From d3a398281c6fd1d3672036cb2d63f842d2cb28c5 Mon Sep 17 00:00:00 2001 From: Anton Samokhvalov Date: Thu, 10 Feb 2022 16:45:17 +0300 Subject: Restoring authorship annotation for Anton Samokhvalov . Commit 2 of 2. --- build/plugins/_common.py | 4 +- build/plugins/_custom_command.py | 24 +- build/plugins/_import_wrapper.py | 32 +- build/plugins/_unpickler.py | 2 +- build/plugins/bundle.py | 8 +- build/plugins/cp.py | 2 +- build/plugins/cpp_style.py | 36 +- build/plugins/create_init_py.py | 4 +- build/plugins/mx_archive.py | 20 +- build/plugins/pybuild.py | 24 +- build/plugins/res.py | 2 +- build/plugins/rodata.py | 98 +- build/rules/contrib_deps.policy | 2 +- build/scripts/check_config_h.py | 138 +- build/scripts/f2c.py | 20 +- build/scripts/fetch_from_sandbox.py | 6 +- build/scripts/fs_tools.py | 10 +- build/scripts/gen_join_srcs.py | 24 +- build/scripts/gen_mx_table.py | 112 +- build/scripts/gen_py_reg.py | 22 +- build/scripts/gen_ub.py | 172 +- build/scripts/link_dyn_lib.py | 112 +- build/scripts/link_lib.py | 22 +- build/scripts/mkver.py | 8 +- build/scripts/perl_wrapper.py | 12 +- build/scripts/preprocess.py | 88 +- build/scripts/run_msvc_wine.py | 710 +- build/scripts/run_tool.py | 6 +- build/scripts/xargs.py | 16 +- build/scripts/yield_line.py | 6 +- build/scripts/yndexer.py | 2 +- build/stdafx.hpp | 630 +- build/ya.conf.json | 142 +- build/ymake.core.conf | 148 +- build/ymake_conf.py | 44 +- certs/cacert.pem | 128 +- certs/update-certs.py | 2 +- contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make | 2 +- contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make | 4 +- contrib/libs/base64/plain32/codec_plain.c | 2 +- contrib/libs/base64/plain32/dec_uint32.c | 8 +- contrib/libs/base64/plain32/enc_uint32.c | 8 +- contrib/libs/base64/plain64/codec_plain.c | 2 +- contrib/libs/base64/plain64/dec_uint64.c | 8 +- contrib/libs/base64/plain64/enc_uint64.c | 8 +- contrib/libs/brotli/LICENSE | 6 +- contrib/libs/brotli/README.md | 22 +- contrib/libs/brotli/common/ya.make | 2 +- contrib/libs/brotli/dec/bit_reader.c | 44 +- contrib/libs/brotli/dec/bit_reader.h | 232 +- contrib/libs/brotli/dec/decode.c | 1044 +- contrib/libs/brotli/dec/huffman.c | 312 +- contrib/libs/brotli/dec/huffman.h | 72 +- contrib/libs/brotli/dec/prefix.h | 1474 +- contrib/libs/brotli/dec/state.c | 168 +- contrib/libs/brotli/dec/state.h | 288 +- contrib/libs/brotli/dec/ya.make | 30 +- contrib/libs/brotli/enc/backward_references.h | 18 +- contrib/libs/brotli/enc/bit_cost.h | 60 +- contrib/libs/brotli/enc/block_splitter.h | 24 +- contrib/libs/brotli/enc/brotli_bit_stream.h | 30 +- contrib/libs/brotli/enc/cluster.h | 32 +- contrib/libs/brotli/enc/command.h | 80 +- contrib/libs/brotli/enc/dictionary_hash.h | 16 +- contrib/libs/brotli/enc/entropy_encode.h | 28 +- contrib/libs/brotli/enc/fast_log.h | 224 +- contrib/libs/brotli/enc/find_match_length.h | 78 +- contrib/libs/brotli/enc/hash.h | 124 +- contrib/libs/brotli/enc/histogram.h | 24 +- contrib/libs/brotli/enc/literal_cost.h | 16 +- contrib/libs/brotli/enc/metablock.h | 34 +- contrib/libs/brotli/enc/prefix.h | 26 +- contrib/libs/brotli/enc/ringbuffer.h | 34 +- contrib/libs/brotli/enc/static_dict.h | 18 +- contrib/libs/brotli/enc/static_dict_lut.h | 26 +- contrib/libs/brotli/enc/write_bits.h | 46 +- contrib/libs/brotli/enc/ya.make | 30 +- contrib/libs/brotli/ya.make | 10 +- contrib/libs/c-ares/ares_build.h | 4 +- contrib/libs/c-ares/ares_config.h | 36 +- contrib/libs/c-ares/ares_setup.h | 2 +- contrib/libs/cctz/tzdata/ya.make | 2 +- contrib/libs/crcutil/ya.make | 30 +- .../builtins/Darwin-excludes/10.4-x86_64.txt | 70 +- .../libs/cxxsupp/builtins/Darwin-excludes/10.4.txt | 192 +- .../builtins/Darwin-excludes/CMakeLists.txt | 8 +- .../cxxsupp/builtins/Darwin-excludes/README.TXT | 22 +- .../cxxsupp/builtins/Darwin-excludes/ios-armv7.txt | 114 +- .../builtins/Darwin-excludes/ios-armv7s.txt | 114 +- .../libs/cxxsupp/builtins/Darwin-excludes/ios.txt | 2 +- .../builtins/Darwin-excludes/ios6-armv7.txt | 240 +- .../builtins/Darwin-excludes/ios6-armv7s.txt | 240 +- .../builtins/Darwin-excludes/ios7-arm64.txt | 32 +- .../builtins/Darwin-excludes/iossim-i386.txt | 164 +- .../builtins/Darwin-excludes/iossim-x86_64.txt | 24 +- .../cxxsupp/builtins/Darwin-excludes/iossim.txt | 2 +- .../cxxsupp/builtins/Darwin-excludes/osx-i386.txt | 164 +- .../builtins/Darwin-excludes/osx-x86_64.txt | 24 +- .../libs/cxxsupp/builtins/Darwin-excludes/osx.txt | 2 +- contrib/libs/cxxsupp/builtins/README.txt | 690 +- contrib/libs/cxxsupp/builtins/absvdi2.c | 58 +- contrib/libs/cxxsupp/builtins/absvsi2.c | 58 +- contrib/libs/cxxsupp/builtins/absvti2.c | 68 +- contrib/libs/cxxsupp/builtins/adddf3.c | 44 +- contrib/libs/cxxsupp/builtins/addsf3.c | 44 +- contrib/libs/cxxsupp/builtins/addtf3.c | 50 +- contrib/libs/cxxsupp/builtins/addvdi3.c | 72 +- contrib/libs/cxxsupp/builtins/addvsi3.c | 72 +- contrib/libs/cxxsupp/builtins/addvti3.c | 80 +- contrib/libs/cxxsupp/builtins/apple_versioning.c | 700 +- contrib/libs/cxxsupp/builtins/arm/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S | 192 +- .../cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c | 32 +- contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S | 182 +- .../cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c | 32 +- contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S | 80 +- contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c | 86 +- contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c | 38 +- contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S | 80 +- contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c | 38 +- contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S | 56 +- contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S | 62 +- contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S | 40 +- contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S | 40 +- contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S | 40 +- contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S | 68 +- contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S | 58 +- contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S | 62 +- contrib/libs/cxxsupp/builtins/arm/bswapdi2.S | 94 +- contrib/libs/cxxsupp/builtins/arm/bswapsi2.S | 78 +- contrib/libs/cxxsupp/builtins/arm/clzdi2.S | 194 +- contrib/libs/cxxsupp/builtins/arm/clzsi2.S | 152 +- contrib/libs/cxxsupp/builtins/arm/comparesf2.S | 296 +- contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/divmodsi4.S | 148 +- contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/divsi3.S | 130 +- contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S | 54 +- contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S | 54 +- contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S | 52 +- .../libs/cxxsupp/builtins/arm/floatunssidfvfp.S | 52 +- .../libs/cxxsupp/builtins/arm/floatunssisfvfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/modsi3.S | 126 +- contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S | 46 +- contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S | 46 +- contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S | 58 +- .../cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S | 66 +- .../cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S | 66 +- .../libs/cxxsupp/builtins/arm/softfloat-alias.list | 42 +- contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S | 54 +- contrib/libs/cxxsupp/builtins/arm/switch16.S | 88 +- contrib/libs/cxxsupp/builtins/arm/switch32.S | 88 +- contrib/libs/cxxsupp/builtins/arm/switch8.S | 84 +- contrib/libs/cxxsupp/builtins/arm/switchu8.S | 84 +- contrib/libs/cxxsupp/builtins/arm/sync-ops.h | 128 +- .../cxxsupp/builtins/arm/sync_fetch_and_add_4.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_add_8.S | 48 +- .../cxxsupp/builtins/arm/sync_fetch_and_and_4.S | 38 +- .../cxxsupp/builtins/arm/sync_fetch_and_and_8.S | 46 +- .../cxxsupp/builtins/arm/sync_fetch_and_max_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_max_8.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_min_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_min_8.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_nand_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_nand_8.S | 48 +- .../cxxsupp/builtins/arm/sync_fetch_and_or_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_or_8.S | 48 +- .../cxxsupp/builtins/arm/sync_fetch_and_sub_4.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_sub_8.S | 48 +- .../cxxsupp/builtins/arm/sync_fetch_and_umax_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_umax_8.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_umin_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_umin_8.S | 42 +- .../cxxsupp/builtins/arm/sync_fetch_and_xor_4.S | 40 +- .../cxxsupp/builtins/arm/sync_fetch_and_xor_8.S | 48 +- .../libs/cxxsupp/builtins/arm/sync_synchronize.S | 70 +- contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S | 52 +- contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S | 368 +- contrib/libs/cxxsupp/builtins/arm/udivsi3.S | 340 +- contrib/libs/cxxsupp/builtins/arm/umodsi3.S | 322 +- contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S | 58 +- contrib/libs/cxxsupp/builtins/arm64/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/ashldi3.c | 86 +- contrib/libs/cxxsupp/builtins/ashlti3.c | 90 +- contrib/libs/cxxsupp/builtins/ashrdi3.c | 88 +- contrib/libs/cxxsupp/builtins/ashrti3.c | 92 +- contrib/libs/cxxsupp/builtins/assembly.h | 316 +- contrib/libs/cxxsupp/builtins/atomic.c | 658 +- contrib/libs/cxxsupp/builtins/atomic_flag_clear.c | 54 +- .../cxxsupp/builtins/atomic_flag_clear_explicit.c | 56 +- .../cxxsupp/builtins/atomic_flag_test_and_set.c | 54 +- .../builtins/atomic_flag_test_and_set_explicit.c | 56 +- .../libs/cxxsupp/builtins/atomic_signal_fence.c | 54 +- .../libs/cxxsupp/builtins/atomic_thread_fence.c | 54 +- contrib/libs/cxxsupp/builtins/clear_cache.c | 316 +- contrib/libs/cxxsupp/builtins/clzdi2.c | 58 +- contrib/libs/cxxsupp/builtins/clzsi2.c | 106 +- contrib/libs/cxxsupp/builtins/clzti2.c | 66 +- contrib/libs/cxxsupp/builtins/cmpdi2.c | 102 +- contrib/libs/cxxsupp/builtins/cmpti2.c | 84 +- contrib/libs/cxxsupp/builtins/comparedf2.c | 292 +- contrib/libs/cxxsupp/builtins/comparesf2.c | 290 +- contrib/libs/cxxsupp/builtins/comparetf2.c | 276 +- contrib/libs/cxxsupp/builtins/ctzdi2.c | 58 +- contrib/libs/cxxsupp/builtins/ctzsi2.c | 114 +- contrib/libs/cxxsupp/builtins/ctzti2.c | 66 +- contrib/libs/cxxsupp/builtins/divdc3.c | 120 +- contrib/libs/cxxsupp/builtins/divdf3.c | 368 +- contrib/libs/cxxsupp/builtins/divdi3.c | 58 +- contrib/libs/cxxsupp/builtins/divmoddi4.c | 50 +- contrib/libs/cxxsupp/builtins/divmodsi4.c | 54 +- contrib/libs/cxxsupp/builtins/divsc3.c | 120 +- contrib/libs/cxxsupp/builtins/divsf3.c | 336 +- contrib/libs/cxxsupp/builtins/divsi3.c | 74 +- contrib/libs/cxxsupp/builtins/divtc3.c | 120 +- contrib/libs/cxxsupp/builtins/divtf3.c | 406 +- contrib/libs/cxxsupp/builtins/divti3.c | 66 +- contrib/libs/cxxsupp/builtins/divxc3.c | 126 +- contrib/libs/cxxsupp/builtins/emutls.c | 366 +- .../libs/cxxsupp/builtins/enable_execute_stack.c | 144 +- contrib/libs/cxxsupp/builtins/eprintf.c | 70 +- contrib/libs/cxxsupp/builtins/extenddftf2.c | 46 +- contrib/libs/cxxsupp/builtins/extendhfsf2.c | 50 +- contrib/libs/cxxsupp/builtins/extendsfdf2.c | 38 +- contrib/libs/cxxsupp/builtins/extendsftf2.c | 46 +- contrib/libs/cxxsupp/builtins/ffsdi2.c | 66 +- contrib/libs/cxxsupp/builtins/ffsti2.c | 74 +- contrib/libs/cxxsupp/builtins/fixdfdi.c | 92 +- contrib/libs/cxxsupp/builtins/fixdfsi.c | 44 +- contrib/libs/cxxsupp/builtins/fixdfti.c | 52 +- contrib/libs/cxxsupp/builtins/fixsfdi.c | 94 +- contrib/libs/cxxsupp/builtins/fixsfsi.c | 44 +- contrib/libs/cxxsupp/builtins/fixsfti.c | 52 +- contrib/libs/cxxsupp/builtins/fixtfdi.c | 46 +- contrib/libs/cxxsupp/builtins/fixtfsi.c | 46 +- contrib/libs/cxxsupp/builtins/fixtfti.c | 46 +- contrib/libs/cxxsupp/builtins/fixunsdfdi.c | 88 +- contrib/libs/cxxsupp/builtins/fixunsdfsi.c | 42 +- contrib/libs/cxxsupp/builtins/fixunsdfti.c | 46 +- contrib/libs/cxxsupp/builtins/fixunssfdi.c | 90 +- contrib/libs/cxxsupp/builtins/fixunssfsi.c | 50 +- contrib/libs/cxxsupp/builtins/fixunssfti.c | 52 +- contrib/libs/cxxsupp/builtins/fixunstfdi.c | 44 +- contrib/libs/cxxsupp/builtins/fixunstfsi.c | 44 +- contrib/libs/cxxsupp/builtins/fixunstfti.c | 44 +- contrib/libs/cxxsupp/builtins/fixunsxfdi.c | 92 +- contrib/libs/cxxsupp/builtins/fixunsxfsi.c | 90 +- contrib/libs/cxxsupp/builtins/fixunsxfti.c | 100 +- contrib/libs/cxxsupp/builtins/fixxfdi.c | 96 +- contrib/libs/cxxsupp/builtins/fixxfti.c | 102 +- contrib/libs/cxxsupp/builtins/floatdidf.c | 214 +- contrib/libs/cxxsupp/builtins/floatdisf.c | 158 +- contrib/libs/cxxsupp/builtins/floatditf.c | 100 +- contrib/libs/cxxsupp/builtins/floatdixf.c | 90 +- contrib/libs/cxxsupp/builtins/floatsidf.c | 106 +- contrib/libs/cxxsupp/builtins/floatsisf.c | 118 +- contrib/libs/cxxsupp/builtins/floatsitf.c | 100 +- contrib/libs/cxxsupp/builtins/floattidf.c | 164 +- contrib/libs/cxxsupp/builtins/floattisf.c | 164 +- contrib/libs/cxxsupp/builtins/floattixf.c | 168 +- contrib/libs/cxxsupp/builtins/floatundidf.c | 210 +- contrib/libs/cxxsupp/builtins/floatundisf.c | 154 +- contrib/libs/cxxsupp/builtins/floatunditf.c | 80 +- contrib/libs/cxxsupp/builtins/floatundixf.c | 84 +- contrib/libs/cxxsupp/builtins/floatunsidf.c | 84 +- contrib/libs/cxxsupp/builtins/floatunsisf.c | 100 +- contrib/libs/cxxsupp/builtins/floatunsitf.c | 80 +- contrib/libs/cxxsupp/builtins/floatuntidf.c | 160 +- contrib/libs/cxxsupp/builtins/floatuntisf.c | 158 +- contrib/libs/cxxsupp/builtins/floatuntixf.c | 162 +- contrib/libs/cxxsupp/builtins/fp_add_impl.inc | 288 +- contrib/libs/cxxsupp/builtins/fp_extend.h | 178 +- contrib/libs/cxxsupp/builtins/fp_extend_impl.inc | 216 +- contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc | 82 +- contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc | 78 +- contrib/libs/cxxsupp/builtins/fp_lib.h | 540 +- contrib/libs/cxxsupp/builtins/fp_mul_impl.inc | 232 +- contrib/libs/cxxsupp/builtins/fp_trunc.h | 152 +- contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc | 270 +- contrib/libs/cxxsupp/builtins/gcc_personality_v0.c | 418 +- contrib/libs/cxxsupp/builtins/i386/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/i386/ashldi3.S | 116 +- contrib/libs/cxxsupp/builtins/i386/ashrdi3.S | 138 +- contrib/libs/cxxsupp/builtins/i386/chkstk.S | 68 +- contrib/libs/cxxsupp/builtins/i386/chkstk2.S | 80 +- contrib/libs/cxxsupp/builtins/i386/divdi3.S | 322 +- contrib/libs/cxxsupp/builtins/i386/floatdidf.S | 78 +- contrib/libs/cxxsupp/builtins/i386/floatdisf.S | 64 +- contrib/libs/cxxsupp/builtins/i386/floatdixf.S | 60 +- contrib/libs/cxxsupp/builtins/i386/floatundidf.S | 104 +- contrib/libs/cxxsupp/builtins/i386/floatundisf.S | 210 +- contrib/libs/cxxsupp/builtins/i386/floatundixf.S | 86 +- contrib/libs/cxxsupp/builtins/i386/lshrdi3.S | 118 +- contrib/libs/cxxsupp/builtins/i386/moddi3.S | 330 +- contrib/libs/cxxsupp/builtins/i386/muldi3.S | 60 +- contrib/libs/cxxsupp/builtins/i386/udivdi3.S | 228 +- contrib/libs/cxxsupp/builtins/i386/umoddi3.S | 250 +- contrib/libs/cxxsupp/builtins/int_endianness.h | 232 +- contrib/libs/cxxsupp/builtins/int_lib.h | 266 +- contrib/libs/cxxsupp/builtins/int_math.h | 228 +- contrib/libs/cxxsupp/builtins/int_types.h | 328 +- contrib/libs/cxxsupp/builtins/int_util.c | 122 +- contrib/libs/cxxsupp/builtins/int_util.h | 66 +- contrib/libs/cxxsupp/builtins/lshrdi3.c | 86 +- contrib/libs/cxxsupp/builtins/lshrti3.c | 90 +- .../cxxsupp/builtins/macho_embedded/CMakeLists.txt | 8 +- .../libs/cxxsupp/builtins/macho_embedded/arm.txt | 32 +- .../cxxsupp/builtins/macho_embedded/common.txt | 184 +- .../libs/cxxsupp/builtins/macho_embedded/i386.txt | 14 +- .../cxxsupp/builtins/macho_embedded/thumb2-64.txt | 20 +- .../cxxsupp/builtins/macho_embedded/thumb2.txt | 28 +- contrib/libs/cxxsupp/builtins/moddi3.c | 60 +- contrib/libs/cxxsupp/builtins/modsi3.c | 46 +- contrib/libs/cxxsupp/builtins/modti3.c | 68 +- contrib/libs/cxxsupp/builtins/muldc3.c | 146 +- contrib/libs/cxxsupp/builtins/muldf3.c | 44 +- contrib/libs/cxxsupp/builtins/muldi3.c | 112 +- contrib/libs/cxxsupp/builtins/mulodi4.c | 116 +- contrib/libs/cxxsupp/builtins/mulosi4.c | 116 +- contrib/libs/cxxsupp/builtins/muloti4.c | 124 +- contrib/libs/cxxsupp/builtins/mulsc3.c | 146 +- contrib/libs/cxxsupp/builtins/mulsf3.c | 44 +- contrib/libs/cxxsupp/builtins/multc3.c | 136 +- contrib/libs/cxxsupp/builtins/multf3.c | 50 +- contrib/libs/cxxsupp/builtins/multi3.c | 116 +- contrib/libs/cxxsupp/builtins/mulvdi3.c | 112 +- contrib/libs/cxxsupp/builtins/mulvsi3.c | 112 +- contrib/libs/cxxsupp/builtins/mulvti3.c | 120 +- contrib/libs/cxxsupp/builtins/mulxc3.c | 154 +- contrib/libs/cxxsupp/builtins/negdf2.c | 44 +- contrib/libs/cxxsupp/builtins/negdi2.c | 52 +- contrib/libs/cxxsupp/builtins/negsf2.c | 44 +- contrib/libs/cxxsupp/builtins/negti2.c | 60 +- contrib/libs/cxxsupp/builtins/negvdi2.c | 56 +- contrib/libs/cxxsupp/builtins/negvsi2.c | 56 +- contrib/libs/cxxsupp/builtins/negvti2.c | 64 +- contrib/libs/cxxsupp/builtins/paritydi2.c | 50 +- contrib/libs/cxxsupp/builtins/paritysi2.c | 54 +- contrib/libs/cxxsupp/builtins/parityti2.c | 58 +- contrib/libs/cxxsupp/builtins/popcountdi2.c | 72 +- contrib/libs/cxxsupp/builtins/popcountsi2.c | 66 +- contrib/libs/cxxsupp/builtins/popcountti2.c | 88 +- contrib/libs/cxxsupp/builtins/powidf2.c | 68 +- contrib/libs/cxxsupp/builtins/powisf2.c | 68 +- contrib/libs/cxxsupp/builtins/powitf2.c | 76 +- contrib/libs/cxxsupp/builtins/powixf2.c | 76 +- contrib/libs/cxxsupp/builtins/ppc/DD.h | 88 +- contrib/libs/cxxsupp/builtins/ppc/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/ppc/divtc3.c | 182 +- contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c | 206 +- contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c | 118 +- contrib/libs/cxxsupp/builtins/ppc/floatditf.c | 72 +- contrib/libs/cxxsupp/builtins/ppc/floatunditf.c | 82 +- contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c | 152 +- contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c | 110 +- contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c | 106 +- contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c | 152 +- contrib/libs/cxxsupp/builtins/ppc/multc3.c | 180 +- contrib/libs/cxxsupp/builtins/ppc/restFP.S | 86 +- contrib/libs/cxxsupp/builtins/ppc/saveFP.S | 80 +- contrib/libs/cxxsupp/builtins/subdf3.c | 50 +- contrib/libs/cxxsupp/builtins/subsf3.c | 50 +- contrib/libs/cxxsupp/builtins/subtf3.c | 54 +- contrib/libs/cxxsupp/builtins/subvdi3.c | 72 +- contrib/libs/cxxsupp/builtins/subvsi3.c | 72 +- contrib/libs/cxxsupp/builtins/subvti3.c | 80 +- contrib/libs/cxxsupp/builtins/trampoline_setup.c | 96 +- contrib/libs/cxxsupp/builtins/truncdfhf2.c | 36 +- contrib/libs/cxxsupp/builtins/truncdfsf2.c | 36 +- contrib/libs/cxxsupp/builtins/truncsfhf2.c | 48 +- contrib/libs/cxxsupp/builtins/trunctfdf2.c | 44 +- contrib/libs/cxxsupp/builtins/trunctfsf2.c | 44 +- contrib/libs/cxxsupp/builtins/ucmpdi2.c | 102 +- contrib/libs/cxxsupp/builtins/ucmpti2.c | 84 +- contrib/libs/cxxsupp/builtins/udivdi3.c | 46 +- contrib/libs/cxxsupp/builtins/udivmoddi4.c | 460 +- contrib/libs/cxxsupp/builtins/udivmodsi4.c | 54 +- contrib/libs/cxxsupp/builtins/udivmodti4.c | 474 +- contrib/libs/cxxsupp/builtins/udivsi3.c | 132 +- contrib/libs/cxxsupp/builtins/udivti3.c | 54 +- contrib/libs/cxxsupp/builtins/umoddi3.c | 50 +- contrib/libs/cxxsupp/builtins/umodsi3.c | 46 +- contrib/libs/cxxsupp/builtins/umodti3.c | 58 +- contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk | 40 +- contrib/libs/cxxsupp/builtins/x86_64/chkstk.S | 78 +- contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S | 84 +- contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c | 32 +- contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c | 28 +- contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c | 32 +- contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S | 98 +- contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S | 70 +- contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S | 136 +- contrib/libs/cxxsupp/builtins/ya.make | 22 +- contrib/libs/cxxsupp/libcxx/include/deque | 6 +- contrib/libs/cxxsupp/libcxx/ya.make | 4 +- contrib/libs/cxxsupp/libcxxrt/exception.cc | 224 +- contrib/libs/cxxsupp/libcxxrt/ya.make | 8 +- contrib/libs/cxxsupp/libsan/ya.make | 2 +- contrib/libs/cxxsupp/openmp/asm.S | 2 +- contrib/libs/cxxsupp/openmp/extractExternal.cpp | 994 +- contrib/libs/cxxsupp/openmp/i18n/en_US.txt | 950 +- contrib/libs/cxxsupp/openmp/include/30/omp.h.var | 328 +- .../libs/cxxsupp/openmp/include/30/omp_lib.f.var | 1266 +- .../libs/cxxsupp/openmp/include/30/omp_lib.f90.var | 716 +- .../libs/cxxsupp/openmp/include/30/omp_lib.h.var | 1276 +- contrib/libs/cxxsupp/openmp/include/30/ompt.h.var | 974 +- contrib/libs/cxxsupp/openmp/include/40/omp.h.var | 320 +- .../libs/cxxsupp/openmp/include/40/omp_lib.f.var | 1516 +- .../libs/cxxsupp/openmp/include/40/omp_lib.f90.var | 896 +- .../libs/cxxsupp/openmp/include/40/omp_lib.h.var | 1116 +- contrib/libs/cxxsupp/openmp/include/40/ompt.h.var | 974 +- contrib/libs/cxxsupp/openmp/include/41/omp.h.var | 352 +- .../libs/cxxsupp/openmp/include/41/omp_lib.f.var | 1576 +- .../libs/cxxsupp/openmp/include/41/omp_lib.f90.var | 940 +- .../libs/cxxsupp/openmp/include/41/omp_lib.h.var | 1168 +- contrib/libs/cxxsupp/openmp/include/41/ompt.h.var | 974 +- contrib/libs/cxxsupp/openmp/kmp.h | 7116 +++---- contrib/libs/cxxsupp/openmp/kmp_affinity.cpp | 9470 ++++----- contrib/libs/cxxsupp/openmp/kmp_affinity.h | 638 +- contrib/libs/cxxsupp/openmp/kmp_alloc.c | 4094 ++-- contrib/libs/cxxsupp/openmp/kmp_atomic.c | 5814 +++--- contrib/libs/cxxsupp/openmp/kmp_atomic.h | 2074 +- contrib/libs/cxxsupp/openmp/kmp_barrier.cpp | 3302 +-- contrib/libs/cxxsupp/openmp/kmp_cancel.cpp | 562 +- contrib/libs/cxxsupp/openmp/kmp_config.h | 198 +- contrib/libs/cxxsupp/openmp/kmp_csupport.c | 6092 +++--- contrib/libs/cxxsupp/openmp/kmp_debug.c | 284 +- contrib/libs/cxxsupp/openmp/kmp_debug.h | 260 +- contrib/libs/cxxsupp/openmp/kmp_debugger.c | 628 +- contrib/libs/cxxsupp/openmp/kmp_debugger.h | 102 +- contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp | 5302 ++--- contrib/libs/cxxsupp/openmp/kmp_environment.c | 1192 +- contrib/libs/cxxsupp/openmp/kmp_environment.h | 162 +- contrib/libs/cxxsupp/openmp/kmp_error.c | 1046 +- contrib/libs/cxxsupp/openmp/kmp_error.h | 114 +- contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c | 70 +- contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h | 2506 +-- contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c | 66 +- contrib/libs/cxxsupp/openmp/kmp_ftn_os.h | 1064 +- contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c | 70 +- contrib/libs/cxxsupp/openmp/kmp_global.c | 942 +- contrib/libs/cxxsupp/openmp/kmp_gsupport.c | 3208 +-- contrib/libs/cxxsupp/openmp/kmp_i18n.c | 1948 +- contrib/libs/cxxsupp/openmp/kmp_i18n.h | 386 +- contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc | 828 +- contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc | 762 +- contrib/libs/cxxsupp/openmp/kmp_import.c | 84 +- contrib/libs/cxxsupp/openmp/kmp_io.c | 494 +- contrib/libs/cxxsupp/openmp/kmp_io.h | 88 +- contrib/libs/cxxsupp/openmp/kmp_itt.c | 288 +- contrib/libs/cxxsupp/openmp/kmp_itt.h | 618 +- contrib/libs/cxxsupp/openmp/kmp_itt.inl | 2260 +-- contrib/libs/cxxsupp/openmp/kmp_lock.cpp | 8408 ++++---- contrib/libs/cxxsupp/openmp/kmp_lock.h | 2546 +-- contrib/libs/cxxsupp/openmp/kmp_omp.h | 466 +- contrib/libs/cxxsupp/openmp/kmp_os.h | 1452 +- contrib/libs/cxxsupp/openmp/kmp_platform.h | 336 +- contrib/libs/cxxsupp/openmp/kmp_runtime.c | 15306 +++++++------- contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h | 124 +- contrib/libs/cxxsupp/openmp/kmp_sched.cpp | 1828 +- contrib/libs/cxxsupp/openmp/kmp_settings.c | 10938 +++++----- contrib/libs/cxxsupp/openmp/kmp_settings.h | 100 +- contrib/libs/cxxsupp/openmp/kmp_stats.cpp | 1218 +- contrib/libs/cxxsupp/openmp/kmp_stats.h | 1496 +- contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp | 336 +- contrib/libs/cxxsupp/openmp/kmp_stats_timing.h | 220 +- contrib/libs/cxxsupp/openmp/kmp_str.c | 1766 +- contrib/libs/cxxsupp/openmp/kmp_str.h | 238 +- contrib/libs/cxxsupp/openmp/kmp_stub.c | 504 +- contrib/libs/cxxsupp/openmp/kmp_stub.h | 122 +- contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp | 1026 +- contrib/libs/cxxsupp/openmp/kmp_tasking.c | 5720 +++--- contrib/libs/cxxsupp/openmp/kmp_taskq.c | 4064 ++-- contrib/libs/cxxsupp/openmp/kmp_threadprivate.c | 1466 +- contrib/libs/cxxsupp/openmp/kmp_utility.c | 866 +- contrib/libs/cxxsupp/openmp/kmp_version.c | 422 +- contrib/libs/cxxsupp/openmp/kmp_version.h | 136 +- contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp | 100 +- contrib/libs/cxxsupp/openmp/kmp_wait_release.h | 1118 +- contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h | 112 +- contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h | 410 +- contrib/libs/cxxsupp/openmp/libomp.rc.var | 140 +- contrib/libs/cxxsupp/openmp/omp.h | 352 +- contrib/libs/cxxsupp/openmp/ompt-event-specific.h | 288 +- contrib/libs/cxxsupp/openmp/ompt-general.c | 1070 +- contrib/libs/cxxsupp/openmp/ompt-internal.h | 158 +- contrib/libs/cxxsupp/openmp/ompt-specific.c | 664 +- contrib/libs/cxxsupp/openmp/ompt-specific.h | 180 +- contrib/libs/cxxsupp/openmp/test-touch.c | 62 +- .../openmp/thirdparty/ittnotify/disable_warnings.h | 58 +- .../openmp/thirdparty/ittnotify/ittnotify.h | 7600 +++---- .../openmp/thirdparty/ittnotify/ittnotify_config.h | 954 +- .../openmp/thirdparty/ittnotify/ittnotify_static.c | 2094 +- .../openmp/thirdparty/ittnotify/ittnotify_static.h | 632 +- .../openmp/thirdparty/ittnotify/ittnotify_types.h | 134 +- .../openmp/thirdparty/ittnotify/legacy/ittnotify.h | 1942 +- contrib/libs/cxxsupp/openmp/ya.make | 110 +- contrib/libs/cxxsupp/openmp/z_Linux_asm.s | 2890 +-- contrib/libs/cxxsupp/openmp/z_Linux_util.c | 5412 ++--- .../libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm | 2804 +-- .../libs/cxxsupp/openmp/z_Windows_NT-586_util.c | 326 +- contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c | 3862 ++-- contrib/libs/cxxsupp/system_stl/ya.make | 2 +- contrib/libs/cxxsupp/ya.make | 6 +- contrib/libs/double-conversion/bignum-dtoa.cc | 1280 +- contrib/libs/double-conversion/bignum-dtoa.h | 168 +- contrib/libs/double-conversion/bignum.cc | 1524 +- contrib/libs/double-conversion/bignum.h | 286 +- contrib/libs/double-conversion/cached-powers.cc | 344 +- contrib/libs/double-conversion/cached-powers.h | 128 +- contrib/libs/double-conversion/diy-fp.cc | 114 +- contrib/libs/double-conversion/diy-fp.h | 236 +- .../libs/double-conversion/double-conversion.cc | 1872 +- contrib/libs/double-conversion/double-conversion.h | 1068 +- contrib/libs/double-conversion/fast-dtoa.cc | 1330 +- contrib/libs/double-conversion/fast-dtoa.h | 176 +- contrib/libs/double-conversion/fixed-dtoa.cc | 808 +- contrib/libs/double-conversion/fixed-dtoa.h | 112 +- contrib/libs/double-conversion/ieee.h | 800 +- contrib/libs/double-conversion/strtod.cc | 1092 +- contrib/libs/double-conversion/strtod.h | 90 +- contrib/libs/double-conversion/utils.h | 642 +- contrib/libs/double-conversion/ya.make | 36 +- contrib/libs/expat/ya.make | 2 +- contrib/libs/farmhash/arch/sse41/ya.make | 4 +- contrib/libs/farmhash/arch/sse42/ya.make | 4 +- contrib/libs/farmhash/arch/sse42_aesni/ya.make | 4 +- contrib/libs/fastlz/fastlz.c | 1104 +- contrib/libs/fastlz/fastlz.h | 204 +- contrib/libs/fastlz/rename.h | 16 +- contrib/libs/fastlz/ya.make | 18 +- contrib/libs/fmt/test/ya.make | 4 +- contrib/libs/grpc/grpc++/ya.make | 4 +- contrib/libs/grpc/grpc++_error_details/ya.make | 4 +- contrib/libs/grpc/grpc++_reflection/ya.make | 4 +- contrib/libs/grpc/grpc++_unsecure/ya.make | 4 +- contrib/libs/grpc/grpc/ya.make | 4 +- contrib/libs/grpc/grpc_unsecure/ya.make | 4 +- contrib/libs/grpc/grpcpp_channelz/ya.make | 4 +- contrib/libs/grpc/python/ya.make | 4 +- .../grpc/src/compiler/grpc_plugin_support/ya.make | 4 +- contrib/libs/grpc/src/core/lib/ya.make | 4 +- contrib/libs/grpc/src/proto/grpc/channelz/ya.make | 4 +- contrib/libs/grpc/src/proto/grpc/core/ya.make | 4 +- contrib/libs/grpc/src/proto/grpc/health/v1/ya.make | 4 +- .../grpc/src/proto/grpc/reflection/v1alpha/ya.make | 4 +- contrib/libs/grpc/src/proto/grpc/status/ya.make | 4 +- .../grpc/src/proto/grpc/testing/duplicate/ya.make | 4 +- .../libs/grpc/src/proto/grpc/testing/xds/ya.make | 4 +- contrib/libs/grpc/src/proto/grpc/testing/ya.make | 4 +- .../libs/grpc/src/python/grpcio_channelz/ya.make | 4 +- .../grpc/src/python/grpcio_health_checking/ya.make | 4 +- .../libs/grpc/src/python/grpcio_reflection/ya.make | 4 +- contrib/libs/grpc/src/python/grpcio_status/ya.make | 4 +- contrib/libs/grpc/test/core/util/ya.make | 4 +- contrib/libs/grpc/test/cpp/end2end/ya.make | 4 +- contrib/libs/grpc/test/cpp/util/ya.make | 4 +- contrib/libs/grpc/ya.make | 2 +- contrib/libs/hdr_histogram/ya.make | 2 +- contrib/libs/highwayhash/arch/avx2/ya.make | 4 +- contrib/libs/highwayhash/arch/sse41/ya.make | 4 +- contrib/libs/hyperscan/ya.make | 2 +- contrib/libs/jemalloc/hack.cpp | 28 +- contrib/libs/jemalloc/hack.h | 30 +- .../internal/jemalloc_internal_defs-linux.h | 4 +- contrib/libs/jemalloc/reg_zone.cpp | 10 +- contrib/libs/jemalloc/spinlock.h | 30 +- contrib/libs/jemalloc/ya.make | 4 +- contrib/libs/jwt-cpp/ya.make | 2 +- contrib/libs/libaio/static/ya.make | 2 +- contrib/libs/libaio/ya.make | 2 +- contrib/libs/libbz2/blocksort.c | 2170 +- contrib/libs/libbz2/bzlib.c | 3122 +-- contrib/libs/libbz2/bzlib.h | 376 +- contrib/libs/libbz2/bzlib_private.h | 986 +- contrib/libs/libbz2/compress.c | 1324 +- contrib/libs/libbz2/crctable.c | 202 +- contrib/libs/libbz2/decompress.c | 1242 +- contrib/libs/libbz2/huffman.c | 406 +- contrib/libs/libbz2/randtable.c | 60 +- contrib/libs/libbz2/ya.make | 26 +- contrib/libs/libc_compat/ifaddrs.c | 1326 +- contrib/libs/libc_compat/ubuntu_14/ya.make | 4 +- contrib/libs/libevent/event_core/ya.make | 4 +- contrib/libs/libevent/event_extra/ya.make | 4 +- contrib/libs/libevent/event_openssl/ya.make | 4 +- contrib/libs/libevent/event_thread/ya.make | 4 +- contrib/libs/libidn/static/ya.make | 22 +- contrib/libs/libidn/unix/config.h | 8 +- contrib/libs/libidn/win/ac-stdint.h | 2 +- contrib/libs/libidn/ya.make | 30 +- .../libs/libunwind/include/__libunwind_config.h | 24 +- contrib/libs/libunwind/include/libunwind.h | 846 +- .../include/mach-o/compact_unwind_encoding.h | 914 +- contrib/libs/libunwind/include/unwind.h | 344 +- contrib/libs/libunwind/include/unwind_arm_ehabi.h | 300 +- contrib/libs/libunwind/include/unwind_itanium.h | 100 +- contrib/libs/libunwind/src/AddressSpace.hpp | 568 +- contrib/libs/libunwind/src/CompactUnwinder.hpp | 1292 +- contrib/libs/libunwind/src/DwarfInstructions.hpp | 1462 +- contrib/libs/libunwind/src/DwarfParser.hpp | 622 +- contrib/libs/libunwind/src/EHHeaderParser.hpp | 296 +- contrib/libs/libunwind/src/Registers.hpp | 3704 ++-- contrib/libs/libunwind/src/Unwind-EHABI.cpp | 1664 +- contrib/libs/libunwind/src/Unwind-EHABI.h | 88 +- contrib/libs/libunwind/src/Unwind-sjlj.c | 768 +- contrib/libs/libunwind/src/UnwindCursor.hpp | 2400 +-- contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c | 478 +- contrib/libs/libunwind/src/UnwindLevel1.c | 758 +- .../libs/libunwind/src/UnwindRegistersRestore.S | 650 +- contrib/libs/libunwind/src/UnwindRegistersSave.S | 542 +- contrib/libs/libunwind/src/Unwind_AppleExtras.cpp | 208 +- contrib/libs/libunwind/src/assembly.h | 122 +- contrib/libs/libunwind/src/config.h | 98 +- contrib/libs/libunwind/src/dwarf2.h | 462 +- contrib/libs/libunwind/src/libunwind.cpp | 394 +- contrib/libs/libunwind/src/libunwind_ext.h | 70 +- contrib/libs/libunwind/ya.make | 36 +- contrib/libs/linuxvdso/fake.cpp | 10 +- contrib/libs/linuxvdso/interface.cpp | 56 +- contrib/libs/linuxvdso/interface.h | 48 +- contrib/libs/linuxvdso/original/config.h | 22 +- contrib/libs/linuxvdso/original/elf_mem_image.cc | 870 +- contrib/libs/linuxvdso/original/elf_mem_image.h | 270 +- .../linuxvdso/original/linux_syscall_support.h | 4282 ++-- contrib/libs/linuxvdso/original/logging.h | 308 +- contrib/libs/linuxvdso/original/vdso_support.cc | 256 +- contrib/libs/linuxvdso/original/vdso_support.h | 260 +- contrib/libs/linuxvdso/original/ya.make | 20 +- contrib/libs/linuxvdso/ya.make | 30 +- contrib/libs/lz4/generated/gen.py | 8 +- contrib/libs/lz4/generated/iface.h | 30 +- contrib/libs/lz4/generated/lz4_ns.h | 8 +- contrib/libs/lz4/generated/ya.make | 4 +- contrib/libs/lz4/lz4.c | 2 +- contrib/libs/lzmasdk/7zStream.c | 352 +- contrib/libs/lzmasdk/7zTypes.h | 742 +- contrib/libs/lzmasdk/Alloc.c | 246 +- contrib/libs/lzmasdk/Alloc.h | 72 +- contrib/libs/lzmasdk/Compiler.h | 66 +- contrib/libs/lzmasdk/LzFind.c | 2032 +- contrib/libs/lzmasdk/LzFind.h | 242 +- contrib/libs/lzmasdk/LzHash.h | 114 +- contrib/libs/lzmasdk/LzmaDec.c | 1876 +- contrib/libs/lzmasdk/LzmaDec.h | 420 +- contrib/libs/lzmasdk/LzmaEnc.c | 2870 +-- contrib/libs/lzmasdk/LzmaEnc.h | 152 +- contrib/libs/lzmasdk/LzmaLib.c | 80 +- contrib/libs/lzmasdk/LzmaLib.h | 260 +- contrib/libs/lzmasdk/Precomp.h | 20 +- contrib/libs/lzmasdk/ya.make | 30 +- contrib/libs/nayuki_md5/md5-fast-x8664.S | 342 +- contrib/libs/nayuki_md5/md5.c | 268 +- contrib/libs/nayuki_md5/md5.h | 18 +- contrib/libs/nayuki_md5/ya.make | 16 +- contrib/libs/nghttp2/ya.make | 2 +- contrib/libs/openssl/crypto/ya.make | 2 +- contrib/libs/openssl/ya.make | 2 +- contrib/libs/pcre/pcre.h | 418 +- contrib/libs/pcre/pcre_chartables.c | 388 +- contrib/libs/pcre/pcre_compile.c | 7720 +++---- contrib/libs/pcre/pcre_config.c | 242 +- contrib/libs/pcre/pcre_config.h | 186 +- contrib/libs/pcre/pcre_dfa_exec.c | 4442 ++-- contrib/libs/pcre/pcre_exec.c | 6546 +++--- contrib/libs/pcre/pcre_fullinfo.c | 288 +- contrib/libs/pcre/pcre_get.c | 826 +- contrib/libs/pcre/pcre_globals.c | 106 +- contrib/libs/pcre/pcre_internal.h | 1344 +- contrib/libs/pcre/pcre_maketables.c | 254 +- contrib/libs/pcre/pcre_newline.c | 254 +- contrib/libs/pcre/pcre_ord2utf8.c | 144 +- contrib/libs/pcre/pcre_refcount.c | 156 +- contrib/libs/pcre/pcre_study.c | 834 +- contrib/libs/pcre/pcre_tables.c | 188 +- contrib/libs/pcre/pcre_valid_utf8.c | 220 +- contrib/libs/pcre/pcre_version.c | 174 +- contrib/libs/pcre/pcre_xclass.c | 252 +- contrib/libs/pcre/pcrecpp/ya.make | 4 +- contrib/libs/pcre/pcreposix.c | 576 +- contrib/libs/pcre/pcreposix.h | 228 +- contrib/libs/pcre/ucp.h | 248 +- contrib/libs/pcre/ya.make | 50 +- contrib/libs/pdqsort/ya.make | 4 +- contrib/libs/pire/pire/extra/count.cpp | 16 +- contrib/libs/pire/pire/extra/count.h | 14 +- contrib/libs/pire/pire/scanners/loaded.h | 20 +- contrib/libs/pire/pire/scanners/multi.h | 74 +- contrib/libs/pire/pire/scanners/simple.h | 12 +- contrib/libs/pire/pire/scanners/slow.h | 26 +- contrib/libs/pire/pire/stub/stl.h | 12 +- contrib/libs/pire/pire/stub/utf8.h | 6 +- contrib/libs/poco/Crypto/ya.make | 2 +- contrib/libs/poco/Foundation/ya.make | 2 +- contrib/libs/poco/JSON/ya.make | 2 +- contrib/libs/poco/Net/ya.make | 2 +- contrib/libs/poco/NetSSL_OpenSSL/ya.make | 2 +- contrib/libs/poco/Util/ya.make | 2 +- contrib/libs/poco/XML/ya.make | 2 +- contrib/libs/python/Include/ya.make | 2 +- contrib/libs/python/ut/lib/ya.make | 8 +- contrib/libs/python/ya.make | 2 +- contrib/libs/re2/re2/parse.cc | 22 +- contrib/libs/re2/re2/perl_groups.cc | 42 +- contrib/libs/re2/re2/re2.cc | 2 +- contrib/libs/re2/re2/unicode_casefold.h | 4 +- contrib/libs/re2/re2/unicode_groups.h | 16 +- contrib/libs/re2/util/utf.h | 2 +- contrib/libs/re2/ya.make | 2 +- contrib/libs/snappy/snappy-c.cc | 180 +- contrib/libs/snappy/snappy-c.h | 268 +- contrib/libs/snappy/snappy-internal.h | 232 +- contrib/libs/snappy/snappy-sinksource.cc | 142 +- contrib/libs/snappy/snappy-sinksource.h | 262 +- contrib/libs/snappy/snappy-stubs-internal.cc | 78 +- contrib/libs/snappy/snappy-stubs-internal.h | 720 +- contrib/libs/snappy/snappy-stubs-public.h | 92 +- contrib/libs/snappy/snappy.cc | 1374 +- contrib/libs/snappy/snappy.h | 266 +- contrib/libs/snappy/ya.make | 18 +- contrib/libs/sqlite3/ya.make | 2 +- contrib/libs/tcmalloc/tcmalloc/libc_override.h | 2 +- .../tcmalloc/tcmalloc/libc_override_redefine.h | 6 +- contrib/libs/tcmalloc/ya.make | 2 +- contrib/libs/utf8proc/ya.make | 2 +- contrib/libs/xz/common/ya.make | 2 +- contrib/libs/xz/liblzma/ya.make | 2 +- contrib/libs/ya.make | 32 +- contrib/libs/yaml-cpp/ya.make | 4 +- contrib/libs/zlib/deflate.c | 2 +- contrib/libs/zstd06/LICENSE | 52 +- contrib/libs/zstd06/common/bitstream.h | 778 +- contrib/libs/zstd06/common/error_private.h | 230 +- contrib/libs/zstd06/common/error_public.h | 142 +- contrib/libs/zstd06/common/fse.h | 558 +- contrib/libs/zstd06/common/fse_static.h | 682 +- contrib/libs/zstd06/common/mem.h | 522 +- contrib/libs/zstd06/common/zbuff.h | 332 +- contrib/libs/zstd06/common/zbuff_static.h | 126 +- contrib/libs/zstd06/common/zstd.h | 292 +- contrib/libs/zstd06/common/zstd_internal.h | 488 +- contrib/libs/zstd06/common/zstd_static.h | 534 +- contrib/libs/zstd06/compress/zstd_compress.c | 5094 ++--- contrib/libs/zstd06/decompress/zstd_decompress.c | 2124 +- contrib/libs/zstd06/dictBuilder/divsufsort.c | 3826 ++-- contrib/libs/zstd06/dictBuilder/divsufsort.h | 134 +- contrib/libs/zstd06/dictBuilder/zdict.c | 1890 +- contrib/libs/zstd06/dictBuilder/zdict.h | 134 +- contrib/libs/zstd06/dictBuilder/zdict_static.h | 160 +- contrib/libs/zstd06/legacy/zstd_legacy.h | 144 +- contrib/libs/zstd06/legacy/zstd_v01.c | 4352 ++-- contrib/libs/zstd06/legacy/zstd_v01.h | 200 +- contrib/libs/zstd06/legacy/zstd_v02.c | 7490 +++---- contrib/libs/zstd06/legacy/zstd_v02.h | 198 +- contrib/libs/zstd06/legacy/zstd_v03.c | 6772 +++---- contrib/libs/zstd06/legacy/zstd_v03.h | 198 +- contrib/libs/zstd06/legacy/zstd_v04.c | 8070 ++++---- contrib/libs/zstd06/legacy/zstd_v04.h | 296 +- contrib/libs/zstd06/legacy/zstd_v05.c | 8618 ++++---- contrib/libs/zstd06/legacy/zstd_v05.h | 294 +- contrib/libs/zstd06/libzstd.pc.in | 28 +- contrib/libs/zstd06/ya.make | 38 +- contrib/python/Jinja2/py2/ya.make | 2 +- contrib/python/Jinja2/py3/ya.make | 2 +- contrib/python/Jinja2/ya.make | 2 +- contrib/python/MarkupSafe/py2/ya.make | 4 +- contrib/python/MarkupSafe/py3/ya.make | 2 +- contrib/python/MarkupSafe/ya.make | 2 +- contrib/python/PyHamcrest/tests/ya.make | 2 +- contrib/python/PyHamcrest/ya.make | 136 +- contrib/python/PyYAML/py2/ya.make | 4 +- contrib/python/PyYAML/py3/ya.make | 4 +- contrib/python/PyYAML/ya.make | 4 +- contrib/python/Pygments/py2/ya.make | 2 +- contrib/python/Pygments/py3/ya.make | 2 +- contrib/python/Pygments/ya.make | 2 +- contrib/python/attrs/ya.make | 4 +- contrib/python/boto3/ya.make | 2 +- contrib/python/botocore/ya.make | 2 +- contrib/python/certifi/ya.make | 2 +- contrib/python/cffi/gen/lib/ya.make | 4 +- contrib/python/cffi/ya.make | 6 +- contrib/python/cryptography/ya.make | 20 +- contrib/python/dateutil/ya.make | 10 +- contrib/python/decorator/ya.make | 4 +- contrib/python/future/ya.make | 4 +- contrib/python/idna/ya.make | 2 +- contrib/python/ipdb/ya.make | 4 +- contrib/python/ipython/py2/ya.make | 2 +- contrib/python/ipython/ya.make | 2 +- contrib/python/jedi/ya.make | 4 +- contrib/python/parso/py3/ya.make | 4 +- contrib/python/parso/ya.make | 2 +- contrib/python/pexpect/ya.make | 4 +- contrib/python/pickleshare/ya.make | 2 +- contrib/python/pluggy/ya.make | 2 +- contrib/python/prompt-toolkit/ya.make | 2 +- contrib/python/ptyprocess/ya.make | 2 +- contrib/python/py/ya.make | 2 +- contrib/python/pycparser/ya.make | 2 +- contrib/python/requests/ya.make | 2 +- contrib/python/s3transfer/py2/ya.make | 2 +- contrib/python/s3transfer/py3/ya.make | 2 +- contrib/python/s3transfer/ya.make | 2 +- contrib/python/six/ya.make | 4 +- contrib/python/toml/ya.make | 4 +- contrib/python/traitlets/ya.make | 2 +- contrib/python/wcwidth/ya.make | 4 +- contrib/python/ya.make | 4 +- contrib/restricted/libffi/include/ffi_common.h | 236 +- contrib/restricted/libffi/src/closures.c | 1172 +- contrib/restricted/libffi/src/dlmalloc.c | 10306 +++++----- contrib/restricted/libffi/src/java_raw_api.c | 702 +- contrib/restricted/libffi/src/prep_cif.c | 418 +- contrib/restricted/libffi/src/raw_api.c | 498 +- contrib/restricted/libffi/src/types.c | 124 +- contrib/restricted/libffi/src/x86/ffi.c | 334 +- contrib/restricted/libffi/src/x86/ffi64.c | 1102 +- contrib/restricted/libffi/src/x86/ffitarget.h | 200 +- contrib/restricted/libffi/src/x86/sysv.S | 186 +- contrib/restricted/libffi/src/x86/unix64.S | 286 +- contrib/restricted/libffi/src/x86/win64.S | 74 +- contrib/restricted/libffi/ya.make | 30 +- contrib/tools/bison/bison/src/files.c | 8 +- contrib/tools/bison/bison/src/parse-gram.y | 2 +- contrib/tools/bison/bison/src/symtab.c | 2 +- contrib/tools/bison/bison/ya.make | 14 +- contrib/tools/bison/gnulib/platform/posix/config.h | 242 +- contrib/tools/bison/gnulib/src/canonicalize-lgpl.c | 2 +- contrib/tools/bison/gnulib/src/execute.c | 2 +- contrib/tools/bison/gnulib/src/fpending.c | 8 +- contrib/tools/bison/gnulib/src/malloca.h | 2 +- contrib/tools/bison/gnulib/src/palloca.h | 14 +- contrib/tools/bison/gnulib/src/penviron.h | 8 +- contrib/tools/bison/gnulib/src/regex_internal.h | 2 +- contrib/tools/bison/gnulib/src/spawn-pipe.c | 2 +- contrib/tools/bison/gnulib/src/spawni.c | 2 +- contrib/tools/bison/gnulib/src/stpcpy.c | 2 +- contrib/tools/bison/gnulib/src/strsignal.c | 8 +- contrib/tools/bison/gnulib/src/timevar.c | 12 +- contrib/tools/bison/gnulib/src/vasnprintf.c | 2 +- contrib/tools/bison/gnulib/src/xstrndup.c | 34 +- contrib/tools/bison/gnulib/src/xvasprintf.c | 8 +- contrib/tools/bison/gnulib/ya.make | 126 +- contrib/tools/bison/m4/src/builtin.c | 2 +- contrib/tools/bison/m4/src/input.c | 2 +- contrib/tools/bison/m4/ya.make | 14 +- contrib/tools/bison/ya.make | 2 +- .../tools/cython/Cython/Build/BuildExecutable.py | 284 +- contrib/tools/cython/Cython/Build/Cythonize.py | 360 +- contrib/tools/cython/Cython/Build/Dependencies.py | 1596 +- contrib/tools/cython/Cython/Build/Inline.py | 524 +- contrib/tools/cython/Cython/Build/IpythonMagic.py | 562 +- .../tools/cython/Cython/Build/Tests/TestInline.py | 112 +- .../cython/Cython/Build/Tests/TestIpythonMagic.py | 136 +- .../cython/Cython/Build/Tests/TestStripLiterals.py | 110 +- .../tools/cython/Cython/Build/Tests/__init__.py | 2 +- contrib/tools/cython/Cython/Build/__init__.py | 2 +- contrib/tools/cython/Cython/CodeWriter.py | 1022 +- .../Cython/Compiler/AnalysedTreeTransforms.py | 198 +- contrib/tools/cython/Cython/Compiler/Annotate.py | 392 +- .../cython/Cython/Compiler/AutoDocTransforms.py | 334 +- contrib/tools/cython/Cython/Compiler/Buffer.py | 1364 +- contrib/tools/cython/Cython/Compiler/Builtin.py | 746 +- contrib/tools/cython/Cython/Compiler/CmdLine.py | 342 +- contrib/tools/cython/Cython/Compiler/Code.pxd | 156 +- contrib/tools/cython/Cython/Compiler/Code.py | 3960 ++-- .../tools/cython/Cython/Compiler/CodeGeneration.py | 68 +- .../tools/cython/Cython/Compiler/CythonScope.py | 314 +- contrib/tools/cython/Cython/Compiler/DebugFlags.py | 42 +- contrib/tools/cython/Cython/Compiler/Errors.py | 486 +- contrib/tools/cython/Cython/Compiler/ExprNodes.py | 20094 +++++++++---------- .../tools/cython/Cython/Compiler/FlowControl.pxd | 208 +- .../tools/cython/Cython/Compiler/FlowControl.py | 2542 +-- contrib/tools/cython/Cython/Compiler/FusedNode.py | 1426 +- contrib/tools/cython/Cython/Compiler/Future.py | 26 +- .../tools/cython/Cython/Compiler/Interpreter.py | 128 +- contrib/tools/cython/Cython/Compiler/Lexicon.py | 248 +- contrib/tools/cython/Cython/Compiler/Main.py | 1138 +- contrib/tools/cython/Cython/Compiler/MemoryView.py | 1504 +- contrib/tools/cython/Cython/Compiler/ModuleNode.py | 4352 ++-- contrib/tools/cython/Cython/Compiler/Naming.py | 294 +- contrib/tools/cython/Cython/Compiler/Nodes.py | 14950 +++++++------- contrib/tools/cython/Cython/Compiler/Optimize.py | 7028 +++---- contrib/tools/cython/Cython/Compiler/Options.py | 558 +- .../cython/Cython/Compiler/ParseTreeTransforms.pxd | 134 +- .../cython/Cython/Compiler/ParseTreeTransforms.py | 5712 +++--- contrib/tools/cython/Cython/Compiler/Parsing.pxd | 362 +- contrib/tools/cython/Cython/Compiler/Parsing.py | 5894 +++--- contrib/tools/cython/Cython/Compiler/Pipeline.py | 594 +- contrib/tools/cython/Cython/Compiler/PyrexTypes.py | 6664 +++--- contrib/tools/cython/Cython/Compiler/Scanning.pxd | 84 +- contrib/tools/cython/Cython/Compiler/Scanning.py | 884 +- .../tools/cython/Cython/Compiler/StringEncoding.py | 608 +- contrib/tools/cython/Cython/Compiler/Symtab.py | 4148 ++-- .../cython/Cython/Compiler/Tests/TestBuffer.py | 188 +- .../cython/Cython/Compiler/Tests/TestMemView.py | 140 +- .../Compiler/Tests/TestParseTreeTransforms.py | 562 +- .../Cython/Compiler/Tests/TestSignatureMatching.py | 144 +- .../Cython/Compiler/Tests/TestTreeFragment.py | 116 +- .../cython/Cython/Compiler/Tests/TestTreePath.py | 116 +- .../Cython/Compiler/Tests/TestUtilityLoad.py | 182 +- .../cython/Cython/Compiler/Tests/TestVisitor.py | 122 +- .../tools/cython/Cython/Compiler/Tests/__init__.py | 2 +- .../tools/cython/Cython/Compiler/TreeFragment.py | 466 +- contrib/tools/cython/Cython/Compiler/TreePath.py | 550 +- .../tools/cython/Cython/Compiler/TypeInference.py | 1052 +- contrib/tools/cython/Cython/Compiler/TypeSlots.py | 1626 +- contrib/tools/cython/Cython/Compiler/UtilNodes.py | 674 +- .../tools/cython/Cython/Compiler/UtilityCode.py | 318 +- contrib/tools/cython/Cython/Compiler/Version.py | 18 +- contrib/tools/cython/Cython/Compiler/Visitor.pxd | 100 +- contrib/tools/cython/Cython/Compiler/Visitor.py | 1390 +- contrib/tools/cython/Cython/Compiler/__init__.py | 2 +- contrib/tools/cython/Cython/Debugger/Cygdb.py | 284 +- .../tools/cython/Cython/Debugger/DebugWriter.py | 126 +- .../cython/Cython/Debugger/Tests/TestLibCython.py | 508 +- .../tools/cython/Cython/Debugger/Tests/__init__.py | 2 +- .../tools/cython/Cython/Debugger/Tests/cfuncs.c | 16 +- .../tools/cython/Cython/Debugger/Tests/codefile | 94 +- .../Cython/Debugger/Tests/test_libcython_in_gdb.py | 970 +- .../Cython/Debugger/Tests/test_libpython_in_gdb.py | 208 +- contrib/tools/cython/Cython/Debugger/__init__.py | 2 +- contrib/tools/cython/Cython/Debugger/libcython.py | 2792 +-- contrib/tools/cython/Cython/Debugger/libpython.py | 4508 ++--- contrib/tools/cython/Cython/Debugging.py | 40 +- contrib/tools/cython/Cython/Distutils/__init__.py | 4 +- contrib/tools/cython/Cython/Distutils/build_ext.py | 8 +- contrib/tools/cython/Cython/Distutils/extension.py | 254 +- .../cython/Cython/Includes/Deprecated/python.pxd | 2 +- .../Cython/Includes/Deprecated/python_bool.pxd | 2 +- .../Cython/Includes/Deprecated/python_buffer.pxd | 2 +- .../Cython/Includes/Deprecated/python_bytes.pxd | 2 +- .../Cython/Includes/Deprecated/python_cobject.pxd | 2 +- .../Cython/Includes/Deprecated/python_complex.pxd | 2 +- .../Cython/Includes/Deprecated/python_dict.pxd | 2 +- .../Cython/Includes/Deprecated/python_exc.pxd | 2 +- .../Cython/Includes/Deprecated/python_float.pxd | 2 +- .../Cython/Includes/Deprecated/python_function.pxd | 2 +- .../Cython/Includes/Deprecated/python_getargs.pxd | 2 +- .../Cython/Includes/Deprecated/python_instance.pxd | 2 +- .../Cython/Includes/Deprecated/python_int.pxd | 2 +- .../Cython/Includes/Deprecated/python_iterator.pxd | 2 +- .../Cython/Includes/Deprecated/python_list.pxd | 2 +- .../Cython/Includes/Deprecated/python_long.pxd | 2 +- .../Cython/Includes/Deprecated/python_mapping.pxd | 2 +- .../Cython/Includes/Deprecated/python_mem.pxd | 2 +- .../Cython/Includes/Deprecated/python_method.pxd | 2 +- .../Cython/Includes/Deprecated/python_module.pxd | 2 +- .../Cython/Includes/Deprecated/python_number.pxd | 2 +- .../Cython/Includes/Deprecated/python_object.pxd | 2 +- .../Includes/Deprecated/python_oldbuffer.pxd | 2 +- .../Includes/Deprecated/python_pycapsule.pxd | 2 +- .../Cython/Includes/Deprecated/python_ref.pxd | 2 +- .../Cython/Includes/Deprecated/python_sequence.pxd | 2 +- .../Cython/Includes/Deprecated/python_set.pxd | 2 +- .../Cython/Includes/Deprecated/python_string.pxd | 2 +- .../Cython/Includes/Deprecated/python_tuple.pxd | 2 +- .../Cython/Includes/Deprecated/python_type.pxd | 2 +- .../Cython/Includes/Deprecated/python_unicode.pxd | 2 +- .../Cython/Includes/Deprecated/python_version.pxd | 2 +- .../Cython/Includes/Deprecated/python_weakref.pxd | 2 +- .../cython/Cython/Includes/Deprecated/stdio.pxd | 2 +- .../cython/Cython/Includes/Deprecated/stdlib.pxd | 2 +- .../cython/Cython/Includes/Deprecated/stl.pxd | 182 +- .../cython/Cython/Includes/cpython/__init__.pxd | 364 +- .../tools/cython/Cython/Includes/cpython/array.pxd | 270 +- .../tools/cython/Cython/Includes/cpython/bool.pxd | 76 +- .../cython/Cython/Includes/cpython/buffer.pxd | 216 +- .../tools/cython/Cython/Includes/cpython/bytes.pxd | 394 +- .../cython/Cython/Includes/cpython/cobject.pxd | 72 +- .../cython/Cython/Includes/cpython/complex.pxd | 100 +- .../cython/Cython/Includes/cpython/datetime.pxd | 410 +- .../tools/cython/Cython/Includes/cpython/dict.pxd | 322 +- .../tools/cython/Cython/Includes/cpython/exc.pxd | 496 +- .../tools/cython/Cython/Includes/cpython/float.pxd | 78 +- .../cython/Cython/Includes/cpython/function.pxd | 128 +- .../cython/Cython/Includes/cpython/getargs.pxd | 24 +- .../cython/Cython/Includes/cpython/instance.pxd | 50 +- .../tools/cython/Cython/Includes/cpython/int.pxd | 158 +- .../cython/Cython/Includes/cpython/iterator.pxd | 72 +- .../tools/cython/Cython/Includes/cpython/list.pxd | 182 +- .../tools/cython/Cython/Includes/cpython/long.pxd | 226 +- .../cython/Cython/Includes/cpython/mapping.pxd | 128 +- .../tools/cython/Cython/Includes/cpython/mem.pxd | 150 +- .../cython/Cython/Includes/cpython/method.pxd | 94 +- .../cython/Cython/Includes/cpython/module.pxd | 334 +- .../cython/Cython/Includes/cpython/number.pxd | 500 +- .../cython/Cython/Includes/cpython/object.pxd | 574 +- .../cython/Cython/Includes/cpython/oldbuffer.pxd | 126 +- .../cython/Cython/Includes/cpython/pycapsule.pxd | 276 +- .../cython/Cython/Includes/cpython/pystate.pxd | 168 +- .../cython/Cython/Includes/cpython/pythread.pxd | 74 +- .../tools/cython/Cython/Includes/cpython/ref.pxd | 98 +- .../cython/Cython/Includes/cpython/sequence.pxd | 270 +- .../tools/cython/Cython/Includes/cpython/set.pxd | 226 +- .../cython/Cython/Includes/cpython/string.pxd | 394 +- .../tools/cython/Cython/Includes/cpython/tuple.pxd | 138 +- .../tools/cython/Cython/Includes/cpython/type.pxd | 96 +- .../cython/Cython/Includes/cpython/unicode.pxd | 706 +- .../cython/Cython/Includes/cpython/version.pxd | 64 +- .../cython/Cython/Includes/cpython/weakref.pxd | 80 +- .../tools/cython/Cython/Includes/libc/__init__.pxd | 2 +- .../tools/cython/Cython/Includes/libc/errno.pxd | 252 +- .../tools/cython/Cython/Includes/libc/float.pxd | 26 +- .../tools/cython/Cython/Includes/libc/limits.pxd | 16 +- .../tools/cython/Cython/Includes/libc/locale.pxd | 90 +- contrib/tools/cython/Cython/Includes/libc/math.pxd | 158 +- .../tools/cython/Cython/Includes/libc/setjmp.pxd | 8 +- .../tools/cython/Cython/Includes/libc/signal.pxd | 28 +- .../tools/cython/Cython/Includes/libc/stddef.pxd | 16 +- .../tools/cython/Cython/Includes/libc/stdint.pxd | 202 +- .../tools/cython/Cython/Includes/libc/stdio.pxd | 158 +- .../tools/cython/Cython/Includes/libc/stdlib.pxd | 142 +- .../tools/cython/Cython/Includes/libc/string.pxd | 98 +- .../cython/Cython/Includes/libcpp/__init__.pxd | 4 +- .../cython/Cython/Includes/libcpp/algorithm.pxd | 24 +- .../tools/cython/Cython/Includes/libcpp/cast.pxd | 22 +- .../cython/Cython/Includes/libcpp/complex.pxd | 146 +- .../tools/cython/Cython/Includes/libcpp/deque.pxd | 102 +- .../tools/cython/Cython/Includes/libcpp/list.pxd | 120 +- .../tools/cython/Cython/Includes/libcpp/map.pxd | 86 +- .../tools/cython/Cython/Includes/libcpp/pair.pxd | 2 +- .../tools/cython/Cython/Includes/libcpp/queue.pxd | 40 +- .../tools/cython/Cython/Includes/libcpp/set.pxd | 82 +- .../tools/cython/Cython/Includes/libcpp/stack.pxd | 20 +- .../tools/cython/Cython/Includes/libcpp/string.pxd | 88 +- .../Cython/Includes/libcpp/unordered_map.pxd | 90 +- .../Cython/Includes/libcpp/unordered_set.pxd | 88 +- .../cython/Cython/Includes/libcpp/utility.pxd | 26 +- .../tools/cython/Cython/Includes/libcpp/vector.pxd | 86 +- contrib/tools/cython/Cython/Includes/numpy.pxd | 1812 +- .../cython/Cython/Includes/numpy/__init__.pxd | 1818 +- .../tools/cython/Cython/Includes/numpy/math.pxd | 232 +- contrib/tools/cython/Cython/Includes/openmp.pxd | 100 +- .../cython/Cython/Includes/posix/__init__.pxd | 2 +- .../tools/cython/Cython/Includes/posix/fcntl.pxd | 132 +- .../tools/cython/Cython/Includes/posix/ioctl.pxd | 6 +- .../cython/Cython/Includes/posix/resource.pxd | 106 +- .../tools/cython/Cython/Includes/posix/signal.pxd | 122 +- .../tools/cython/Cython/Includes/posix/stat.pxd | 128 +- .../tools/cython/Cython/Includes/posix/stdlib.pxd | 56 +- .../tools/cython/Cython/Includes/posix/time.pxd | 92 +- .../tools/cython/Cython/Includes/posix/types.pxd | 28 +- .../tools/cython/Cython/Includes/posix/unistd.pxd | 540 +- contrib/tools/cython/Cython/Plex/Actions.pxd | 50 +- contrib/tools/cython/Cython/Plex/Actions.py | 94 +- contrib/tools/cython/Cython/Plex/DFA.py | 64 +- contrib/tools/cython/Cython/Plex/Errors.py | 56 +- contrib/tools/cython/Cython/Plex/Lexicons.py | 124 +- contrib/tools/cython/Cython/Plex/Machines.py | 104 +- contrib/tools/cython/Cython/Plex/Regexps.py | 1084 +- contrib/tools/cython/Cython/Plex/Scanners.pxd | 78 +- contrib/tools/cython/Cython/Plex/Scanners.py | 108 +- contrib/tools/cython/Cython/Plex/Timing.py | 46 +- contrib/tools/cython/Cython/Plex/Traditional.py | 72 +- contrib/tools/cython/Cython/Plex/Transitions.py | 86 +- contrib/tools/cython/Cython/Plex/__init__.py | 78 +- contrib/tools/cython/Cython/Runtime/__init__.py | 2 +- contrib/tools/cython/Cython/Runtime/refnanny.pyx | 370 +- contrib/tools/cython/Cython/Shadow.py | 798 +- contrib/tools/cython/Cython/StringIOTree.py | 134 +- contrib/tools/cython/Cython/Tempita/__init__.py | 6 +- contrib/tools/cython/Cython/Tempita/_looper.py | 326 +- contrib/tools/cython/Cython/Tempita/_tempita.py | 2332 +-- contrib/tools/cython/Cython/Tempita/compat3.py | 88 +- contrib/tools/cython/Cython/TestUtils.py | 372 +- .../tools/cython/Cython/Tests/TestCodeWriter.py | 162 +- contrib/tools/cython/Cython/Tests/TestJediTyper.py | 266 +- .../tools/cython/Cython/Tests/TestStringIOTree.py | 132 +- contrib/tools/cython/Cython/Tests/__init__.py | 2 +- contrib/tools/cython/Cython/Tests/xmlrunner.py | 710 +- contrib/tools/cython/Cython/Utility/Buffer.c | 1608 +- contrib/tools/cython/Cython/Utility/Builtins.c | 648 +- contrib/tools/cython/Cython/Utility/Capsule.c | 40 +- contrib/tools/cython/Cython/Utility/CommonTypes.c | 96 +- contrib/tools/cython/Cython/Utility/CppConvert.pyx | 378 +- contrib/tools/cython/Cython/Utility/CppSupport.cpp | 92 +- .../tools/cython/Cython/Utility/CythonFunction.c | 2042 +- contrib/tools/cython/Cython/Utility/Embed.c | 362 +- contrib/tools/cython/Cython/Utility/Exceptions.c | 908 +- .../tools/cython/Cython/Utility/ExtensionTypes.c | 106 +- .../cython/Cython/Utility/FunctionArguments.c | 530 +- contrib/tools/cython/Cython/Utility/ImportExport.c | 952 +- contrib/tools/cython/Cython/Utility/MemoryView.pyx | 2668 +-- contrib/tools/cython/Cython/Utility/MemoryView_C.c | 1694 +- .../tools/cython/Cython/Utility/ModuleSetupCode.c | 960 +- .../tools/cython/Cython/Utility/ObjectHandling.c | 2418 +-- contrib/tools/cython/Cython/Utility/Optimize.c | 840 +- contrib/tools/cython/Cython/Utility/Overflow.c | 494 +- contrib/tools/cython/Cython/Utility/Printing.c | 352 +- contrib/tools/cython/Cython/Utility/Profile.c | 254 +- contrib/tools/cython/Cython/Utility/StringTools.c | 1372 +- .../cython/Cython/Utility/TestCyUtilityLoader.pyx | 16 +- .../cython/Cython/Utility/TestCythonScope.pyx | 128 +- .../cython/Cython/Utility/TestUtilityLoader.c | 24 +- .../tools/cython/Cython/Utility/TypeConversion.c | 854 +- contrib/tools/cython/Cython/Utility/arrayarray.h | 262 +- contrib/tools/cython/Cython/Utils.py | 532 +- contrib/tools/cython/Cython/__init__.py | 14 +- contrib/tools/cython/cygdb.py | 16 +- contrib/tools/cython/cython.py | 50 +- contrib/tools/protoc/ya.make | 2 +- contrib/tools/python/pyconfig.inc | 30 +- contrib/tools/python/src/Include/pyport.h | 4 +- contrib/tools/python/src/config_init.c | 12 +- contrib/tools/python/src/config_map.c | 12 +- contrib/tools/python/ya.make | 8 +- contrib/tools/ragel6/cdcodegen.cpp | 10 +- contrib/tools/ragel6/javacodegen.cpp | 10 +- contrib/tools/ragel6/rbxgoto.cpp | 2 +- contrib/tools/ragel6/rlscan.cpp | 94 +- contrib/tools/ragel6/rlscan.h | 2 +- contrib/tools/ya.make | 6 +- contrib/tools/yasm/bin/ya.make | 20 +- contrib/tools/yasm/util.h | 4 +- contrib/ya.make | 6 +- library/README.md | 4 +- library/cpp/actors/core/actor.h | 4 +- library/cpp/actors/core/actor_bootstrapped.h | 2 +- library/cpp/actors/core/actor_coroutine.cpp | 2 +- library/cpp/actors/core/actor_coroutine.h | 2 +- library/cpp/actors/core/actor_coroutine_ut.cpp | 2 +- library/cpp/actors/core/buffer.cpp | 2 +- library/cpp/actors/core/buffer.h | 2 +- library/cpp/actors/core/callstack.cpp | 4 +- library/cpp/actors/core/callstack.h | 4 +- library/cpp/actors/core/event.h | 4 +- library/cpp/actors/core/event_load.h | 4 +- library/cpp/actors/core/event_local.h | 2 +- library/cpp/actors/core/event_pb.cpp | 2 +- library/cpp/actors/core/event_pb.h | 52 +- library/cpp/actors/core/event_pb_ut.cpp | 20 +- library/cpp/actors/core/interconnect.h | 6 +- library/cpp/actors/core/log.cpp | 22 +- library/cpp/actors/core/log.h | 2 +- library/cpp/actors/core/log_iface.h | 166 +- library/cpp/actors/core/log_settings.cpp | 4 +- library/cpp/actors/core/log_settings.h | 10 +- library/cpp/actors/core/mon.h | 6 +- library/cpp/actors/core/mon_stats.h | 6 +- library/cpp/actors/core/probes.h | 4 +- library/cpp/actors/core/process_stats.cpp | 2 +- library/cpp/actors/core/process_stats.h | 2 +- library/cpp/actors/core/scheduler_actor.cpp | 30 +- library/cpp/actors/core/scheduler_actor.h | 10 +- library/cpp/actors/core/scheduler_actor_ut.cpp | 12 +- library/cpp/actors/core/scheduler_basic.cpp | 6 +- library/cpp/actors/core/scheduler_basic.h | 6 +- library/cpp/actors/dnscachelib/dnscache.cpp | 130 +- library/cpp/actors/dnscachelib/dnscache.h | 58 +- library/cpp/actors/dnscachelib/probes.h | 56 +- library/cpp/actors/dnscachelib/timekeeper.h | 10 +- library/cpp/actors/helpers/mon_histogram_helper.h | 100 +- .../cpp/actors/interconnect/event_holder_pool.h | 4 +- library/cpp/actors/interconnect/events_local.h | 550 +- library/cpp/actors/interconnect/interconnect.h | 188 +- .../actors/interconnect/interconnect_address.cpp | 60 +- .../cpp/actors/interconnect/interconnect_address.h | 22 +- .../actors/interconnect/interconnect_channel.cpp | 18 +- .../cpp/actors/interconnect/interconnect_channel.h | 44 +- .../cpp/actors/interconnect/interconnect_common.h | 56 +- .../actors/interconnect/interconnect_handshake.cpp | 18 +- .../actors/interconnect/interconnect_handshake.h | 6 +- .../cpp/actors/interconnect/interconnect_impl.h | 60 +- .../interconnect/interconnect_nameserver_table.cpp | 60 +- .../actors/interconnect/interconnect_stream.cpp | 340 +- .../cpp/actors/interconnect/interconnect_stream.h | 64 +- .../interconnect_tcp_input_session.cpp | 6 +- .../actors/interconnect/interconnect_tcp_proxy.cpp | 588 +- .../actors/interconnect/interconnect_tcp_proxy.h | 322 +- .../interconnect/interconnect_tcp_server.cpp | 24 +- .../actors/interconnect/interconnect_tcp_server.h | 38 +- .../interconnect/interconnect_tcp_session.cpp | 598 +- .../actors/interconnect/interconnect_tcp_session.h | 280 +- library/cpp/actors/interconnect/load.cpp | 44 +- library/cpp/actors/interconnect/load.h | 16 +- library/cpp/actors/interconnect/logging.h | 6 +- library/cpp/actors/interconnect/packet.h | 20 +- library/cpp/actors/interconnect/poller.h | 24 +- library/cpp/actors/interconnect/poller_actor.cpp | 6 +- library/cpp/actors/interconnect/poller_actor.h | 4 +- library/cpp/actors/interconnect/poller_tcp.cpp | 50 +- library/cpp/actors/interconnect/poller_tcp.h | 22 +- .../cpp/actors/interconnect/poller_tcp_unit.cpp | 192 +- library/cpp/actors/interconnect/poller_tcp_unit.h | 76 +- .../actors/interconnect/poller_tcp_unit_epoll.cpp | 192 +- .../actors/interconnect/poller_tcp_unit_epoll.h | 40 +- .../actors/interconnect/poller_tcp_unit_select.cpp | 102 +- .../actors/interconnect/poller_tcp_unit_select.h | 20 +- .../actors/interconnect/ut/lib/ic_test_cluster.h | 8 +- .../cpp/actors/interconnect/ut/lib/interrupter.h | 28 +- library/cpp/actors/interconnect/ut/lib/node.h | 4 +- .../cpp/actors/interconnect/ut/lib/test_actors.h | 92 +- .../cpp/actors/interconnect/ut/lib/test_events.h | 2 +- library/cpp/actors/interconnect/ut_fat/main.cpp | 4 +- library/cpp/actors/interconnect/watchdog_timer.h | 10 +- library/cpp/actors/memory_log/memlog.cpp | 64 +- library/cpp/actors/prof/tag.cpp | 2 +- library/cpp/actors/prof/tag.h | 2 +- library/cpp/actors/testlib/test_runtime.cpp | 4 +- library/cpp/actors/util/funnel_queue.h | 128 +- library/cpp/actors/util/queue_chunk.h | 4 +- library/cpp/actors/util/recentwnd.h | 48 +- library/cpp/actors/util/rope.h | 2 +- library/cpp/actors/util/threadparkpad.cpp | 152 +- library/cpp/actors/util/unordered_cache.h | 4 +- library/cpp/actors/wilson/wilson_event.h | 4 +- library/cpp/actors/wilson/wilson_trace.h | 2 +- library/cpp/archive/yarchive.cpp | 552 +- library/cpp/archive/yarchive.h | 40 +- library/cpp/archive/yarchive_ut.cpp | 88 +- library/cpp/balloc/balloc.cpp | 256 +- library/cpp/balloc/malloc-info.cpp | 26 +- library/cpp/binsaver/bin_saver.cpp | 18 +- library/cpp/binsaver/bin_saver.h | 290 +- library/cpp/binsaver/blob_io.h | 34 +- library/cpp/binsaver/buffered_io.cpp | 4 +- library/cpp/binsaver/buffered_io.h | 78 +- library/cpp/binsaver/class_factory.h | 116 +- library/cpp/binsaver/ut/binsaver_ut.cpp | 158 +- library/cpp/binsaver/util_stream_io.h | 76 +- library/cpp/binsaver/ya.make | 4 +- library/cpp/bit_io/bitinout_ut.cpp | 26 +- library/cpp/bit_io/bitinput.cpp | 2 +- library/cpp/bit_io/bitinput.h | 240 +- library/cpp/bit_io/bitinput_impl.cpp | 2 +- library/cpp/bit_io/bitinput_impl.h | 142 +- library/cpp/bit_io/bitoutput.cpp | 2 +- library/cpp/bit_io/bitoutput.h | 282 +- library/cpp/bit_io/ut/ya.make | 18 +- library/cpp/bit_io/ya.make | 32 +- .../codecs/legacy_zstd06/legacy_zstd06.cpp | 74 +- library/cpp/blockcodecs/codecs_ut.cpp | 396 +- library/cpp/blockcodecs/core/codecs.cpp | 176 +- library/cpp/blockcodecs/core/codecs.h | 110 +- library/cpp/blockcodecs/core/common.h | 138 +- library/cpp/blockcodecs/core/stream.cpp | 362 +- library/cpp/blockcodecs/core/stream.h | 66 +- library/cpp/blockcodecs/core/ya.make | 20 +- library/cpp/blockcodecs/fuzz/main.cpp | 14 +- library/cpp/blockcodecs/fuzz/ya.make | 6 +- library/cpp/blockcodecs/ut/ya.make | 10 +- library/cpp/bucket_quoter/bucket_quoter.cpp | 2 +- library/cpp/bucket_quoter/bucket_quoter.h | 18 +- library/cpp/build_info/build_info.h | 2 +- library/cpp/build_info/build_info_static.h | 6 +- library/cpp/build_info/sandbox.h | 2 +- library/cpp/build_info/ya.make | 2 +- library/cpp/cache/cache.h | 54 +- library/cpp/cache/thread_safe_cache.h | 14 +- library/cpp/cache/ut/cache_ut.cpp | 168 +- library/cpp/cgiparam/cgiparam.cpp | 172 +- library/cpp/cgiparam/cgiparam.h | 38 +- library/cpp/cgiparam/cgiparam_ut.cpp | 102 +- library/cpp/cgiparam/fuzz/main.cpp | 20 +- library/cpp/cgiparam/fuzz/ya.make | 14 +- library/cpp/charset/codepage.cpp | 278 +- library/cpp/charset/codepage.h | 90 +- library/cpp/charset/codepage_ut.cpp | 138 +- library/cpp/charset/cp_encrec.cpp | 10 +- library/cpp/charset/doccodes.cpp | 2 +- library/cpp/charset/doccodes.h | 72 +- library/cpp/charset/iconv.cpp | 186 +- library/cpp/charset/iconv.h | 82 +- library/cpp/charset/iconv_ut.cpp | 54 +- library/cpp/charset/recyr.hh | 34 +- library/cpp/charset/recyr_int.hh | 540 +- library/cpp/charset/wide.h | 20 +- library/cpp/charset/wide_ut.cpp | 96 +- library/cpp/charset/ya.make | 6 +- library/cpp/codecs/codecs.cpp | 264 +- library/cpp/codecs/codecs.h | 370 +- library/cpp/codecs/codecs_registry.cpp | 24 +- library/cpp/codecs/codecs_registry.h | 12 +- library/cpp/codecs/comptable_codec.cpp | 6 +- library/cpp/codecs/comptable_codec.h | 48 +- library/cpp/codecs/delta_codec.cpp | 32 +- library/cpp/codecs/delta_codec.h | 206 +- library/cpp/codecs/float_huffman.cpp | 2 +- library/cpp/codecs/greedy_dict/gd_builder.cpp | 190 +- library/cpp/codecs/greedy_dict/gd_builder.h | 168 +- library/cpp/codecs/greedy_dict/gd_entry.cpp | 126 +- library/cpp/codecs/greedy_dict/gd_entry.h | 126 +- library/cpp/codecs/greedy_dict/gd_stats.h | 116 +- .../cpp/codecs/greedy_dict/ut/greedy_dict_ut.cpp | 8 +- library/cpp/codecs/huffman_codec.cpp | 870 +- library/cpp/codecs/huffman_codec.h | 38 +- library/cpp/codecs/pfor_codec.cpp | 32 +- library/cpp/codecs/pfor_codec.h | 276 +- library/cpp/codecs/sample.h | 8 +- library/cpp/codecs/solar_codec.cpp | 170 +- library/cpp/codecs/solar_codec.h | 370 +- library/cpp/codecs/static/builder.h | 2 +- library/cpp/codecs/static/example/example.cpp | 8 +- library/cpp/codecs/static/example/example.h | 10 +- library/cpp/codecs/static/static.cpp | 4 +- .../static_codec_checker/static_codec_checker.cpp | 18 +- .../static_codec_generator.cpp | 26 +- library/cpp/codecs/static/ut/builder_ut.cpp | 6 +- library/cpp/codecs/static/ut/static_ut.cpp | 6 +- library/cpp/codecs/tls_cache.h | 16 +- library/cpp/codecs/ut/codecs_ut.cpp | 224 +- library/cpp/codecs/ut/float_huffman_ut.cpp | 6 +- library/cpp/codecs/ut/tls_cache_ut.cpp | 54 +- library/cpp/codecs/ya.make | 2 +- library/cpp/codecs/zstd_dict_codec.cpp | 20 +- library/cpp/codecs/zstd_dict_codec.h | 40 +- library/cpp/colorizer/colors.cpp | 50 +- library/cpp/colorizer/colors.h | 34 +- library/cpp/colorizer/fwd.h | 2 +- library/cpp/colorizer/output.cpp | 18 +- library/cpp/colorizer/output.h | 60 +- library/cpp/colorizer/ya.make | 20 +- library/cpp/compproto/bit.h | 654 +- library/cpp/compproto/compproto_ut.cpp | 86 +- library/cpp/compproto/compressor.h | 114 +- library/cpp/compproto/huff.h | 702 +- library/cpp/compproto/metainfo.h | 502 +- library/cpp/comptable/comptable.cpp | 730 +- library/cpp/comptable/comptable.h | 94 +- library/cpp/comptable/usage/usage.cpp | 30 +- library/cpp/comptable/ut/comptable_ut.cpp | 28 +- library/cpp/containers/2d_array/2d_array.h | 22 +- library/cpp/containers/2d_array/ya.make | 2 +- library/cpp/containers/atomizer/atomizer.cpp | 2 +- library/cpp/containers/atomizer/atomizer.h | 104 +- library/cpp/containers/atomizer/ya.make | 22 +- library/cpp/containers/bitseq/bititerator.h | 12 +- library/cpp/containers/bitseq/bititerator_ut.cpp | 2 +- library/cpp/containers/bitseq/bitvector.cpp | 2 +- library/cpp/containers/bitseq/bitvector.h | 18 +- library/cpp/containers/bitseq/readonly_bitvector.h | 4 +- library/cpp/containers/bitseq/ya.make | 16 +- .../cpp/containers/compact_vector/compact_vector.h | 14 +- library/cpp/containers/comptrie/array_with_size.h | 20 +- .../cpp/containers/comptrie/chunked_helpers_trie.h | 32 +- library/cpp/containers/comptrie/comptrie.cpp | 16 +- library/cpp/containers/comptrie/comptrie_builder.h | 10 +- .../cpp/containers/comptrie/comptrie_builder.inl | 20 +- library/cpp/containers/comptrie/comptrie_impl.cpp | 52 +- library/cpp/containers/comptrie/comptrie_impl.h | 34 +- library/cpp/containers/comptrie/comptrie_trie.h | 70 +- library/cpp/containers/comptrie/comptrie_ut.cpp | 464 +- .../containers/comptrie/first_symbol_iterator.h | 4 +- library/cpp/containers/comptrie/key_selector.h | 12 +- library/cpp/containers/comptrie/leaf_skipper.h | 20 +- library/cpp/containers/comptrie/loader/loader.h | 2 +- .../cpp/containers/comptrie/loader/loader_ut.cpp | 26 +- .../cpp/containers/comptrie/make_fast_layout.cpp | 852 +- library/cpp/containers/comptrie/make_fast_layout.h | 20 +- library/cpp/containers/comptrie/minimize.cpp | 678 +- library/cpp/containers/comptrie/minimize.h | 32 +- library/cpp/containers/comptrie/node.cpp | 124 +- library/cpp/containers/comptrie/node.h | 128 +- .../containers/comptrie/opaque_trie_iterator.cpp | 428 +- .../cpp/containers/comptrie/opaque_trie_iterator.h | 474 +- library/cpp/containers/comptrie/prefix_iterator.h | 6 +- library/cpp/containers/comptrie/protopacker.h | 2 +- library/cpp/containers/comptrie/search_iterator.h | 4 +- library/cpp/containers/comptrie/set.h | 16 +- .../containers/comptrie/write_trie_backwards.cpp | 188 +- library/cpp/containers/comptrie/writeable_node.cpp | 156 +- library/cpp/containers/comptrie/writeable_node.h | 30 +- library/cpp/containers/comptrie/ya.make | 6 +- .../cpp/containers/intrusive_avl_tree/avltree.cpp | 2 +- .../cpp/containers/intrusive_avl_tree/avltree.h | 1198 +- .../intrusive_avl_tree/ut/avltree_ut.cpp | 32 +- .../cpp/containers/intrusive_avl_tree/ut/ya.make | 14 +- library/cpp/containers/intrusive_avl_tree/ya.make | 16 +- .../intrusive_rb_tree/fuzz/rb_tree_fuzzing.cpp | 12 +- .../cpp/containers/intrusive_rb_tree/rb_tree.cpp | 2 +- library/cpp/containers/intrusive_rb_tree/rb_tree.h | 1262 +- .../containers/intrusive_rb_tree/rb_tree_ut.cpp | 316 +- .../cpp/containers/intrusive_rb_tree/ut/ya.make | 14 +- library/cpp/containers/intrusive_rb_tree/ya.make | 16 +- .../cpp/containers/paged_vector/paged_vector.cpp | 2 +- library/cpp/containers/paged_vector/paged_vector.h | 812 +- .../containers/paged_vector/ut/paged_vector_ut.cpp | 624 +- library/cpp/containers/paged_vector/ut/ya.make | 24 +- .../cpp/containers/sorted_vector/sorted_vector.cpp | 2 +- .../cpp/containers/sorted_vector/sorted_vector.h | 702 +- library/cpp/containers/sorted_vector/ya.make | 18 +- library/cpp/containers/stack_array/range_ops.cpp | 2 +- library/cpp/containers/stack_array/range_ops.h | 96 +- library/cpp/containers/stack_array/stack_array.cpp | 2 +- library/cpp/containers/stack_array/stack_array.h | 36 +- library/cpp/containers/stack_array/ut/tests_ut.cpp | 170 +- library/cpp/containers/stack_array/ut/ya.make | 16 +- library/cpp/containers/stack_array/ya.make | 20 +- library/cpp/containers/stack_vector/stack_vec.h | 26 +- library/cpp/containers/str_map/str_map.cpp | 2 +- library/cpp/containers/str_map/str_map.h | 66 +- library/cpp/containers/str_map/ya.make | 18 +- library/cpp/containers/top_keeper/top_keeper.h | 16 +- .../containers/top_keeper/top_keeper/top_keeper.h | 16 +- library/cpp/coroutine/engine/condvar.h | 52 +- library/cpp/coroutine/engine/cont_poller.cpp | 10 +- library/cpp/coroutine/engine/cont_poller.h | 112 +- library/cpp/coroutine/engine/coroutine_ut.cpp | 388 +- library/cpp/coroutine/engine/events.h | 216 +- library/cpp/coroutine/engine/impl.cpp | 36 +- library/cpp/coroutine/engine/impl.h | 216 +- library/cpp/coroutine/engine/iostatus.cpp | 2 +- library/cpp/coroutine/engine/iostatus.h | 142 +- library/cpp/coroutine/engine/mutex.h | 74 +- library/cpp/coroutine/engine/network.cpp | 170 +- library/cpp/coroutine/engine/network.h | 34 +- library/cpp/coroutine/engine/poller.cpp | 538 +- library/cpp/coroutine/engine/poller.h | 66 +- library/cpp/coroutine/engine/sockmap.h | 42 +- library/cpp/coroutine/engine/sockpool.cpp | 2 +- library/cpp/coroutine/engine/sockpool.h | 366 +- library/cpp/coroutine/engine/trampoline.cpp | 8 +- library/cpp/coroutine/engine/trampoline.h | 20 +- library/cpp/coroutine/engine/ya.make | 26 +- library/cpp/coroutine/listener/listen.cpp | 332 +- library/cpp/coroutine/listener/listen.h | 126 +- library/cpp/coroutine/listener/ya.make | 24 +- library/cpp/coroutine/ya.make | 16 +- library/cpp/dbg_output/DONT_COMMIT.h | 14 +- library/cpp/dbg_output/auto.h | 12 +- library/cpp/dbg_output/colorscheme.h | 122 +- library/cpp/dbg_output/dump.cpp | 2 +- library/cpp/dbg_output/dump.h | 166 +- library/cpp/dbg_output/dumpers.cpp | 2 +- library/cpp/dbg_output/dumpers.h | 250 +- library/cpp/dbg_output/engine.cpp | 62 +- library/cpp/dbg_output/engine.h | 308 +- library/cpp/dbg_output/ut/dbg_output_ut.cpp | 60 +- library/cpp/dbg_output/ya.make | 20 +- library/cpp/deprecated/accessors/accessors.cpp | 2 +- library/cpp/deprecated/accessors/accessors.h | 38 +- .../cpp/deprecated/accessors/accessors_impl.cpp | 2 +- library/cpp/deprecated/accessors/accessors_impl.h | 254 +- library/cpp/deprecated/accessors/accessors_ut.cpp | 12 +- library/cpp/deprecated/accessors/memory_traits.cpp | 2 +- library/cpp/deprecated/accessors/memory_traits.h | 22 +- .../cpp/deprecated/enum_codegen/enum_codegen.cpp | 2 +- library/cpp/deprecated/enum_codegen/enum_codegen.h | 48 +- .../deprecated/enum_codegen/enum_codegen_ut.cpp | 16 +- library/cpp/deprecated/kmp/kmp.cpp | 4 +- library/cpp/deprecated/kmp/kmp.h | 6 +- library/cpp/deprecated/kmp/kmp_ut.cpp | 18 +- library/cpp/deprecated/mapped_file/mapped_file.cpp | 16 +- library/cpp/deprecated/mapped_file/mapped_file.h | 30 +- .../deprecated/mapped_file/ut/mapped_file_ut.cpp | 2 +- library/cpp/deprecated/split/delim_string_iter.h | 32 +- .../cpp/deprecated/split/delim_string_iter_ut.cpp | 22 +- library/cpp/deprecated/split/split_iterator.cpp | 128 +- library/cpp/deprecated/split/split_iterator.h | 120 +- library/cpp/deprecated/split/split_iterator_ut.cpp | 32 +- library/cpp/deprecated/ya.make | 2 +- library/cpp/diff/diff.cpp | 4 +- library/cpp/diff/diff.h | 2 +- library/cpp/digest/argonish/ut/ut.cpp | 212 +- library/cpp/digest/argonish/ut_fat/ut.cpp | 136 +- library/cpp/digest/crc32c/crc32c.cpp | 54 +- library/cpp/digest/crc32c/crc32c.h | 10 +- library/cpp/digest/crc32c/crc32c_ut.cpp | 18 +- library/cpp/digest/crc32c/ut/ya.make | 16 +- library/cpp/digest/crc32c/ya.make | 16 +- library/cpp/digest/lower_case/hash_ops.cpp | 44 +- library/cpp/digest/lower_case/hash_ops.h | 24 +- library/cpp/digest/lower_case/hash_ops_ut.cpp | 56 +- library/cpp/digest/lower_case/lchash.cpp | 2 +- library/cpp/digest/lower_case/lchash.h | 28 +- library/cpp/digest/lower_case/lchash_ut.cpp | 32 +- library/cpp/digest/lower_case/lciter.cpp | 2 +- library/cpp/digest/lower_case/lciter.h | 68 +- library/cpp/digest/lower_case/ut/ya.make | 16 +- library/cpp/digest/lower_case/ya.make | 26 +- library/cpp/digest/md5/bench/main.cpp | 24 +- library/cpp/digest/md5/bench/ya.make | 20 +- library/cpp/digest/md5/md5.cpp | 94 +- library/cpp/digest/md5/md5.h | 24 +- library/cpp/digest/md5/md5_medium_ut.cpp | 14 +- library/cpp/digest/md5/md5_ut.cpp | 26 +- library/cpp/digest/md5/medium_ut/ya.make | 12 +- library/cpp/digest/md5/ut/ya.make | 14 +- library/cpp/digest/md5/ya.make | 18 +- library/cpp/digest/old_crc/crc.cpp | 524 +- library/cpp/digest/old_crc/crc.h | 84 +- library/cpp/digest/old_crc/crc_ut.cpp | 144 +- library/cpp/digest/old_crc/gencrc/main.cpp | 46 +- library/cpp/digest/old_crc/gencrc/ya.make | 18 +- library/cpp/digest/old_crc/ut/ya.make | 14 +- library/cpp/digest/old_crc/ya.make | 18 +- library/cpp/digest/sfh/sfh.cpp | 2 +- library/cpp/digest/sfh/sfh.h | 38 +- library/cpp/digest/sfh/sfh_ut.cpp | 76 +- library/cpp/digest/sfh/ut/ya.make | 14 +- library/cpp/digest/sfh/ya.make | 16 +- library/cpp/digest/ya.make | 28 +- library/cpp/dns/cache.cpp | 160 +- library/cpp/dns/cache.h | 48 +- library/cpp/dns/magic.cpp | 50 +- library/cpp/dns/magic.h | 30 +- library/cpp/dns/thread.cpp | 178 +- library/cpp/dns/ut/dns_ut.cpp | 14 +- library/cpp/dns/ut/ya.make | 18 +- library/cpp/enumbitset/enumbitset.h | 88 +- library/cpp/enumbitset/enumbitset_ut.cpp | 2 +- library/cpp/execprofile/autostart/start.cpp | 26 +- library/cpp/execprofile/autostart/ya.make | 24 +- library/cpp/execprofile/profile.cpp | 184 +- library/cpp/execprofile/ya.make | 2 +- library/cpp/getopt/last_getopt.h | 2 +- library/cpp/getopt/last_getopt_demo/demo.cpp | 2 +- library/cpp/getopt/last_getopt_support.h | 2 +- library/cpp/getopt/modchooser.h | 2 +- library/cpp/getopt/opt.h | 2 +- library/cpp/getopt/opt2.h | 2 +- library/cpp/getopt/posix_getopt.h | 2 +- library/cpp/getopt/print.cpp | 30 +- library/cpp/getopt/small/last_getopt.cpp | 12 +- library/cpp/getopt/small/last_getopt.h | 32 +- .../cpp/getopt/small/last_getopt_easy_setup.cpp | 72 +- library/cpp/getopt/small/last_getopt_easy_setup.h | 64 +- library/cpp/getopt/small/last_getopt_opt.cpp | 156 +- library/cpp/getopt/small/last_getopt_opt.h | 546 +- library/cpp/getopt/small/last_getopt_opts.cpp | 630 +- library/cpp/getopt/small/last_getopt_opts.h | 340 +- .../cpp/getopt/small/last_getopt_parse_result.cpp | 238 +- .../cpp/getopt/small/last_getopt_parse_result.h | 282 +- library/cpp/getopt/small/last_getopt_parser.cpp | 636 +- library/cpp/getopt/small/last_getopt_parser.h | 198 +- library/cpp/getopt/small/last_getopt_support.h | 324 +- library/cpp/getopt/small/modchooser.cpp | 2 +- library/cpp/getopt/small/modchooser.h | 8 +- library/cpp/getopt/small/opt.cpp | 48 +- library/cpp/getopt/small/opt.h | 94 +- library/cpp/getopt/small/opt2.cpp | 148 +- library/cpp/getopt/small/opt2.h | 36 +- library/cpp/getopt/small/posix_getopt.cpp | 24 +- library/cpp/getopt/small/posix_getopt.h | 6 +- library/cpp/getopt/small/ygetopt.cpp | 194 +- library/cpp/getopt/small/ygetopt.h | 138 +- library/cpp/getopt/ut/last_getopt_ut.cpp | 58 +- library/cpp/getopt/ut/opt2_ut.cpp | 46 +- library/cpp/getopt/ut/opt_ut.cpp | 18 +- library/cpp/getopt/ut/posix_getopt_ut.cpp | 64 +- library/cpp/getopt/ut/ygetopt_ut.cpp | 80 +- library/cpp/getopt/ya.make | 2 +- library/cpp/getopt/ygetopt.h | 2 +- .../cpp/histogram/adaptive/adaptive_histogram.cpp | 22 +- .../cpp/histogram/adaptive/adaptive_histogram.h | 10 +- library/cpp/histogram/adaptive/auto_histogram.h | 6 +- library/cpp/histogram/adaptive/block_histogram.cpp | 28 +- library/cpp/histogram/adaptive/block_histogram.h | 26 +- library/cpp/histogram/adaptive/common.cpp | 4 +- library/cpp/histogram/adaptive/common.h | 2 +- .../cpp/histogram/adaptive/fixed_bin_histogram.cpp | 16 +- .../cpp/histogram/adaptive/fixed_bin_histogram.h | 6 +- library/cpp/histogram/adaptive/histogram.h | 12 +- library/cpp/histogram/adaptive/merger.h | 6 +- library/cpp/histogram/adaptive/multi_histogram.h | 10 +- library/cpp/html/escape/escape.cpp | 78 +- library/cpp/html/escape/escape.h | 6 +- library/cpp/html/pcdata/pcdata.cpp | 108 +- library/cpp/html/pcdata/pcdata.h | 8 +- library/cpp/html/pcdata/pcdata_ut.cpp | 54 +- library/cpp/http/fetch/exthttpcodes.cpp | 364 +- library/cpp/http/fetch/exthttpcodes.h | 214 +- library/cpp/http/fetch/http_digest.cpp | 156 +- library/cpp/http/fetch/http_digest.h | 56 +- library/cpp/http/fetch/http_socket.cpp | 162 +- library/cpp/http/fetch/httpfetcher.h | 56 +- library/cpp/http/fetch/httpfsm.h | 58 +- library/cpp/http/fetch/httpfsm_ut.cpp | 118 +- library/cpp/http/fetch/httpheader.h | 110 +- library/cpp/http/fetch/httpload.cpp | 312 +- library/cpp/http/fetch/httpload.h | 264 +- library/cpp/http/fetch/httpparser.h | 94 +- library/cpp/http/fetch/httpparser_ut.cpp | 172 +- library/cpp/http/fetch/httpzreader.h | 42 +- .../http/fetch/library-htfetch_ut_hreflang_in.h | 304 +- library/cpp/http/fetch/ya.make | 12 +- library/cpp/http/io/chunk.cpp | 398 +- library/cpp/http/io/chunk.h | 38 +- library/cpp/http/io/chunk_ut.cpp | 72 +- library/cpp/http/io/compression.cpp | 58 +- library/cpp/http/io/compression.h | 26 +- library/cpp/http/io/compression_ut.cpp | 20 +- library/cpp/http/io/fuzz/main.cpp | 28 +- library/cpp/http/io/fuzz/ya.make | 20 +- library/cpp/http/io/headers.cpp | 106 +- library/cpp/http/io/headers.h | 126 +- library/cpp/http/io/headers_ut.cpp | 10 +- library/cpp/http/io/list_codings/main.cpp | 14 +- library/cpp/http/io/list_codings/ya.make | 24 +- library/cpp/http/io/stream.cpp | 1302 +- library/cpp/http/io/stream.h | 180 +- library/cpp/http/io/stream_ut.cpp | 294 +- library/cpp/http/io/stream_ut_medium.cpp | 94 +- library/cpp/http/io/ut/medium/ya.make | 12 +- library/cpp/http/io/ut/ya.make | 16 +- library/cpp/http/io/ya.make | 24 +- library/cpp/http/misc/httpcodes.cpp | 88 +- library/cpp/http/misc/httpcodes.h | 84 +- library/cpp/http/misc/httpdate.cpp | 16 +- library/cpp/http/misc/httpdate.h | 8 +- library/cpp/http/misc/httpdate_ut.cpp | 18 +- library/cpp/http/misc/httpreqdata.cpp | 260 +- library/cpp/http/misc/httpreqdata.h | 68 +- library/cpp/http/misc/httpreqdata_ut.cpp | 90 +- library/cpp/http/misc/parsed_request.cpp | 54 +- library/cpp/http/misc/parsed_request.h | 50 +- library/cpp/http/misc/parsed_request_ut.cpp | 48 +- library/cpp/http/misc/ut/ya.make | 18 +- library/cpp/http/misc/ya.make | 22 +- library/cpp/http/server/conn.cpp | 84 +- library/cpp/http/server/conn.h | 56 +- library/cpp/http/server/http.cpp | 566 +- library/cpp/http/server/http.h | 86 +- library/cpp/http/server/http_ex.cpp | 120 +- library/cpp/http/server/http_ex.h | 24 +- library/cpp/http/server/http_ut.cpp | 140 +- library/cpp/http/server/options.cpp | 44 +- library/cpp/http/server/options.h | 130 +- library/cpp/http/server/response.cpp | 32 +- library/cpp/http/server/response.h | 26 +- library/cpp/http/server/response_ut.cpp | 26 +- library/cpp/http/server/ut/ya.make | 4 +- library/cpp/http/server/ya.make | 28 +- library/cpp/hyperloglog/hyperloglog.cpp | 50 +- library/cpp/hyperloglog/hyperloglog.h | 2 +- library/cpp/hyperloglog/hyperloglog_ut.cpp | 18 +- library/cpp/int128/ut/int128_old_ut.cpp | 20 +- library/cpp/ipv6_address/ipv6_address.cpp | 154 +- library/cpp/ipv6_address/ipv6_address.h | 76 +- library/cpp/ipv6_address/ut/ipv6_address_ut.cpp | 22 +- library/cpp/json/common/defs.cpp | 136 +- library/cpp/json/common/defs.h | 76 +- library/cpp/json/common/ya.make | 18 +- library/cpp/json/domscheme_traits.h | 22 +- library/cpp/json/easy_parse/json_easy_parser.cpp | 390 +- library/cpp/json/easy_parse/json_easy_parser.h | 14 +- .../cpp/json/easy_parse/json_easy_parser_impl.h | 6 +- library/cpp/json/fast_sax/parser.h | 22 +- library/cpp/json/fast_sax/parser.rl6 | 14 +- library/cpp/json/fast_sax/unescape.cpp | 12 +- library/cpp/json/fast_sax/unescape.h | 10 +- library/cpp/json/fast_sax/ya.make | 24 +- library/cpp/json/flex_buffers/cvt.cpp | 236 +- library/cpp/json/flex_buffers/cvt.h | 40 +- library/cpp/json/flex_buffers/ut/cvt_ut.cpp | 38 +- library/cpp/json/flex_buffers/ut/ya.make | 16 +- library/cpp/json/flex_buffers/ya.make | 28 +- library/cpp/json/fuzzy_test/main.cpp | 56 +- library/cpp/json/fuzzy_test/ya.make | 22 +- library/cpp/json/json_prettifier.cpp | 480 +- library/cpp/json/json_prettifier.h | 98 +- library/cpp/json/json_reader.cpp | 870 +- library/cpp/json/json_reader.h | 168 +- library/cpp/json/json_writer.cpp | 216 +- library/cpp/json/json_writer.h | 296 +- library/cpp/json/rapidjson_helpers.h | 4 +- library/cpp/json/ut/json_prettifier_ut.cpp | 218 +- library/cpp/json/ut/json_reader_fast_ut.cpp | 416 +- library/cpp/json/ut/json_reader_ut.cpp | 80 +- library/cpp/json/ut/json_writer_ut.cpp | 4 +- library/cpp/json/ut/ya.make | 6 +- library/cpp/json/writer/json.cpp | 902 +- library/cpp/json/writer/json.h | 36 +- library/cpp/json/writer/json_ut.cpp | 70 +- library/cpp/json/writer/json_value.cpp | 1648 +- library/cpp/json/writer/json_value.h | 448 +- library/cpp/json/writer/json_value_ut.cpp | 24 +- library/cpp/json/writer/ya.make | 4 +- library/cpp/json/ya.make | 2 +- library/cpp/lcs/lcs_via_lis.h | 336 +- library/cpp/lcs/lcs_via_lis_ut.cpp | 116 +- library/cpp/lcs/ya.make | 10 +- library/cpp/lfalloc/dbg_info/dbg_info.cpp | 180 +- library/cpp/lfalloc/dbg_info/dbg_info.h | 128 +- library/cpp/lfalloc/lf_allocX64.cpp | 18 +- library/cpp/lfalloc/lf_allocX64.h | 570 +- library/cpp/lfalloc/ya.make | 22 +- library/cpp/lfalloc/yt/ya.make | 22 +- library/cpp/linear_regression/benchmark/pool.cpp | 10 +- library/cpp/linear_regression/benchmark/pool.h | 10 +- library/cpp/linear_regression/linear_model.h | 62 +- .../cpp/linear_regression/linear_regression.cpp | 38 +- library/cpp/linear_regression/linear_regression.h | 46 +- .../cpp/linear_regression/linear_regression_ut.cpp | 6 +- library/cpp/linear_regression/welford.h | 10 +- library/cpp/logger/all.h | 14 +- library/cpp/logger/backend.cpp | 14 +- library/cpp/logger/backend.h | 20 +- library/cpp/logger/element.cpp | 46 +- library/cpp/logger/element.h | 78 +- library/cpp/logger/file.cpp | 98 +- library/cpp/logger/file.h | 24 +- library/cpp/logger/filter.h | 4 +- library/cpp/logger/global/common.h | 8 +- library/cpp/logger/global/global.cpp | 4 +- library/cpp/logger/global/global.h | 10 +- library/cpp/logger/global/rty_formater.cpp | 4 +- library/cpp/logger/global/rty_formater.h | 2 +- library/cpp/logger/global/rty_formater_ut.cpp | 2 +- library/cpp/logger/log.cpp | 228 +- library/cpp/logger/log.h | 54 +- library/cpp/logger/log_ut.cpp | 196 +- library/cpp/logger/null.cpp | 24 +- library/cpp/logger/null.h | 18 +- library/cpp/logger/priority.h | 20 +- library/cpp/logger/record.h | 26 +- library/cpp/logger/stream.cpp | 34 +- library/cpp/logger/stream.h | 22 +- library/cpp/logger/system.cpp | 94 +- library/cpp/logger/system.h | 90 +- library/cpp/logger/thread.cpp | 224 +- library/cpp/logger/thread.h | 38 +- library/cpp/logger/ya.make | 14 +- library/cpp/lwtrace/control.h | 12 +- library/cpp/lwtrace/example5/lwtrace_example5.cpp | 6 +- library/cpp/lwtrace/mon/analytics/all.h | 12 +- library/cpp/lwtrace/mon/analytics/csv_output.h | 2 +- library/cpp/lwtrace/mon/analytics/html_output.h | 2 +- library/cpp/lwtrace/mon/analytics/json_output.h | 4 +- library/cpp/lwtrace/mon/analytics/transform.h | 2 +- library/cpp/lwtrace/mon/analytics/util.h | 14 +- library/cpp/lwtrace/rwspinlock.h | 2 +- library/cpp/malloc/api/helpers/io.cpp | 14 +- library/cpp/malloc/api/helpers/ya.make | 20 +- library/cpp/malloc/api/malloc.cpp | 28 +- library/cpp/malloc/api/malloc.h | 4 +- library/cpp/malloc/api/ut/ya.make | 6 +- library/cpp/malloc/jemalloc/malloc-info.cpp | 114 +- library/cpp/malloc/jemalloc/ya.make | 22 +- library/cpp/malloc/ya.make | 14 +- library/cpp/messagebus/acceptor.cpp | 2 +- library/cpp/messagebus/acceptor.h | 66 +- library/cpp/messagebus/acceptor_status.cpp | 4 +- library/cpp/messagebus/acceptor_status.h | 38 +- library/cpp/messagebus/actor/actor.h | 218 +- library/cpp/messagebus/actor/actor_ut.cpp | 26 +- library/cpp/messagebus/actor/executor.cpp | 140 +- library/cpp/messagebus/actor/executor.h | 130 +- library/cpp/messagebus/actor/queue_for_actor.h | 92 +- library/cpp/messagebus/actor/queue_in_actor.h | 144 +- library/cpp/messagebus/actor/ring_buffer.h | 4 +- library/cpp/messagebus/actor/ring_buffer_ut.cpp | 10 +- .../messagebus/actor/ring_buffer_with_spin_lock.h | 4 +- library/cpp/messagebus/actor/tasks.h | 74 +- library/cpp/messagebus/actor/temp_tls_vector.h | 4 +- library/cpp/messagebus/actor/thread_extra.h | 40 +- library/cpp/messagebus/actor/what_thread_does.cpp | 10 +- .../cpp/messagebus/actor/what_thread_does_guard.h | 58 +- library/cpp/messagebus/async_result.h | 2 +- library/cpp/messagebus/base.h | 8 +- library/cpp/messagebus/cc_semaphore.h | 6 +- library/cpp/messagebus/cc_semaphore_ut.cpp | 10 +- library/cpp/messagebus/config/defs.h | 90 +- library/cpp/messagebus/config/netaddr.cpp | 296 +- library/cpp/messagebus/config/netaddr.h | 134 +- library/cpp/messagebus/config/session_config.cpp | 108 +- library/cpp/messagebus/config/session_config.h | 90 +- library/cpp/messagebus/connection.h | 84 +- library/cpp/messagebus/coreconn.cpp | 34 +- library/cpp/messagebus/coreconn.h | 58 +- .../debug_receiver/debug_receiver_handler.cpp | 10 +- .../debug_receiver/debug_receiver_handler.h | 2 +- .../debug_receiver/debug_receiver_proto.cpp | 4 +- .../debug_receiver/debug_receiver_proto.h | 22 +- library/cpp/messagebus/defs.h | 2 +- library/cpp/messagebus/dummy_debugger.h | 4 +- library/cpp/messagebus/duration_histogram.cpp | 4 +- library/cpp/messagebus/duration_histogram_ut.cpp | 8 +- library/cpp/messagebus/event_loop.cpp | 8 +- library/cpp/messagebus/event_loop.h | 8 +- library/cpp/messagebus/extra_ref.h | 8 +- library/cpp/messagebus/futex_like.cpp | 12 +- library/cpp/messagebus/futex_like.h | 4 +- library/cpp/messagebus/handler.cpp | 36 +- library/cpp/messagebus/handler.h | 244 +- library/cpp/messagebus/handler_impl.h | 24 +- library/cpp/messagebus/hash.h | 22 +- library/cpp/messagebus/key_value_printer.cpp | 8 +- library/cpp/messagebus/key_value_printer.h | 2 +- library/cpp/messagebus/latch.h | 10 +- library/cpp/messagebus/left_right_buffer.h | 140 +- library/cpp/messagebus/lfqueue_batch.h | 8 +- library/cpp/messagebus/lfqueue_batch_ut.cpp | 6 +- library/cpp/messagebus/local_flags.cpp | 18 +- library/cpp/messagebus/local_flags.h | 32 +- library/cpp/messagebus/local_flags_ut.cpp | 6 +- library/cpp/messagebus/local_tasks.h | 4 +- library/cpp/messagebus/locator.cpp | 662 +- library/cpp/messagebus/locator.h | 116 +- library/cpp/messagebus/mb_lwtrace.h | 10 +- library/cpp/messagebus/memory.h | 8 +- library/cpp/messagebus/message.cpp | 324 +- library/cpp/messagebus/message.h | 436 +- library/cpp/messagebus/message_counter.cpp | 4 +- library/cpp/messagebus/message_counter.h | 58 +- library/cpp/messagebus/message_ptr_and_header.h | 44 +- library/cpp/messagebus/message_status.cpp | 6 +- library/cpp/messagebus/message_status.h | 76 +- library/cpp/messagebus/message_status_counter.cpp | 2 +- library/cpp/messagebus/message_status_counter.h | 40 +- library/cpp/messagebus/messqueue.cpp | 24 +- library/cpp/messagebus/misc/atomic_box.h | 18 +- library/cpp/messagebus/misc/granup.h | 28 +- library/cpp/messagebus/misc/test_sync.h | 6 +- library/cpp/messagebus/misc/tokenquota.h | 30 +- library/cpp/messagebus/misc/weak_ptr.h | 26 +- library/cpp/messagebus/misc/weak_ptr_ut.cpp | 14 +- library/cpp/messagebus/moved.h | 6 +- library/cpp/messagebus/moved_ut.cpp | 4 +- library/cpp/messagebus/network.cpp | 12 +- library/cpp/messagebus/network.h | 18 +- library/cpp/messagebus/network_ut.cpp | 6 +- library/cpp/messagebus/oldmodule/module.cpp | 1332 +- library/cpp/messagebus/oldmodule/module.h | 668 +- library/cpp/messagebus/oldmodule/startsession.cpp | 74 +- library/cpp/messagebus/oldmodule/startsession.h | 54 +- library/cpp/messagebus/protobuf/ybusbuf.cpp | 8 +- library/cpp/messagebus/protobuf/ybusbuf.h | 438 +- library/cpp/messagebus/queue_config.cpp | 8 +- library/cpp/messagebus/queue_config.h | 18 +- library/cpp/messagebus/rain_check/core/coro.cpp | 14 +- library/cpp/messagebus/rain_check/core/coro.h | 16 +- .../cpp/messagebus/rain_check/core/coro_stack.cpp | 4 +- .../cpp/messagebus/rain_check/core/coro_stack.h | 56 +- library/cpp/messagebus/rain_check/core/coro_ut.cpp | 8 +- library/cpp/messagebus/rain_check/core/env.h | 24 +- library/cpp/messagebus/rain_check/core/simple.cpp | 2 +- library/cpp/messagebus/rain_check/core/simple.h | 16 +- .../cpp/messagebus/rain_check/core/simple_ut.cpp | 6 +- library/cpp/messagebus/rain_check/core/sleep.cpp | 4 +- library/cpp/messagebus/rain_check/core/sleep.h | 2 +- .../cpp/messagebus/rain_check/core/sleep_ut.cpp | 4 +- library/cpp/messagebus/rain_check/core/spawn.cpp | 2 +- library/cpp/messagebus/rain_check/core/spawn.h | 2 +- .../cpp/messagebus/rain_check/core/spawn_ut.cpp | 24 +- library/cpp/messagebus/rain_check/core/task.cpp | 28 +- library/cpp/messagebus/rain_check/core/task.h | 82 +- library/cpp/messagebus/rain_check/core/track.cpp | 10 +- library/cpp/messagebus/rain_check/core/track.h | 32 +- .../cpp/messagebus/rain_check/core/track_ut.cpp | 8 +- library/cpp/messagebus/rain_check/http/client.cpp | 228 +- library/cpp/messagebus/rain_check/http/client.h | 94 +- .../cpp/messagebus/rain_check/http/client_ut.cpp | 204 +- .../rain_check/http/http_code_extractor.cpp | 32 +- .../rain_check/http/http_code_extractor.h | 6 +- .../rain_check/messagebus/messagebus_client.cpp | 38 +- .../rain_check/messagebus/messagebus_client.h | 18 +- .../rain_check/messagebus/messagebus_client_ut.cpp | 8 +- .../rain_check/messagebus/messagebus_server.cpp | 4 +- .../rain_check/messagebus/messagebus_server.h | 14 +- .../rain_check/messagebus/messagebus_server_ut.cpp | 2 +- .../cpp/messagebus/rain_check/test/helper/misc.cpp | 6 +- .../cpp/messagebus/rain_check/test/helper/misc.h | 20 +- .../rain_check/test/perftest/perftest.cpp | 28 +- library/cpp/messagebus/ref_counted.h | 6 +- .../cpp/messagebus/remote_client_connection.cpp | 56 +- library/cpp/messagebus/remote_client_connection.h | 66 +- library/cpp/messagebus/remote_client_session.cpp | 12 +- library/cpp/messagebus/remote_client_session.h | 62 +- .../messagebus/remote_client_session_semaphore.cpp | 2 +- .../messagebus/remote_client_session_semaphore.h | 66 +- library/cpp/messagebus/remote_connection.cpp | 1614 +- library/cpp/messagebus/remote_connection.h | 484 +- .../cpp/messagebus/remote_connection_status.cpp | 28 +- library/cpp/messagebus/remote_connection_status.h | 302 +- .../cpp/messagebus/remote_server_connection.cpp | 2 +- library/cpp/messagebus/remote_server_connection.h | 34 +- library/cpp/messagebus/remote_server_session.cpp | 66 +- library/cpp/messagebus/remote_server_session.h | 60 +- .../messagebus/remote_server_session_semaphore.cpp | 4 +- .../messagebus/remote_server_session_semaphore.h | 72 +- library/cpp/messagebus/scheduler/scheduler.cpp | 4 +- library/cpp/messagebus/scheduler/scheduler.h | 82 +- library/cpp/messagebus/scheduler/scheduler_ut.cpp | 4 +- library/cpp/messagebus/scheduler_actor.h | 132 +- library/cpp/messagebus/scheduler_actor_ut.cpp | 6 +- library/cpp/messagebus/session.cpp | 186 +- library/cpp/messagebus/session.h | 354 +- library/cpp/messagebus/session_impl.cpp | 54 +- library/cpp/messagebus/session_impl.h | 408 +- library/cpp/messagebus/session_job_count.cpp | 4 +- library/cpp/messagebus/session_job_count.h | 50 +- library/cpp/messagebus/shutdown_state.cpp | 8 +- library/cpp/messagebus/socket_addr.cpp | 76 +- library/cpp/messagebus/socket_addr.h | 168 +- library/cpp/messagebus/storage.cpp | 266 +- library/cpp/messagebus/storage.h | 114 +- library/cpp/messagebus/synchandler.cpp | 150 +- .../cpp/messagebus/test/example/client/client.cpp | 2 +- library/cpp/messagebus/test/example/common/proto.h | 2 +- .../cpp/messagebus/test/example/server/server.cpp | 4 +- library/cpp/messagebus/test/helper/alloc_counter.h | 10 +- library/cpp/messagebus/test/helper/example.cpp | 28 +- library/cpp/messagebus/test/helper/example.h | 200 +- .../cpp/messagebus/test/helper/example_module.cpp | 8 +- .../cpp/messagebus/test/helper/example_module.h | 44 +- library/cpp/messagebus/test/helper/fixed_port.cpp | 2 +- library/cpp/messagebus/test/helper/fixed_port.h | 14 +- .../cpp/messagebus/test/helper/hanging_server.cpp | 2 +- .../cpp/messagebus/test/helper/hanging_server.h | 2 +- .../messagebus/test/helper/message_handler_error.h | 26 +- .../messagebus/test/helper/object_count_check.h | 20 +- library/cpp/messagebus/test/helper/wait_for.h | 8 +- library/cpp/messagebus/test/perftest/perftest.cpp | 130 +- .../cpp/messagebus/test/perftest/simple_proto.cpp | 4 +- .../cpp/messagebus/test/perftest/simple_proto.h | 24 +- library/cpp/messagebus/test/ut/count_down_latch.h | 6 +- library/cpp/messagebus/test/ut/locator_uniq_ut.cpp | 10 +- library/cpp/messagebus/test/ut/messagebus_ut.cpp | 102 +- .../test/ut/module_client_one_way_ut.cpp | 8 +- .../cpp/messagebus/test/ut/module_client_ut.cpp | 74 +- .../cpp/messagebus/test/ut/module_server_ut.cpp | 8 +- library/cpp/messagebus/test/ut/moduletest.h | 406 +- library/cpp/messagebus/test/ut/one_way_ut.cpp | 48 +- library/cpp/messagebus/test/ut/starter_ut.cpp | 6 +- library/cpp/messagebus/test/ut/sync_client_ut.cpp | 98 +- library/cpp/messagebus/test/ya.make | 4 +- library/cpp/messagebus/test_utils.h | 16 +- library/cpp/messagebus/use_after_free_checker.cpp | 6 +- library/cpp/messagebus/use_after_free_checker.h | 4 +- library/cpp/messagebus/use_count_checker.cpp | 8 +- library/cpp/messagebus/use_count_checker.h | 4 +- library/cpp/messagebus/vector_swaps.h | 44 +- library/cpp/messagebus/vector_swaps_ut.cpp | 2 +- library/cpp/messagebus/www/concat_strings.h | 8 +- library/cpp/messagebus/www/html_output.cpp | 4 +- library/cpp/messagebus/www/html_output.h | 70 +- library/cpp/messagebus/www/www.cpp | 64 +- library/cpp/messagebus/www/www.h | 54 +- library/cpp/messagebus/ybus.h | 270 +- library/cpp/mime/types/mime.cpp | 146 +- library/cpp/mime/types/mime.h | 82 +- library/cpp/monlib/counters/timer.h | 8 +- library/cpp/monlib/dynamic_counters/counters.h | 2 +- library/cpp/monlib/encode/spack/compression.cpp | 16 +- library/cpp/monlib/service/service.cpp | 6 +- library/cpp/monlib/service/service.h | 2 +- library/cpp/object_factory/object_factory.h | 268 +- library/cpp/object_factory/object_factory_ut.cpp | 24 +- library/cpp/on_disk/chunks/chunked_helpers.cpp | 4 +- library/cpp/on_disk/chunks/chunked_helpers.h | 238 +- library/cpp/on_disk/chunks/chunks_ut.cpp | 32 +- library/cpp/on_disk/chunks/reader.h | 64 +- library/cpp/on_disk/chunks/writer.h | 64 +- library/cpp/on_disk/chunks/ya.make | 2 +- library/cpp/openssl/init/init.cpp | 124 +- library/cpp/openssl/init/init.h | 6 +- library/cpp/openssl/init/ya.make | 24 +- library/cpp/openssl/io/stream.cpp | 320 +- library/cpp/openssl/io/stream.h | 32 +- library/cpp/openssl/io/ya.make | 26 +- library/cpp/openssl/ya.make | 12 +- library/cpp/packedtypes/fixed_point.h | 108 +- library/cpp/packedtypes/longs.cpp | 2 +- library/cpp/packedtypes/longs.h | 416 +- library/cpp/packedtypes/longs_ut.cpp | 16 +- library/cpp/packedtypes/packed.h | 40 +- library/cpp/packedtypes/packed_ut.cpp | 24 +- library/cpp/packedtypes/packedfloat.cpp | 12 +- library/cpp/packedtypes/packedfloat.h | 210 +- library/cpp/packedtypes/packedfloat_ut.cpp | 36 +- library/cpp/packedtypes/ya.make | 2 +- library/cpp/packedtypes/zigzag.h | 4 +- library/cpp/packers/packers.cpp | 2 +- library/cpp/packers/packers.h | 124 +- library/cpp/packers/proto_packer.h | 2 +- library/cpp/packers/ut/packers_ut.cpp | 50 +- library/cpp/packers/ut/proto_packer_ut.cpp | 2 +- library/cpp/pop_count/benchmark/main.cpp | 94 +- library/cpp/pop_count/benchmark/ya.make | 20 +- library/cpp/pop_count/popcount.cpp | 52 +- library/cpp/pop_count/popcount.h | 92 +- library/cpp/pop_count/popcount_ut.cpp | 126 +- library/cpp/pop_count/ut/ya.make | 14 +- library/cpp/pop_count/ya.make | 16 +- library/cpp/protobuf/json/config.h | 210 +- library/cpp/protobuf/json/field_option.h | 56 +- library/cpp/protobuf/json/filter.h | 62 +- library/cpp/protobuf/json/inline.h | 110 +- library/cpp/protobuf/json/json2proto.cpp | 98 +- library/cpp/protobuf/json/json2proto.h | 214 +- library/cpp/protobuf/json/json_output.h | 126 +- library/cpp/protobuf/json/json_output_create.cpp | 24 +- library/cpp/protobuf/json/json_output_create.h | 20 +- library/cpp/protobuf/json/json_value_output.cpp | 198 +- library/cpp/protobuf/json/json_value_output.h | 100 +- library/cpp/protobuf/json/json_writer_output.cpp | 28 +- library/cpp/protobuf/json/json_writer_output.h | 164 +- library/cpp/protobuf/json/name_generator.h | 16 +- library/cpp/protobuf/json/proto2json.cpp | 76 +- library/cpp/protobuf/json/proto2json.h | 94 +- library/cpp/protobuf/json/proto2json_printer.cpp | 18 +- library/cpp/protobuf/json/string_transform.cpp | 72 +- library/cpp/protobuf/json/string_transform.h | 156 +- library/cpp/protobuf/json/ut/filter_ut.cpp | 112 +- library/cpp/protobuf/json/ut/inline_ut.cpp | 158 +- library/cpp/protobuf/json/ut/json.h | 72 +- library/cpp/protobuf/json/ut/json2proto_ut.cpp | 1286 +- library/cpp/protobuf/json/ut/proto.h | 92 +- library/cpp/protobuf/json/ut/proto2json_ut.cpp | 1456 +- .../cpp/protobuf/json/ut/string_transform_ut.cpp | 10 +- library/cpp/protobuf/util/cast.h | 272 +- library/cpp/protobuf/util/is_equal.cpp | 270 +- library/cpp/protobuf/util/is_equal.h | 22 +- library/cpp/protobuf/util/merge.cpp | 58 +- library/cpp/protobuf/util/merge.h | 14 +- library/cpp/protobuf/util/merge_ut.cpp | 8 +- library/cpp/protobuf/util/path.cpp | 98 +- library/cpp/protobuf/util/path.h | 64 +- library/cpp/protobuf/util/pb_io.cpp | 70 +- library/cpp/protobuf/util/pb_io.h | 4 +- library/cpp/protobuf/util/pb_io_ut.cpp | 20 +- library/cpp/protobuf/util/pb_utils.h | 14 +- library/cpp/protobuf/util/repeated_field_utils.h | 128 +- library/cpp/protobuf/util/simple_reflection.cpp | 108 +- library/cpp/protobuf/util/simple_reflection.h | 488 +- library/cpp/protobuf/util/simple_reflection_ut.cpp | 162 +- library/cpp/protobuf/util/sort.h | 30 +- library/cpp/protobuf/util/traits.h | 560 +- library/cpp/protobuf/util/walk.h | 4 +- library/cpp/protobuf/util/walk_ut.cpp | 10 +- library/cpp/protobuf/util/ya.make | 2 +- library/cpp/regex/hyperscan/hyperscan.cpp | 4 +- library/cpp/regex/hyperscan/hyperscan.h | 4 +- library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 18 +- library/cpp/regex/pcre/regexp.cpp | 122 +- library/cpp/regex/pcre/regexp.h | 28 +- library/cpp/regex/pcre/regexp_ut.cpp | 30 +- library/cpp/regex/pcre/ya.make | 2 +- library/cpp/regex/pire/extraencodings.cpp | 116 +- library/cpp/regex/pire/inline/ya.make | 4 +- library/cpp/regex/pire/pcre2pire.cpp | 2 +- library/cpp/regex/pire/pcre2pire.h | 4 +- library/cpp/regex/pire/pire.h | 14 +- library/cpp/regex/pire/regexp.h | 110 +- library/cpp/regex/pire/ut/regexp_ut.cpp | 36 +- library/cpp/regex/pire/ut/ya.make | 14 +- library/cpp/regex/pire/ya.make | 2 +- library/cpp/regex/ya.make | 16 +- library/cpp/resource/registry.cpp | 112 +- library/cpp/resource/registry.h | 50 +- library/cpp/resource/resource.cpp | 82 +- library/cpp/resource/resource.h | 28 +- library/cpp/resource/ut/lib/data | 2 +- library/cpp/resource/ut/lib/ya.make | 18 +- library/cpp/resource/ut/resource_ut.cpp | 8 +- library/cpp/resource/ut/ya.make | 16 +- library/cpp/resource/ya.make | 26 +- library/cpp/retry/retry.h | 4 +- library/cpp/retry/retry_ut.cpp | 4 +- library/cpp/retry/utils.h | 2 +- library/cpp/scheme/domscheme_traits.h | 46 +- library/cpp/scheme/scheme.cpp | 290 +- library/cpp/scheme/scheme.h | 122 +- library/cpp/scheme/scheme_cast.h | 82 +- library/cpp/scheme/scimpl.h | 134 +- library/cpp/scheme/scimpl_defs.h | 34 +- library/cpp/scheme/scimpl_json_read.cpp | 4 +- library/cpp/scheme/scimpl_json_write.cpp | 108 +- library/cpp/scheme/scimpl_private.cpp | 2 +- library/cpp/scheme/scimpl_private.h | 10 +- library/cpp/scheme/scimpl_protobuf.cpp | 264 +- library/cpp/scheme/tests/ut/scheme_cast_ut.cpp | 34 +- library/cpp/scheme/tests/ut/scheme_json_ut.cpp | 66 +- library/cpp/scheme/tests/ut/scheme_path_ut.cpp | 44 +- library/cpp/scheme/tests/ut/scheme_proto_ut.cpp | 206 +- library/cpp/scheme/tests/ut/scheme_ut.cpp | 28 +- library/cpp/scheme/ut_utils/scheme_ut_utils.cpp | 16 +- library/cpp/scheme/ut_utils/scheme_ut_utils.h | 16 +- library/cpp/sighandler/async_signals_handler.cpp | 314 +- library/cpp/sighandler/async_signals_handler.h | 4 +- library/cpp/sliding_window/sliding_window.h | 390 +- library/cpp/sliding_window/sliding_window_ut.cpp | 126 +- library/cpp/sse/sse.h | 2 +- library/cpp/sse/sse2neon.h | 100 +- library/cpp/sse/ut/test.cpp | 80 +- library/cpp/sse/ut/ya.make | 2 +- library/cpp/sse/ya.make | 14 +- library/cpp/streams/brotli/brotli.cpp | 6 +- library/cpp/streams/brotli/brotli.h | 4 +- library/cpp/streams/bzip2/bzip2.cpp | 342 +- library/cpp/streams/bzip2/bzip2.h | 68 +- library/cpp/streams/bzip2/bzip2_ut.cpp | 62 +- library/cpp/streams/bzip2/ut/ya.make | 14 +- library/cpp/streams/bzip2/ya.make | 24 +- library/cpp/streams/lz/lz.cpp | 1074 +- library/cpp/streams/lz/lz.h | 196 +- library/cpp/streams/lz/lz_ut.cpp | 218 +- library/cpp/streams/lz/ut/ya.make | 24 +- library/cpp/streams/lz/ya.make | 32 +- library/cpp/streams/lzma/lzma.cpp | 912 +- library/cpp/streams/lzma/lzma.h | 52 +- library/cpp/streams/lzma/lzma_ut.cpp | 176 +- library/cpp/streams/lzma/ut/ya.make | 20 +- library/cpp/streams/lzma/ya.make | 30 +- library/cpp/streams/ya.make | 36 +- library/cpp/streams/zc_memory_input/ya.make | 16 +- .../streams/zc_memory_input/zc_memory_input.cpp | 2 +- .../cpp/streams/zc_memory_input/zc_memory_input.h | 20 +- library/cpp/string_utils/base64/base64.cpp | 66 +- library/cpp/string_utils/base64/base64.h | 58 +- library/cpp/string_utils/base64/base64_ut.cpp | 20 +- .../cpp/string_utils/indent_text/indent_text.cpp | 4 +- library/cpp/string_utils/indent_text/ya.make | 18 +- .../levenshtein_diff/levenshtein_diff.h | 176 +- library/cpp/string_utils/levenshtein_diff/ya.make | 22 +- library/cpp/string_utils/parse_size/parse_size.cpp | 120 +- library/cpp/string_utils/parse_size/parse_size.h | 52 +- library/cpp/string_utils/quote/quote.cpp | 178 +- library/cpp/string_utils/quote/quote.h | 42 +- library/cpp/string_utils/quote/quote_ut.cpp | 36 +- .../relaxed_escaper/relaxed_escaper.cpp | 2 +- .../string_utils/relaxed_escaper/relaxed_escaper.h | 326 +- .../relaxed_escaper/relaxed_escaper_ut.cpp | 34 +- .../cpp/string_utils/relaxed_escaper/ut/ya.make | 16 +- library/cpp/string_utils/relaxed_escaper/ya.make | 18 +- library/cpp/string_utils/scan/scan.cpp | 2 +- library/cpp/string_utils/scan/scan.h | 28 +- library/cpp/string_utils/url/url.cpp | 84 +- library/cpp/string_utils/url/url_ut.cpp | 6 +- library/cpp/svnversion/svnversion.cpp | 14 +- library/cpp/svnversion/svnversion.h | 4 +- library/cpp/svnversion/ya.make | 2 +- .../terminate_handler/sample/pure-virtual/main.cpp | 4 +- library/cpp/terminate_handler/sample/segv/main.cpp | 2 +- library/cpp/testing/benchmark/bench.cpp | 648 +- library/cpp/testing/benchmark/bench.h | 72 +- library/cpp/testing/benchmark/examples/main.cpp | 332 +- library/cpp/testing/benchmark/examples/ya.make | 12 +- library/cpp/testing/benchmark/main/main.cpp | 6 +- library/cpp/testing/benchmark/main/ya.make | 2 +- library/cpp/testing/benchmark/ya.make | 6 +- library/cpp/testing/gmock_in_unittest/gmock.h | 2 +- .../cpp/testing/gmock_in_unittest/registration.cpp | 2 +- library/cpp/testing/unittest/checks.cpp | 2 +- library/cpp/testing/unittest/env.h | 2 +- library/cpp/testing/unittest/example_ut.cpp | 18 +- library/cpp/testing/unittest/gtest.cpp | 106 +- library/cpp/testing/unittest/gtest.h | 188 +- library/cpp/testing/unittest/plugin.cpp | 4 +- library/cpp/testing/unittest/plugin.h | 4 +- library/cpp/testing/unittest/registar.cpp | 58 +- library/cpp/testing/unittest/registar.h | 664 +- library/cpp/testing/unittest/tests_data.cpp | 2 +- library/cpp/testing/unittest/tests_data.h | 2 +- library/cpp/testing/unittest/ut/main.cpp | 50 +- library/cpp/testing/unittest/utmain.cpp | 516 +- library/cpp/testing/unittest/utmain.h | 10 +- library/cpp/testing/unittest/ya.make | 22 +- library/cpp/testing/unittest_main/main.cpp | 8 +- library/cpp/testing/unittest_main/ya.make | 24 +- library/cpp/threading/atomic/bool.h | 12 +- library/cpp/threading/chunk_queue/queue.h | 958 +- library/cpp/threading/chunk_queue/queue_ut.cpp | 284 +- library/cpp/threading/future/async.h | 14 +- library/cpp/threading/future/async_ut.cpp | 48 +- library/cpp/threading/future/core/future-inl.h | 1200 +- library/cpp/threading/future/core/future.h | 240 +- library/cpp/threading/future/future_ut.cpp | 474 +- library/cpp/threading/future/legacy_future.h | 104 +- library/cpp/threading/future/legacy_future_ut.cpp | 78 +- library/cpp/threading/future/perf/main.cpp | 20 +- library/cpp/threading/future/wait/wait-inl.h | 10 +- library/cpp/threading/future/wait/wait.cpp | 16 +- library/cpp/threading/future/wait/wait.h | 8 +- .../light_rw_lock/bench/lightrwlock_test.cpp | 2 +- .../cpp/threading/light_rw_lock/lightrwlock.cpp | 2 +- library/cpp/threading/light_rw_lock/lightrwlock.h | 144 +- .../threading/local_executor/local_executor.cpp | 2 +- .../cpp/threading/local_executor/local_executor.h | 6 +- .../local_executor/ut/local_executor_ut.cpp | 492 +- .../threading/poor_man_openmp/thread_helper.cpp | 12 +- .../cpp/threading/poor_man_openmp/thread_helper.h | 108 +- .../threading/poor_man_openmp/thread_helper_ut.cpp | 42 +- library/cpp/threading/poor_man_openmp/ut/ya.make | 14 +- library/cpp/threading/queue/mpsc_htswap.h | 12 +- .../cpp/threading/queue/mpsc_intrusive_unordered.h | 2 +- library/cpp/threading/queue/mpsc_read_as_filled.h | 14 +- .../cpp/threading/queue/mpsc_vinfarr_obstructive.h | 8 +- library/cpp/threading/queue/queue_ut.cpp | 2 +- library/cpp/threading/queue/tune.h | 16 +- library/cpp/threading/queue/unordered_ut.cpp | 8 +- library/cpp/threading/queue/ut_helpers.h | 12 +- library/cpp/threading/skip_list/compare.h | 114 +- library/cpp/threading/skip_list/perf/main.cpp | 586 +- library/cpp/threading/skip_list/skiplist.h | 642 +- library/cpp/threading/skip_list/skiplist_ut.cpp | 60 +- .../threading/task_scheduler/task_scheduler_ut.cpp | 24 +- library/cpp/timezone_conversion/civil-inl.h | 28 +- library/cpp/timezone_conversion/civil.cpp | 30 +- library/cpp/timezone_conversion/civil.h | 2 +- library/cpp/timezone_conversion/convert.cpp | 44 +- library/cpp/timezone_conversion/ut/civil_ut.cpp | 4 +- library/cpp/timezone_conversion/ut/convert_ut.cpp | 52 +- library/cpp/tld/gen_tld.py | 2 +- library/cpp/tld/tld.cpp | 70 +- library/cpp/tld/tld.h | 30 +- library/cpp/tld/tld_ut.cpp | 6 +- library/cpp/tld/ya.make | 2 +- .../cpp/unicode/normalization/custom_encoder.cpp | 30 +- library/cpp/unicode/normalization/custom_encoder.h | 4 +- .../unicode/normalization/decomposition_table.h | 36 +- library/cpp/unicode/normalization/normalization.h | 40 +- .../unicode/normalization/ut/normalization_ut.cpp | 2 +- library/cpp/unicode/punycode/punycode.h | 2 +- library/cpp/unicode/punycode/punycode_ut.cpp | 24 +- library/cpp/unicode/ya.make | 4 +- library/cpp/uri/assign.cpp | 652 +- library/cpp/uri/common.cpp | 184 +- library/cpp/uri/common.h | 790 +- library/cpp/uri/encode.cpp | 386 +- library/cpp/uri/encode.h | 534 +- library/cpp/uri/http_url.h | 38 +- library/cpp/uri/location.cpp | 44 +- library/cpp/uri/location.h | 4 +- library/cpp/uri/location_ut.cpp | 20 +- library/cpp/uri/other.cpp | 20 +- library/cpp/uri/other.h | 12 +- library/cpp/uri/parse.cpp | 336 +- library/cpp/uri/parse.h | 634 +- library/cpp/uri/qargs.cpp | 480 +- library/cpp/uri/qargs.h | 22 +- library/cpp/uri/uri-ru_ut.cpp | 190 +- library/cpp/uri/uri.cpp | 1004 +- library/cpp/uri/uri.h | 1152 +- library/cpp/uri/uri_ut.cpp | 1718 +- library/cpp/uri/uri_ut.h | 116 +- library/cpp/uri/ut/ya.make | 4 +- library/cpp/uri/ya.make | 8 +- library/cpp/xml/document/libxml-guards.h | 4 +- library/cpp/xml/document/node-attr.h | 96 +- library/cpp/xml/document/xml-document-decl.h | 144 +- library/cpp/xml/document/xml-document.cpp | 148 +- library/cpp/xml/document/xml-document_ut.cpp | 96 +- library/cpp/xml/document/xml-options.h | 2 +- library/cpp/xml/document/xml-textreader.cpp | 2 +- library/cpp/xml/document/xml-textreader.h | 2 +- library/cpp/xml/document/xml-textreader_ut.cpp | 78 +- library/cpp/xml/document/ya.make | 6 +- library/cpp/xml/init/init.cpp | 62 +- library/cpp/xml/init/init.h | 10 +- library/cpp/xml/init/ptr.cpp | 2 +- library/cpp/xml/init/ptr.h | 64 +- library/cpp/xml/init/ya.make | 28 +- library/cpp/xml/ya.make | 8 +- library/cpp/yson/consumer.cpp | 8 +- library/cpp/yson/consumer.h | 4 +- library/cpp/yson/detail.h | 1504 +- library/cpp/yson/format.h | 24 +- library/cpp/yson/json/json_writer.cpp | 320 +- library/cpp/yson/json/json_writer.h | 102 +- library/cpp/yson/json/yson2json_adapter.h | 2 +- library/cpp/yson/lexer.cpp | 58 +- library/cpp/yson/lexer.h | 22 +- library/cpp/yson/lexer_detail.h | 492 +- library/cpp/yson/parser.cpp | 244 +- library/cpp/yson/parser.h | 86 +- library/cpp/yson/parser_detail.h | 646 +- library/cpp/yson/public.h | 8 +- library/cpp/yson/token.cpp | 418 +- library/cpp/yson/token.h | 168 +- library/cpp/yson/tokenizer.cpp | 66 +- library/cpp/yson/tokenizer.h | 32 +- library/cpp/yson/varint.cpp | 102 +- library/cpp/yson/varint.h | 22 +- library/cpp/yson/writer.cpp | 510 +- library/cpp/yson/writer.h | 104 +- library/cpp/yson/zigzag.h | 38 +- library/cpp/yson_pull/bridge.h | 28 +- library/cpp/yson_pull/buffer.h | 122 +- library/cpp/yson_pull/consumer.cpp | 4 +- library/cpp/yson_pull/consumer.h | 10 +- library/cpp/yson_pull/detail/byte_reader.h | 108 +- library/cpp/yson_pull/detail/byte_writer.h | 112 +- library/cpp/yson_pull/detail/cescape.h | 172 +- library/cpp/yson_pull/detail/cescape_decode.h | 274 +- library/cpp/yson_pull/detail/cescape_encode.h | 188 +- library/cpp/yson_pull/detail/fail.h | 24 +- library/cpp/yson_pull/detail/format_string.h | 30 +- library/cpp/yson_pull/detail/input/buffered.h | 44 +- library/cpp/yson_pull/detail/input/stdio_file.h | 48 +- library/cpp/yson_pull/detail/input/stream.h | 8 +- library/cpp/yson_pull/detail/lexer_base.h | 518 +- library/cpp/yson_pull/detail/macros.h | 20 +- library/cpp/yson_pull/detail/number.h | 56 +- library/cpp/yson_pull/detail/output/buffered.h | 80 +- library/cpp/yson_pull/detail/output/stdio_file.h | 38 +- library/cpp/yson_pull/detail/output/stream.h | 8 +- library/cpp/yson_pull/detail/reader.h | 1082 +- library/cpp/yson_pull/detail/stream_counter.h | 86 +- library/cpp/yson_pull/detail/symbols.h | 92 +- library/cpp/yson_pull/detail/traits.h | 44 +- library/cpp/yson_pull/detail/varint.h | 434 +- library/cpp/yson_pull/detail/writer.h | 758 +- library/cpp/yson_pull/detail/zigzag.h | 18 +- library/cpp/yson_pull/event.cpp | 2 +- library/cpp/yson_pull/event.h | 80 +- library/cpp/yson_pull/exceptions.cpp | 4 +- library/cpp/yson_pull/exceptions.h | 60 +- library/cpp/yson_pull/input.h | 114 +- library/cpp/yson_pull/output.h | 68 +- library/cpp/yson_pull/position_info.h | 18 +- library/cpp/yson_pull/range.h | 32 +- library/cpp/yson_pull/read_ops.cpp | 46 +- library/cpp/yson_pull/read_ops.h | 112 +- library/cpp/yson_pull/reader.cpp | 12 +- library/cpp/yson_pull/reader.h | 34 +- library/cpp/yson_pull/scalar.cpp | 10 +- library/cpp/yson_pull/scalar.h | 98 +- library/cpp/yson_pull/stream_type.h | 12 +- library/cpp/yson_pull/ut/cescape_ut.cpp | 82 +- library/cpp/yson_pull/ut/loop_ut.cpp | 340 +- library/cpp/yson_pull/ut/reader_ut.cpp | 264 +- library/cpp/yson_pull/ut/writer_ut.cpp | 294 +- library/cpp/yson_pull/writer.cpp | 12 +- library/cpp/yson_pull/writer.h | 120 +- library/python/pytest/main.py | 40 +- library/python/pytest/plugins/ya.py | 2 +- library/python/pytest/pytest.yatest.ini | 4 +- library/python/resource/ut/lib/qw.txt | 2 +- library/python/resource/ut/lib/test_simple.py | 6 +- library/python/resource/ut/lib/ya.make | 26 +- library/python/resource/ut/py2/ya.make | 14 +- library/python/resource/ut/ya.make | 6 +- library/python/resource/ya.make | 10 +- library/python/runtime_py3/__res.pyx | 48 +- library/python/runtime_py3/importer.pxi | 54 +- library/python/runtime_py3/main/main.c | 8 +- library/python/runtime_py3/ya.make | 14 +- library/python/svn_version/__svn_version.pyx | 14 +- library/python/svn_version/ut/lib/test_simple.py | 8 +- library/python/svn_version/ut/lib/ya.make | 26 +- library/python/svn_version/ut/py2/ya.make | 16 +- library/python/svn_version/ut/py3/ya.make | 18 +- library/python/svn_version/ut/ya.make | 10 +- library/python/svn_version/ya.make | 24 +- library/python/symbols/libc/syms.cpp | 170 +- library/python/symbols/libc/ya.make | 8 +- library/python/symbols/module/__init__.py | 96 +- library/python/symbols/module/module.cpp | 90 +- library/python/symbols/module/ya.make | 36 +- library/python/symbols/registry/syms.cpp | 62 +- library/python/symbols/registry/syms.h | 48 +- library/python/symbols/registry/ya.make | 18 +- library/python/symbols/ya.make | 12 +- library/python/testing/import_test/import_test.py | 14 +- library/python/ya.make | 54 +- library/ya.make | 8 +- tools/archiver/main.cpp | 414 +- tools/archiver/ya.make | 8 +- tools/enum_parser/ya.make | 2 +- tools/ya.make | 12 +- util/README.md | 24 +- util/charset/benchmark/to_lower/main.cpp | 52 +- util/charset/benchmark/utf8_to_wide/main.cpp | 64 +- util/charset/generated/unidata.cpp | 2244 +-- util/charset/recode_result.cpp | 2 +- util/charset/recode_result.h | 2 +- util/charset/unicode_table.cpp | 2 +- util/charset/unicode_table.h | 6 +- util/charset/unidata.cpp | 2 +- util/charset/unidata.h | 190 +- util/charset/ut/ya.make | 4 +- util/charset/utf8.cpp | 2 +- util/charset/utf8.h | 34 +- util/charset/wide.cpp | 60 +- util/charset/wide.h | 72 +- util/charset/wide_sse41.cpp | 48 +- util/charset/wide_ut.cpp | 166 +- util/charset/ya.make | 16 +- util/datetime/base.cpp | 198 +- util/datetime/base.h | 268 +- util/datetime/base_ut.cpp | 424 +- util/datetime/constants.cpp | 2 +- util/datetime/cputimer.cpp | 42 +- util/datetime/cputimer.h | 66 +- util/datetime/parser.h | 26 +- util/datetime/parser.rl6 | 24 +- util/datetime/parser_ut.cpp | 80 +- util/datetime/strptime.cpp | 872 +- util/datetime/systime.cpp | 70 +- util/datetime/systime.h | 54 +- util/datetime/uptime.cpp | 8 +- util/datetime/ut/ya.make | 6 +- util/digest/benchmark/murmur/main.cpp | 14 +- util/digest/city.cpp | 622 +- util/digest/city.h | 64 +- util/digest/fnv.cpp | 2 +- util/digest/fnv.h | 108 +- util/digest/fnv_ut.cpp | 40 +- util/digest/multi.h | 4 +- util/digest/murmur.cpp | 4 +- util/digest/murmur.h | 32 +- util/digest/murmur_ut.cpp | 88 +- util/digest/numeric.cpp | 2 +- util/digest/numeric.h | 132 +- util/digest/sequence.h | 4 +- util/digest/sequence_ut.cpp | 4 +- util/digest/ut/ya.make | 8 +- util/draft/date.cpp | 36 +- util/draft/date.h | 46 +- util/draft/date_ut.cpp | 2 +- util/draft/datetime.cpp | 156 +- util/draft/datetime.h | 128 +- util/draft/datetime_ut.cpp | 26 +- util/draft/enum.cpp | 2 +- util/draft/enum.h | 26 +- util/draft/holder_vector.cpp | 2 +- util/draft/holder_vector.h | 32 +- util/draft/ip.cpp | 2 +- util/draft/ip.h | 18 +- util/draft/matrix.cpp | 2 +- util/draft/matrix.h | 52 +- util/draft/memory.cpp | 2 +- util/draft/memory.h | 30 +- util/draft/memory_ut.cpp | 76 +- util/draft/ut/ya.make | 2 +- util/draft/ya.make | 14 +- util/folder/dirent_win.c | 42 +- util/folder/dirent_win.h | 72 +- util/folder/dirut.cpp | 240 +- util/folder/dirut.h | 54 +- util/folder/dirut_ut.cpp | 224 +- util/folder/filelist.cpp | 28 +- util/folder/filelist.h | 60 +- util/folder/filelist_ut.cpp | 16 +- util/folder/fts.cpp | 572 +- util/folder/fts.h | 158 +- util/folder/fts_ut.cpp | 50 +- util/folder/iterator.cpp | 20 +- util/folder/iterator.h | 182 +- util/folder/iterator_ut.cpp | 298 +- util/folder/lstat_win.c | 8 +- util/folder/lstat_win.h | 14 +- util/folder/path.cpp | 178 +- util/folder/path.h | 66 +- util/folder/path_ut.cpp | 210 +- util/folder/pathsplit.cpp | 210 +- util/folder/pathsplit.h | 168 +- util/folder/pathsplit_ut.cpp | 362 +- util/folder/tempdir.cpp | 4 +- util/folder/ut/ya.make | 14 +- util/generic/adaptor.h | 24 +- util/generic/adaptor_ut.cpp | 4 +- util/generic/algorithm.cpp | 2 +- util/generic/algorithm.h | 374 +- util/generic/algorithm_ut.cpp | 118 +- util/generic/array_ref.cpp | 2 +- util/generic/array_ref.h | 30 +- util/generic/array_ref_ut.cpp | 34 +- util/generic/array_size.cpp | 2 +- util/generic/array_size.h | 46 +- util/generic/array_size_ut.cpp | 28 +- util/generic/benchmark/cont_speed/main.cpp | 234 +- util/generic/benchmark/cont_speed/ya.make | 18 +- util/generic/benchmark/fastclp2/main.cpp | 6 +- util/generic/benchmark/rotate_bits/main.cpp | 4 +- util/generic/benchmark/singleton/f.cpp | 36 +- util/generic/benchmark/singleton/main.cpp | 106 +- util/generic/benchmark/singleton/ya.make | 16 +- util/generic/benchmark/smart_pointers/main.cpp | 26 +- util/generic/benchmark/smart_pointers/ya.make | 14 +- util/generic/benchmark/sort/main.cpp | 136 +- util/generic/benchmark/sort/ya.make | 14 +- util/generic/benchmark/string/benchmarks.h | 4 +- util/generic/benchmark/ya.make | 12 +- util/generic/bitmap.cpp | 2 +- util/generic/bitmap.h | 458 +- util/generic/bitmap_ut.cpp | 86 +- util/generic/bitops.cpp | 6 +- util/generic/bitops.h | 66 +- util/generic/bitops_ut.cpp | 578 +- util/generic/bt_exception.cpp | 2 +- util/generic/bt_exception.h | 42 +- util/generic/buffer.cpp | 64 +- util/generic/buffer.h | 306 +- util/generic/buffer_ut.cpp | 102 +- util/generic/cast.cpp | 2 +- util/generic/cast.h | 134 +- util/generic/cast_ut.cpp | 66 +- util/generic/deque.cpp | 2 +- util/generic/deque.h | 22 +- util/generic/deque_ut.cpp | 394 +- util/generic/explicit_type.h | 4 +- util/generic/explicit_type_ut.cpp | 40 +- util/generic/fastqueue.cpp | 2 +- util/generic/fastqueue.h | 10 +- util/generic/flags.h | 16 +- util/generic/flags_ut.cpp | 14 +- util/generic/function.cpp | 2 +- util/generic/function.h | 136 +- util/generic/function_ut.cpp | 118 +- util/generic/fuzz/vector/main.cpp | 90 +- util/generic/fuzz/vector/ya.make | 14 +- util/generic/fuzz/ya.make | 6 +- util/generic/fwd.cpp | 2 +- util/generic/fwd.h | 72 +- util/generic/guid.cpp | 54 +- util/generic/guid.h | 38 +- util/generic/guid_ut.cpp | 150 +- util/generic/hash.h | 2088 +- util/generic/hash_primes.cpp | 56 +- util/generic/hash_primes.h | 14 +- util/generic/hash_primes_ut.cpp | 18 +- util/generic/hash_set.cpp | 2 +- util/generic/hash_set.h | 486 +- util/generic/hash_ut.cpp | 530 +- util/generic/intrlist.cpp | 2 +- util/generic/intrlist.h | 422 +- util/generic/intrlist_ut.cpp | 372 +- util/generic/is_in.cpp | 2 +- util/generic/is_in.h | 44 +- util/generic/is_in_ut.cpp | 20 +- util/generic/iterator.cpp | 2 +- util/generic/iterator.h | 104 +- util/generic/iterator_range.h | 10 +- util/generic/iterator_ut.cpp | 14 +- util/generic/lazy_value.h | 14 +- util/generic/lazy_value_ut.cpp | 12 +- util/generic/list.cpp | 2 +- util/generic/list.h | 22 +- util/generic/map.cpp | 2 +- util/generic/map.h | 20 +- util/generic/map_ut.cpp | 664 +- util/generic/mapfindptr.cpp | 2 +- util/generic/mapfindptr.h | 18 +- util/generic/mapfindptr_ut.cpp | 44 +- util/generic/maybe.cpp | 2 +- util/generic/maybe.h | 100 +- util/generic/maybe_traits.h | 44 +- util/generic/maybe_ut.cpp | 138 +- util/generic/mem_copy.cpp | 2 +- util/generic/mem_copy.h | 102 +- util/generic/mem_copy_ut.cpp | 188 +- util/generic/noncopyable.cpp | 2 +- util/generic/noncopyable.h | 6 +- util/generic/object_counter.cpp | 2 +- util/generic/object_counter.h | 20 +- util/generic/overloaded.h | 4 +- util/generic/overloaded_ut.cpp | 12 +- util/generic/ptr.cpp | 24 +- util/generic/ptr.h | 1210 +- util/generic/ptr_ut.cpp | 422 +- util/generic/queue.cpp | 2 +- util/generic/queue.h | 54 +- util/generic/queue_ut.cpp | 202 +- util/generic/refcount.cpp | 2 +- util/generic/refcount.h | 110 +- util/generic/reserve.h | 2 +- util/generic/scope.h | 30 +- util/generic/scope_ut.cpp | 28 +- util/generic/serialized_enum.h | 14 +- util/generic/serialized_enum_ut.cpp | 10 +- util/generic/set.cpp | 2 +- util/generic/set.h | 20 +- util/generic/set_ut.cpp | 644 +- util/generic/singleton.cpp | 90 +- util/generic/singleton.h | 178 +- util/generic/singleton_ut.cpp | 24 +- util/generic/size_literals.h | 20 +- util/generic/stack.cpp | 2 +- util/generic/stack.h | 20 +- util/generic/stack_ut.cpp | 4 +- util/generic/store_policy.cpp | 2 +- util/generic/store_policy.h | 138 +- util/generic/store_policy_ut.cpp | 6 +- util/generic/strbase.h | 136 +- util/generic/strbuf.cpp | 4 +- util/generic/strbuf.h | 132 +- util/generic/strbuf_ut.cpp | 82 +- util/generic/strfcpy.cpp | 32 +- util/generic/strfcpy.h | 2 +- util/generic/string.cpp | 62 +- util/generic/string.h | 988 +- util/generic/string.pxd | 70 +- util/generic/string_hash.h | 4 +- util/generic/string_transparent_hash_ut.cpp | 6 +- util/generic/string_ut.cpp | 572 +- util/generic/string_ut.h | 86 +- util/generic/typelist.cpp | 4 +- util/generic/typelist.h | 110 +- util/generic/typelist_ut.cpp | 44 +- util/generic/typetraits.cpp | 2 +- util/generic/typetraits.h | 236 +- util/generic/typetraits_ut.cpp | 130 +- util/generic/ut/ya.make | 62 +- util/generic/utility.cpp | 4 +- util/generic/utility.h | 78 +- util/generic/utility_ut.cpp | 290 +- util/generic/va_args.cpp | 8 +- util/generic/va_args.h | 1178 +- util/generic/va_args_ut.cpp | 84 +- util/generic/variant.cpp | 2 +- util/generic/variant.h | 4 +- util/generic/vector.cpp | 2 +- util/generic/vector.h | 64 +- util/generic/vector.pxd | 62 +- util/generic/vector_ut.cpp | 686 +- util/generic/xrange.h | 52 +- util/generic/xrange_ut.cpp | 28 +- util/generic/yexception.cpp | 86 +- util/generic/yexception.h | 116 +- util/generic/yexception_ut.cpp | 356 +- util/generic/yexception_ut.h | 6 +- util/generic/ylimits.cpp | 2 +- util/generic/ylimits.h | 22 +- util/generic/ylimits_ut.cpp | 244 +- util/generic/ymath.cpp | 4 +- util/generic/ymath.h | 100 +- util/generic/ymath_ut.cpp | 220 +- util/memory/addstorage.cpp | 2 +- util/memory/addstorage.h | 90 +- util/memory/addstorage_ut.cpp | 46 +- util/memory/alloc.cpp | 38 +- util/memory/alloc.h | 40 +- util/memory/benchmark/pool/main.cpp | 20 +- util/memory/benchmark/ya.make | 2 +- util/memory/blob.cpp | 430 +- util/memory/blob.h | 262 +- util/memory/blob_ut.cpp | 122 +- util/memory/mmapalloc.cpp | 62 +- util/memory/mmapalloc.h | 14 +- util/memory/pool.cpp | 76 +- util/memory/pool.h | 430 +- util/memory/pool_ut.cpp | 150 +- util/memory/segmented_string_pool.cpp | 2 +- util/memory/segmented_string_pool.h | 82 +- util/memory/segpool_alloc.cpp | 2 +- util/memory/segpool_alloc.h | 52 +- util/memory/smallobj.cpp | 2 +- util/memory/smallobj.h | 182 +- util/memory/smallobj_ut.cpp | 126 +- util/memory/tempbuf.cpp | 484 +- util/memory/tempbuf.h | 132 +- util/memory/tempbuf_ut.cpp | 116 +- util/memory/ut/ya.make | 12 +- util/network/address.cpp | 206 +- util/network/address.h | 190 +- util/network/endpoint.h | 6 +- util/network/endpoint_ut.cpp | 52 +- util/network/hostip.cpp | 106 +- util/network/hostip.h | 20 +- util/network/init.cpp | 30 +- util/network/init.h | 76 +- util/network/interface.cpp | 92 +- util/network/interface.h | 4 +- util/network/iovec.cpp | 2 +- util/network/iovec.h | 110 +- util/network/ip.cpp | 2 +- util/network/ip.h | 158 +- util/network/ip_ut.cpp | 58 +- util/network/nonblock.cpp | 196 +- util/network/nonblock.h | 14 +- util/network/pair.cpp | 154 +- util/network/pair.h | 14 +- util/network/poller.cpp | 114 +- util/network/poller.h | 92 +- util/network/poller_ut.cpp | 14 +- util/network/pollerimpl.cpp | 2 +- util/network/pollerimpl.h | 984 +- util/network/sock.cpp | 2 +- util/network/sock.h | 112 +- util/network/sock_ut.cpp | 274 +- util/network/socket.cpp | 1230 +- util/network/socket.h | 580 +- util/network/socket_ut.cpp | 146 +- util/network/ut/ya.make | 20 +- util/random/benchmark/prng/main.cpp | 204 +- util/random/common_ops.cpp | 2 +- util/random/common_ops.h | 198 +- util/random/common_ops_ut.cpp | 124 +- util/random/easy.cpp | 2 +- util/random/easy.h | 92 +- util/random/easy_ut.cpp | 64 +- util/random/entropy.cpp | 382 +- util/random/entropy.h | 30 +- util/random/entropy_ut.cpp | 20 +- util/random/fast.cpp | 90 +- util/random/fast.h | 168 +- util/random/fast_ut.cpp | 168 +- util/random/init_atfork.cpp | 54 +- util/random/init_atfork.h | 6 +- util/random/lcg_engine.cpp | 4 +- util/random/lcg_engine.h | 82 +- util/random/mersenne.h | 72 +- util/random/mersenne32.cpp | 186 +- util/random/mersenne32.h | 78 +- util/random/mersenne64.cpp | 190 +- util/random/mersenne64.h | 82 +- util/random/mersenne_ut.cpp | 144 +- util/random/normal.cpp | 54 +- util/random/normal.h | 76 +- util/random/normal_ut.cpp | 144 +- util/random/random.cpp | 194 +- util/random/random.h | 26 +- util/random/random_ut.cpp | 176 +- util/random/shuffle.cpp | 2 +- util/random/shuffle.h | 44 +- util/random/shuffle_ut.cpp | 98 +- util/random/ut/ya.make | 18 +- util/str_stl.cpp | 2 +- util/str_stl.h | 154 +- util/stream/aligned.cpp | 2 +- util/stream/aligned.h | 12 +- util/stream/aligned_ut.cpp | 16 +- util/stream/buffer.cpp | 80 +- util/stream/buffer.h | 82 +- util/stream/buffer_ut.cpp | 18 +- util/stream/buffered.cpp | 560 +- util/stream/buffered.h | 160 +- util/stream/buffered_ut.cpp | 116 +- util/stream/debug.cpp | 68 +- util/stream/debug.h | 22 +- util/stream/direct_io.cpp | 6 +- util/stream/file.cpp | 124 +- util/stream/file.h | 78 +- util/stream/format.cpp | 38 +- util/stream/format.h | 54 +- util/stream/format_ut.cpp | 14 +- util/stream/fwd.h | 4 +- util/stream/hex.cpp | 2 +- util/stream/hex_ut.cpp | 2 +- util/stream/holder.cpp | 2 +- util/stream/holder.h | 14 +- util/stream/input.cpp | 352 +- util/stream/input.h | 70 +- util/stream/input_ut.cpp | 40 +- util/stream/ios_ut.cpp | 468 +- util/stream/labeled.cpp | 2 +- util/stream/labeled.h | 2 +- util/stream/length.h | 40 +- util/stream/mem.cpp | 52 +- util/stream/mem.h | 56 +- util/stream/mem_ut.cpp | 20 +- util/stream/multi.cpp | 42 +- util/stream/multi.h | 22 +- util/stream/null.cpp | 20 +- util/stream/null.h | 36 +- util/stream/output.cpp | 308 +- util/stream/output.h | 224 +- util/stream/pipe.cpp | 88 +- util/stream/pipe.h | 50 +- util/stream/printf.cpp | 94 +- util/stream/printf.h | 8 +- util/stream/printf_ut.cpp | 36 +- util/stream/str.cpp | 34 +- util/stream/str.h | 122 +- util/stream/str_ut.cpp | 30 +- util/stream/tee.cpp | 22 +- util/stream/tee.h | 12 +- util/stream/tempbuf.cpp | 38 +- util/stream/tempbuf.h | 14 +- util/stream/tokenizer.cpp | 2 +- util/stream/tokenizer.h | 276 +- util/stream/trace.cpp | 2 +- util/stream/trace.h | 46 +- util/stream/ut/ya.make | 36 +- util/stream/walk.cpp | 28 +- util/stream/walk.h | 18 +- util/stream/walk_ut.cpp | 8 +- util/stream/zerocopy.cpp | 20 +- util/stream/zerocopy.h | 26 +- util/stream/zlib.cpp | 516 +- util/stream/zlib.h | 186 +- util/stream/zlib_ut.cpp | 44 +- util/string/ascii.cpp | 70 +- util/string/ascii.h | 178 +- util/string/ascii_ut.cpp | 42 +- util/string/benchmark/ascii/main.cpp | 174 +- util/string/benchmark/ascii/ya.make | 16 +- util/string/benchmark/cast/main.cpp | 104 +- util/string/benchmark/cast/ya.make | 16 +- util/string/benchmark/float_to_string/main.cpp | 8 +- util/string/benchmark/join/main.cpp | 26 +- util/string/benchmark/subst_global/main.cpp | 4 +- util/string/benchmark/ya.make | 4 +- util/string/builder.h | 32 +- util/string/cast.cpp | 774 +- util/string/cast.h | 188 +- util/string/cast.py | 54 +- util/string/cast_ut.cpp | 544 +- util/string/cstriter.cpp | 2 +- util/string/cstriter.h | 26 +- util/string/escape.cpp | 510 +- util/string/escape.h | 22 +- util/string/escape_ut.cpp | 54 +- util/string/fuzzing/collapse/main.cpp | 18 +- util/string/fuzzing/collapse/ya.make | 14 +- util/string/fuzzing/strtod/main.cpp | 18 +- util/string/fuzzing/strtod/ya.make | 14 +- util/string/fuzzing/ya.make | 6 +- util/string/hex.cpp | 32 +- util/string/hex.h | 40 +- util/string/hex_ut.cpp | 26 +- util/string/join.cpp | 2 +- util/string/join.h | 42 +- util/string/join_ut.cpp | 2 +- util/string/printf.cpp | 64 +- util/string/printf.h | 10 +- util/string/printf_ut.cpp | 24 +- util/string/split.cpp | 2 +- util/string/split.h | 454 +- util/string/split_ut.cpp | 154 +- util/string/strip.cpp | 24 +- util/string/strip.h | 142 +- util/string/strip_ut.cpp | 22 +- util/string/strspn.cpp | 2 +- util/string/strspn.h | 112 +- util/string/subst.cpp | 10 +- util/string/subst.h | 8 +- util/string/subst_ut.cpp | 14 +- util/string/type.cpp | 42 +- util/string/type.h | 34 +- util/string/type_ut.cpp | 8 +- util/string/ut/ya.make | 28 +- util/string/util.cpp | 54 +- util/string/util.h | 82 +- util/string/util_ut.cpp | 48 +- util/string/vector.cpp | 12 +- util/string/vector.h | 38 +- util/string/vector_ut.cpp | 38 +- util/system/align.cpp | 2 +- util/system/align.h | 54 +- util/system/align_ut.cpp | 54 +- util/system/atexit.cpp | 188 +- util/system/atexit.h | 18 +- util/system/atexit_ut.cpp | 36 +- util/system/atomic.cpp | 6 +- util/system/atomic.h | 80 +- util/system/atomic_gcc.h | 44 +- util/system/atomic_ops.h | 94 +- util/system/atomic_ut.cpp | 90 +- util/system/atomic_win.h | 82 +- util/system/backtrace.cpp | 414 +- util/system/backtrace.h | 28 +- util/system/backtrace_ut.cpp | 48 +- util/system/benchmark/rdtsc/main.cpp | 120 +- util/system/benchmark/rdtsc/ya.make | 14 +- util/system/benchmark/ya.make | 12 +- util/system/byteorder.cpp | 2 +- util/system/byteorder.h | 230 +- util/system/byteorder_ut.cpp | 40 +- util/system/compat.cpp | 22 +- util/system/compat.h | 96 +- util/system/compat_ut.cpp | 4 +- util/system/compiler.cpp | 2 +- util/system/compiler.h | 498 +- util/system/condvar.cpp | 218 +- util/system/condvar.h | 56 +- util/system/condvar_ut.cpp | 146 +- util/system/context.cpp | 544 +- util/system/context.h | 314 +- util/system/context_aarch64.S | 80 +- util/system/context_aarch64.h | 14 +- util/system/context_i686.asm | 6 +- util/system/context_i686.h | 12 +- util/system/context_ut.cpp | 122 +- util/system/context_x86.asm | 30 +- util/system/context_x86.h | 20 +- util/system/context_x86_64.asm | 64 +- util/system/context_x86_64.h | 10 +- util/system/cpu_id.cpp | 198 +- util/system/cpu_id.h | 208 +- util/system/cpu_id_ut.cpp | 194 +- util/system/daemon.cpp | 126 +- util/system/daemon.h | 24 +- util/system/daemon_ut.cpp | 2 +- util/system/datetime.cpp | 46 +- util/system/datetime.h | 70 +- util/system/defaults.c | 4 +- util/system/defaults.h | 120 +- util/system/direct_io.cpp | 102 +- util/system/direct_io.h | 12 +- util/system/direct_io_ut.cpp | 48 +- util/system/dynlib.cpp | 224 +- util/system/dynlib.h | 52 +- util/system/env.cpp | 8 +- util/system/env_ut.cpp | 2 +- util/system/err.cpp | 108 +- util/system/error.cpp | 116 +- util/system/error.h | 182 +- util/system/error_ut.cpp | 64 +- util/system/event.cpp | 178 +- util/system/event.h | 66 +- util/system/event_ut.cpp | 60 +- util/system/execpath.cpp | 120 +- util/system/execpath_ut.cpp | 18 +- util/system/fasttime.cpp | 52 +- util/system/fhandle.cpp | 2 +- util/system/fhandle.h | 38 +- util/system/file.cpp | 610 +- util/system/file.h | 72 +- util/system/file_lock.cpp | 2 +- util/system/file_lock.h | 4 +- util/system/file_ut.cpp | 278 +- util/system/filemap.cpp | 586 +- util/system/filemap.h | 280 +- util/system/filemap_ut.cpp | 144 +- util/system/flock.cpp | 102 +- util/system/flock.h | 36 +- util/system/flock_ut.cpp | 20 +- util/system/fs.cpp | 68 +- util/system/fs.h | 6 +- util/system/fs_ut.cpp | 20 +- util/system/fs_win.cpp | 204 +- util/system/fs_win.h | 4 +- util/system/fstat.cpp | 64 +- util/system/fstat.h | 10 +- util/system/fstat_ut.cpp | 86 +- util/system/getpid.cpp | 24 +- util/system/guard.cpp | 2 +- util/system/guard.h | 230 +- util/system/guard_ut.cpp | 140 +- util/system/hi_lo.h | 36 +- util/system/hi_lo_ut.cpp | 98 +- util/system/hostname.cpp | 60 +- util/system/hostname.h | 6 +- util/system/hostname_ut.cpp | 44 +- util/system/hp_timer.cpp | 104 +- util/system/hp_timer.h | 8 +- util/system/info.cpp | 356 +- util/system/info.h | 14 +- util/system/info_ut.cpp | 40 +- util/system/interrupt_signals.cpp | 2 +- util/system/interrupt_signals.h | 2 +- util/system/interrupt_signals_ut.cpp | 4 +- util/system/madvise.cpp | 22 +- util/system/maxlen.cpp | 2 +- util/system/maxlen.h | 28 +- util/system/mem_info.cpp | 208 +- util/system/mem_info.h | 22 +- util/system/mem_info_ut.cpp | 8 +- util/system/mincore.cpp | 10 +- util/system/mincore_ut.cpp | 2 +- util/system/mktemp.cpp | 30 +- util/system/mktemp_system.cpp | 196 +- util/system/mlock.cpp | 46 +- util/system/mutex.cpp | 214 +- util/system/mutex.h | 38 +- util/system/mutex_ut.cpp | 172 +- util/system/nice.cpp | 2 +- util/system/nice_ut.cpp | 22 +- util/system/pipe.cpp | 116 +- util/system/pipe.h | 44 +- util/system/pipe_ut.cpp | 4 +- util/system/platform.cpp | 36 +- util/system/platform.h | 144 +- util/system/platform_ut.cpp | 52 +- util/system/progname.cpp | 34 +- util/system/progname.h | 14 +- util/system/progname_ut.cpp | 24 +- util/system/protect.cpp | 80 +- util/system/protect.h | 8 +- util/system/rusage.cpp | 30 +- util/system/rusage.h | 6 +- util/system/rusage_ut.cpp | 4 +- util/system/rwlock.cpp | 218 +- util/system/rwlock.h | 68 +- util/system/rwlock_ut.cpp | 174 +- util/system/sanitizers.cpp | 152 +- util/system/sanitizers.h | 50 +- util/system/sem.cpp | 424 +- util/system/sem.h | 48 +- util/system/shellcommand.cpp | 338 +- util/system/shellcommand.h | 58 +- util/system/shellcommand_ut.cpp | 38 +- util/system/shmat.cpp | 256 +- util/system/shmat.h | 42 +- util/system/shmat_ut.cpp | 28 +- util/system/sigset.cpp | 2 +- util/system/sigset.h | 26 +- util/system/spin_wait.cpp | 76 +- util/system/spin_wait.h | 12 +- util/system/spinlock.cpp | 2 +- util/system/spinlock.h | 148 +- util/system/src_location.cpp | 12 +- util/system/src_location.h | 38 +- util/system/src_location_ut.cpp | 22 +- util/system/src_root.h | 54 +- util/system/src_root_ut.cpp | 10 +- util/system/sys_alloc.cpp | 2 +- util/system/sys_alloc.h | 28 +- util/system/sysstat.cpp | 8 +- util/system/sysstat.h | 28 +- util/system/tempfile.h | 44 +- util/system/thread.cpp | 634 +- util/system/thread.h | 138 +- util/system/thread.i | 82 +- util/system/thread_ut.cpp | 166 +- util/system/tls.cpp | 466 +- util/system/tls.h | 202 +- util/system/tls_ut.cpp | 106 +- util/system/type_name.cpp | 16 +- util/system/type_name_ut.cpp | 16 +- util/system/types.cpp | 32 +- util/system/types.h | 198 +- util/system/unaligned_mem.cpp | 2 +- util/system/unaligned_mem.h | 30 +- util/system/unaligned_mem_ut.cpp | 16 +- util/system/user.cpp | 26 +- util/system/user_ut.cpp | 12 +- util/system/ut/ya.make | 96 +- util/system/utime.cpp | 38 +- util/system/utime.h | 4 +- util/system/valgrind.cpp | 2 +- util/system/valgrind.h | 58 +- util/system/winint.cpp | 2 +- util/system/winint.h | 72 +- util/system/yassert.cpp | 54 +- util/system/yassert.h | 166 +- util/system/yassert_ut.cpp | 12 +- util/system/yield.cpp | 46 +- util/system/yield.h | 2 +- util/tests/benchmark/ya.make | 2 +- util/tests/fuzzing/ya.make | 2 +- util/tests/style/ya.make | 14 +- util/tests/sym_versions/test_glibc.py | 56 +- util/tests/sym_versions/ya.make | 36 +- util/tests/ut/ya.make | 4 +- util/tests/ya.make | 18 +- util/thread/factory.cpp | 128 +- util/thread/factory.h | 92 +- util/thread/factory_ut.cpp | 96 +- util/thread/lfqueue.cpp | 2 +- util/thread/lfqueue.h | 98 +- util/thread/lfqueue_ut.cpp | 18 +- util/thread/lfstack.cpp | 2 +- util/thread/lfstack.h | 66 +- util/thread/lfstack_ut.cpp | 28 +- util/thread/pool.cpp | 990 +- util/thread/pool.h | 260 +- util/thread/pool_ut.cpp | 88 +- util/thread/singleton.cpp | 2 +- util/thread/singleton.h | 48 +- util/thread/singleton_ut.cpp | 4 +- util/thread/ut/ya.make | 8 +- util/ya.make | 504 +- util/ysafeptr.cpp | 16 +- util/ysafeptr.h | 630 +- util/ysaveload.h | 662 +- util/ysaveload_ut.cpp | 480 +- ydb/core/base/logoblob.h | 2 +- ydb/core/base/statestorage.h | 2 +- ydb/core/base/tablet.h | 2 +- ydb/core/base/tablet_pipe.h | 2 +- ydb/core/base/tablet_resolver.h | 2 +- ydb/core/blobstorage/dsproxy/dsproxy_range.cpp | 2 +- .../groupinfo/blobstorage_groupinfo.cpp | 2 +- .../nodewarden/blobstorage_node_warden_ut.cpp | 12 +- .../blobstorage/testload/test_load_pdisk_write.cpp | 4 +- .../blobstorage/vdisk/common/vdisk_syncneighbors.h | 2 +- .../vdisk/hulldb/generic/hullds_sst_it.h | 2 +- .../blobstorage_hullcompactdeferredqueue_ut.cpp | 4 +- .../vdisk/ingress/blobstorage_ingress.cpp | 2 +- .../vdisk/ingress/blobstorage_ingress_matrix.h | 8 +- ydb/core/blobstorage/ya.make | 2 +- ydb/core/mind/local.cpp | 2 +- .../mind/ut_fat/blobstorage_node_warden_ut_fat.cpp | 8 +- ydb/core/persqueue/type_codecs_ut.cpp | 26 +- ydb/core/tablet/tablet_impl.h | 2 +- ydb/core/tablet/tablet_pipe_ut.cpp | 4 +- ydb/core/tablet/tablet_resolver.cpp | 2 +- ydb/core/testlib/tablet_helpers.cpp | 2 +- ydb/core/tx/coordinator/coordinator.h | 2 +- ydb/core/tx/coordinator/coordinator_impl.h | 2 +- ydb/core/tx/mediator/mediator_impl.h | 2 +- ydb/core/tx/tx.h | 2 +- ydb/core/tx/tx_processing.h | 2 +- ydb/core/tx/tx_proxy/proxy.h | 4 +- ydb/core/ymq/client/bin/main.cpp | 4 +- ydb/library/yql/core/type_ann/type_ann_expr.cpp | 6 +- .../yql/udfs/common/topfreq/topfreq_udf_ut.cpp | 2 +- ydb/services/lib/actors/pq_schema_actor.cpp | 4 +- 3097 files changed, 365137 insertions(+), 365137 deletions(-) diff --git a/build/plugins/_common.py b/build/plugins/_common.py index 37b01baa2f7..2f831a94db6 100644 --- a/build/plugins/_common.py +++ b/build/plugins/_common.py @@ -105,8 +105,8 @@ def resolve_common_const(path): if path.startswith('${ARCADIA_BUILD_ROOT}'): return path.replace('${ARCADIA_BUILD_ROOT}', '$B', 1) return path - - + + def resolve_to_abs_path(path, source_root, build_root): if path.startswith('$S') and source_root is not None: return path.replace('$S', source_root, 1) diff --git a/build/plugins/_custom_command.py b/build/plugins/_custom_command.py index 5e554ee97f8..9692214b22e 100644 --- a/build/plugins/_custom_command.py +++ b/build/plugins/_custom_command.py @@ -1,7 +1,7 @@ import subprocess import sys -import os - +import os + import _common as common @@ -25,29 +25,29 @@ class CustomCommand(object): def call(self, args, **kwargs): cwd = self._get_call_specs('cwd', kwargs) stdout_path = self._get_call_specs('stdout', kwargs) - + resolved_args = [] - + for arg in args: - resolved_args.append(self.resolve_path(arg)) + resolved_args.append(self.resolve_path(arg)) if stdout_path: stdout = open(stdout_path, 'wb') else: stdout = None - env = os.environ.copy() - env['ASAN_OPTIONS'] = 'detect_leaks=0' + env = os.environ.copy() + env['ASAN_OPTIONS'] = 'detect_leaks=0' + + rc = subprocess.call(resolved_args, cwd=cwd, stdout=stdout, env=env) - rc = subprocess.call(resolved_args, cwd=cwd, stdout=stdout, env=env) - if stdout: stdout.close() if rc: sys.exit(rc) - def resolve_path(self, path): - return common.resolve_to_abs_path(path, self._source_root, self._build_root) + def resolve_path(self, path): + return common.resolve_to_abs_path(path, self._source_root, self._build_root) def _get_call_specs(self, name, kwargs): if isinstance(kwargs, dict): @@ -55,7 +55,7 @@ class CustomCommand(object): if param: return self.resolve_path(param) return None - + def addrule(*unused): pass diff --git a/build/plugins/_import_wrapper.py b/build/plugins/_import_wrapper.py index 2f26f909740..883f6623147 100644 --- a/build/plugins/_import_wrapper.py +++ b/build/plugins/_import_wrapper.py @@ -1,24 +1,24 @@ try: - from ymake import CustomCommand as RealCustomCommand + from ymake import CustomCommand as RealCustomCommand from ymake import addrule from ymake import addparser - from ymake import subst + from ymake import subst + + class CustomCommand(RealCustomCommand): + def __init__(self, *args, **kwargs): + RealCustomCommand.__init__(*args, **kwargs) + + def resolve_path(self, path): + return subst(path) - class CustomCommand(RealCustomCommand): - def __init__(self, *args, **kwargs): - RealCustomCommand.__init__(*args, **kwargs) - - def resolve_path(self, path): - return subst(path) - except ImportError: from _custom_command import CustomCommand # noqa from _custom_command import addrule # noqa from _custom_command import addparser # noqa - - -try: - from ymake import engine_version -except ImportError: - def engine_version(): - return -1 + + +try: + from ymake import engine_version +except ImportError: + def engine_version(): + return -1 diff --git a/build/plugins/_unpickler.py b/build/plugins/_unpickler.py index 5d0ec085fb1..e01e7b31181 100644 --- a/build/plugins/_unpickler.py +++ b/build/plugins/_unpickler.py @@ -27,7 +27,7 @@ def main(): assert (int(tools[0]) == len(tools[1:])), "tools quantity != tools number!" - cmd_object = pickle.loads(base64.b64decode(encoded_cmd)) + cmd_object = pickle.loads(base64.b64decode(encoded_cmd)) cmd_object.set_source_root(src_root) cmd_object.set_build_root(build_root) diff --git a/build/plugins/bundle.py b/build/plugins/bundle.py index c7864dc4f34..0bec8254eec 100644 --- a/build/plugins/bundle.py +++ b/build/plugins/bundle.py @@ -1,7 +1,7 @@ -import os - - -def onbundle(unit, *args): +import os + + +def onbundle(unit, *args): """ @usage BUNDLE(...) diff --git a/build/plugins/cp.py b/build/plugins/cp.py index 0685d5521a6..5c663a3bdd9 100644 --- a/build/plugins/cp.py +++ b/build/plugins/cp.py @@ -1,5 +1,5 @@ import os - + from _common import sort_by_keywords diff --git a/build/plugins/cpp_style.py b/build/plugins/cpp_style.py index feca9bc66f5..3ab78b7320c 100644 --- a/build/plugins/cpp_style.py +++ b/build/plugins/cpp_style.py @@ -1,19 +1,19 @@ -import os - -from _common import sort_by_keywords - - -def on_style(unit, *args): - def it(): - yield 'DONT_PARSE' - - for f in args: - f = f[len('${ARCADIA_ROOT}') + 1:] - - if '/generated/' in f: - continue - - yield f +import os + +from _common import sort_by_keywords + + +def on_style(unit, *args): + def it(): + yield 'DONT_PARSE' + + for f in args: + f = f[len('${ARCADIA_ROOT}') + 1:] + + if '/generated/' in f: + continue + + yield f yield '/cpp_style/files/' + f - - unit.onresource(list(it())) + + unit.onresource(list(it())) diff --git a/build/plugins/create_init_py.py b/build/plugins/create_init_py.py index 131bda1dbb3..e41a4d22dfe 100644 --- a/build/plugins/create_init_py.py +++ b/build/plugins/create_init_py.py @@ -1,8 +1,8 @@ import os - + from _common import sort_by_keywords - + def oncreate_init_py_structure(unit, *args): if unit.get('DISTBUILD'): return diff --git a/build/plugins/mx_archive.py b/build/plugins/mx_archive.py index ca32bfc8d1e..56b0d4d16ec 100644 --- a/build/plugins/mx_archive.py +++ b/build/plugins/mx_archive.py @@ -1,16 +1,16 @@ -def onmx_formulas(unit, *args): +def onmx_formulas(unit, *args): """ @usage: MX_FORMULAS(BinFiles...) # deprecated, matrixnet Create MatrixNet formulas archive """ - def iter_infos(): - for a in args: - if a.endswith('.bin'): + def iter_infos(): + for a in args: + if a.endswith('.bin'): unit.on_mx_bin_to_info([a]) - yield a[:-3] + 'info' - else: - yield a - - infos = list(iter_infos()) - unit.onarchive_asm(['NAME', 'MxFormulas'] + infos) + yield a[:-3] + 'info' + else: + yield a + + infos = list(iter_infos()) + unit.onarchive_asm(['NAME', 'MxFormulas'] + infos) unit.on_mx_gen_table(infos) diff --git a/build/plugins/pybuild.py b/build/plugins/pybuild.py index 621eb7805e7..f32a2d39a0e 100644 --- a/build/plugins/pybuild.py +++ b/build/plugins/pybuild.py @@ -5,7 +5,7 @@ from hashlib import md5 import ymake from _common import stripext, rootrel_arc_src, tobuilddir, listid, resolve_to_ymake_path, generate_chunks, pathid - + YA_IDE_VENV_VAR = 'YA_IDE_VENV' PY_NAMESPACE_PREFIX = 'py/namespace' BUILTIN_PROTO = 'builtin_proto' @@ -16,7 +16,7 @@ def is_arc_src(src, unit): src.startswith('${CURDIR}/') or unit.resolve_arc_path(src).startswith('$S/') ) - + def is_extended_source_search_enabled(path, unit): if not is_arc_src(path, unit): return False @@ -28,7 +28,7 @@ def to_build_root(path, unit): if is_arc_src(path, unit): return '${ARCADIA_BUILD_ROOT}/' + rootrel_arc_src(path, unit) return path - + def uniq_suffix(path, unit): upath = unit.path() if '/' not in path: @@ -168,7 +168,7 @@ def py_program(unit, py3): unit.onadd_check_py_imports() -def onpy_srcs(unit, *args): +def onpy_srcs(unit, *args): """ @usage PY_SRCS({| CYTHON_C} { | TOP_LEVEL | NAMESPACE ns} Files...) @@ -201,10 +201,10 @@ def onpy_srcs(unit, *args): venv = unit.get(YA_IDE_VENV_VAR) need_gazetteer_peerdir = False trim = 0 - + if not upath.startswith('contrib/tools/python') and not upath.startswith('library/python/runtime') and unit.get('NO_PYTHON_INCLS') != 'yes': unit.onpeerdir(['contrib/libs/python']) - + unit_needs_main = unit.get('MODULE_TYPE') in ('PROGRAM', 'DLL') if unit_needs_main: py_program(unit, py3) @@ -222,7 +222,7 @@ def onpy_srcs(unit, *args): cython_directives = [] if cython_coverage: cython_directives += ['-X', 'linetrace=True'] - + pyxs_c = [] pyxs_c_h = [] pyxs_c_api_h = [] @@ -236,7 +236,7 @@ def onpy_srcs(unit, *args): evs = [] fbss = [] py_namespaces = {} - + dump_dir = unit.get('PYTHON_BUILD_DUMP_DIR') dump_output = None if dump_dir: @@ -324,7 +324,7 @@ def onpy_srcs(unit, *args): ymake.report_configure_error('TOP_LEVEL __main__.py is not allowed in PY3_PROGRAM') pathmod = (path, mod) - + if dump_output is not None: dump_output.write('{path}\t{module}\n'.format(path=rootrel_arc_src(path, unit), module=mod)) @@ -493,10 +493,10 @@ def onpy_srcs(unit, *args): py_runtime_path = 'contrib/python/protobuf' builtin_proto_path = cpp_runtime_path + '/' + BUILTIN_PROTO - if protos: + if protos: if not upath.startswith(py_runtime_path) and not upath.startswith(builtin_proto_path): unit.onpeerdir(py_runtime_path) - + unit.onpeerdir(unit.get("PY_PROTO_DEPS").split()) proto_paths = [path for path, mod in protos] @@ -514,7 +514,7 @@ def onpy_srcs(unit, *args): unit.onpeerdir([cpp_runtime_path]) unit.on_generate_py_evs_internal([path for path, mod in evs]) unit.onpy_srcs([ev_arg(path, mod, unit) for path, mod in evs]) - + if fbss: unit.onpeerdir(unit.get('_PY_FBS_DEPS').split()) pysrc_base_name = listid(fbss) diff --git a/build/plugins/res.py b/build/plugins/res.py index 48fb57b4260..a937caba816 100644 --- a/build/plugins/res.py +++ b/build/plugins/res.py @@ -1,6 +1,6 @@ from _common import iterpair, listid, pathid, rootrel_arc_src, tobuilddir, filter_out_by_keyword - + def split(lst, limit): # paths are specified with replaceable prefix # real length is unknown at the moment, that why we use root_lenght diff --git a/build/plugins/rodata.py b/build/plugins/rodata.py index 98defaff654..3ecb0f9a839 100644 --- a/build/plugins/rodata.py +++ b/build/plugins/rodata.py @@ -1,15 +1,15 @@ import argparse -import os - -import _common as common -import _import_wrapper as iw - - +import os + +import _common as common +import _import_wrapper as iw + + class ROData(iw.CustomCommand): def __init__(self, path, unit): self._path = path self._flags = [] - + prefix = unit.get('ASM_PREFIX') if prefix: @@ -118,51 +118,51 @@ class ROData(iw.CustomCommand): self.call(cmd) -class RODataCXX(iw.CustomCommand): - def __init__(self, path, unit): - self._path = path - self._base = os.path.basename(common.stripext(self._path)) - - def descr(self): - return 'RD', self._path, 'light-green' - - def input(self): +class RODataCXX(iw.CustomCommand): + def __init__(self, path, unit): + self._path = path + self._base = os.path.basename(common.stripext(self._path)) + + def descr(self): + return 'RD', self._path, 'light-green' + + def input(self): return common.make_tuples([self._path]) - - def main_out(self): - return common.tobuilddir(common.stripext(self._path)) + '.cpp' - - def output(self): + + def main_out(self): + return common.tobuilddir(common.stripext(self._path)) + '.cpp' + + def output(self): return common.make_tuples([self.main_out()]) - + def run(self, extra_args, binary): - with open(self.resolve_path(self.main_out()), 'w') as f: - f.write('static_assert(sizeof(unsigned int) == 4, "ups, something gone wrong");\n\n') - f.write('extern "C" {\n') - f.write(' extern const unsigned char ' + self._base + '[] = {\n') - - cnt = 0 - - with open(self.resolve_path(self._path), 'r') as input: - for ch in input.read(): - f.write('0x%02x, ' % ord(ch)) - - cnt += 1 - - if cnt % 50 == 1: - f.write('\n') - - f.write(' };\n') - f.write(' extern const unsigned int ' + self._base + 'Size = sizeof(' + self._base + ');\n') - f.write('}\n') - - -def ro_data(path, unit): + with open(self.resolve_path(self.main_out()), 'w') as f: + f.write('static_assert(sizeof(unsigned int) == 4, "ups, something gone wrong");\n\n') + f.write('extern "C" {\n') + f.write(' extern const unsigned char ' + self._base + '[] = {\n') + + cnt = 0 + + with open(self.resolve_path(self._path), 'r') as input: + for ch in input.read(): + f.write('0x%02x, ' % ord(ch)) + + cnt += 1 + + if cnt % 50 == 1: + f.write('\n') + + f.write(' };\n') + f.write(' extern const unsigned int ' + self._base + 'Size = sizeof(' + self._base + ');\n') + f.write('}\n') + + +def ro_data(path, unit): if unit.enabled('ARCH_AARCH64') or unit.enabled('ARCH_ARM') or unit.enabled('ARCH_PPC64LE'): - return RODataCXX(path, unit) - - return ROData(path, unit) - - + return RODataCXX(path, unit) + + return ROData(path, unit) + + def init(): iw.addrule('rodata', ro_data) diff --git a/build/rules/contrib_deps.policy b/build/rules/contrib_deps.policy index b0c8fdfc544..9af4b85cc2c 100644 --- a/build/rules/contrib_deps.policy +++ b/build/rules/contrib_deps.policy @@ -1,7 +1,7 @@ ALLOW contrib/clickhouse -> library/cpp/consistent_hashing ALLOW contrib -> contrib ALLOW contrib -> build -ALLOW contrib -> library/python/resource +ALLOW contrib -> library/python/resource ALLOW contrib -> library/cpp/testing/unittest_main ALLOW contrib -> library/cpp/testing/unittest ALLOW contrib -> library/cpp/getopt diff --git a/build/scripts/check_config_h.py b/build/scripts/check_config_h.py index a19a405d0a0..07bc12e2308 100644 --- a/build/scripts/check_config_h.py +++ b/build/scripts/check_config_h.py @@ -1,88 +1,88 @@ -import sys - -data = """ -#if defined(SIZEOF_LONG) +import sys + +data = """ +#if defined(SIZEOF_LONG) static_assert(sizeof(long) == SIZEOF_LONG, "fixme 1"); -#endif - -#if defined(SIZEOF_PTHREAD_T) -#include - +#endif + +#if defined(SIZEOF_PTHREAD_T) +#include + static_assert(sizeof(pthread_t) == SIZEOF_PTHREAD_T, "fixme 2"); -#endif - -#if defined(SIZEOF_SIZE_T) -#include - +#endif + +#if defined(SIZEOF_SIZE_T) +#include + static_assert(sizeof(size_t) == SIZEOF_SIZE_T, "fixme 3"); -#endif - -#if defined(SIZEOF_TIME_T) -#include - +#endif + +#if defined(SIZEOF_TIME_T) +#include + static_assert(sizeof(time_t) == SIZEOF_TIME_T, "fixme 4"); -#endif - -#if defined(SIZEOF_UINTPTR_T) -#include - +#endif + +#if defined(SIZEOF_UINTPTR_T) +#include + static_assert(sizeof(uintptr_t) == SIZEOF_UINTPTR_T, "fixme 5"); -#endif - -#if defined(SIZEOF_VOID_P) +#endif + +#if defined(SIZEOF_VOID_P) static_assert(sizeof(void*) == SIZEOF_VOID_P, "fixme 6"); -#endif - -#if defined(SIZEOF_FPOS_T) -#include - +#endif + +#if defined(SIZEOF_FPOS_T) +#include + static_assert(sizeof(fpos_t) == SIZEOF_FPOS_T, "fixme 7"); -#endif - -#if defined(SIZEOF_DOUBLE) +#endif + +#if defined(SIZEOF_DOUBLE) static_assert(sizeof(double) == SIZEOF_DOUBLE, "fixme 8"); -#endif - -#if defined(SIZEOF_LONG_DOUBLE) +#endif + +#if defined(SIZEOF_LONG_DOUBLE) static_assert(sizeof(long double) == SIZEOF_LONG_DOUBLE, "fixme 9"); -#endif - -#if defined(SIZEOF_FLOAT) +#endif + +#if defined(SIZEOF_FLOAT) static_assert(sizeof(float) == SIZEOF_FLOAT, "fixme 10"); -#endif - -#if defined(SIZEOF_INT) +#endif + +#if defined(SIZEOF_INT) static_assert(sizeof(int) == SIZEOF_INT, "fixme 11"); -#endif - -#if defined(SIZEOF_LONG_LONG) +#endif + +#if defined(SIZEOF_LONG_LONG) static_assert(sizeof(long long) == SIZEOF_LONG_LONG, "fixme 12"); -#endif - -#if defined(SIZEOF_OFF_T) -#include - +#endif + +#if defined(SIZEOF_OFF_T) +#include + static_assert(sizeof(off_t) == SIZEOF_OFF_T, "fixme 13"); -#endif - -#if defined(SIZEOF_PID_T) -#include - +#endif + +#if defined(SIZEOF_PID_T) +#include + static_assert(sizeof(pid_t) == SIZEOF_PID_T, "fixme 14"); -#endif - -#if defined(SIZEOF_SHORT) +#endif + +#if defined(SIZEOF_SHORT) static_assert(sizeof(short) == SIZEOF_SHORT, "fixme 15"); -#endif - -#if defined(SIZEOF_WCHAR_T) +#endif + +#if defined(SIZEOF_WCHAR_T) static_assert(sizeof(wchar_t) == SIZEOF_WCHAR_T, "fixme 16"); -#endif - -#if defined(SIZEOF__BOOL) -//TODO -#endif -""" +#endif + +#if defined(SIZEOF__BOOL) +//TODO +#endif +""" if __name__ == '__main__': with open(sys.argv[2], 'w') as f: f.write('#include <' + sys.argv[1] + '>\n\n') diff --git a/build/scripts/f2c.py b/build/scripts/f2c.py index 243d3965a4b..7021e1391f1 100644 --- a/build/scripts/f2c.py +++ b/build/scripts/f2c.py @@ -1,8 +1,8 @@ -import sys -import subprocess -import argparse -import os - +import sys +import subprocess +import argparse +import os + header = '''\ #ifdef __GNUC__ @@ -30,11 +30,11 @@ def mkdir_p(directory): if __name__ == '__main__': parser = argparse.ArgumentParser() - + parser.add_argument('-t', '--tool') parser.add_argument('-c', '--input') parser.add_argument('-o', '--output') - + args = parser.parse_args() tmpdir = args.output + '.f2c' mkdir_p(tmpdir) @@ -44,14 +44,14 @@ if __name__ == '__main__': stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate(input=open(args.input).read()) ret = p.wait() - + if ret: print >>sys.stderr, 'f2c failed: %s, %s' % (stderr, ret) sys.exit(ret) - + if 'Error' in stderr: print >>sys.stderr, stderr - + with open(args.output, 'w') as f: f.write(header) f.write(stdout) diff --git a/build/scripts/fetch_from_sandbox.py b/build/scripts/fetch_from_sandbox.py index bd84f7a4d2b..a99542e1743 100755 --- a/build/scripts/fetch_from_sandbox.py +++ b/build/scripts/fetch_from_sandbox.py @@ -6,12 +6,12 @@ import os import random import subprocess import sys -import time +import time import urllib2 import uuid - + import fetch_from - + ORIGIN_SUFFIX = '?origin=fetch-from-sandbox' MDS_PREFIX = 'http://storage-int.mds.yandex.net/get-sandbox/' diff --git a/build/scripts/fs_tools.py b/build/scripts/fs_tools.py index d420e85c50a..dec4c349c89 100644 --- a/build/scripts/fs_tools.py +++ b/build/scripts/fs_tools.py @@ -1,11 +1,11 @@ from __future__ import print_function -import os +import os import platform -import sys -import shutil +import sys +import shutil import errno - + import process_command_files as pcf @@ -26,7 +26,7 @@ def link_or_copy(src, dst): if __name__ == '__main__': mode = sys.argv[1] args = pcf.get_args(sys.argv[2:]) - + if mode == 'copy': shutil.copy(args[0], args[1]) elif mode == 'copy_tree_no_link': diff --git a/build/scripts/gen_join_srcs.py b/build/scripts/gen_join_srcs.py index b4ddbb587eb..e0c2df161a7 100644 --- a/build/scripts/gen_join_srcs.py +++ b/build/scripts/gen_join_srcs.py @@ -1,16 +1,16 @@ -import sys - +import sys + import process_command_files as pcf - -with open(sys.argv[1], 'w') as f: - f.write('#if defined(__GNUC__)\n') - f.write('#pragma GCC diagnostic ignored "-Wunknown-pragmas"\n') - f.write('#if defined(__clang__)\n') - f.write('#pragma GCC diagnostic ignored "-Wunknown-warning-option"\n') - f.write('#endif\n') - f.write('#pragma GCC diagnostic ignored "-Wsubobject-linkage"\n') - f.write('#endif\n\n') - + +with open(sys.argv[1], 'w') as f: + f.write('#if defined(__GNUC__)\n') + f.write('#pragma GCC diagnostic ignored "-Wunknown-pragmas"\n') + f.write('#if defined(__clang__)\n') + f.write('#pragma GCC diagnostic ignored "-Wunknown-warning-option"\n') + f.write('#endif\n') + f.write('#pragma GCC diagnostic ignored "-Wsubobject-linkage"\n') + f.write('#endif\n\n') + for arg in pcf.iter_args(sys.argv[2:]): f.write('#include "' + arg + '"\n') diff --git a/build/scripts/gen_mx_table.py b/build/scripts/gen_mx_table.py index cce69e5cfbc..187c21c539f 100644 --- a/build/scripts/gen_mx_table.py +++ b/build/scripts/gen_mx_table.py @@ -1,75 +1,75 @@ -import sys - -tmpl = """ -#include "yabs_mx_calc_table.h" - -#include - +import sys + +tmpl = """ +#include "yabs_mx_calc_table.h" + +#include + #include - -#include -#include -#include -#include - -using namespace NMatrixnet; - -extern "C" { - extern const unsigned char MxFormulas[]; - extern const ui32 MxFormulasSize; -} - -namespace { - struct TFml: public TBlob, public TMnSseInfo { - inline TFml(const TBlob& b) - : TBlob(b) - , TMnSseInfo(Data(), Size()) - { - } - }; - + +#include +#include +#include +#include + +using namespace NMatrixnet; + +extern "C" { + extern const unsigned char MxFormulas[]; + extern const ui32 MxFormulasSize; +} + +namespace { + struct TFml: public TBlob, public TMnSseInfo { + inline TFml(const TBlob& b) + : TBlob(b) + , TMnSseInfo(Data(), Size()) + { + } + }; + struct TFormulas: public THashMap> { - inline TFormulas() { - TBlob b = TBlob::NoCopy(MxFormulas, MxFormulasSize); - TArchiveReader ar(b); - %s - } - + inline TFormulas() { + TBlob b = TBlob::NoCopy(MxFormulas, MxFormulasSize); + TArchiveReader ar(b); + %s + } + inline const TMnSseInfo& at(size_t n) const noexcept { - return *find(n)->second; - } - }; - - %s - - static func_descr_t yabs_funcs[] = { - %s - }; -} - -yabs_mx_calc_table_t yabs_mx_calc_table = {YABS_MX_CALC_VERSION, 10000, 0, yabs_funcs}; -""" - + return *find(n)->second; + } + }; + + %s + + static func_descr_t yabs_funcs[] = { + %s + }; +} + +yabs_mx_calc_table_t yabs_mx_calc_table = {YABS_MX_CALC_VERSION, 10000, 0, yabs_funcs}; +""" + if __name__ == '__main__': init = [] body = [] defs = {} - + for i in sys.argv[1:]: name = i.replace('.', '_') num = long(name.split('_')[1]) - + init.append('(*this)[%s] = new TFml(ar.ObjectBlobByKey("%s"));' % (num, '/' + i)) - + f1 = 'static void yabs_%s(size_t count, const float** args, double* res) {Singleton()->at(%s).DoCalcRelevs(args, res, count);}' % (name, num) f2 = 'static size_t yabs_%s_factor_count() {return Singleton()->at(%s).MaxFactorIndex() + 1;}' % (name, num) - + body.append(f1) body.append(f2) - + d1 = 'yabs_%s' % name d2 = 'yabs_%s_factor_count' % name - + defs[num] = '{%s, %s}' % (d1, d2) - + print tmpl % ('\n'.join(init), '\n\n'.join(body), ',\n'.join((defs.get(i, '{nullptr, nullptr}') for i in range(0, 10000)))) diff --git a/build/scripts/gen_py_reg.py b/build/scripts/gen_py_reg.py index 7526198395c..1560135ae81 100644 --- a/build/scripts/gen_py_reg.py +++ b/build/scripts/gen_py_reg.py @@ -1,19 +1,19 @@ import sys -template = ''' +template = ''' extern "C" void PyImport_AppendInittab(const char* name, void (*fn)(void)); extern "C" void {1}(); - -namespace { - struct TRegistrar { - inline TRegistrar() { + +namespace { + struct TRegistrar { + inline TRegistrar() { PyImport_AppendInittab("{0}", {1}); - } - } REG; -} -''' - - + } + } REG; +} +''' + + def mangle(name): if '.' not in name: return name diff --git a/build/scripts/gen_ub.py b/build/scripts/gen_ub.py index cced92e99bd..ad79cda9261 100644 --- a/build/scripts/gen_ub.py +++ b/build/scripts/gen_ub.py @@ -1,86 +1,86 @@ -import argparse -import os -import tarfile -import contextlib -import hashlib -import base64 -import io - - -stub = """#!/usr/bin/env python - -info = {info} -data = "{data}" - -import platform -import os -import sys -import tarfile -import contextlib -import io -import base64 - - -def current_platform(): - arch = platform.machine().upper() - - if arch == 'AMD64': - arch = 'X86_64' - - platf = platform.system().upper() - - if platf.startswith('WIN'): - platf = 'WIN' - - return (platf + '-' + arch).lower() - - -def extract_file(fname): - with contextlib.closing(tarfile.open(fileobj=io.BytesIO(base64.b64decode(data)))) as f: - return f.extractfile(fname).read() - - -fname = info[current_platform()] -my_path = os.path.realpath(os.path.abspath(__file__)) -tmp_path = my_path + '.tmp' - -with open(tmp_path, 'wb') as f: - f.write(extract_file(fname)) - -os.rename(tmp_path, my_path) -os.chmod(my_path, 0775) -os.execv(sys.argv[0], sys.argv) -""" - - -def gen_ub(output, data): - info = {} - binary = io.BytesIO() - - with contextlib.closing(tarfile.open(mode='w:bz2', fileobj=binary, dereference=True)) as f: - for pl, path in data: - fname = os.path.basename(path) - pl = pl.split('-') - pl = pl[1] + '-' + pl[2] - info[pl] = fname - f.add(path, arcname=fname) - - binary = binary.getvalue() - info['md5'] = hashlib.md5(binary).hexdigest() - - with open(output, 'w') as f: - f.write(stub.format(info=info, data=base64.b64encode(binary))) - - os.chmod(output, 0775) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument('--path', action='append') - parser.add_argument('--platform', action='append') - parser.add_argument('--output', action='store') - - args = parser.parse_args() - - gen_ub(args.output, zip(args.platform, args.path)) +import argparse +import os +import tarfile +import contextlib +import hashlib +import base64 +import io + + +stub = """#!/usr/bin/env python + +info = {info} +data = "{data}" + +import platform +import os +import sys +import tarfile +import contextlib +import io +import base64 + + +def current_platform(): + arch = platform.machine().upper() + + if arch == 'AMD64': + arch = 'X86_64' + + platf = platform.system().upper() + + if platf.startswith('WIN'): + platf = 'WIN' + + return (platf + '-' + arch).lower() + + +def extract_file(fname): + with contextlib.closing(tarfile.open(fileobj=io.BytesIO(base64.b64decode(data)))) as f: + return f.extractfile(fname).read() + + +fname = info[current_platform()] +my_path = os.path.realpath(os.path.abspath(__file__)) +tmp_path = my_path + '.tmp' + +with open(tmp_path, 'wb') as f: + f.write(extract_file(fname)) + +os.rename(tmp_path, my_path) +os.chmod(my_path, 0775) +os.execv(sys.argv[0], sys.argv) +""" + + +def gen_ub(output, data): + info = {} + binary = io.BytesIO() + + with contextlib.closing(tarfile.open(mode='w:bz2', fileobj=binary, dereference=True)) as f: + for pl, path in data: + fname = os.path.basename(path) + pl = pl.split('-') + pl = pl[1] + '-' + pl[2] + info[pl] = fname + f.add(path, arcname=fname) + + binary = binary.getvalue() + info['md5'] = hashlib.md5(binary).hexdigest() + + with open(output, 'w') as f: + f.write(stub.format(info=info, data=base64.b64encode(binary))) + + os.chmod(output, 0775) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('--path', action='append') + parser.add_argument('--platform', action='append') + parser.add_argument('--output', action='store') + + args = parser.parse_args() + + gen_ub(args.output, zip(args.platform, args.path)) diff --git a/build/scripts/link_dyn_lib.py b/build/scripts/link_dyn_lib.py index 58ae45d101d..23487f5c1ef 100644 --- a/build/scripts/link_dyn_lib.py +++ b/build/scripts/link_dyn_lib.py @@ -1,13 +1,13 @@ -import sys -import os +import sys +import os import subprocess -import tempfile -import collections +import tempfile +import collections import optparse import pipes - + from process_whole_archive_option import ProcessWholeArchiveOption - + def shlex_join(cmd): # equivalent to shlex.join() in python 3 @@ -17,12 +17,12 @@ def shlex_join(cmd): ) -def parse_export_file(p): - with open(p, 'r') as f: +def parse_export_file(p): + with open(p, 'r') as f: for l in f: - l = l.strip() - - if l and '#' not in l: + l = l.strip() + + if l and '#' not in l: words = l.split() if len(words) == 2 and words[0] == 'linux_version': yield {'linux_version': words[1]} @@ -32,8 +32,8 @@ def parse_export_file(p): yield {'lang': 'C', 'sym': words[0]} else: raise Exception('unsupported exports line: ' + l) - - + + def to_c(sym): symbols = collections.deque(sym.split('::')) c_prefixes = [ # demangle prefixes for c++ symbols @@ -60,24 +60,24 @@ def to_c(sym): return ['{prefix}{sym}'.format(prefix=prefix, sym=c_sym) for prefix in c_prefixes] -def fix_darwin_param(ex): - for item in ex: +def fix_darwin_param(ex): + for item in ex: if item.get('linux_version'): continue - if item['lang'] == 'C': - yield '-Wl,-exported_symbol,_' + item['sym'] + if item['lang'] == 'C': + yield '-Wl,-exported_symbol,_' + item['sym'] elif item['lang'] == 'C++': for sym in to_c(item['sym']): yield '-Wl,-exported_symbol,_' + sym - else: - raise Exception('unsupported lang: ' + item['lang']) - - -def fix_gnu_param(arch, ex): - d = collections.defaultdict(list) + else: + raise Exception('unsupported lang: ' + item['lang']) + + +def fix_gnu_param(arch, ex): + d = collections.defaultdict(list) version = None - for item in ex: + for item in ex: if item.get('linux_version'): if not version: version = item.get('linux_version') @@ -87,31 +87,31 @@ def fix_gnu_param(arch, ex): d['C'].extend(to_c(item['sym'])) else: d[item['lang']].append(item['sym']) - + with tempfile.NamedTemporaryFile(mode='wt', delete=False) as f: if version: f.write('{} {{\nglobal:\n'.format(version)) else: f.write('{\nglobal:\n') - - for k, v in d.items(): - f.write(' extern "' + k + '" {\n') - - for x in v: + + for k, v in d.items(): + f.write(' extern "' + k + '" {\n') + + for x in v: f.write(' ' + x + ';\n') - - f.write(' };\n') - - f.write('local: *;\n};\n') - - ret = ['-Wl,--version-script=' + f.name] - - if arch == 'ANDROID': - ret += ['-Wl,--export-dynamic'] - - return ret - - + + f.write(' };\n') + + f.write('local: *;\n};\n') + + ret = ['-Wl,--version-script=' + f.name] + + if arch == 'ANDROID': + ret += ['-Wl,--export-dynamic'] + + return ret + + def fix_windows_param(ex): with tempfile.NamedTemporaryFile(delete=False) as def_file: exports = [] @@ -131,33 +131,33 @@ def fix_cmd(arch, musl, c): if arch == 'WINDOWS': prefix = '/DEF:' f = fix_windows_param - else: + else: prefix = '-Wl,--version-script=' if arch in ('DARWIN', 'IOS'): f = fix_darwin_param else: f = lambda x: fix_gnu_param(arch, x) - - def do_fix(p): + + def do_fix(p): if musl and p in musl_libs: return [] - if p.startswith(prefix) and p.endswith('.exports'): - fname = p[len(prefix):] - - return list(f(list(parse_export_file(fname)))) - + if p.startswith(prefix) and p.endswith('.exports'): + fname = p[len(prefix):] + + return list(f(list(parse_export_file(fname)))) + if p.endswith('.supp'): return [] if p.endswith('.pkg.fake'): return [] - return [p] - - return sum((do_fix(x) for x in c), []) - - + return [p] + + return sum((do_fix(x) for x in c), []) + + def parse_args(): parser = optparse.OptionParser() parser.disable_interspersed_args() diff --git a/build/scripts/link_lib.py b/build/scripts/link_lib.py index c3ef7b9607e..344d50d4ebb 100644 --- a/build/scripts/link_lib.py +++ b/build/scripts/link_lib.py @@ -49,18 +49,18 @@ if __name__ == "__main__": os.environ['SYM64_THRESHOLD'] = '31' def call(): - try: - p = subprocess.Popen(cmd, stdin=stdin, cwd=opts.build_root) - rc = p.wait() - return rc - except OSError as e: - raise Exception('while running %s: %s' % (' '.join(cmd), e)) + try: + p = subprocess.Popen(cmd, stdin=stdin, cwd=opts.build_root) + rc = p.wait() + return rc + except OSError as e: + raise Exception('while running %s: %s' % (' '.join(cmd), e)) + + try: + os.unlink(opts.output) + except OSError: + pass - try: - os.unlink(opts.output) - except OSError: - pass - if not opts.libs: cmd = [opts.archiver] + opts.create_flags + opts.plugin_flags + [opts.output] + opts.objs stdin = None diff --git a/build/scripts/mkver.py b/build/scripts/mkver.py index 6b4ac58bf75..321cdaade16 100755 --- a/build/scripts/mkver.py +++ b/build/scripts/mkver.py @@ -1,12 +1,12 @@ -import sys - +import sys + if __name__ == '__main__': with open(sys.argv[1], 'r') as f: data = f.readline() - + beg = data.find('(') + 1 end = data.find(')') version = data[beg:end] - + print '#pragma once' print '#define DEBIAN_VERSION "%s"' % version diff --git a/build/scripts/perl_wrapper.py b/build/scripts/perl_wrapper.py index cd456e30568..cb4027f1d36 100644 --- a/build/scripts/perl_wrapper.py +++ b/build/scripts/perl_wrapper.py @@ -1,24 +1,24 @@ import os import sys -import shutil +import shutil if __name__ == '__main__': path = sys.argv[1] to = sys.argv[-1] fr = sys.argv[-2] to_dir = os.path.dirname(to) - + os.chdir(to_dir) - + f1 = os.path.basename(fr) fr_ = os.path.dirname(fr) f2 = os.path.basename(fr_) fr_ = os.path.dirname(fr_) - + os.makedirs(f2) shutil.copyfile(fr, os.path.join(f2, f1)) - + if path[0] != '/': path = os.path.join(os.path.dirname(__file__), path) - + os.execv(path, [path] + sys.argv[2:]) diff --git a/build/scripts/preprocess.py b/build/scripts/preprocess.py index 24846b4c238..4657bef732e 100644 --- a/build/scripts/preprocess.py +++ b/build/scripts/preprocess.py @@ -1,48 +1,48 @@ import sys import os - - -def load_file(p): - with open(p, 'r') as f: - return f.read() - - -def step(base, data, hh): - def flt(): - for l in data.split('\n'): - if l in hh: - pp = os.path.join(base, hh[l]) - - yield '\n\n' + load_file(pp) + '\n\n' - - os.unlink(pp) - else: - yield l - - return '\n'.join(flt()) - - -def subst_headers(path, headers): - hh = dict() - - for h in headers: - hh['# include "' + h + '"'] = h - - data = load_file(path) - prev = data - - while True: - ret = step(os.path.dirname(path), prev, hh) - - if ret == prev: - break - - prev = ret - - if data != prev: - with open(path, 'w') as f: - f.write(prev) - - + + +def load_file(p): + with open(p, 'r') as f: + return f.read() + + +def step(base, data, hh): + def flt(): + for l in data.split('\n'): + if l in hh: + pp = os.path.join(base, hh[l]) + + yield '\n\n' + load_file(pp) + '\n\n' + + os.unlink(pp) + else: + yield l + + return '\n'.join(flt()) + + +def subst_headers(path, headers): + hh = dict() + + for h in headers: + hh['# include "' + h + '"'] = h + + data = load_file(path) + prev = data + + while True: + ret = step(os.path.dirname(path), prev, hh) + + if ret == prev: + break + + prev = ret + + if data != prev: + with open(path, 'w') as f: + f.write(prev) + + if __name__ == '__main__': subst_headers(sys.argv[1], ['stack.hh', 'position.hh', 'location.hh']) diff --git a/build/scripts/run_msvc_wine.py b/build/scripts/run_msvc_wine.py index 4763a39f70f..439d1f88316 100644 --- a/build/scripts/run_msvc_wine.py +++ b/build/scripts/run_msvc_wine.py @@ -1,60 +1,60 @@ -import sys -import os +import sys +import os import re -import subprocess -import signal -import time -import json +import subprocess +import signal +import time +import json import argparse import errno - + import process_command_files as pcf import process_whole_archive_option as pwa - -procs = [] -build_kekeke = 45 - - + +procs = [] +build_kekeke = 45 + + def stringize(s): return s.encode('utf-8') if isinstance(s, unicode) else s -def run_subprocess(*args, **kwargs): +def run_subprocess(*args, **kwargs): if 'env' in kwargs: kwargs['env'] = {stringize(k): stringize(v) for k, v in kwargs['env'].iteritems()} - p = subprocess.Popen(*args, **kwargs) - - procs.append(p) - - return p - - -def terminate_slaves(): - for p in procs: - try: - p.terminate() - except Exception: - pass - - -def sig_term(sig, fr): - terminate_slaves() - sys.exit(sig) - - -def subst_path(l): - if len(l) > 3: - if l[:3].lower() in ('z:\\', 'z:/'): - return l[2:].replace('\\', '/') - - return l - - + p = subprocess.Popen(*args, **kwargs) + + procs.append(p) + + return p + + +def terminate_slaves(): + for p in procs: + try: + p.terminate() + except Exception: + pass + + +def sig_term(sig, fr): + terminate_slaves() + sys.exit(sig) + + +def subst_path(l): + if len(l) > 3: + if l[:3].lower() in ('z:\\', 'z:/'): + return l[2:].replace('\\', '/') + + return l + + def call_wine_cmd_once(wine, cmd, env, mode): p = run_subprocess(wine + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, close_fds=True, shell=False) - + output = find_cmd_out(cmd) error = None if output is not None and os.path.exists(output): @@ -69,15 +69,15 @@ def call_wine_cmd_once(wine, cmd, env, mode): if error is not None: print >> sys.stderr, 'Output {} already exists and we have failed to remove it: {}'.format(output, error) - # print >>sys.stderr, cmd, env, wine - + # print >>sys.stderr, cmd, env, wine + stdout_and_stderr, _ = p.communicate() - + return_code = p.returncode if not stdout_and_stderr: if return_code != 0: raise Exception('wine did something strange') - + return return_code elif ' : fatal error ' in stdout_and_stderr: return_code = 1 @@ -85,27 +85,27 @@ def call_wine_cmd_once(wine, cmd, env, mode): return_code = 2 lines = [x.strip() for x in stdout_and_stderr.split('\n')] - - prefixes = [ - 'Microsoft (R)', - 'Copyright (C)', - 'Application tried to create a window', - 'The graphics driver is missing', - 'Could not load wine-gecko', - 'wine: configuration in', - 'wine: created the configuration directory', + + prefixes = [ + 'Microsoft (R)', + 'Copyright (C)', + 'Application tried to create a window', + 'The graphics driver is missing', + 'Could not load wine-gecko', + 'wine: configuration in', + 'wine: created the configuration directory', 'libpng warning:' - ] - - suffixes = [ - '.c', - '.cxx', - '.cc', - '.cpp', - '.masm', - ] - - substrs = [ + ] + + suffixes = [ + '.c', + '.cxx', + '.cc', + '.cpp', + '.masm', + ] + + substrs = [ 'Creating library Z:', 'err:heap', 'err:menubuilder:', @@ -113,211 +113,211 @@ def call_wine_cmd_once(wine, cmd, env, mode): 'err:ole:', 'err:wincodecs:', 'err:winediag:', - ] - - def good_line(l): - for x in prefixes: - if l.startswith(x): - return False - - for x in suffixes: - if l.endswith(x): - return False - - for x in substrs: - if x in l: - return False - - return True - - def filter_lines(): - for l in lines: - if good_line(l): - yield subst_path(l.strip()) - + ] + + def good_line(l): + for x in prefixes: + if l.startswith(x): + return False + + for x in suffixes: + if l.endswith(x): + return False + + for x in substrs: + if x in l: + return False + + return True + + def filter_lines(): + for l in lines: + if good_line(l): + yield subst_path(l.strip()) + stdout_and_stderr = '\n'.join(filter_lines()).strip() - + if stdout_and_stderr: print >>sys.stderr, stdout_and_stderr - + return return_code - - -def prepare_vc(fr, to): - for p in os.listdir(fr): - fr_p = os.path.join(fr, p) - to_p = os.path.join(to, p) - - if not os.path.exists(to_p): - print >>sys.stderr, 'install %s -> %s' % (fr_p, to_p) - - os.link(fr_p, to_p) - - -def run_slave(): + + +def prepare_vc(fr, to): + for p in os.listdir(fr): + fr_p = os.path.join(fr, p) + to_p = os.path.join(to, p) + + if not os.path.exists(to_p): + print >>sys.stderr, 'install %s -> %s' % (fr_p, to_p) + + os.link(fr_p, to_p) + + +def run_slave(): args = json.loads(sys.argv[3]) wine = sys.argv[1] - - signal.signal(signal.SIGTERM, sig_term) - - if args.get('tout', None): - signal.signal(signal.SIGALRM, sig_term) - signal.alarm(args['tout']) - - tout = 0.1 - - while True: - try: + + signal.signal(signal.SIGTERM, sig_term) + + if args.get('tout', None): + signal.signal(signal.SIGALRM, sig_term) + signal.alarm(args['tout']) + + tout = 0.1 + + while True: + try: return call_wine_cmd_once([wine], args['cmd'], args['env'], args['mode']) - except Exception as e: - print >>sys.stderr, '%s, will retry in %s' % (str(e), tout) - - time.sleep(tout) - tout = min(2 * tout, 4) - - -def find_cmd_out(args): - for arg in args: - if arg.startswith('/Fo'): - return arg[3:] - - if arg.startswith('/OUT:'): - return arg[5:] - - -def calc_zero_cnt(data): - zero_cnt = 0 - - for ch in data: - if ch == chr(0): - zero_cnt += 1 - - return zero_cnt - - -def is_good_file(p): - if not os.path.isfile(p): - return False - - if os.path.getsize(p) < 300: - return False - + except Exception as e: + print >>sys.stderr, '%s, will retry in %s' % (str(e), tout) + + time.sleep(tout) + tout = min(2 * tout, 4) + + +def find_cmd_out(args): + for arg in args: + if arg.startswith('/Fo'): + return arg[3:] + + if arg.startswith('/OUT:'): + return arg[5:] + + +def calc_zero_cnt(data): + zero_cnt = 0 + + for ch in data: + if ch == chr(0): + zero_cnt += 1 + + return zero_cnt + + +def is_good_file(p): + if not os.path.isfile(p): + return False + + if os.path.getsize(p) < 300: + return False + asm_pattern = re.compile('asm(\.\w+)?\.obj$') if asm_pattern.search(p): - pass - elif p.endswith('.obj'): - with open(p, 'rb') as f: - prefix = f.read(200) - - if ord(prefix[0]) != 0: - return False - - if ord(prefix[1]) != 0: - return False - - if ord(prefix[2]) != 0xFF: - return False - - if ord(prefix[3]) != 0xFF: - return False - - if calc_zero_cnt(prefix) > 195: - return False - - f.seek(-100, os.SEEK_END) - last = f.read(100) - - if calc_zero_cnt(last) > 95: - return False - - if last[-1] != chr(0): - return False - elif p.endswith('.lib'): - with open(p, 'rb') as f: - if f.read(7) != '!': - return False - - return True - - -RED = '\x1b[31;1m' -GRAY = '\x1b[30;1m' -RST = '\x1b[0m' -MGT = '\x1b[35m' -YEL = '\x1b[33m' -GRN = '\x1b[32m' -CYA = '\x1b[36m' - - -def colorize_strings(l): - p = l.find("'") - - if p >= 0: - yield l[:p] - - l = l[p + 1:] - - p = l.find("'") - - if p >= 0: - yield CYA + "'" + subst_path(l[:p]) + "'" + RST - - for x in colorize_strings(l[p + 1:]): - yield x - else: - yield "'" + l - else: - yield l - - -def colorize_line(l): - lll = l - - try: - parts = [] - - if l.startswith('(compiler file'): - return ''.join(colorize_strings(l)) - - if l.startswith('/'): - p = l.find('(') - parts.append(GRAY + l[:p] + RST) - l = l[p:] - - if l and l.startswith('('): - p = l.find(')') - parts.append(':' + MGT + l[1:p] + RST) - l = l[p + 1:] - - if l: - if l.startswith(' : '): - l = l[1:] - - if l.startswith(': error'): - parts.append(': ' + RED + 'error' + RST) - l = l[7:] - elif l.startswith(': warning'): - parts.append(': ' + YEL + 'warning' + RST) - l = l[9:] - elif l.startswith(': note'): - parts.append(': ' + GRN + 'note' + RST) - l = l[6:] - elif l.startswith('fatal error'): - parts.append(RED + 'fatal error' + RST) - l = l[11:] - - if l: - parts.extend(colorize_strings(l)) - - return ''.join(parts) + pass + elif p.endswith('.obj'): + with open(p, 'rb') as f: + prefix = f.read(200) + + if ord(prefix[0]) != 0: + return False + + if ord(prefix[1]) != 0: + return False + + if ord(prefix[2]) != 0xFF: + return False + + if ord(prefix[3]) != 0xFF: + return False + + if calc_zero_cnt(prefix) > 195: + return False + + f.seek(-100, os.SEEK_END) + last = f.read(100) + + if calc_zero_cnt(last) > 95: + return False + + if last[-1] != chr(0): + return False + elif p.endswith('.lib'): + with open(p, 'rb') as f: + if f.read(7) != '!': + return False + + return True + + +RED = '\x1b[31;1m' +GRAY = '\x1b[30;1m' +RST = '\x1b[0m' +MGT = '\x1b[35m' +YEL = '\x1b[33m' +GRN = '\x1b[32m' +CYA = '\x1b[36m' + + +def colorize_strings(l): + p = l.find("'") + + if p >= 0: + yield l[:p] + + l = l[p + 1:] + + p = l.find("'") + + if p >= 0: + yield CYA + "'" + subst_path(l[:p]) + "'" + RST + + for x in colorize_strings(l[p + 1:]): + yield x + else: + yield "'" + l + else: + yield l + + +def colorize_line(l): + lll = l + + try: + parts = [] + + if l.startswith('(compiler file'): + return ''.join(colorize_strings(l)) + + if l.startswith('/'): + p = l.find('(') + parts.append(GRAY + l[:p] + RST) + l = l[p:] + + if l and l.startswith('('): + p = l.find(')') + parts.append(':' + MGT + l[1:p] + RST) + l = l[p + 1:] + + if l: + if l.startswith(' : '): + l = l[1:] + + if l.startswith(': error'): + parts.append(': ' + RED + 'error' + RST) + l = l[7:] + elif l.startswith(': warning'): + parts.append(': ' + YEL + 'warning' + RST) + l = l[9:] + elif l.startswith(': note'): + parts.append(': ' + GRN + 'note' + RST) + l = l[6:] + elif l.startswith('fatal error'): + parts.append(RED + 'fatal error' + RST) + l = l[11:] + + if l: + parts.extend(colorize_strings(l)) + + return ''.join(parts) except Exception: - return lll - - -def colorize(out): - return '\n'.join(colorize_line(l) for l in out.split('\n')) - - + return lll + + +def colorize(out): + return '\n'.join(colorize_line(l) for l in out.split('\n')) + + def trim_path(path, winepath): p1 = run_subprocess([winepath, '-w', path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) p1_stdout, p1_stderr = p1.communicate() @@ -376,7 +376,7 @@ def fix_path(p): if p.startswith('/Fo'): return '/Fo' + p[3:].replace('/', '\\') return p - + def process_free_args(args, wine, bld_root, mode): whole_archive_prefix = '/WHOLEARCHIVE:' short_names = {} @@ -385,16 +385,16 @@ def process_free_args(args, wine, bld_root, mode): # Slow for no benefit. # arc_root = args.arcadia_root # short_names[arc_root] = trim_path(arc_root, winepath) - + free_args, wa_peers, wa_libs = pwa.get_whole_archive_peers_and_libs(pcf.skip_markers(args)) - + process_link = lambda x: make_full_path_arg(x, bld_root, short_names[bld_root]) if mode in ('link', 'lib') else x def process_arg(arg): with_wa_prefix = arg.startswith(whole_archive_prefix) prefix = whole_archive_prefix if with_wa_prefix else '' without_prefix_arg = arg[len(prefix):] return prefix + fix_path(process_link(downsize_path(without_prefix_arg, short_names))) - + result = [] for arg in free_args: if pcf.is_cmdfile_arg(arg): @@ -430,51 +430,51 @@ def run_main(): free_args = args.free_args wine_dir = os.path.dirname(os.path.dirname(wine)) - bin_dir = os.path.dirname(binary) + bin_dir = os.path.dirname(binary) tc_dir = os.path.dirname(os.path.dirname(os.path.dirname(bin_dir))) if not incl_paths: incl_paths = [tc_dir + '/VC/include', tc_dir + '/include'] - + cmd_out = find_cmd_out(free_args) - env = os.environ.copy() - - env.pop('DISPLAY', None) - + env = os.environ.copy() + + env.pop('DISPLAY', None) + env['WINEDLLOVERRIDES'] = 'msvcr{}=n'.format(version) - env['WINEDEBUG'] = 'fixme-all' + env['WINEDEBUG'] = 'fixme-all' env['INCLUDE'] = ';'.join(fix_path(p) for p in incl_paths) - env['VSINSTALLDIR'] = fix_path(tc_dir) - env['VCINSTALLDIR'] = fix_path(tc_dir + '/VC') - env['WindowsSdkDir'] = fix_path(tc_dir) + env['VSINSTALLDIR'] = fix_path(tc_dir) + env['VCINSTALLDIR'] = fix_path(tc_dir + '/VC') + env['WindowsSdkDir'] = fix_path(tc_dir) env['LIBPATH'] = fix_path(tc_dir + '/VC/lib/amd64') env['LIB'] = fix_path(tc_dir + '/VC/lib/amd64') env['LD_LIBRARY_PATH'] = ':'.join(wine_dir + d for d in ['/lib', '/lib64', '/lib64/wine']) - + cmd = [binary] + process_free_args(free_args, wine, bld_root, mode) for x in ('/NOLOGO', '/nologo', '/FD'): - try: - cmd.remove(x) - except ValueError: - pass - - def run_process(sleep, tout): - if sleep: - time.sleep(sleep) - - args = { - 'cmd': cmd, - 'env': env, - 'mode': mode, - 'tout': tout - } - + try: + cmd.remove(x) + except ValueError: + pass + + def run_process(sleep, tout): + if sleep: + time.sleep(sleep) + + args = { + 'cmd': cmd, + 'env': env, + 'mode': mode, + 'tout': tout + } + slave_cmd = [sys.executable, sys.argv[0], wine, 'slave', json.dumps(args)] p = run_subprocess(slave_cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=False) - out, _ = p.communicate() - return p.wait(), out - + out, _ = p.communicate() + return p.wait(), out + def print_err_log(log): if not log: return @@ -483,74 +483,74 @@ def run_main(): print >>sys.stderr, log tout = 200 - - while True: - rc, out = run_process(0, tout) - - if rc in (-signal.SIGALRM, signal.SIGALRM): + + while True: + rc, out = run_process(0, tout) + + if rc in (-signal.SIGALRM, signal.SIGALRM): print_err_log(out) - print >>sys.stderr, '##append_tag##time out' + print >>sys.stderr, '##append_tag##time out' elif out and ' stack overflow ' in out: - print >>sys.stderr, '##append_tag##stack overflow' - elif out and 'recvmsg: Connection reset by peer' in out: - print >>sys.stderr, '##append_tag##wine gone' - elif out and 'D8037' in out: - print >>sys.stderr, '##append_tag##repair wine' - - try: - os.unlink(os.path.join(os.environ['WINEPREFIX'], '.update-timestamp')) - except Exception as e: - print >>sys.stderr, e - - else: + print >>sys.stderr, '##append_tag##stack overflow' + elif out and 'recvmsg: Connection reset by peer' in out: + print >>sys.stderr, '##append_tag##wine gone' + elif out and 'D8037' in out: + print >>sys.stderr, '##append_tag##repair wine' + + try: + os.unlink(os.path.join(os.environ['WINEPREFIX'], '.update-timestamp')) + except Exception as e: + print >>sys.stderr, e + + else: print_err_log(out) - - # non-zero return code - bad, return it immediately - if rc: + + # non-zero return code - bad, return it immediately + if rc: print >>sys.stderr, '##win_cmd##' + ' '.join(cmd) print >>sys.stderr, '##args##' + ' '.join(free_args) - return rc - - # check for output existence(if we expect it!) and real length - if cmd_out: - if is_good_file(cmd_out): - return 0 - else: - # retry! - print >>sys.stderr, '##append_tag##no output' - else: - return 0 - - tout *= 3 - - -def main(): + return rc + + # check for output existence(if we expect it!) and real length + if cmd_out: + if is_good_file(cmd_out): + return 0 + else: + # retry! + print >>sys.stderr, '##append_tag##no output' + else: + return 0 + + tout *= 3 + + +def main(): prefix_suffix = os.environ.pop('WINEPREFIX_SUFFIX', None) if prefix_suffix is not None: prefix = os.environ.pop('WINEPREFIX', None) if prefix is not None: os.environ['WINEPREFIX'] = os.path.join(prefix, prefix_suffix) - # just in case - signal.alarm(2000) - - if sys.argv[2] == 'slave': - func = run_slave - else: - func = run_main - - try: - try: - sys.exit(func()) - finally: - terminate_slaves() - except KeyboardInterrupt: - sys.exit(4) - except Exception as e: - print >>sys.stderr, str(e) - - sys.exit(3) - - -if __name__ == '__main__': - main() + # just in case + signal.alarm(2000) + + if sys.argv[2] == 'slave': + func = run_slave + else: + func = run_main + + try: + try: + sys.exit(func()) + finally: + terminate_slaves() + except KeyboardInterrupt: + sys.exit(4) + except Exception as e: + print >>sys.stderr, str(e) + + sys.exit(3) + + +if __name__ == '__main__': + main() diff --git a/build/scripts/run_tool.py b/build/scripts/run_tool.py index fe4b5446173..00e3ff6f1ea 100755 --- a/build/scripts/run_tool.py +++ b/build/scripts/run_tool.py @@ -1,7 +1,7 @@ -import sys -import subprocess +import sys +import subprocess import os - + if __name__ == '__main__': env = os.environ.copy() diff --git a/build/scripts/xargs.py b/build/scripts/xargs.py index 64295d594f6..5d68929eccd 100644 --- a/build/scripts/xargs.py +++ b/build/scripts/xargs.py @@ -1,18 +1,18 @@ -import sys -import os -import subprocess - +import sys +import os +import subprocess + if __name__ == '__main__': pos = sys.argv.index('--') fname = sys.argv[pos + 1] cmd = sys.argv[pos + 2:] - + with open(fname, 'r') as f: args = [x.strip() for x in f] - + os.remove(fname) - + p = subprocess.Popen(cmd + args, shell=False, stderr=sys.stderr, stdout=sys.stdout) p.communicate() - + sys.exit(p.returncode) diff --git a/build/scripts/yield_line.py b/build/scripts/yield_line.py index e95ab5ec62c..9c1c5391460 100644 --- a/build/scripts/yield_line.py +++ b/build/scripts/yield_line.py @@ -1,7 +1,7 @@ -import sys - +import sys + if __name__ == '__main__': pos = sys.argv.index('--') - + with open(sys.argv[pos + 1], 'a') as f: f.write(' '.join(sys.argv[pos + 2:]) + '\n') diff --git a/build/scripts/yndexer.py b/build/scripts/yndexer.py index 82defb59f88..a38e28ba99f 100644 --- a/build/scripts/yndexer.py +++ b/build/scripts/yndexer.py @@ -17,7 +17,7 @@ def _try_to_kill(process): def touch(path): if not os.path.exists(path): - with open(path, 'w'): + with open(path, 'w'): pass diff --git a/build/stdafx.hpp b/build/stdafx.hpp index 17f7f5c5f51..98b15adda05 100644 --- a/build/stdafx.hpp +++ b/build/stdafx.hpp @@ -1,332 +1,332 @@ -#if !defined(LIBCXXRT) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -//#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#if !defined(LIBCXXRT) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -//#include -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include -#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include -//#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif diff --git a/build/ya.conf.json b/build/ya.conf.json index 739feadf3b3..5f7cc875d61 100644 --- a/build/ya.conf.json +++ b/build/ya.conf.json @@ -2646,14 +2646,14 @@ "android_ndk_version": 24 } }, - "bloat": { - "tools": { + "bloat": { + "tools": { "bloat": { "bottle": "bloat", "executable": "bloat" } - }, - "platforms": [ + }, + "platforms": [ { "host": { "os": "LINUX" @@ -2666,8 +2666,8 @@ }, "default": true } - ] - }, + ] + }, "pprof": { "tools": { "pprof": { @@ -2925,13 +2925,13 @@ } ] }, - "deploy": { - "tools": { + "deploy": { + "tools": { "deploy": { "bottle": "deploy", "executable": "deploy" } - }, + }, "platforms": [ { "host": { @@ -2940,15 +2940,15 @@ "default": true } ] - }, - "yt": { - "tools": { + }, + "yt": { + "tools": { "yt": { "bottle": "yt", "executable": "yt" } - }, - "platforms": [ + }, + "platforms": [ { "host": { "os": "LINUX" @@ -2961,8 +2961,8 @@ }, "default": true } - ] - }, + ] + }, "yndexer": { "tools": { "yndexer": { @@ -3079,13 +3079,13 @@ } ] }, - "uc": { - "tools": { + "uc": { + "tools": { "uc": { "bottle": "uc", "executable": "uc" } - }, + }, "platforms": [ { "host": { @@ -3094,14 +3094,14 @@ "default": true } ] - }, - "perf": { - "tools": { + }, + "perf": { + "tools": { "perf": { "bottle": "perf", "executable": "perf" } - }, + }, "platforms": [ { "host": { @@ -3110,7 +3110,7 @@ "default": true } ] - }, + }, "cuda-check": { "tools": { "cuda-check": { @@ -3143,14 +3143,14 @@ } ] }, - "samogonctl": { - "tools": { + "samogonctl": { + "tools": { "samogonctl": { "bottle": "samogonctl", "executable": "samogonctl" } - }, - "platforms": [ + }, + "platforms": [ { "host": { "os": "LINUX" @@ -3163,8 +3163,8 @@ }, "default": true } - ] - }, + ] + }, "jdk": { "tools": { "javac": { @@ -5944,19 +5944,19 @@ ] } }, - "bloat": { - "formula": { + "bloat": { + "formula": { "sandbox_id": [ 801218211 ], - "match": "Bloat" - }, - "executable": { + "match": "Bloat" + }, + "executable": { "bloat": [ "bloat" ] - } - }, + } + }, "jdk": { "formula": { "sandbox_id": [ @@ -6507,31 +6507,31 @@ ] } }, - "deploy": { - "formula": { + "deploy": { + "formula": { "sandbox_id": 243231127, - "match": "DEPLOY" - }, - "executable": { + "match": "DEPLOY" + }, + "executable": { "deploy": [ "samogon", "deploy" ] - } - }, - "yt": { - "formula": { + } + }, + "yt": { + "formula": { "sandbox_id": [ 1195249182 ], - "match": "YT" - }, - "executable": { + "match": "YT" + }, + "executable": { "yt": [ "yt" ] - } - }, + } + }, "yndexer": { "formula": { "sandbox_id": 992600186, @@ -6587,28 +6587,28 @@ ] } }, - "uc": { - "formula": { + "uc": { + "formula": { "sandbox_id": 505682252, - "match": "UC" - }, - "executable": { + "match": "UC" + }, + "executable": { "uc": [ "uc" ] - } - }, - "perf": { - "formula": { + } + }, + "perf": { + "formula": { "sandbox_id": 1130176134, "match": "infra/kernel/tools/perf/build/perf-static.tar.gz" - }, - "executable": { + }, + "executable": { "perf": [ "perf" ] - } - }, + } + }, "cuda-check": { "formula": { "sandbox_id": "1128218000", @@ -6635,17 +6635,17 @@ ] } }, - "samogonctl": { - "formula": { + "samogonctl": { + "formula": { "sandbox_id": 438053038, - "match": "SAMOGONCTL" - }, - "executable": { + "match": "SAMOGONCTL" + }, + "executable": { "samogonctl": [ "samogonctl" ] - } - }, + } + }, "gcc61": { "formula": { "sandbox_id": 63189766, diff --git a/build/ymake.core.conf b/build/ymake.core.conf index 650d90caf6b..081833998b8 100644 --- a/build/ymake.core.conf +++ b/build/ymake.core.conf @@ -6,7 +6,7 @@ # - Distributed (YT) cache warmup will take signigicant time to catch up and will need to recache everithing. # - Autocheck will rebuild and recache everything. # Use this with extreme care and only change if it is utlimately needed. Consider more specific XXX_FAKEIDs below instead. -FAKEID=3141592653 +FAKEID=3141592653 SANDBOX_FAKEID=${FAKEID}.7600000 CPP_FAKEID=9107927 @@ -136,18 +136,18 @@ when ($OS_CYGWIN == "yes") { USE_ASMLIB=no FSTACK= } - + CFLAGS+=$COVERAGE_FLAGS LDFLAGS+=$COVERAGE_FLAGS CHECKFLAG= LEX_FLAGS= NO_MAPREDUCE= - + when ($NO_MAPREDUCE == "yes") { C_DEFINES+=-DNO_MAPREDUCE } - + when ($OS_ANDROID == "yes") { PIE=yes } @@ -416,12 +416,12 @@ DEFAULT_ALLOCATOR=LF when ($OS_ANDROID == "yes" || $MSVC == "yes") { DEFAULT_ALLOCATOR=J } - + # tag:allocator when ($OS_CYGWIN == "yes" || $ARCH_PPC64LE == "yes") { DEFAULT_ALLOCATOR=SYSTEM -} - +} + # tag:allocator when ($OS_DARWIN == "yes") { DEFAULT_ALLOCATOR=SYSTEM @@ -991,19 +991,19 @@ RUN_NO_SANITIZE=$YMAKE_PYTHON ${input:"build/scripts/run_tool.py"} -- when ($IS_CROSS_SANITIZE) { RUN_NO_SANITIZE= } - + YIELD=$YMAKE_PYTHON ${input:"build/scripts/yield_line.py"} -- ${BINDIR}/__args XARGS=$YMAKE_PYTHON ${input:"build/scripts/xargs.py"} -- ${BINDIR}/__args - + RESPFILE_CMD=$YMAKE_PYTHON ${input:"build/scripts/writer.py"} FS_TOOLS=$YMAKE_PYTHON ${input:"build/scripts/fs_tools.py"} - -COPY_CMD=$FS_TOOLS copy + +COPY_CMD=$FS_TOOLS copy LINK_OR_COPY_CMD=$FS_TOOLS link_or_copy -REMOVE_FILE=$FS_TOOLS remove -MOVE_FILE=$FS_TOOLS rename - +REMOVE_FILE=$FS_TOOLS remove +MOVE_FILE=$FS_TOOLS rename + # tag:allocator tag:windows-specific MSVC_DYNAMICBASE=/DYNAMICBASE when ($ALLOCATOR == "LF") { @@ -1174,7 +1174,7 @@ module _BASE_UNIT: _BARE_UNIT { } SANITIZER_DEFINED=no - + when ($SANITIZER_TYPE && $SANITIZER_TYPE != "no") { CFLAGS+=-fsanitize=$SANITIZER_TYPE -D${SANITIZER_TYPE}_sanitizer_enabled $SANITIZER_CFLAGS -fno-omit-frame-pointer LDFLAGS+=-fsanitize=$SANITIZER_TYPE @@ -1185,7 +1185,7 @@ module _BASE_UNIT: _BARE_UNIT { LDFLAGS+=-fsanitize-blacklist=${input:"build/sanitize-blacklist.txt"} } } - + when ($SANITIZE_COVERAGE && $SANITIZE_COVERAGE != "no") { CFLAGS+=-fsanitize-coverage=$SANITIZE_COVERAGE LDFLAGS+=-fsanitize-coverage=$SANITIZE_COVERAGE @@ -1426,12 +1426,12 @@ when ($COMMON_LINK_SETTINGS == "yes") { } otherwise { select ($ALLOCATOR) { - "MIM" ? { - PEERDIR+=library/cpp/malloc/mimalloc - } - "HU" ? { - PEERDIR+=library/cpp/malloc/hu - } + "MIM" ? { + PEERDIR+=library/cpp/malloc/mimalloc + } + "HU" ? { + PEERDIR+=library/cpp/malloc/hu + } "TCMALLOC_256K" ? { PEERDIR+=library/cpp/malloc/tcmalloc PEERDIR+=contrib/libs/tcmalloc @@ -1628,12 +1628,12 @@ module _BASE_PROGRAM: _LINK_UNIT { } when ($MUSL == "yes") { - when ($MUSL_LITE == "yes") { - PEERDIR += contrib/libs/musl - } - otherwise { - PEERDIR += contrib/libs/musl/full - } + when ($MUSL_LITE == "yes") { + PEERDIR += contrib/libs/musl + } + otherwise { + PEERDIR += contrib/libs/musl/full + } } DEFAULT(CPU_CHECK yes) @@ -2234,15 +2234,15 @@ multimodule PY3TEST { # tag:cpp-specific tag:test module CPP_STYLE_TEST: PY3TEST_BIN { - DEPENDS(contrib/libs/clang12/tools/clang-format) - PEERDIR+=library/python/cpp_test -} - -macro STYLE(Globs...) { - _GLOB(STYLE_SRCS_GLOB ${pre=${ARCADIA_ROOT}:Globs}) - _STYLE(${STYLE_SRCS_GLOB}) -} - + DEPENDS(contrib/libs/clang12/tools/clang-format) + PEERDIR+=library/python/cpp_test +} + +macro STYLE(Globs...) { + _GLOB(STYLE_SRCS_GLOB ${pre=${ARCADIA_ROOT}:Globs}) + _STYLE(${STYLE_SRCS_GLOB}) +} + # tag:cpp-specific tag:deprecated tag:test ### @usage: GTEST_UGLY([name]) ### @@ -2305,8 +2305,8 @@ module EXECTEST: _BARE_UNIT { module Y_BENCHMARK: PROGRAM { PEERDIR(library/cpp/testing/benchmark/main) SET(MODULE_LANG CPP) -} - +} + # tag:cpp-specific tag:test ### @usage: G_BENCHMARK([benchmarkname]) ### @@ -2685,7 +2685,7 @@ module PY_ANY_MODULE: DLL_UNIT { } _SONAME=$SONAME$MODULE_VERSION - # -bundle + # -bundle when ($DARWIN == "yes") { LDFLAGS+=-flat_namespace } @@ -5258,7 +5258,7 @@ macro ADD_PERL_MODULE(Dir, Module) { ### Set memory allocator implementation for the PROGRAM()/DLL() module. ### This may only be specified for programs and dlls, use in other modules leads to configuration errors. ### -### Available allocators are: "LF", "LF_YT", "LF_DBG", "YT", "J", "B", "BM", "C", "TCMALLOC", "GOOGLE", "LOCKLESS", "SYSTEM", "FAKE", "MIM", "HU". +### Available allocators are: "LF", "LF_YT", "LF_DBG", "YT", "J", "B", "BM", "C", "TCMALLOC", "GOOGLE", "LOCKLESS", "SYSTEM", "FAKE", "MIM", "HU". ### - LF - lfalloc (https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/lfalloc) ### - LF_YT - Allocator selection for YT (https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/lfalloc/yt/ya.make) ### - LF_DBG - Debug allocator selection (https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/lfalloc/dbg/ya.make) @@ -5269,7 +5269,7 @@ macro ADD_PERL_MODULE(Dir, Module) { ### - Code: https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/balloc ### - BM - The balloc for market (agri@ commits from july 2018 till November 2018 saved) ### - C - Like B, but can be disabled for each thread to LF or SYSTEM one (B can be disabled only to SYSTEM) -### - MIM - Microsoft's mimalloc (actual version) (https://a.yandex-team.ru/arc/trunk/arcadia/library/malloc/mimalloc) +### - MIM - Microsoft's mimalloc (actual version) (https://a.yandex-team.ru/arc/trunk/arcadia/library/malloc/mimalloc) ### - TCMALLOC - Google TCMalloc (actual version) (https://a.yandex-team.ru/arc/trunk/arcadia/library/malloc/tcmalloc) ### - GOOGLE - Google TCMalloc (https://a.yandex-team.ru/arc/trunk/arcadia/library/malloc/galloc) ### - LOCKLESS - Allocator based upon lockless queues (https://a.yandex-team.ru/arc/trunk/arcadia/library/malloc/lockless) @@ -5394,8 +5394,8 @@ when ($SPLIT_DWARF_VALUE == "yes" && $NO_SPLIT_DWARF != "yes" && $NO_DEBUGINFO ! ### Add the specified external static libraries to the program link macro EXTRALIBS_STATIC(Args...) { LDFLAGS(-Wl,-Bstatic ${Args} -Wl,-Bdynamic) -} - +} + ### @usage ADD_COMPILABLE_TRANSLATE(Dict Name Options...) ### ### Generate translation dictionary code to transdict.LOWER(Name).cpp that will than be compiled into library @@ -5578,18 +5578,18 @@ macro JOIN_SRCS(Out, Src...) { .CMD=$YMAKE_PYTHON ${input:"build/scripts/gen_join_srcs.py"} ${output:Out} --ya-start-command-file ${input;rootrel:Src} --ya-end-command-file ${output_include;hide:Src} ${kv;hide:"p JS"} ${kv;hide:"pc magenta"} .SEM=target_joined_source $Out ${input:Src} ${output;hide;suf=.o:Out} && modules_required yandex_common.cmake _CONDITIONAL_SRCS($TIDY_VALUE $Src) -} - +} + ### @usage: JOIN_SRCS_GLOBAL(Out Src...) ### ### Join set of sources into single file named Out and send it for further processing as if it were listed as SRCS(GLOBAL Out). ### This macro doesn't place all file into Out, it emits #include... Use the for C++ source files only. ### You should specify file name with the extension as Out. Further processing will be done according to this extension. -macro JOIN_SRCS_GLOBAL(Out, Src...) { +macro JOIN_SRCS_GLOBAL(Out, Src...) { .CMD=$YMAKE_PYTHON ${input:"build/scripts/gen_join_srcs.py"} ${output;noauto:Out} --ya-start-command-file ${input;rootrel:Src} --ya-end-command-file ${output_include;hide:Src} ${kv;hide:"p JS"} ${kv;hide:"pc magenta"} - SRCS(GLOBAL $Out) -} - + SRCS(GLOBAL $Out) +} + ### @usage: FLAT_JOIN_SRCS_GLOBAL(Out Src...) ### ### Join set of sources into single file named Out and send it for further processing as if it were listed as SRCS(GLOBAL Out). @@ -6351,9 +6351,9 @@ macro _SWIG_PYTHON_C(Src, DstSubPrefix) { macro BUILDWITH_RAGEL6(Src, Options...) { .CMD=$RUN_NO_SANITIZE ${tool:"contrib/tools/ragel6"} $RAGEL6_FLAGS ${Options} -I${ARCADIA_ROOT} -o ${output;nopath;noext;defext=.rl6.cpp:Src} ${input:Src} ${kv;hide:"p R6"} ${kv;hide:"pc yellow"} } - + # tag:python-processing tag:internal -# TODO: use it in [.pyx] cmd +# TODO: use it in [.pyx] cmd ### @usage: _PY_REGISTER() # internal ### ### Register Python 2.x module in internal resource file system. Arcadia Python 2.x importer will be retrieve these on import directive. @@ -6362,8 +6362,8 @@ macro BUILDWITH_RAGEL6(Src, Options...) { macro _PY_REGISTER(Func) { .CMD=$YMAKE_PYTHON ${input:"build/scripts/gen_py_reg.py"} $Func ${output;noauto:Func.reg.cpp} ${requirements;hide:PY_REQUIREMENTS} ${kv;hide:"p PY"} ${kv;hide:"pc yellow"} SRCS(GLOBAL $Func.reg.cpp) -} - +} + # tag:python-processing tag:internal ### @usage: _PY3_REGISTER() # internal ### @@ -6402,8 +6402,8 @@ macro _PY3_COMPILE_BYTECODE(SrcX, Src, Dst) { ### 2. Different syntax (see examples in codesearch or users/pg/tests/archive_test) macro ARCHIVE_ASM(NAME="", DONTCOMPRESS?"-p":"", REQUIREMENTS[], Files...) { .CMD=$ARCH_TOOL -q $DONTCOMPRESS ${input;join=\: :Files}: -o ${output;chksum;suf=$OBJ_SUF.rodata:NAME} ${requirements;hide:REQUIREMENTS} ${kv;hide:"p AR"} ${kv;hide:"pc light-cyan"} -} - +} + # tag:yweb-specific macro PIRE_INLINE_CMD(SRC) { .CMD=${tool:"library/cpp/regex/pire/inline"} -o ${output:SRC} ${input:SRC} ${output_include;hide:SRC} ${kv;hide:"p PI"} ${kv;hide:"pc yellow"} @@ -6787,21 +6787,21 @@ macro _LUAJIT_21_OBJDUMP(Src, OUT="") { macro _MX_BIN_TO_INFO(Src) { .CMD=${tool:"tools/mx_bin2info"} ${input:Src} ${output;nopath;noext;noauto:Src.info} ${kv;hide:"p MX"} ${kv;hide:"pc yellow"} } - -MX_GEN_TABLE_INCLS=${output_include;hide:"yabs_mx_calc_table.h"} \ -${output_include;hide:"kernel/matrixnet/mn_sse.h"} \ + +MX_GEN_TABLE_INCLS=${output_include;hide:"yabs_mx_calc_table.h"} \ +${output_include;hide:"kernel/matrixnet/mn_sse.h"} \ ${output_include;hide:"library/cpp/archive/yarchive.h"} \ -${output_include;hide:"util/memory/blob.h"} \ -${output_include;hide:"util/generic/hash.h"} \ -${output_include;hide:"util/generic/ptr.h"} \ -${output_include;hide:"util/generic/singleton.h"} - +${output_include;hide:"util/memory/blob.h"} \ +${output_include;hide:"util/generic/hash.h"} \ +${output_include;hide:"util/generic/ptr.h"} \ +${output_include;hide:"util/generic/singleton.h"} + macro _MX_GEN_TABLE(Srcs...) { .CMD=$YMAKE_PYTHON ${input:"build/scripts/gen_mx_table.py"} $Srcs ${output;stdout:"mx_tables.cpp"} $MX_GEN_TABLE_INCLS ${kv;hide:"p MX"} ${kv;hide:"pc yellow"} PEERDIR(kernel/matrixnet) PEERDIR(library/cpp/archive) -} - +} + RELEV_FML_CODEGEN_INCLS=${output_include;hide:"kernel/relevfml/relev_fml.h"} ${output_include;hide:"library/cpp/sse/sse.h"} ### @usage: GENERATE_ENUM_SERIALIZATION(File.h) @@ -6826,7 +6826,7 @@ macro GENERATE_ENUM_SERIALIZATION_WITH_HEADER(File) { .SEM=generate_enum_serilization ${input:File} ${output;hide;suf=_serialized.o:File} GEN_HEADER ${output;suf=_serialized.h:File} INCLUDE_HEADERS ${input;rootrel:File} ${tool;hide:"tools/enum_parser/enum_parser/bin"} PEERDIR(tools/enum_parser/enum_serialization_runtime) } - + ### @usage: DEB_VERSION(File) ### ### Creates a header file DebianVersion.h define the DEBIAN_VERSION taken from the File. @@ -7039,8 +7039,8 @@ macro LUA(ScriptPath, IN{input}[], OUT{output}[], OUT_NOAUTO{output}[], TOOL{too ### ${CURDIR} and ${BINDIR} which are expanded where the outputs are used. macro PYTHON(ScriptPath, IN{input}[], OUT{output}[], OUT_NOAUTO{output}[], TOOL{tool}[], OUTPUT_INCLUDES[], IN_DEPS[], STDOUT="", STDOUT_NOAUTO="", CWD="", ENV[], REQUIREMENTS[], Args...) { .CMD=${cwd:CWD} ${env:ENV} $YMAKE_PYTHON ${input:ScriptPath} $Args ${input;hide:IN} ${input;hide:IN_DEPS} ${output_include;hide:OUTPUT_INCLUDES} ${tool;hide:TOOL} ${output;hide:OUT} ${output;noauto;hide:OUT_NOAUTO} ${output;stdout:STDOUT} ${output;stdout;noauto:STDOUT_NOAUTO} ${requirements;hide:REQUIREMENTS} ${requirements;hide:"network:restricted"} ${kv;hide:"p PY"} ${kv;hide:"pc yellow"} ${kv;hide:"show_out"} -} - +} + # tag:java-specific macro _RUN_JAVA(IN{input}[], OUT{output}[], OUT_NOAUTO{output}[], OUTPUT_INCLUDES[], TOOL[], STDOUT="", STDOUT_NOAUTO="", CWD="", ENV[], HIDE_OUTPUT?"stderr2stdout":"stdout2stderr", REQUIREMENTS[], Args...) { PEERDIR(build/platform/java/jdk $JDK_RESOURCE_PEERDIR) @@ -7068,8 +7068,8 @@ macro _RUN_JAVA(IN{input}[], OUT{output}[], OUT_NOAUTO{output}[], OUTPUT_INCLUDE macro FROM_SANDBOX(Id, OUT{output}[], OUT_NOAUTO{output}[], OUTPUT_INCLUDES[], FILE?"--copy-to-dir":"--untar-to", AUTOUPDATED="", PREFIX=".", RENAME[], EXECUTABLE?"--executable":"", SBR="sbr:", REQUIREMENTS[]) { .CMD=${hide:SANDBOX_FAKEID} ${cwd:BINDIR} ${resource;pre=$SBR:Id} $YMAKE_PYTHON ${input:"build/scripts/fetch_from_sandbox.py"} --resource-file $(RESOURCE_ROOT)/sbr/$Id/resource --resource-id $Id $FILE $PREFIX ${pre=--rename :RENAME} $EXECUTABLE -- $OUT $OUT_NOAUTO ${input;hide:"build/scripts/fetch_from.py"} ${output_include;hide:OUTPUT_INCLUDES} ${output;hide:OUT} ${output;noauto;hide:OUT_NOAUTO} ${requirements;hide:REQUIREMENTS} ${requirements;hide:"network:full"} ${kv;hide:"p SB"} ${kv;hide:"pc yellow"} ${kv;hide:"show_out"} ADD_CHECK(check.resource $Id) -} - +} + ### @usage: FROM_MDS([FILE] key [RENAME ] OUT_[NOAUTO] [EXECUTABLE]) ### ### Download resource from MDS with the specified key and process like [FROM_SANDBOX()](#macro_FROM_SANDBOX). @@ -7120,8 +7120,8 @@ otherwise { ### Compile .c files as .cpp ones within a module. macro COMPILE_C_AS_CXX() { SET(EXTRA_C_FLAGS $C_AS_CXX_FLAGS) -} - +} + ### @usage: NO_DEBUG_INFO() ### ### Compile files without debug info collection. @@ -7249,7 +7249,7 @@ when ($CLANG && $DEBUGINFO_LINES_ONLY == "yes" && $NO_DEBUGINFO != "yes") { } # TODO: configurable tar and touch -PACK_TGZ=${cwd:ARCADIA_BUILD_ROOT} tar -czf ${rootrel:OUTPUT} ${rootrel:INPUT} ${kv;hide:"p AR"} ${kv;hide:"pc light-red"} +PACK_TGZ=${cwd:ARCADIA_BUILD_ROOT} tar -czf ${rootrel:OUTPUT} ${rootrel:INPUT} ${kv;hide:"p AR"} ${kv;hide:"pc light-red"} # tag:internal ### @usage TOUCH(Outputs...) # internal diff --git a/build/ymake_conf.py b/build/ymake_conf.py index f186dc95c72..30219eb85ed 100755 --- a/build/ymake_conf.py +++ b/build/ymake_conf.py @@ -488,10 +488,10 @@ class Build(object): def print_build(self): self._print_build_settings() - + host_os = System(self.host) host_os.print_host_settings() - + target_os = System(self.target) target_os.print_target_settings() @@ -522,21 +522,21 @@ class Build(object): emit('DISTBUILD', 'yes') elif self.build_system != 'ymake': raise ConfigureError() - + python_bin = preset('BUILD_PYTHON_BIN', '$(PYTHON)/python') emit('YMAKE_PYTHON', python_bin) emit('YMAKE_UNPICKLER', python_bin, '$ARCADIA_ROOT/build/plugins/_unpickler.py') - + @property def is_release(self): # TODO(somov): Проверить, бывают ли тут суффиксы на самом деле return self.build_type in ('release', 'relwithdebinfo', 'minsizerel', 'profile', 'gprof') or self.build_type.endswith('-release') - + @property def is_debug(self): return self.build_type in ('debug', 'debugnoasserts', 'fastdebug') or self.build_type.endswith('-debug') - + @property def is_fast_debug(self): return self.build_type == 'fastdebug' @@ -548,7 +548,7 @@ class Build(object): @property def is_coverage(self): return self.build_type == 'coverage' - + @property def is_sanitized(self): sanitizer = preset('SANITIZER_TYPE') @@ -684,7 +684,7 @@ class Build(object): return un_unicode(json.loads(base64.b64decode(base64str))) - + class YMake(object): def __init__(self, arcadia): self.arcadia = arcadia @@ -1415,11 +1415,11 @@ class GnuCompiler(Compiler): self.debug_info_flags.append('-ggnu-pubnames') self.cross_suffix = '' if is_positive('FORCE_NO_PIC') else '.pic' - + self.optimize = None - + self.configure_build_type() - + if self.tc.is_clang: self.sfdl_flags.append('-Qunused-arguments') @@ -1456,7 +1456,7 @@ class GnuCompiler(Compiler): def configure_build_type(self): if self.build.is_valgrind: self.c_defines.append('-DWITH_VALGRIND=1') - + if self.build.is_debug: self.c_foptions.append('$FSTACK') @@ -1516,7 +1516,7 @@ class GnuCompiler(Compiler): CFLAGS+=-fPIE LDFLAGS+=-fPIE -pie }''') - + append('CFLAGS', self.c_flags, '$DEBUG_INFO_FLAGS', self.c_foptions, '$C_WARNING_OPTS', '$GCC_PREPROCESSOR_OPTS', '$USER_CFLAGS', '$USER_CFLAGS_GLOBAL') append('CXXFLAGS', '$CFLAGS', '-std=' + self.tc.cxx_std, '$CXX_WARNING_OPTS', '$USER_CXXFLAGS', '$USER_CXXFLAGS_GLOBAL') append('CONLYFLAGS', '$USER_CONLYFLAGS', '$USER_CONLYFLAGS_GLOBAL') @@ -1528,7 +1528,7 @@ class GnuCompiler(Compiler): # TODO(somov): Убрать чтение настройки из os.environ emit('USE_ARC_PROFILE', 'yes' if preset('USE_ARC_PROFILE') or os.environ.get('USE_ARC_PROFILE') else 'no') emit('DEBUG_INFO_FLAGS', self.debug_info_flags) - + emit_big(''' when ($NO_WSHADOW == "yes") { C_WARNING_OPTS += -Wno-shadow @@ -1663,7 +1663,7 @@ class GnuCompiler(Compiler): emit('COMPILER_TIME_TRACE_POSTPROCESS') append('EXTRA_OUTPUT') - + style = ['${requirements;hide:CC_REQUIREMENTS} ${hide;kv:"p CC"} ${hide;kv:"pc green"}'] cxx_args = [ '$CLANG_TIDY_ARGS', @@ -1683,7 +1683,7 @@ class GnuCompiler(Compiler): '$YNDEXER_OUTPUT', '&& $COMPILER_TIME_TRACE_POSTPROCESS', ] + style - + c_args = [ '$CLANG_TIDY_ARGS', '$YNDEXER_ARGS', @@ -1722,7 +1722,7 @@ class GnuCompiler(Compiler): emit('_SRC_C_CMD', ' '.join(c_args)) emit('_SRC_M_CMD', '$SRC_c($SRC $SRCFLAGS)') emit('_SRC_MASM_CMD', '$_EMPTY_CMD') - + # fuzzing configuration if self.tc.is_clang: if self.tc.version_at_least(12): @@ -1823,7 +1823,7 @@ class LD(Linker): self.ar = '{}/gcc/bin/gcc-ar'.format(self.tc.name_marker) else: self.ar = 'ar' - + self.ar_type = 'GNU_AR' self.llvm_ar_format = 'None' @@ -1887,7 +1887,7 @@ class LD(Linker): self.ld_flags.append('-Wl,-no_deduplicate') if not self.tc.is_clang: self.ld_flags.append('-Wl,-no_compact_unwind') - + self.thread_library = select([ (target.is_linux or target.is_macos, '-lpthread'), ]) @@ -1906,7 +1906,7 @@ class LD(Linker): if target.is_linux or target.is_android: self.ld_export_dynamic_flag = '-rdynamic' self.use_stdlib = '-nodefaultlibs' - + if target.is_linux or target.is_android or target.is_cygwin or target.is_none: self.start_group = '-Wl,--start-group' self.end_group = '-Wl,--end-group' @@ -1914,13 +1914,13 @@ class LD(Linker): self.no_whole_archive = '-Wl,--no-whole-archive' self.ld_stripflag = '-s' self.soname_option = '-soname' - + if target.is_macos or target.is_ios: self.use_stdlib = '-nodefaultlibs' self.soname_option = '-install_name' if not preset('NO_DEBUGINFO'): self.dwarf_command = '$DWARF_TOOL $TARGET -o ${output;pre=$MODULE_PREFIX$REALPRJNAME.dSYM/Contents/Resources/DWARF/$MODULE_PREFIX:REALPRJNAME}' - + if self.target.is_ios and preset('MAPSMOBI_BUILD_TARGET') and self.target.is_arm: self.ld_flags.extend(('-fembed-bitcode', '-Wl,-bitcode_verify')) diff --git a/certs/cacert.pem b/certs/cacert.pem index 32de2c9e32f..54f82bc73cf 100644 --- a/certs/cacert.pem +++ b/certs/cacert.pem @@ -4165,69 +4165,69 @@ MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUW8pe5d7SgarNqC1kUbbZcpuX ytRrJPOwPYdGWBrssd9v+1a6cGvHOMzosYxPD/fxZ3YOg9AeUY8CMD32IygmTMZg h5Mmm7I1HrrW9zzRHM76JTymGoEVW/MSD2zuZYrJh6j5B+BimoxcSg== -----END CERTIFICATE----- - -# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 -# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 -# Label: "GlobalSign Root CA - R6" -# Serial: 1417766617973444989252670301619537 -# MD5 Fingerprint: 4f:dd:07:e4:d4:22:64:39:1e:0c:37:42:ea:d1:c6:ae -# SHA1 Fingerprint: 80:94:64:0e:b5:a7:a1:ca:11:9c:1f:dd:d5:9f:81:02:63:a7:fb:d1 -# SHA256 Fingerprint: 2c:ab:ea:fe:37:d0:6c:a2:2a:ba:73:91:c0:03:3d:25:98:29:52:c4:53:64:73:49:76:3a:3a:b5:ad:6c:cf:69 ------BEGIN CERTIFICATE----- -MIIFgzCCA2ugAwIBAgIORea7A4Mzw4VlSOb/RVEwDQYJKoZIhvcNAQEMBQAwTDEg -MB4GA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjYxEzARBgNVBAoTCkdsb2Jh -bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTQxMjEwMDAwMDAwWhcNMzQx -MjEwMDAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSNjET -MBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCAiIwDQYJ -KoZIhvcNAQEBBQADggIPADCCAgoCggIBAJUH6HPKZvnsFMp7PPcNCPG0RQssgrRI -xutbPK6DuEGSMxSkb3/pKszGsIhrxbaJ0cay/xTOURQh7ErdG1rG1ofuTToVBu1k -ZguSgMpE3nOUTvOniX9PeGMIyBJQbUJmL025eShNUhqKGoC3GYEOfsSKvGRMIRxD -aNc9PIrFsmbVkJq3MQbFvuJtMgamHvm566qjuL++gmNQ0PAYid/kD3n16qIfKtJw -LnvnvJO7bVPiSHyMEAc4/2ayd2F+4OqMPKq0pPbzlUoSB239jLKJz9CgYXfIWHSw -1CM69106yqLbnQneXUQtkPGBzVeS+n68UARjNN9rkxi+azayOeSsJDa38O+2HBNX -k7besvjihbdzorg1qkXy4J02oW9UivFyVm4uiMVRQkQVlO6jxTiWm05OWgtH8wY2 -SXcwvHE35absIQh1/OZhFj931dmRl4QKbNQCTXTAFO39OfuD8l4UoQSwC+n+7o/h -bguyCLNhZglqsQY6ZZZZwPA1/cnaKI0aEYdwgQqomnUdnjqGBQCe24DWJfncBZ4n -WUx2OVvq+aWh2IMP0f/fMBH5hc8zSPXKbWQULHpYT9NLCEnFlWQaYw55PfWzjMpY -rZxCRXluDocZXFSxZba/jJvcE+kNb7gu3GduyYsRtYQUigAZcIN5kZeR1Bonvzce -MgfYFGM8KEyvAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTAD -AQH/MB0GA1UdDgQWBBSubAWjkxPioufi1xzWx/B/yGdToDAfBgNVHSMEGDAWgBSu -bAWjkxPioufi1xzWx/B/yGdToDANBgkqhkiG9w0BAQwFAAOCAgEAgyXt6NH9lVLN -nsAEoJFp5lzQhN7craJP6Ed41mWYqVuoPId8AorRbrcWc+ZfwFSY1XS+wc3iEZGt -Ixg93eFyRJa0lV7Ae46ZeBZDE1ZXs6KzO7V33EByrKPrmzU+sQghoefEQzd5Mr61 -55wsTLxDKZmOMNOsIeDjHfrYBzN2VAAiKrlNIC5waNrlU/yDXNOd8v9EDERm8tLj -vUYAGm0CuiVdjaExUd1URhxN25mW7xocBFymFe944Hn+Xds+qkxV/ZoVqW/hpvvf -cDDpw+5CRu3CkwWJ+n1jez/QcYF8AOiYrg54NMMl+68KnyBr3TsTjxKM4kEaSHpz -oHdpx7Zcf4LIHv5YGygrqGytXm3ABdJ7t+uA/iU3/gKbaKxCXcPu9czc8FB10jZp -nOZ7BN9uBmm23goJSFmH63sUYHpkqmlD75HHTOwY3WzvUy2MmeFe8nI+z1TIvWfs -pA9MRf/TuTAjB0yPEL+GltmZWrSZVxykzLsViVO6LAUP5MSeGbEYNNVMnbrt9x+v -JJUEeKgDu+6B5dpffItKoZB0JaezPkvILFa9x8jvOOJckvB595yEunQtYQEgfn7R -8k8HWV+LLUNS60YMlOH1Zkd5d9VUWx+tJDfLRVpOoERIyNiwmcUVhAn21klJwGW4 -5hpxbqCo8YLoRT5s1gLXCmeDBVrJpBA= ------END CERTIFICATE----- - -# Issuer: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed -# Subject: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed -# Label: "OISTE WISeKey Global Root GC CA" -# Serial: 44084345621038548146064804565436152554 -# MD5 Fingerprint: a9:d6:b9:2d:2f:93:64:f8:a5:69:ca:91:e9:68:07:23 -# SHA1 Fingerprint: e0:11:84:5e:34:de:be:88:81:b9:9c:f6:16:26:d1:96:1f:c3:b9:31 -# SHA256 Fingerprint: 85:60:f9:1c:36:24:da:ba:95:70:b5:fe:a0:db:e3:6f:f1:1a:83:23:be:94:86:85:4f:b3:f3:4a:55:71:19:8d ------BEGIN CERTIFICATE----- -MIICaTCCAe+gAwIBAgIQISpWDK7aDKtARb8roi066jAKBggqhkjOPQQDAzBtMQsw -CQYDVQQGEwJDSDEQMA4GA1UEChMHV0lTZUtleTEiMCAGA1UECxMZT0lTVEUgRm91 -bmRhdGlvbiBFbmRvcnNlZDEoMCYGA1UEAxMfT0lTVEUgV0lTZUtleSBHbG9iYWwg -Um9vdCBHQyBDQTAeFw0xNzA1MDkwOTQ4MzRaFw00MjA1MDkwOTU4MzNaMG0xCzAJ -BgNVBAYTAkNIMRAwDgYDVQQKEwdXSVNlS2V5MSIwIAYDVQQLExlPSVNURSBGb3Vu -ZGF0aW9uIEVuZG9yc2VkMSgwJgYDVQQDEx9PSVNURSBXSVNlS2V5IEdsb2JhbCBS -b290IEdDIENBMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAETOlQwMYPchi82PG6s4ni -eUqjFqdrVCTbUf/q9Akkwwsin8tqJ4KBDdLArzHkdIJuyiXZjHWd8dvQmqJLIX4W -p2OQ0jnUsYd4XxiWD1AbNTcPasbc2RNNpI6QN+a9WzGRo1QwUjAOBgNVHQ8BAf8E -BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUSIcUrOPDnpBgOtfKie7T -rYy0UGYwEAYJKwYBBAGCNxUBBAMCAQAwCgYIKoZIzj0EAwMDaAAwZQIwJsdpW9zV -57LnyAyMjMPdeYwbY9XJUpROTYJKcx6ygISpJcBMWm1JKWB4E+J+SOtkAjEA2zQg -Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 ------END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 +# Label: "GlobalSign Root CA - R6" +# Serial: 1417766617973444989252670301619537 +# MD5 Fingerprint: 4f:dd:07:e4:d4:22:64:39:1e:0c:37:42:ea:d1:c6:ae +# SHA1 Fingerprint: 80:94:64:0e:b5:a7:a1:ca:11:9c:1f:dd:d5:9f:81:02:63:a7:fb:d1 +# SHA256 Fingerprint: 2c:ab:ea:fe:37:d0:6c:a2:2a:ba:73:91:c0:03:3d:25:98:29:52:c4:53:64:73:49:76:3a:3a:b5:ad:6c:cf:69 +-----BEGIN CERTIFICATE----- +MIIFgzCCA2ugAwIBAgIORea7A4Mzw4VlSOb/RVEwDQYJKoZIhvcNAQEMBQAwTDEg +MB4GA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjYxEzARBgNVBAoTCkdsb2Jh +bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTQxMjEwMDAwMDAwWhcNMzQx +MjEwMDAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSNjET +MBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBAJUH6HPKZvnsFMp7PPcNCPG0RQssgrRI +xutbPK6DuEGSMxSkb3/pKszGsIhrxbaJ0cay/xTOURQh7ErdG1rG1ofuTToVBu1k +ZguSgMpE3nOUTvOniX9PeGMIyBJQbUJmL025eShNUhqKGoC3GYEOfsSKvGRMIRxD +aNc9PIrFsmbVkJq3MQbFvuJtMgamHvm566qjuL++gmNQ0PAYid/kD3n16qIfKtJw +LnvnvJO7bVPiSHyMEAc4/2ayd2F+4OqMPKq0pPbzlUoSB239jLKJz9CgYXfIWHSw +1CM69106yqLbnQneXUQtkPGBzVeS+n68UARjNN9rkxi+azayOeSsJDa38O+2HBNX +k7besvjihbdzorg1qkXy4J02oW9UivFyVm4uiMVRQkQVlO6jxTiWm05OWgtH8wY2 +SXcwvHE35absIQh1/OZhFj931dmRl4QKbNQCTXTAFO39OfuD8l4UoQSwC+n+7o/h +bguyCLNhZglqsQY6ZZZZwPA1/cnaKI0aEYdwgQqomnUdnjqGBQCe24DWJfncBZ4n +WUx2OVvq+aWh2IMP0f/fMBH5hc8zSPXKbWQULHpYT9NLCEnFlWQaYw55PfWzjMpY +rZxCRXluDocZXFSxZba/jJvcE+kNb7gu3GduyYsRtYQUigAZcIN5kZeR1Bonvzce +MgfYFGM8KEyvAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTAD +AQH/MB0GA1UdDgQWBBSubAWjkxPioufi1xzWx/B/yGdToDAfBgNVHSMEGDAWgBSu +bAWjkxPioufi1xzWx/B/yGdToDANBgkqhkiG9w0BAQwFAAOCAgEAgyXt6NH9lVLN +nsAEoJFp5lzQhN7craJP6Ed41mWYqVuoPId8AorRbrcWc+ZfwFSY1XS+wc3iEZGt +Ixg93eFyRJa0lV7Ae46ZeBZDE1ZXs6KzO7V33EByrKPrmzU+sQghoefEQzd5Mr61 +55wsTLxDKZmOMNOsIeDjHfrYBzN2VAAiKrlNIC5waNrlU/yDXNOd8v9EDERm8tLj +vUYAGm0CuiVdjaExUd1URhxN25mW7xocBFymFe944Hn+Xds+qkxV/ZoVqW/hpvvf +cDDpw+5CRu3CkwWJ+n1jez/QcYF8AOiYrg54NMMl+68KnyBr3TsTjxKM4kEaSHpz +oHdpx7Zcf4LIHv5YGygrqGytXm3ABdJ7t+uA/iU3/gKbaKxCXcPu9czc8FB10jZp +nOZ7BN9uBmm23goJSFmH63sUYHpkqmlD75HHTOwY3WzvUy2MmeFe8nI+z1TIvWfs +pA9MRf/TuTAjB0yPEL+GltmZWrSZVxykzLsViVO6LAUP5MSeGbEYNNVMnbrt9x+v +JJUEeKgDu+6B5dpffItKoZB0JaezPkvILFa9x8jvOOJckvB595yEunQtYQEgfn7R +8k8HWV+LLUNS60YMlOH1Zkd5d9VUWx+tJDfLRVpOoERIyNiwmcUVhAn21klJwGW4 +5hpxbqCo8YLoRT5s1gLXCmeDBVrJpBA= +-----END CERTIFICATE----- + +# Issuer: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed +# Subject: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed +# Label: "OISTE WISeKey Global Root GC CA" +# Serial: 44084345621038548146064804565436152554 +# MD5 Fingerprint: a9:d6:b9:2d:2f:93:64:f8:a5:69:ca:91:e9:68:07:23 +# SHA1 Fingerprint: e0:11:84:5e:34:de:be:88:81:b9:9c:f6:16:26:d1:96:1f:c3:b9:31 +# SHA256 Fingerprint: 85:60:f9:1c:36:24:da:ba:95:70:b5:fe:a0:db:e3:6f:f1:1a:83:23:be:94:86:85:4f:b3:f3:4a:55:71:19:8d +-----BEGIN CERTIFICATE----- +MIICaTCCAe+gAwIBAgIQISpWDK7aDKtARb8roi066jAKBggqhkjOPQQDAzBtMQsw +CQYDVQQGEwJDSDEQMA4GA1UEChMHV0lTZUtleTEiMCAGA1UECxMZT0lTVEUgRm91 +bmRhdGlvbiBFbmRvcnNlZDEoMCYGA1UEAxMfT0lTVEUgV0lTZUtleSBHbG9iYWwg +Um9vdCBHQyBDQTAeFw0xNzA1MDkwOTQ4MzRaFw00MjA1MDkwOTU4MzNaMG0xCzAJ +BgNVBAYTAkNIMRAwDgYDVQQKEwdXSVNlS2V5MSIwIAYDVQQLExlPSVNURSBGb3Vu +ZGF0aW9uIEVuZG9yc2VkMSgwJgYDVQQDEx9PSVNURSBXSVNlS2V5IEdsb2JhbCBS +b290IEdDIENBMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAETOlQwMYPchi82PG6s4ni +eUqjFqdrVCTbUf/q9Akkwwsin8tqJ4KBDdLArzHkdIJuyiXZjHWd8dvQmqJLIX4W +p2OQ0jnUsYd4XxiWD1AbNTcPasbc2RNNpI6QN+a9WzGRo1QwUjAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUSIcUrOPDnpBgOtfKie7T +rYy0UGYwEAYJKwYBBAGCNxUBBAMCAQAwCgYIKoZIzj0EAwMDaAAwZQIwJsdpW9zV +57LnyAyMjMPdeYwbY9XJUpROTYJKcx6ygISpJcBMWm1JKWB4E+J+SOtkAjEA2zQg +Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 +-----END CERTIFICATE----- # Issuer: CN=GTS Root R1 O=Google Trust Services LLC # Subject: CN=GTS Root R1 O=Google Trust Services LLC @@ -4236,7 +4236,7 @@ Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 # MD5 Fingerprint: 82:1a:ef:d4:d2:4a:f2:9f:e2:3d:97:06:14:70:72:85 # SHA1 Fingerprint: e1:c9:50:e6:ef:22:f8:4c:56:45:72:8b:92:20:60:d7:d5:a7:a3:e8 # SHA256 Fingerprint: 2a:57:54:71:e3:13:40:bc:21:58:1c:bd:2c:f1:3e:15:84:63:20:3e:ce:94:bc:f9:d3:cc:19:6b:f0:9a:54:72 ------BEGIN CERTIFICATE----- +-----BEGIN CERTIFICATE----- MIIFWjCCA0KgAwIBAgIQbkepxUtHDA3sM9CJuRz04TANBgkqhkiG9w0BAQwFADBH MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy diff --git a/certs/update-certs.py b/certs/update-certs.py index 045daa64ae1..13a2dabc941 100755 --- a/certs/update-certs.py +++ b/certs/update-certs.py @@ -11,7 +11,7 @@ def get_text(url): common_root_cas = get_text(COMMON_ROOT_CAS_URL) yandex_internal_cas = get_text(YANDEX_INTERNAL_CAS_URL) - + with open("cacert.pem", "wt") as target: target.write(common_root_cas) target.write(yandex_internal_cas) diff --git a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make index 74e8bab7081..d6b0823d455 100644 --- a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make +++ b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make @@ -13,7 +13,7 @@ LICENSE( MIT AND Zlib ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make index 7bc59e52a58..df8594d6cca 100644 --- a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make +++ b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make @@ -8,8 +8,8 @@ OWNER( g:cpp-contrib ) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/base64/plain32/codec_plain.c b/contrib/libs/base64/plain32/codec_plain.c index 25ce09e3a14..740d343468a 100644 --- a/contrib/libs/base64/plain32/codec_plain.c +++ b/contrib/libs/base64/plain32/codec_plain.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include "libbase64.h" #include "codecs.h" diff --git a/contrib/libs/base64/plain32/dec_uint32.c b/contrib/libs/base64/plain32/dec_uint32.c index e2829633a21..db701d73d41 100644 --- a/contrib/libs/base64/plain32/dec_uint32.c +++ b/contrib/libs/base64/plain32/dec_uint32.c @@ -8,8 +8,8 @@ while (srclen >= 8) uint32_t str, res, dec; // Load string: - //str = *(uint32_t *)c; - memcpy(&str, c, sizeof(str)); + //str = *(uint32_t *)c; + memcpy(&str, c, sizeof(str)); // Shuffle bytes to 32-bit bigendian: str = cpu_to_be32(str); @@ -40,8 +40,8 @@ while (srclen >= 8) res = be32_to_cpu(res); // Store back: - //*(uint32_t *)o = res; - memcpy(o, &res, sizeof(res)); + //*(uint32_t *)o = res; + memcpy(o, &res, sizeof(res)); c += 4; o += 3; diff --git a/contrib/libs/base64/plain32/enc_uint32.c b/contrib/libs/base64/plain32/enc_uint32.c index 9e117f06028..1dbe5fbe536 100644 --- a/contrib/libs/base64/plain32/enc_uint32.c +++ b/contrib/libs/base64/plain32/enc_uint32.c @@ -3,11 +3,11 @@ while (srclen >= 4) { // Load string: - //uint32_t str = *(uint32_t *)c; - uint32_t str; + //uint32_t str = *(uint32_t *)c; + uint32_t str; + + memcpy(&str, c, sizeof(str)); - memcpy(&str, c, sizeof(str)); - // Reorder to 32-bit big-endian, if not already in that format. The // workset must be in big-endian, otherwise the shifted bits do not // carry over properly among adjacent bytes: diff --git a/contrib/libs/base64/plain64/codec_plain.c b/contrib/libs/base64/plain64/codec_plain.c index 67e5ed88f5d..26a5af90979 100644 --- a/contrib/libs/base64/plain64/codec_plain.c +++ b/contrib/libs/base64/plain64/codec_plain.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include "libbase64.h" #include "codecs.h" diff --git a/contrib/libs/base64/plain64/dec_uint64.c b/contrib/libs/base64/plain64/dec_uint64.c index 8dc787dd8d4..fe26e9881bf 100644 --- a/contrib/libs/base64/plain64/dec_uint64.c +++ b/contrib/libs/base64/plain64/dec_uint64.c @@ -8,8 +8,8 @@ while (srclen >= 13) uint64_t str, res, dec; // Load string: - //str = *(uint64_t *)c; - memcpy(&str, c, sizeof(str)); + //str = *(uint64_t *)c; + memcpy(&str, c, sizeof(str)); // Shuffle bytes to 64-bit bigendian: str = cpu_to_be64(str); @@ -60,8 +60,8 @@ while (srclen >= 13) res = be64_to_cpu(res); // Store back: - //*(uint64_t *)o = res; - memcpy(o, &res, sizeof(res)); + //*(uint64_t *)o = res; + memcpy(o, &res, sizeof(res)); c += 8; o += 6; diff --git a/contrib/libs/base64/plain64/enc_uint64.c b/contrib/libs/base64/plain64/enc_uint64.c index 67636b12a01..3d5955af24d 100644 --- a/contrib/libs/base64/plain64/enc_uint64.c +++ b/contrib/libs/base64/plain64/enc_uint64.c @@ -3,11 +3,11 @@ while (srclen >= 8) { // Load string: - //uint64_t str = *(uint64_t *)c; - uint64_t str; + //uint64_t str = *(uint64_t *)c; + uint64_t str; + + memcpy(&str, c, sizeof(str)); - memcpy(&str, c, sizeof(str)); - // Reorder to 64-bit big-endian, if not already in that format. The // workset must be in big-endian, otherwise the shifted bits do not // carry over properly among adjacent bytes: diff --git a/contrib/libs/brotli/LICENSE b/contrib/libs/brotli/LICENSE index 981ec3b4f3b..33b7cdd2dba 100644 --- a/contrib/libs/brotli/LICENSE +++ b/contrib/libs/brotli/LICENSE @@ -1,15 +1,15 @@ Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/contrib/libs/brotli/README.md b/contrib/libs/brotli/README.md index 3874ddfa0f7..6b9b0cf0e19 100644 --- a/contrib/libs/brotli/README.md +++ b/contrib/libs/brotli/README.md @@ -1,19 +1,19 @@

Brotli

- + ### Introduction -Brotli is a generic-purpose lossless compression algorithm that compresses data -using a combination of a modern variant of the LZ77 algorithm, Huffman coding -and 2nd order context modeling, with a compression ratio comparable to the best -currently available general-purpose compression methods. It is similar in speed -with deflate but offers more dense compression. - +Brotli is a generic-purpose lossless compression algorithm that compresses data +using a combination of a modern variant of the LZ77 algorithm, Huffman coding +and 2nd order context modeling, with a compression ratio comparable to the best +currently available general-purpose compression methods. It is similar in speed +with deflate but offers more dense compression. + The specification of the Brotli Compressed Data Format is defined in [RFC 7932](https://tools.ietf.org/html/rfc7932). - + Brotli is open-sourced under the MIT License, see the LICENSE file. - -Brotli mailing list: -https://groups.google.com/forum/#!forum/brotli + +Brotli mailing list: +https://groups.google.com/forum/#!forum/brotli [![TravisCI Build Status](https://travis-ci.org/google/brotli.svg?branch=master)](https://travis-ci.org/google/brotli) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/google/brotli?branch=master&svg=true)](https://ci.appveyor.com/project/szabadka/brotli) diff --git a/contrib/libs/brotli/common/ya.make b/contrib/libs/brotli/common/ya.make index 7419a29f5a7..6c4157831c9 100644 --- a/contrib/libs/brotli/common/ya.make +++ b/contrib/libs/brotli/common/ya.make @@ -1,6 +1,6 @@ LIBRARY() -LICENSE(MIT) +LICENSE(MIT) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) diff --git a/contrib/libs/brotli/dec/bit_reader.c b/contrib/libs/brotli/dec/bit_reader.c index a685fca6c2b..722fd906dd4 100644 --- a/contrib/libs/brotli/dec/bit_reader.c +++ b/contrib/libs/brotli/dec/bit_reader.c @@ -1,25 +1,25 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Bit reading helpers */ - +*/ + +/* Bit reading helpers */ + #include "./bit_reader.h" - + #include "../common/platform.h" #include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + void BrotliInitBitReader(BrotliBitReader* const br) { - br->val_ = 0; - br->bit_pos_ = sizeof(br->val_) << 3; -} - + br->val_ = 0; + br->bit_pos_ = sizeof(br->val_) << 3; +} + BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) { size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1; /* Fixing alignment after unaligned BrotliFillWindow would result accumulator @@ -31,8 +31,8 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) { if (BrotliGetAvailableBits(br) == 0) { if (!BrotliPullByte(br)) { return BROTLI_FALSE; - } - } + } + } while ((((size_t)br->next_in) & aligned_read_mask) != 0) { if (!BrotliPullByte(br)) { @@ -41,8 +41,8 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) { } } return BROTLI_TRUE; -} - -#if defined(__cplusplus) || defined(c_plusplus) +} + +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif +#endif diff --git a/contrib/libs/brotli/dec/bit_reader.h b/contrib/libs/brotli/dec/bit_reader.h index 732072bfdd8..c06e91419f2 100644 --- a/contrib/libs/brotli/dec/bit_reader.h +++ b/contrib/libs/brotli/dec/bit_reader.h @@ -1,25 +1,25 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Bit reading helpers */ - -#ifndef BROTLI_DEC_BIT_READER_H_ -#define BROTLI_DEC_BIT_READER_H_ - +*/ + +/* Bit reading helpers */ + +#ifndef BROTLI_DEC_BIT_READER_H_ +#define BROTLI_DEC_BIT_READER_H_ + #include /* memcpy */ - + #include "../common/platform.h" #include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1) - + static const uint32_t kBitMask[33] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, @@ -30,7 +30,7 @@ static const uint32_t kBitMask[33] = { 0x00000000, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; - + static BROTLI_INLINE uint32_t BitMask(uint32_t n) { if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) { /* Masking with this expression turns to a single @@ -41,20 +41,20 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) { } } -typedef struct { +typedef struct { brotli_reg_t val_; /* pre-fetched bits */ uint32_t bit_pos_; /* current bit-reading position in val_ */ const uint8_t* next_in; /* the byte we're reading from */ size_t avail_in; -} BrotliBitReader; - +} BrotliBitReader; + typedef struct { brotli_reg_t val_; uint32_t bit_pos_; const uint8_t* next_in; size_t avail_in; } BrotliBitReaderState; - + /* Initializes the BrotliBitReader fields. */ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); @@ -64,7 +64,7 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); - + static BROTLI_INLINE void BrotliBitReaderSaveState( BrotliBitReader* const from, BrotliBitReaderState* to) { to->val_ = from->val_; @@ -72,7 +72,7 @@ static BROTLI_INLINE void BrotliBitReaderSaveState( to->next_in = from->next_in; to->avail_in = from->avail_in; } - + static BROTLI_INLINE void BrotliBitReaderRestoreState( BrotliBitReader* const to, BrotliBitReaderState* from) { to->val_ = from->val_; @@ -80,15 +80,15 @@ static BROTLI_INLINE void BrotliBitReaderRestoreState( to->next_in = from->next_in; to->avail_in = from->avail_in; } - + static BROTLI_INLINE uint32_t BrotliGetAvailableBits( const BrotliBitReader* br) { return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_; -} - -/* Returns amount of unread bytes the bit reader still has buffered from the - BrotliInput, including whole bytes in br->val_. */ -static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) { +} + +/* Returns amount of unread bytes the bit reader still has buffered from the + BrotliInput, including whole bytes in br->val_. */ +static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) { return br->avail_in + (BrotliGetAvailableBits(br) >> 3); } @@ -100,59 +100,59 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( } /* Guarantees that there are at least |n_bits| + 1 bits in accumulator. - Precondition: accumulator contains at least 1 bit. + Precondition: accumulator contains at least 1 bit. |n_bits| should be in the range [1..24] for regular build. For portable non-64-bit little-endian build only 16 bits are safe to request. */ -static BROTLI_INLINE void BrotliFillBitWindow( +static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { #if (BROTLI_64_BITS) if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { - if (br->bit_pos_ >= 56) { - br->val_ >>= 56; - br->bit_pos_ ^= 56; /* here same as -= 56 because of the if condition */ + if (br->bit_pos_ >= 56) { + br->val_ >>= 56; + br->bit_pos_ ^= 56; /* here same as -= 56 because of the if condition */ br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8; - br->avail_in -= 7; - br->next_in += 7; - } + br->avail_in -= 7; + br->next_in += 7; + } } else if ( !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) { - if (br->bit_pos_ >= 48) { - br->val_ >>= 48; - br->bit_pos_ ^= 48; /* here same as -= 48 because of the if condition */ + if (br->bit_pos_ >= 48) { + br->val_ >>= 48; + br->bit_pos_ ^= 48; /* here same as -= 48 because of the if condition */ br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16; - br->avail_in -= 6; - br->next_in += 6; - } - } else { - if (br->bit_pos_ >= 32) { - br->val_ >>= 32; - br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */ + br->avail_in -= 6; + br->next_in += 6; + } + } else { + if (br->bit_pos_ >= 32) { + br->val_ >>= 32; + br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */ br->val_ |= ((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32; br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; - } - } + } + } #else if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { - if (br->bit_pos_ >= 24) { - br->val_ >>= 24; - br->bit_pos_ ^= 24; /* here same as -= 24 because of the if condition */ + if (br->bit_pos_ >= 24) { + br->val_ >>= 24; + br->bit_pos_ ^= 24; /* here same as -= 24 because of the if condition */ br->val_ |= BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8; - br->avail_in -= 3; - br->next_in += 3; - } - } else { - if (br->bit_pos_ >= 16) { - br->val_ >>= 16; - br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */ + br->avail_in -= 3; + br->next_in += 3; + } + } else { + if (br->bit_pos_ >= 16) { + br->val_ >>= 16; + br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */ br->val_ |= ((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16; br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; - } - } -#endif -} - + } + } +#endif +} + /* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) { @@ -165,25 +165,25 @@ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { if (br->avail_in == 0) { return BROTLI_FALSE; } - br->val_ >>= 8; + br->val_ >>= 8; #if (BROTLI_64_BITS) br->val_ |= ((uint64_t)*br->next_in) << 56; -#else +#else br->val_ |= ((uint32_t)*br->next_in) << 24; -#endif - br->bit_pos_ -= 8; - --br->avail_in; - ++br->next_in; +#endif + br->bit_pos_ -= 8; + --br->avail_in; + ++br->next_in; return BROTLI_TRUE; -} - +} + /* Returns currently available bits. The number of valid bits could be calculated by BrotliGetAvailableBits. */ static BROTLI_INLINE brotli_reg_t BrotliGetBitsUnmasked( BrotliBitReader* const br) { return br->val_ >> br->bit_pos_; -} - +} + /* Like BrotliGetBits, but does not mask the result. The result contains at least 16 valid bits. */ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked( @@ -194,12 +194,12 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked( /* Returns the specified number of bits from |br| without advancing bit position. */ -static BROTLI_INLINE uint32_t BrotliGetBits( +static BROTLI_INLINE uint32_t BrotliGetBits( BrotliBitReader* const br, uint32_t n_bits) { - BrotliFillBitWindow(br, n_bits); + BrotliFillBitWindow(br, n_bits); return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); -} - +} + /* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there is not enough input. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( @@ -214,11 +214,11 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( } /* Advances the bit pos by |n_bits|. */ -static BROTLI_INLINE void BrotliDropBits( +static BROTLI_INLINE void BrotliDropBits( BrotliBitReader* const br, uint32_t n_bits) { br->bit_pos_ += n_bits; -} - +} + static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3; uint32_t unused_bits = unused_bytes << 3; @@ -234,17 +234,17 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { /* Reads the specified number of bits from |br| and advances the bit pos. Precondition: accumulator MUST contain at least |n_bits|. */ -static BROTLI_INLINE void BrotliTakeBits( +static BROTLI_INLINE void BrotliTakeBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); BROTLI_LOG(("[BrotliReadBits] %d %d %d val: %6x\n", (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val)); BrotliDropBits(br, n_bits); -} - +} + /* Reads the specified number of bits from |br| and advances the bit pos. - Assumes that there is enough input to perform BrotliFillBitWindow. */ -static BROTLI_INLINE uint32_t BrotliReadBits( + Assumes that there is enough input to perform BrotliFillBitWindow. */ +static BROTLI_INLINE uint32_t BrotliReadBits( BrotliBitReader* const br, uint32_t n_bits) { if (BROTLI_64_BITS || (n_bits <= 16)) { uint32_t val; @@ -260,8 +260,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits( BrotliTakeBits(br, n_bits - 16, &high_val); return low_val | (high_val << 16); } -} - +} + /* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there is not enough input. |n_bits| MUST be positive. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits( @@ -269,41 +269,41 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits( while (BrotliGetAvailableBits(br) < n_bits) { if (!BrotliPullByte(br)) { return BROTLI_FALSE; - } - } - BrotliTakeBits(br, n_bits, val); + } + } + BrotliTakeBits(br, n_bits, val); return BROTLI_TRUE; -} - -/* Advances the bit reader position to the next byte boundary and verifies - that any skipped bits are set to zero. */ +} + +/* Advances the bit reader position to the next byte boundary and verifies + that any skipped bits are set to zero. */ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7; - uint32_t pad_bits = 0; - if (pad_bits_count != 0) { - BrotliTakeBits(br, pad_bits_count, &pad_bits); - } + uint32_t pad_bits = 0; + if (pad_bits_count != 0) { + BrotliTakeBits(br, pad_bits_count, &pad_bits); + } return TO_BROTLI_BOOL(pad_bits == 0); -} - -/* Copies remaining input bytes stored in the bit reader to the output. Value +} + +/* Copies remaining input bytes stored in the bit reader to the output. Value |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be - warmed up again after this. */ -static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, - BrotliBitReader* br, size_t num) { + warmed up again after this. */ +static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, + BrotliBitReader* br, size_t num) { while (BrotliGetAvailableBits(br) >= 8 && num > 0) { *dest = (uint8_t)BrotliGetBitsUnmasked(br); BrotliDropBits(br, 8); - ++dest; - --num; - } - memcpy(dest, br->next_in, num); + ++dest; + --num; + } + memcpy(dest, br->next_in, num); br->avail_in -= num; - br->next_in += num; -} - -#if defined(__cplusplus) || defined(c_plusplus) + br->next_in += num; +} + +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_BIT_READER_H_ */ +#endif + +#endif /* BROTLI_DEC_BIT_READER_H_ */ diff --git a/contrib/libs/brotli/dec/decode.c b/contrib/libs/brotli/dec/decode.c index ee898d4372e..08bd76ca16e 100644 --- a/contrib/libs/brotli/dec/decode.c +++ b/contrib/libs/brotli/dec/decode.c @@ -1,11 +1,11 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ - + #include - + #include /* free, malloc */ #include /* memcpy, memset */ @@ -15,48 +15,48 @@ #include "../common/platform.h" #include "../common/transform.h" #include "../common/version.h" -#include "./bit_reader.h" +#include "./bit_reader.h" #include "./huffman.h" #include "./prefix.h" #include "./state.h" - + #if defined(BROTLI_TARGET_NEON) #include #endif -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + #define BROTLI_FAILURE(CODE) (BROTLI_DUMP(), CODE) - + #define BROTLI_LOG_UINT(name) \ BROTLI_LOG(("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name))) #define BROTLI_LOG_ARRAY_INDEX(array_name, idx) \ BROTLI_LOG(("[%s] %s[%lu] = %lu\n", __func__, #array_name, \ (unsigned long)(idx), (unsigned long)array_name[idx])) - + #define HUFFMAN_TABLE_BITS 8U #define HUFFMAN_TABLE_MASK 0xFF - + /* We need the slack region for the following reasons: - doing up to two 16-byte copies for fast backward copying - inserting transformed dictionary word (5 prefix + 24 base + 8 suffix) */ static const uint32_t kRingBufferWriteAheadSlack = 42; static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = { - 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, -}; - -/* Static prefix code for the complex code length code lengths. */ -static const uint8_t kCodeLengthPrefixLength[16] = { - 2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4, -}; - -static const uint8_t kCodeLengthPrefixValue[16] = { - 0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5, -}; - + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, +}; + +/* Static prefix code for the complex code length code lengths. */ +static const uint8_t kCodeLengthPrefixLength[16] = { + 2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4, +}; + +static const uint8_t kCodeLengthPrefixValue[16] = { + 0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5, +}; + BROTLI_BOOL BrotliDecoderSetParameter( BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) { if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE; @@ -64,7 +64,7 @@ BROTLI_BOOL BrotliDecoderSetParameter( case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: state->canny_ringbuffer_allocation = !!value ? 0 : 1; return BROTLI_TRUE; - + case BROTLI_DECODER_PARAM_LARGE_WINDOW: state->large_window = TO_BROTLI_BOOL(!!value); return BROTLI_TRUE; @@ -132,20 +132,20 @@ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( Precondition: bit-reader accumulator has at least 8 bits. */ static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s, BrotliBitReader* br) { - uint32_t n; + uint32_t n; BROTLI_BOOL large_window = s->large_window; s->large_window = BROTLI_FALSE; - BrotliTakeBits(br, 1, &n); - if (n == 0) { + BrotliTakeBits(br, 1, &n); + if (n == 0) { s->window_bits = 16; return BROTLI_DECODER_SUCCESS; - } - BrotliTakeBits(br, 3, &n); - if (n != 0) { + } + BrotliTakeBits(br, 3, &n); + if (n != 0) { s->window_bits = 17 + n; return BROTLI_DECODER_SUCCESS; - } - BrotliTakeBits(br, 3, &n); + } + BrotliTakeBits(br, 3, &n); if (n == 1) { if (large_window) { BrotliTakeBits(br, 1, &n); @@ -158,188 +158,188 @@ static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s, return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); } } - if (n != 0) { + if (n != 0) { s->window_bits = 8 + n; return BROTLI_DECODER_SUCCESS; - } + } s->window_bits = 17; return BROTLI_DECODER_SUCCESS; -} - +} + static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) { #if defined(BROTLI_TARGET_NEON) - vst1q_u8(dst, vld1q_u8(src)); -#else + vst1q_u8(dst, vld1q_u8(src)); +#else uint32_t buffer[4]; memcpy(buffer, src, 16); memcpy(dst, buffer, 16); -#endif -} - -/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */ +#endif +} + +/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */ static BROTLI_NOINLINE BrotliDecoderErrorCode DecodeVarLenUint8( BrotliDecoderState* s, BrotliBitReader* br, uint32_t* value) { - uint32_t bits; - switch (s->substate_decode_uint8) { - case BROTLI_STATE_DECODE_UINT8_NONE: + uint32_t bits; + switch (s->substate_decode_uint8) { + case BROTLI_STATE_DECODE_UINT8_NONE: if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (bits == 0) { - *value = 0; + } + if (bits == 0) { + *value = 0; return BROTLI_DECODER_SUCCESS; - } + } /* Fall through. */ - - case BROTLI_STATE_DECODE_UINT8_SHORT: + + case BROTLI_STATE_DECODE_UINT8_SHORT: if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 3, &bits))) { - s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT; + s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (bits == 0) { - *value = 1; - s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; + } + if (bits == 0) { + *value = 1; + s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; return BROTLI_DECODER_SUCCESS; - } - /* Use output value as a temporary storage. It MUST be persisted. */ + } + /* Use output value as a temporary storage. It MUST be persisted. */ *value = bits; /* Fall through. */ - - case BROTLI_STATE_DECODE_UINT8_LONG: + + case BROTLI_STATE_DECODE_UINT8_LONG: if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) { - s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG; + s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } *value = (1U << *value) + bits; - s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; + s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; return BROTLI_DECODER_SUCCESS; - - default: + + default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); - } -} - -/* Decodes a metablock length and flags by reading 2 - 31 bits. */ + } +} + +/* Decodes a metablock length and flags by reading 2 - 31 bits. */ static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength( BrotliDecoderState* s, BrotliBitReader* br) { - uint32_t bits; - int i; - for (;;) { - switch (s->substate_metablock_header) { - case BROTLI_STATE_METABLOCK_HEADER_NONE: - if (!BrotliSafeReadBits(br, 1, &bits)) { + uint32_t bits; + int i; + for (;;) { + switch (s->substate_metablock_header) { + case BROTLI_STATE_METABLOCK_HEADER_NONE: + if (!BrotliSafeReadBits(br, 1, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } s->is_last_metablock = bits ? 1 : 0; - s->meta_block_remaining_len = 0; - s->is_uncompressed = 0; - s->is_metadata = 0; - if (!s->is_last_metablock) { - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; - break; - } - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY; + s->meta_block_remaining_len = 0; + s->is_uncompressed = 0; + s->is_metadata = 0; + if (!s->is_last_metablock) { + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; + break; + } + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_EMPTY: - if (!BrotliSafeReadBits(br, 1, &bits)) { + + case BROTLI_STATE_METABLOCK_HEADER_EMPTY: + if (!BrotliSafeReadBits(br, 1, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (bits) { - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; + } + if (bits) { + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; return BROTLI_DECODER_SUCCESS; - } - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; + } + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_NIBBLES: - if (!BrotliSafeReadBits(br, 2, &bits)) { + + case BROTLI_STATE_METABLOCK_HEADER_NIBBLES: + if (!BrotliSafeReadBits(br, 2, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - s->size_nibbles = (uint8_t)(bits + 4); - s->loop_counter = 0; - if (bits == 3) { - s->is_metadata = 1; - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED; - break; - } - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE; + } + s->size_nibbles = (uint8_t)(bits + 4); + s->loop_counter = 0; + if (bits == 3) { + s->is_metadata = 1; + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED; + break; + } + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_SIZE: - i = s->loop_counter; + + case BROTLI_STATE_METABLOCK_HEADER_SIZE: + i = s->loop_counter; for (; i < (int)s->size_nibbles; ++i) { - if (!BrotliSafeReadBits(br, 4, &bits)) { - s->loop_counter = i; + if (!BrotliSafeReadBits(br, 4, &bits)) { + s->loop_counter = i; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) { + } + if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE); - } - s->meta_block_remaining_len |= (int)(bits << (i * 4)); - } - s->substate_metablock_header = - BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED; + } + s->meta_block_remaining_len |= (int)(bits << (i * 4)); + } + s->substate_metablock_header = + BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED: + + case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED: if (!s->is_last_metablock) { - if (!BrotliSafeReadBits(br, 1, &bits)) { + if (!BrotliSafeReadBits(br, 1, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } s->is_uncompressed = bits ? 1 : 0; - } - ++s->meta_block_remaining_len; - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; + } + ++s->meta_block_remaining_len; + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; return BROTLI_DECODER_SUCCESS; - - case BROTLI_STATE_METABLOCK_HEADER_RESERVED: - if (!BrotliSafeReadBits(br, 1, &bits)) { + + case BROTLI_STATE_METABLOCK_HEADER_RESERVED: + if (!BrotliSafeReadBits(br, 1, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (bits != 0) { + } + if (bits != 0) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_RESERVED); - } - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES; + } + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_BYTES: - if (!BrotliSafeReadBits(br, 2, &bits)) { + + case BROTLI_STATE_METABLOCK_HEADER_BYTES: + if (!BrotliSafeReadBits(br, 2, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (bits == 0) { - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; + } + if (bits == 0) { + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; return BROTLI_DECODER_SUCCESS; - } - s->size_nibbles = (uint8_t)bits; - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA; + } + s->size_nibbles = (uint8_t)bits; + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA; /* Fall through. */ - - case BROTLI_STATE_METABLOCK_HEADER_METADATA: - i = s->loop_counter; + + case BROTLI_STATE_METABLOCK_HEADER_METADATA: + i = s->loop_counter; for (; i < (int)s->size_nibbles; ++i) { - if (!BrotliSafeReadBits(br, 8, &bits)) { - s->loop_counter = i; + if (!BrotliSafeReadBits(br, 8, &bits)) { + s->loop_counter = i; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } - if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) { + } + if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) { return BROTLI_FAILURE( BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE); - } - s->meta_block_remaining_len |= (int)(bits << (i * 8)); - } + } + s->meta_block_remaining_len |= (int)(bits << (i * 8)); + } ++s->meta_block_remaining_len; s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; return BROTLI_DECODER_SUCCESS; - - default: + + default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); - } - } -} - + } + } +} + /* Decodes the Huffman code. This method doesn't read data from the bit reader, BUT drops the amount of bits that correspond to the decoded symbol. @@ -351,15 +351,15 @@ static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits, BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK); if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) { uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS; - BrotliDropBits(br, HUFFMAN_TABLE_BITS); + BrotliDropBits(br, HUFFMAN_TABLE_BITS); BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits))); - } + } BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table)); return BROTLI_HC_FAST_LOAD_VALUE(table); -} - +} + /* Reads and decodes the next Huffman code from bit-stream. This method peeks 16 bits of input and drops 0 - 15 of them. */ static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table, @@ -419,10 +419,10 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadSymbol( return SafeDecodeSymbol(table, br, result); } -/* Makes a look-up in first level Huffman table. Peeks 8 bits. */ +/* Makes a look-up in first level Huffman table. Peeks 8 bits. */ static BROTLI_INLINE void PreloadSymbol(int safe, const HuffmanCode* table, - BrotliBitReader* br, + BrotliBitReader* br, uint32_t* bits, uint32_t* value) { if (safe) { @@ -432,40 +432,40 @@ static BROTLI_INLINE void PreloadSymbol(int safe, BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS)); *bits = BROTLI_HC_FAST_LOAD_BITS(table); *value = BROTLI_HC_FAST_LOAD_VALUE(table); -} - -/* Decodes the next Huffman code using data prepared by PreloadSymbol. - Reads 0 - 15 bits. Also peeks 8 following bits. */ +} + +/* Decodes the next Huffman code using data prepared by PreloadSymbol. + Reads 0 - 15 bits. Also peeks 8 following bits. */ static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table, - BrotliBitReader* br, + BrotliBitReader* br, uint32_t* bits, uint32_t* value) { uint32_t result = *value; if (BROTLI_PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) { uint32_t val = BrotliGet16BitsUnmasked(br); - const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value; + const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value; uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS)); BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext); - BrotliDropBits(br, HUFFMAN_TABLE_BITS); + BrotliDropBits(br, HUFFMAN_TABLE_BITS); BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask); BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext)); result = BROTLI_HC_FAST_LOAD_VALUE(ext); - } else { + } else { BrotliDropBits(br, *bits); - } + } PreloadSymbol(0, table, br, bits, value); - return result; -} - + return result; +} + static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) { uint32_t result = 0; - while (x) { - x >>= 1; - ++result; - } - return result; -} - + while (x) { + x >>= 1; + ++result; + } + return result; +} + /* Reads (s->symbol + 1) symbols. Totally 1..4 symbols are read, 1..11 bits each. The list of symbols MUST NOT contain duplicates. */ @@ -726,24 +726,24 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { return BROTLI_DECODER_SUCCESS; } -/* Decodes the Huffman tables. - There are 2 scenarios: - A) Huffman code contains only few symbols (1..4). Those symbols are read - directly; their code lengths are defined by the number of symbols. +/* Decodes the Huffman tables. + There are 2 scenarios: + A) Huffman code contains only few symbols (1..4). Those symbols are read + directly; their code lengths are defined by the number of symbols. For this scenario 4 - 49 bits will be read. - - B) 2-phase decoding: - B.1) Small Huffman table is decoded; it is specified with code lengths - encoded with predefined entropy code. 32 - 74 bits are used. - B.2) Decoded table is used to decode code lengths of symbols in resulting + + B) 2-phase decoding: + B.1) Small Huffman table is decoded; it is specified with code lengths + encoded with predefined entropy code. 32 - 74 bits are used. + B.2) Decoded table is used to decode code lengths of symbols in resulting Huffman table. In worst case 3520 bits are read. */ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, uint32_t max_symbol, HuffmanCode* table, uint32_t* opt_table_size, BrotliDecoderState* s) { - BrotliBitReader* br = &s->br; - /* Unnecessary masking, but might be good for safety. */ + BrotliBitReader* br = &s->br; + /* Unnecessary masking, but might be good for safety. */ alphabet_size &= 0x7FF; /* State machine. */ for (;;) { @@ -769,11 +769,11 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, /* Fall through. */ case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE: - /* Read symbols, codes & code lengths directly. */ + /* Read symbols, codes & code lengths directly. */ if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } s->sub_loop_counter = 0; /* Fall through. */ @@ -793,16 +793,16 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, if (!BrotliSafeReadBits(br, 1, &bits)) { s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } s->symbol += bits; - } + } BROTLI_LOG_UINT(s->symbol); table_size = BrotliBuildSimpleHuffmanTable( table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, s->symbol); - if (opt_table_size) { + if (opt_table_size) { *opt_table_size = table_size; - } - s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; + } + s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; return BROTLI_DECODER_SUCCESS; } @@ -812,7 +812,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, BrotliDecoderErrorCode result = ReadCodeLengthCodeLengths(s); if (result != BROTLI_DECODER_SUCCESS) { return result; - } + } BrotliBuildCodeLengthsHuffmanTable(s->table, s->code_length_code_lengths, s->code_length_histo); @@ -820,7 +820,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) { s->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); s->symbol_lists[s->next_symbol[i]] = 0xFFFF; - } + } s->symbol = 0; s->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH; @@ -828,7 +828,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, s->repeat_code_len = 0; s->space = 32768; s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS; - } + } /* Fall through. */ case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: { @@ -840,37 +840,37 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, if (result != BROTLI_DECODER_SUCCESS) { return result; } - + if (s->space != 0) { BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)s->space)); return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE); - } + } table_size = BrotliBuildHuffmanTable( table, HUFFMAN_TABLE_BITS, s->symbol_lists, s->code_length_histo); - if (opt_table_size) { - *opt_table_size = table_size; - } + if (opt_table_size) { + *opt_table_size = table_size; + } s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; return BROTLI_DECODER_SUCCESS; - } + } default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); - } - } -} - -/* Decodes a block length by reading 3..39 bits. */ + } + } +} + +/* Decodes a block length by reading 3..39 bits. */ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table, BrotliBitReader* br) { uint32_t code; uint32_t nbits; - code = ReadSymbol(table, br); + code = ReadSymbol(table, br); nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits); -} - +} + /* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then reading can't be continued with ReadBlockLength. */ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( @@ -898,114 +898,114 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( } } -/* Transform: - 1) initialize list L with values 0, 1,... 255 - 2) For each input element X: - 2.1) let Y = L[X] - 2.2) remove X-th element from L - 2.3) prepend Y to L - 2.4) append Y to output - - In most cases max(Y) <= 7, so most of L remains intact. - To reduce the cost of initialization, we reuse L, remember the upper bound - of Y values, and reinitialize only first elements in L. - - Most of input values are 0 and 1. To reduce number of branches, we replace +/* Transform: + 1) initialize list L with values 0, 1,... 255 + 2) For each input element X: + 2.1) let Y = L[X] + 2.2) remove X-th element from L + 2.3) prepend Y to L + 2.4) append Y to output + + In most cases max(Y) <= 7, so most of L remains intact. + To reduce the cost of initialization, we reuse L, remember the upper bound + of Y values, and reinitialize only first elements in L. + + Most of input values are 0 and 1. To reduce number of branches, we replace inner for loop with do-while. */ static BROTLI_NOINLINE void InverseMoveToFrontTransform( uint8_t* v, uint32_t v_len, BrotliDecoderState* state) { - /* Reinitialize elements that could have been changed. */ + /* Reinitialize elements that could have been changed. */ uint32_t i = 1; uint32_t upper_bound = state->mtf_upper_bound; uint32_t* mtf = &state->mtf[1]; /* Make mtf[-1] addressable. */ uint8_t* mtf_u8 = (uint8_t*)mtf; - /* Load endian-aware constant. */ - const uint8_t b0123[4] = {0, 1, 2, 3}; - uint32_t pattern; - memcpy(&pattern, &b0123, 4); - - /* Initialize list using 4 consequent values pattern. */ + /* Load endian-aware constant. */ + const uint8_t b0123[4] = {0, 1, 2, 3}; + uint32_t pattern; + memcpy(&pattern, &b0123, 4); + + /* Initialize list using 4 consequent values pattern. */ mtf[0] = pattern; - do { + do { pattern += 0x04040404; /* Advance all 4 values by 4. */ mtf[i] = pattern; i++; - } while (i <= upper_bound); - - /* Transform the input. */ - upper_bound = 0; - for (i = 0; i < v_len; ++i) { - int index = v[i]; + } while (i <= upper_bound); + + /* Transform the input. */ + upper_bound = 0; + for (i = 0; i < v_len; ++i) { + int index = v[i]; uint8_t value = mtf_u8[index]; upper_bound |= v[i]; - v[i] = value; + v[i] = value; mtf_u8[-1] = value; - do { - index--; + do { + index--; mtf_u8[index + 1] = mtf_u8[index]; } while (index >= 0); - } - /* Remember amount of elements to be reinitialized. */ + } + /* Remember amount of elements to be reinitialized. */ state->mtf_upper_bound = upper_bound >> 2; -} - -/* Decodes a series of Huffman table using ReadHuffmanCode function. */ +} + +/* Decodes a series of Huffman table using ReadHuffmanCode function. */ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( HuffmanTreeGroup* group, BrotliDecoderState* s) { - if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) { - s->next = group->codes; - s->htree_index = 0; - s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP; - } - while (s->htree_index < group->num_htrees) { + if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) { + s->next = group->codes; + s->htree_index = 0; + s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP; + } + while (s->htree_index < group->num_htrees) { uint32_t table_size; BrotliDecoderErrorCode result = ReadHuffmanCode(group->alphabet_size, group->max_symbol, s->next, &table_size, s); if (result != BROTLI_DECODER_SUCCESS) return result; - group->htrees[s->htree_index] = s->next; - s->next += table_size; - ++s->htree_index; - } - s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; + group->htrees[s->htree_index] = s->next; + s->next += table_size; + ++s->htree_index; + } + s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; return BROTLI_DECODER_SUCCESS; -} - -/* Decodes a context map. - Decoding is done in 4 phases: - 1) Read auxiliary information (6..16 bits) and allocate memory. - In case of trivial context map, decoding is finished at this phase. - 2) Decode Huffman table using ReadHuffmanCode function. - This table will be used for reading context map items. - 3) Read context map items; "0" values could be run-length encoded. +} + +/* Decodes a context map. + Decoding is done in 4 phases: + 1) Read auxiliary information (6..16 bits) and allocate memory. + In case of trivial context map, decoding is finished at this phase. + 2) Decode Huffman table using ReadHuffmanCode function. + This table will be used for reading context map items. + 3) Read context map items; "0" values could be run-length encoded. 4) Optionally, apply InverseMoveToFront transform to the resulting map. */ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, uint32_t* num_htrees, uint8_t** context_map_arg, BrotliDecoderState* s) { - BrotliBitReader* br = &s->br; + BrotliBitReader* br = &s->br; BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; - + switch ((int)s->substate_context_map) { - case BROTLI_STATE_CONTEXT_MAP_NONE: - result = DecodeVarLenUint8(s, br, num_htrees); + case BROTLI_STATE_CONTEXT_MAP_NONE: + result = DecodeVarLenUint8(s, br, num_htrees); if (result != BROTLI_DECODER_SUCCESS) { - return result; - } - (*num_htrees)++; - s->context_index = 0; - BROTLI_LOG_UINT(context_map_size); - BROTLI_LOG_UINT(*num_htrees); + return result; + } + (*num_htrees)++; + s->context_index = 0; + BROTLI_LOG_UINT(context_map_size); + BROTLI_LOG_UINT(*num_htrees); *context_map_arg = (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)context_map_size); - if (*context_map_arg == 0) { + if (*context_map_arg == 0) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP); - } - if (*num_htrees <= 1) { - memset(*context_map_arg, 0, (size_t)context_map_size); + } + if (*num_htrees <= 1) { + memset(*context_map_arg, 0, (size_t)context_map_size); return BROTLI_DECODER_SUCCESS; - } - s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; + } + s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; /* Fall through. */ case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: { @@ -1014,33 +1014,33 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, to peek 4 bits ahead. */ if (!BrotliSafeGetBits(br, 5, &bits)) { return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } if ((bits & 1) != 0) { /* Use RLE for zeros. */ s->max_run_length_prefix = (bits >> 1) + 1; BrotliDropBits(br, 5); - } else { - s->max_run_length_prefix = 0; + } else { + s->max_run_length_prefix = 0; BrotliDropBits(br, 1); - } - BROTLI_LOG_UINT(s->max_run_length_prefix); - s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; + } + BROTLI_LOG_UINT(s->max_run_length_prefix); + s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; } /* Fall through. */ case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: { uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix; result = ReadHuffmanCode(alphabet_size, alphabet_size, - s->context_map_table, NULL, s); + s->context_map_table, NULL, s); if (result != BROTLI_DECODER_SUCCESS) return result; s->code = 0xFFFF; - s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; + s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; } /* Fall through. */ - case BROTLI_STATE_CONTEXT_MAP_DECODE: { + case BROTLI_STATE_CONTEXT_MAP_DECODE: { uint32_t context_index = s->context_index; uint32_t max_run_length_prefix = s->max_run_length_prefix; - uint8_t* context_map = *context_map_arg; + uint8_t* context_map = *context_map_arg; uint32_t code = s->code; BROTLI_BOOL skip_preamble = (code != 0xFFFF); while (context_index < context_map_size || skip_preamble) { @@ -1063,7 +1063,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } } else { skip_preamble = BROTLI_FALSE; - } + } /* RLE sub-stage. */ { uint32_t reps; @@ -1073,16 +1073,16 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, return BROTLI_DECODER_NEEDS_MORE_INPUT; } reps += 1U << code; - BROTLI_LOG_UINT(reps); - if (context_index + reps > context_map_size) { + BROTLI_LOG_UINT(reps); + if (context_index + reps > context_map_size) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT); - } - do { - context_map[context_index++] = 0; - } while (--reps); - } - } + } + do { + context_map[context_index++] = 0; + } while (--reps); + } + } } /* Fall through. */ @@ -1091,20 +1091,20 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, if (!BrotliSafeReadBits(br, 1, &bits)) { s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM; return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } if (bits != 0) { InverseMoveToFrontTransform(*context_map_arg, context_map_size, s); } - s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; + s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; return BROTLI_DECODER_SUCCESS; - } + } default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); - } -} - + } +} + /* Decodes a command or literal and updates block type ring-buffer. Reads 3..54 bits. */ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength( @@ -1137,20 +1137,20 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength( } if (block_type == 1) { - block_type = ringbuffer[1] + 1; + block_type = ringbuffer[1] + 1; } else if (block_type == 0) { - block_type = ringbuffer[0]; + block_type = ringbuffer[0]; } else { block_type -= 2; - } - if (block_type >= max_block_type) { - block_type -= max_block_type; - } - ringbuffer[0] = ringbuffer[1]; - ringbuffer[1] = block_type; + } + if (block_type >= max_block_type) { + block_type -= max_block_type; + } + ringbuffer[0] = ringbuffer[1]; + ringbuffer[1] = block_type; return BROTLI_TRUE; -} - +} + static BROTLI_INLINE void DetectTrivialLiteralBlockTypes( BrotliDecoderState* s) { size_t i; @@ -1170,18 +1170,18 @@ static BROTLI_INLINE void DetectTrivialLiteralBlockTypes( } static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) { - uint8_t context_mode; + uint8_t context_mode; size_t trivial; uint32_t block_type = s->block_type_rb[1]; uint32_t context_offset = block_type << BROTLI_LITERAL_CONTEXT_BITS; - s->context_map_slice = s->context_map + context_offset; + s->context_map_slice = s->context_map + context_offset; trivial = s->trivial_literal_contexts[block_type >> 5]; s->trivial_literal_context = (trivial >> (block_type & 31)) & 1; s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]]; context_mode = s->context_modes[block_type] & 3; s->context_lookup = BROTLI_CONTEXT_LUT(context_mode); -} - +} + /* Decodes the block type and updates the state for literal context. Reads 3..54 bits. */ static BROTLI_INLINE BROTLI_BOOL DecodeLiteralBlockSwitchInternal( @@ -1264,9 +1264,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( if (num_written > to_write) { num_written = to_write; } - if (s->meta_block_remaining_len < 0) { + if (s->meta_block_remaining_len < 0) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1); - } + } if (next_out && !*next_out) { *next_out = start; } else { @@ -1277,18 +1277,18 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( } *available_out -= num_written; BROTLI_LOG_UINT(to_write); - BROTLI_LOG_UINT(num_written); + BROTLI_LOG_UINT(num_written); s->partial_pos_out += num_written; if (total_out) { *total_out = s->partial_pos_out; - } + } if (num_written < to_write) { if (s->ringbuffer_size == (1 << s->window_bits) || force) { return BROTLI_DECODER_NEEDS_MORE_OUTPUT; } else { return BROTLI_DECODER_SUCCESS; } - } + } /* Wrap ring buffer only if it has reached its maximal size. */ if (s->ringbuffer_size == (1 << s->window_bits) && s->pos >= s->ringbuffer_size) { @@ -1297,8 +1297,8 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( s->should_wrap_ringbuffer = (size_t)s->pos != 0 ? 1 : 0; } return BROTLI_DECODER_SUCCESS; -} - +} + static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) { if (s->should_wrap_ringbuffer) { memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos); @@ -1350,17 +1350,17 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1); } - /* State machine */ - for (;;) { + /* State machine */ + for (;;) { switch (s->substate_uncompressed) { case BROTLI_STATE_UNCOMPRESSED_NONE: { int nbytes = (int)BrotliGetRemainingBytes(&s->br); if (nbytes > s->meta_block_remaining_len) { nbytes = s->meta_block_remaining_len; - } + } if (s->pos + nbytes > s->ringbuffer_size) { nbytes = s->ringbuffer_size - s->pos; - } + } /* Copy remaining bytes from s->br.buf_ to ring-buffer. */ BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes); s->pos += nbytes; @@ -1368,9 +1368,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( if (s->pos < 1 << s->window_bits) { if (s->meta_block_remaining_len == 0) { return BROTLI_DECODER_SUCCESS; - } + } return BROTLI_DECODER_NEEDS_MORE_INPUT; - } + } s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE; } /* Fall through. */ @@ -1380,19 +1380,19 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( result = WriteRingBuffer( s, available_out, next_out, total_out, BROTLI_FALSE); if (result != BROTLI_DECODER_SUCCESS) { - return result; - } + return result; + } if (s->ringbuffer_size == 1 << s->window_bits) { s->max_distance = s->max_backward_distance; - } + } s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE; - break; + break; } - } - } + } + } BROTLI_DCHECK(0); /* Unreachable */ -} - +} + /* Calculates the smallest feasible ring buffer. If we know the data size is small, do not allocate more ring buffer @@ -1411,18 +1411,18 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( /* If maximum is already reached, no further extension is retired. */ if (s->ringbuffer_size == window_size) { return; - } + } /* Metadata blocks does not touch ring buffer. */ if (s->is_metadata) { return; - } + } if (!s->ringbuffer) { output_size = 0; } else { output_size = s->pos; - } + } output_size += s->meta_block_remaining_len; min_size = min_size < output_size ? output_size : min_size; @@ -1433,16 +1433,16 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( while ((new_ringbuffer_size >> 1) >= min_size) { new_ringbuffer_size >>= 1; } - } + } s->new_ringbuffer_size = new_ringbuffer_size; -} - +} + /* Reads 1..256 2-bit context modes. */ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) { BrotliBitReader* br = &s->br; int i = s->loop_counter; - + while (i < (int)s->num_block_types[0]) { uint32_t bits; if (!BrotliSafeReadBits(br, 2, &bits)) { @@ -1455,7 +1455,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) { } return BROTLI_DECODER_SUCCESS; } - + static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { if (s->distance_code == 0) { --s->dist_rb_idx; @@ -1482,11 +1482,11 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { /* A huge distance will cause a BROTLI_FAILURE() soon. This is a little faster than failing here. */ s->distance_code = 0x7FFFFFFF; - } - } - } + } + } + } } - + static BROTLI_INLINE BROTLI_BOOL SafeReadBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { if (n_bits != 0) { @@ -1494,9 +1494,9 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits( } else { *val = 0; return BROTLI_TRUE; - } + } } - + /* Precondition: s->distance_code < 0. */ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( int safe, BrotliDecoderState* s, BrotliBitReader* br) { @@ -1512,7 +1512,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( return BROTLI_FALSE; } s->distance_code = (int)code; - } + } /* Convert the distance code to the actual distance by possibly looking up past distances from the s->ringbuffer. */ s->distance_context = 0; @@ -1555,7 +1555,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( --s->block_length[2]; return BROTLI_TRUE; } - + static BROTLI_INLINE void ReadDistance( BrotliDecoderState* s, BrotliBitReader* br) { ReadDistanceInternal(0, s, br); @@ -1580,7 +1580,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal( if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) { return BROTLI_FALSE; } - } + } v = kCmdLut[cmd_code]; s->distance_code = v.distance_code; s->distance_context = v.context; @@ -1597,28 +1597,28 @@ static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal( BrotliBitReaderRestoreState(br, &memento); return BROTLI_FALSE; } - } + } s->copy_length = (int)copy_length + v.copy_len_offset; --s->block_length[1]; *insert_length += (int)insert_len_extra; return BROTLI_TRUE; } - + static BROTLI_INLINE void ReadCommand( BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) { ReadCommandInternal(0, s, br, insert_length); -} - +} + static BROTLI_INLINE BROTLI_BOOL SafeReadCommand( BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) { return ReadCommandInternal(1, s, br, insert_length); -} - +} + static BROTLI_INLINE BROTLI_BOOL CheckInputAmount( int safe, BrotliBitReader* const br, size_t num) { if (safe) { return BROTLI_TRUE; - } + } return BrotliCheckInputAmount(br, num); } @@ -1920,9 +1920,9 @@ CommandPostWrapCopy: saveStateAndReturn: s->pos = pos; s->loop_counter = i; - return result; -} - + return result; +} + #undef BROTLI_SAFE static BROTLI_NOINLINE BrotliDecoderErrorCode ProcessCommands( @@ -1970,9 +1970,9 @@ BrotliDecoderResult BrotliDecoderDecompress( if (result != BROTLI_DECODER_RESULT_SUCCESS) { result = BROTLI_DECODER_RESULT_ERROR; } - return result; -} - + return result; +} + /* Invariant: input stream is never overconsumed: - invalid input implies that the whole stream is invalid -> any amount of input could be read and discarded @@ -1988,7 +1988,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; - BrotliBitReader* br = &s->br; + BrotliBitReader* br = &s->br; /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; @@ -2012,8 +2012,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = BROTLI_DECODER_NEEDS_MORE_INPUT; br->next_in = &s->buffer.u8[0]; } - /* State machine */ - for (;;) { + /* State machine */ + for (;;) { if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output. */ if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { @@ -2025,7 +2025,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = intermediate_result; break; } - } + } if (s->buffer_length != 0) { /* Used with internal buffer. */ if (br->avail_in == 0) { /* Successfully finished read transaction. @@ -2062,9 +2062,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( (*available_in)--; } break; - } + } /* Unreachable. */ - } + } /* Fail or needs more output. */ @@ -2081,15 +2081,15 @@ BrotliDecoderResult BrotliDecoderDecompressStream( *next_in = br->next_in; } break; - } - switch (s->state) { - case BROTLI_STATE_UNINITED: - /* Prepare to the first read. */ - if (!BrotliWarmupBitReader(br)) { + } + switch (s->state) { + case BROTLI_STATE_UNINITED: + /* Prepare to the first read. */ + if (!BrotliWarmupBitReader(br)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; - break; - } - /* Decode window size. */ + break; + } + /* Decode window size. */ result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */ if (result != BROTLI_DECODER_SUCCESS) { break; @@ -2109,8 +2109,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( if (s->window_bits < BROTLI_LARGE_MIN_WBITS || s->window_bits > BROTLI_LARGE_MAX_WBITS) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); - break; - } + break; + } s->state = BROTLI_STATE_INITIALIZE; /* Fall through. */ @@ -2118,99 +2118,99 @@ BrotliDecoderResult BrotliDecoderDecompressStream( BROTLI_LOG_UINT(s->window_bits); /* Maximum distance, see section 9.1. of the spec. */ s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP; - - /* Allocate memory for both block_type_trees and block_len_trees. */ + + /* Allocate memory for both block_type_trees and block_len_trees. */ s->block_type_trees = (HuffmanCode*)BROTLI_DECODER_ALLOC(s, sizeof(HuffmanCode) * 3 * (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26)); if (s->block_type_trees == 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES); - break; - } + break; + } s->block_len_trees = s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258; - - s->state = BROTLI_STATE_METABLOCK_BEGIN; + + s->state = BROTLI_STATE_METABLOCK_BEGIN; /* Fall through. */ - case BROTLI_STATE_METABLOCK_BEGIN: + case BROTLI_STATE_METABLOCK_BEGIN: BrotliDecoderStateMetablockBegin(s); BROTLI_LOG_UINT(s->pos); - s->state = BROTLI_STATE_METABLOCK_HEADER; + s->state = BROTLI_STATE_METABLOCK_HEADER; /* Fall through. */ - case BROTLI_STATE_METABLOCK_HEADER: + case BROTLI_STATE_METABLOCK_HEADER: result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ if (result != BROTLI_DECODER_SUCCESS) { - break; - } - BROTLI_LOG_UINT(s->is_last_metablock); - BROTLI_LOG_UINT(s->meta_block_remaining_len); - BROTLI_LOG_UINT(s->is_metadata); - BROTLI_LOG_UINT(s->is_uncompressed); - if (s->is_metadata || s->is_uncompressed) { - if (!BrotliJumpToByteBoundary(br)) { + break; + } + BROTLI_LOG_UINT(s->is_last_metablock); + BROTLI_LOG_UINT(s->meta_block_remaining_len); + BROTLI_LOG_UINT(s->is_metadata); + BROTLI_LOG_UINT(s->is_uncompressed); + if (s->is_metadata || s->is_uncompressed) { + if (!BrotliJumpToByteBoundary(br)) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_1); - break; - } - } - if (s->is_metadata) { - s->state = BROTLI_STATE_METADATA; - break; - } - if (s->meta_block_remaining_len == 0) { - s->state = BROTLI_STATE_METABLOCK_DONE; - break; - } + break; + } + } + if (s->is_metadata) { + s->state = BROTLI_STATE_METADATA; + break; + } + if (s->meta_block_remaining_len == 0) { + s->state = BROTLI_STATE_METABLOCK_DONE; + break; + } BrotliCalculateRingBufferSize(s); - if (s->is_uncompressed) { - s->state = BROTLI_STATE_UNCOMPRESSED; - break; - } + if (s->is_uncompressed) { + s->state = BROTLI_STATE_UNCOMPRESSED; + break; + } s->loop_counter = 0; - s->state = BROTLI_STATE_HUFFMAN_CODE_0; - break; + s->state = BROTLI_STATE_HUFFMAN_CODE_0; + break; case BROTLI_STATE_UNCOMPRESSED: { result = CopyUncompressedBlockToOutput( available_out, next_out, total_out, s); if (result != BROTLI_DECODER_SUCCESS) { - break; - } - s->state = BROTLI_STATE_METABLOCK_DONE; - break; + break; + } + s->state = BROTLI_STATE_METABLOCK_DONE; + break; } - case BROTLI_STATE_METADATA: - for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { - uint32_t bits; - /* Read one byte and ignore it. */ - if (!BrotliSafeReadBits(br, 8, &bits)) { + case BROTLI_STATE_METADATA: + for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { + uint32_t bits; + /* Read one byte and ignore it. */ + if (!BrotliSafeReadBits(br, 8, &bits)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; - break; - } - } + break; + } + } if (result == BROTLI_DECODER_SUCCESS) { - s->state = BROTLI_STATE_METABLOCK_DONE; - } - break; + s->state = BROTLI_STATE_METABLOCK_DONE; + } + break; - case BROTLI_STATE_HUFFMAN_CODE_0: + case BROTLI_STATE_HUFFMAN_CODE_0: if (s->loop_counter >= 3) { s->state = BROTLI_STATE_METABLOCK_HEADER_2; - break; - } - /* Reads 1..11 bits. */ + break; + } + /* Reads 1..11 bits. */ result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]); if (result != BROTLI_DECODER_SUCCESS) { - break; - } + break; + } s->num_block_types[s->loop_counter]++; BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]); if (s->num_block_types[s->loop_counter] < 2) { s->loop_counter++; - break; - } + break; + } s->state = BROTLI_STATE_HUFFMAN_CODE_1; /* Fall through. */ @@ -2230,7 +2230,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_len_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; - s->state = BROTLI_STATE_HUFFMAN_CODE_3; + s->state = BROTLI_STATE_HUFFMAN_CODE_3; } /* Fall through. */ @@ -2239,33 +2239,33 @@ BrotliDecoderResult BrotliDecoderDecompressStream( if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter], &s->block_len_trees[tree_offset], br)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; - break; - } + break; + } BROTLI_LOG_UINT(s->block_length[s->loop_counter]); s->loop_counter++; - s->state = BROTLI_STATE_HUFFMAN_CODE_0; - break; + s->state = BROTLI_STATE_HUFFMAN_CODE_0; + break; } case BROTLI_STATE_METABLOCK_HEADER_2: { uint32_t bits; if (!BrotliSafeReadBits(br, 6, &bits)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; - break; - } + break; + } s->distance_postfix_bits = bits & BitMask(2); bits >>= 2; s->num_direct_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES + (bits << s->distance_postfix_bits); - BROTLI_LOG_UINT(s->num_direct_distance_codes); - BROTLI_LOG_UINT(s->distance_postfix_bits); - s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits); + BROTLI_LOG_UINT(s->num_direct_distance_codes); + BROTLI_LOG_UINT(s->distance_postfix_bits); + s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits); s->context_modes = (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]); - if (s->context_modes == 0) { + if (s->context_modes == 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES); - break; - } + break; + } s->loop_counter = 0; s->state = BROTLI_STATE_CONTEXT_MODES; } @@ -2275,19 +2275,19 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = ReadContextModes(s); if (result != BROTLI_DECODER_SUCCESS) { break; - } - s->state = BROTLI_STATE_CONTEXT_MAP_1; + } + s->state = BROTLI_STATE_CONTEXT_MAP_1; /* Fall through. */ - case BROTLI_STATE_CONTEXT_MAP_1: + case BROTLI_STATE_CONTEXT_MAP_1: result = DecodeContextMap( s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS, &s->num_literal_htrees, &s->context_map, s); if (result != BROTLI_DECODER_SUCCESS) { - break; - } + break; + } DetectTrivialLiteralBlockTypes(s); - s->state = BROTLI_STATE_CONTEXT_MAP_2; + s->state = BROTLI_STATE_CONTEXT_MAP_2; /* Fall through. */ case BROTLI_STATE_CONTEXT_MAP_2: { @@ -2307,7 +2307,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( &s->num_dist_htrees, &s->dist_context_map, s); if (result != BROTLI_DECODER_SUCCESS) { break; - } + } allocation_success &= BrotliDecoderHuffmanTreeGroupInit( s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees); @@ -2322,7 +2322,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; - s->state = BROTLI_STATE_TREE_GROUP; + s->state = BROTLI_STATE_TREE_GROUP; } /* Fall through. */ @@ -2334,83 +2334,83 @@ BrotliDecoderResult BrotliDecoderDecompressStream( case 2: hgroup = &s->distance_hgroup; break; default: return SaveErrorCode(s, BROTLI_FAILURE( BROTLI_DECODER_ERROR_UNREACHABLE)); - } + } result = HuffmanTreeGroupDecode(hgroup, s); if (result != BROTLI_DECODER_SUCCESS) break; s->loop_counter++; if (s->loop_counter >= 3) { PrepareLiteralDecoding(s); - s->dist_context_map_slice = s->dist_context_map; - s->htree_command = s->insert_copy_hgroup.htrees[0]; + s->dist_context_map_slice = s->dist_context_map; + s->htree_command = s->insert_copy_hgroup.htrees[0]; if (!BrotliEnsureRingBuffer(s)) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2); break; } - s->state = BROTLI_STATE_COMMAND_BEGIN; - } - break; + s->state = BROTLI_STATE_COMMAND_BEGIN; + } + break; } - case BROTLI_STATE_COMMAND_BEGIN: + case BROTLI_STATE_COMMAND_BEGIN: /* Fall through. */ - case BROTLI_STATE_COMMAND_INNER: + case BROTLI_STATE_COMMAND_INNER: /* Fall through. */ case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS: /* Fall through. */ - case BROTLI_STATE_COMMAND_POST_WRAP_COPY: + case BROTLI_STATE_COMMAND_POST_WRAP_COPY: result = ProcessCommands(s); if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { result = SafeProcessCommands(s); - } - break; + } + break; - case BROTLI_STATE_COMMAND_INNER_WRITE: + case BROTLI_STATE_COMMAND_INNER_WRITE: /* Fall through. */ - case BROTLI_STATE_COMMAND_POST_WRITE_1: + case BROTLI_STATE_COMMAND_POST_WRITE_1: /* Fall through. */ - case BROTLI_STATE_COMMAND_POST_WRITE_2: + case BROTLI_STATE_COMMAND_POST_WRITE_2: result = WriteRingBuffer( s, available_out, next_out, total_out, BROTLI_FALSE); if (result != BROTLI_DECODER_SUCCESS) { - break; - } + break; + } WrapRingBuffer(s); if (s->ringbuffer_size == 1 << s->window_bits) { s->max_distance = s->max_backward_distance; } - if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { + if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { if (s->meta_block_remaining_len == 0) { /* Next metablock, if any. */ - s->state = BROTLI_STATE_METABLOCK_DONE; - } else { + s->state = BROTLI_STATE_METABLOCK_DONE; + } else { s->state = BROTLI_STATE_COMMAND_BEGIN; - } + } break; - } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) { - s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY; - } else { /* BROTLI_STATE_COMMAND_INNER_WRITE */ + } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) { + s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY; + } else { /* BROTLI_STATE_COMMAND_INNER_WRITE */ if (s->loop_counter == 0) { if (s->meta_block_remaining_len == 0) { - s->state = BROTLI_STATE_METABLOCK_DONE; + s->state = BROTLI_STATE_METABLOCK_DONE; } else { s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS; - } + } break; - } - s->state = BROTLI_STATE_COMMAND_INNER; - } - break; + } + s->state = BROTLI_STATE_COMMAND_INNER; + } + break; - case BROTLI_STATE_METABLOCK_DONE: + case BROTLI_STATE_METABLOCK_DONE: if (s->meta_block_remaining_len < 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2); break; } BrotliDecoderStateCleanupAfterMetablock(s); - if (!s->is_last_metablock) { - s->state = BROTLI_STATE_METABLOCK_BEGIN; - break; - } + if (!s->is_last_metablock) { + s->state = BROTLI_STATE_METABLOCK_BEGIN; + break; + } if (!BrotliJumpToByteBoundary(br)) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_2); break; @@ -2420,20 +2420,20 @@ BrotliDecoderResult BrotliDecoderDecompressStream( *available_in = br->avail_in; *next_in = br->next_in; } - s->state = BROTLI_STATE_DONE; + s->state = BROTLI_STATE_DONE; /* Fall through. */ - case BROTLI_STATE_DONE: - if (s->ringbuffer != 0) { + case BROTLI_STATE_DONE: + if (s->ringbuffer != 0) { result = WriteRingBuffer( s, available_out, next_out, total_out, BROTLI_TRUE); if (result != BROTLI_DECODER_SUCCESS) { - break; - } - } + break; + } + } return SaveErrorCode(s, result); - } - } + } + } return SaveErrorCode(s, result); } @@ -2468,19 +2468,19 @@ const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) { *size = 0; result = 0; } - return result; -} - + return result; +} + BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* s) { return TO_BROTLI_BOOL(s->state != BROTLI_STATE_UNINITED || BrotliGetAvailableBits(&s->br) != 0); -} - +} + BROTLI_BOOL BrotliDecoderIsFinished(const BrotliDecoderState* s) { return TO_BROTLI_BOOL(s->state == BROTLI_STATE_DONE) && !BrotliDecoderHasMoreOutput(s); } - + BrotliDecoderErrorCode BrotliDecoderGetErrorCode(const BrotliDecoderState* s) { return (BrotliDecoderErrorCode)s->error_code; } @@ -2501,6 +2501,6 @@ uint32_t BrotliDecoderVersion() { return BROTLI_VERSION; } -#if defined(__cplusplus) || defined(c_plusplus) +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif +#endif diff --git a/contrib/libs/brotli/dec/huffman.c b/contrib/libs/brotli/dec/huffman.c index 02e5b15c229..30c40d33f20 100644 --- a/contrib/libs/brotli/dec/huffman.c +++ b/contrib/libs/brotli/dec/huffman.c @@ -1,29 +1,29 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Utilities for building Huffman decoding tables. */ - -#include "./huffman.h" +*/ + +/* Utilities for building Huffman decoding tables. */ + +#include "./huffman.h" #include /* memcpy, memset */ #include "../common/constants.h" #include "../common/platform.h" #include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + #define BROTLI_REVERSE_BITS_MAX 8 #if defined(BROTLI_RBIT) #define BROTLI_REVERSE_BITS_BASE \ ((sizeof(brotli_reg_t) << 3) - BROTLI_REVERSE_BITS_MAX) -#else +#else #define BROTLI_REVERSE_BITS_BASE 0 static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = { 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, @@ -72,37 +72,37 @@ static BROTLI_INLINE brotli_reg_t BrotliReverseBits(brotli_reg_t num) { return BROTLI_RBIT(num); #else return kReverseBits[num]; -#endif -} - -/* Stores code in table[0], table[step], table[2*step], ..., table[end] */ -/* Assumes that end is an integer multiple of step */ -static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, - int step, int end, - HuffmanCode code) { - do { - end -= step; - table[end] = code; - } while (end > 0); -} - +#endif +} + +/* Stores code in table[0], table[step], table[2*step], ..., table[end] */ +/* Assumes that end is an integer multiple of step */ +static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, + int step, int end, + HuffmanCode code) { + do { + end -= step; + table[end] = code; + } while (end > 0); +} + /* Returns the table width of the next 2nd level table. |count| is the histogram of bit lengths for the remaining symbols, |len| is the code length of the next processed symbol. */ -static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, - int len, int root_bits) { - int left = 1 << (len - root_bits); - while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) { - left -= count[len]; - if (left <= 0) break; - ++len; - left <<= 1; - } - return len - root_bits; -} - -void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, - const uint8_t* const code_lengths, +static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, + int len, int root_bits) { + int left = 1 << (len - root_bits); + while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; + } + return len - root_bits; +} + +void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, + const uint8_t* const code_lengths, uint16_t* count) { HuffmanCode code; /* current table entry */ int symbol; /* symbol index in original or sorted table */ @@ -111,61 +111,61 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, int step; /* step size to replicate values in current table */ int table_size; /* size of current table */ int sorted[BROTLI_CODE_LENGTH_CODES]; /* symbols sorted by code length */ - /* offsets in sorted table for each length */ - int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1]; - int bits; - int bits_count; + /* offsets in sorted table for each length */ + int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1]; + int bits; + int bits_count; BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); - + /* Generate offsets into sorted symbol table by code length. */ - symbol = -1; - bits = 1; - BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { - symbol += count[bits]; - offset[bits] = symbol; - bits++; - }); + symbol = -1; + bits = 1; + BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { + symbol += count[bits]; + offset[bits] = symbol; + bits++; + }); /* Symbols with code length 0 are placed after all other symbols. */ offset[0] = BROTLI_CODE_LENGTH_CODES - 1; - + /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; - do { - BROTLI_REPEAT(6, { - symbol--; - sorted[offset[code_lengths[symbol]]--] = symbol; - }); - } while (symbol != 0); - - table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH; - + do { + BROTLI_REPEAT(6, { + symbol--; + sorted[offset[code_lengths[symbol]]--] = symbol; + }); + } while (symbol != 0); + + table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH; + /* Special case: all symbols but one have 0 code length. */ if (offset[0] == 0) { code = ConstructHuffmanCode(0, (uint16_t)sorted[0]); for (key = 0; key < (brotli_reg_t)table_size; ++key) { - table[key] = code; - } - return; - } - + table[key] = code; + } + return; + } + /* Fill in table. */ - key = 0; + key = 0; key_step = BROTLI_REVERSE_BITS_LOWEST; - symbol = 0; - bits = 1; - step = 2; - do { - for (bits_count = count[bits]; bits_count != 0; --bits_count) { + symbol = 0; + bits = 1; + step = 2; + do { + for (bits_count = count[bits]; bits_count != 0; --bits_count) { code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)sorted[symbol++]); ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code); key += key_step; - } - step <<= 1; + } + step <<= 1; key_step >>= 1; - } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); -} - + } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); +} + uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int root_bits, const uint16_t* const symbol_lists, @@ -182,114 +182,114 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int table_bits; /* key length of current table */ int table_size; /* size of current table */ int total_size; /* sum of root table size and 2nd level table sizes */ - int max_length = -1; - int bits; - int bits_count; - + int max_length = -1; + int bits; + int bits_count; + BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX); BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <= BROTLI_REVERSE_BITS_MAX); - while (symbol_lists[max_length] == 0xFFFF) max_length--; - max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1; - - table = root_table; - table_bits = root_bits; - table_size = 1 << table_bits; - total_size = table_size; - + while (symbol_lists[max_length] == 0xFFFF) max_length--; + max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1; + + table = root_table; + table_bits = root_bits; + table_size = 1 << table_bits; + total_size = table_size; + /* Fill in the root table. Reduce the table size to if possible, and create the repetitions by memcpy. */ - if (table_bits > max_length) { - table_bits = max_length; - table_size = 1 << table_bits; - } - key = 0; + if (table_bits > max_length) { + table_bits = max_length; + table_size = 1 << table_bits; + } + key = 0; key_step = BROTLI_REVERSE_BITS_LOWEST; - bits = 1; - step = 2; - do { - symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); - for (bits_count = count[bits]; bits_count != 0; --bits_count) { - symbol = symbol_lists[symbol]; + bits = 1; + step = 2; + do { + symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); + for (bits_count = count[bits]; bits_count != 0; --bits_count) { + symbol = symbol_lists[symbol]; code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol); ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code); key += key_step; - } - step <<= 1; + } + step <<= 1; key_step >>= 1; - } while (++bits <= table_bits); - + } while (++bits <= table_bits); + /* If root_bits != table_bits then replicate to fill the remaining slots. */ - while (total_size != table_size) { - memcpy(&table[table_size], &table[0], - (size_t)table_size * sizeof(table[0])); - table_size <<= 1; - } - + while (total_size != table_size) { + memcpy(&table[table_size], &table[0], + (size_t)table_size * sizeof(table[0])); + table_size <<= 1; + } + /* Fill in 2nd level tables and add pointers to root table. */ key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1); sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1); sub_key_step = BROTLI_REVERSE_BITS_LOWEST; for (len = root_bits + 1, step = 2; len <= max_length; ++len) { - symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); - for (; count[len] != 0; --count[len]) { + symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); + for (; count[len] != 0; --count[len]) { if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) { - table += table_size; - table_bits = NextTableBitSize(count, len, root_bits); - table_size = 1 << table_bits; - total_size += table_size; + table += table_size; + table_bits = NextTableBitSize(count, len, root_bits); + table_size = 1 << table_bits; + total_size += table_size; sub_key = BrotliReverseBits(key); key += key_step; root_table[sub_key] = ConstructHuffmanCode( (uint8_t)(table_bits + root_bits), (uint16_t)(((size_t)(table - root_table)) - sub_key)); sub_key = 0; - } - symbol = symbol_lists[symbol]; + } + symbol = symbol_lists[symbol]; code = ConstructHuffmanCode((uint8_t)(len - root_bits), (uint16_t)symbol); ReplicateValue( &table[BrotliReverseBits(sub_key)], step, table_size, code); sub_key += sub_key_step; - } + } step <<= 1; sub_key_step >>= 1; - } + } return (uint32_t)total_size; -} - +} + uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, int root_bits, uint16_t* val, uint32_t num_symbols) { uint32_t table_size = 1; const uint32_t goal_size = 1U << root_bits; - switch (num_symbols) { - case 0: + switch (num_symbols) { + case 0: table[0] = ConstructHuffmanCode(0, val[0]); - break; - case 1: - if (val[1] > val[0]) { + break; + case 1: + if (val[1] > val[0]) { table[0] = ConstructHuffmanCode(1, val[0]); table[1] = ConstructHuffmanCode(1, val[1]); - } else { + } else { table[0] = ConstructHuffmanCode(1, val[1]); table[1] = ConstructHuffmanCode(1, val[0]); - } - table_size = 2; - break; - case 2: + } + table_size = 2; + break; + case 2: table[0] = ConstructHuffmanCode(1, val[0]); table[2] = ConstructHuffmanCode(1, val[0]); - if (val[2] > val[1]) { + if (val[2] > val[1]) { table[1] = ConstructHuffmanCode(2, val[1]); table[3] = ConstructHuffmanCode(2, val[2]); - } else { + } else { table[1] = ConstructHuffmanCode(2, val[2]); table[3] = ConstructHuffmanCode(2, val[1]); - } - table_size = 4; - break; + } + table_size = 4; + break; case 3: { int i, k; for (i = 0; i < 3; ++i) { @@ -298,22 +298,22 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, uint16_t t = val[k]; val[k] = val[i]; val[i] = t; - } - } - } + } + } + } table[0] = ConstructHuffmanCode(2, val[0]); table[2] = ConstructHuffmanCode(2, val[1]); table[1] = ConstructHuffmanCode(2, val[2]); table[3] = ConstructHuffmanCode(2, val[3]); table_size = 4; - break; + break; } case 4: { if (val[3] < val[2]) { uint16_t t = val[3]; val[3] = val[2]; val[2] = t; - } + } table[0] = ConstructHuffmanCode(1, val[0]); table[1] = ConstructHuffmanCode(2, val[1]); table[2] = ConstructHuffmanCode(1, val[0]); @@ -323,17 +323,17 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, table[6] = ConstructHuffmanCode(1, val[0]); table[7] = ConstructHuffmanCode(3, val[3]); table_size = 8; - break; + break; } - } - while (table_size != goal_size) { - memcpy(&table[table_size], &table[0], - (size_t)table_size * sizeof(table[0])); - table_size <<= 1; - } - return goal_size; -} - -#if defined(__cplusplus) || defined(c_plusplus) + } + while (table_size != goal_size) { + memcpy(&table[table_size], &table[0], + (size_t)table_size * sizeof(table[0])); + table_size <<= 1; + } + return goal_size; +} + +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif +#endif diff --git a/contrib/libs/brotli/dec/huffman.h b/contrib/libs/brotli/dec/huffman.h index 9951f8e15d8..b9f0716c160 100644 --- a/contrib/libs/brotli/dec/huffman.h +++ b/contrib/libs/brotli/dec/huffman.h @@ -1,23 +1,23 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Utilities for building Huffman decoding tables. */ - -#ifndef BROTLI_DEC_HUFFMAN_H_ -#define BROTLI_DEC_HUFFMAN_H_ - +*/ + +/* Utilities for building Huffman decoding tables. */ + +#ifndef BROTLI_DEC_HUFFMAN_H_ +#define BROTLI_DEC_HUFFMAN_H_ + #include "../common/platform.h" #include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 - + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 + /* Maximum possible Huffman table size for an alphabet size of (index * 32), max code length 15 and root table bits 8. */ static const uint16_t kMaxHuffmanTableSize[] = { @@ -30,9 +30,9 @@ static const uint16_t kMaxHuffmanTableSize[] = { #define BROTLI_HUFFMAN_MAX_SIZE_258 632 /* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */ #define BROTLI_HUFFMAN_MAX_SIZE_272 646 - -#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5 - + +#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5 + #if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \ BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)) #define BROTLI_HUFFMAN_CODE_FAST_LOAD @@ -41,11 +41,11 @@ static const uint16_t kMaxHuffmanTableSize[] = { #if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD) /* Do not create this struct directly - use the ConstructHuffmanCode * constructor below! */ -typedef struct { +typedef struct { uint8_t bits; /* number of bits used for this symbol */ uint16_t value; /* symbol value or table offset */ -} HuffmanCode; - +} HuffmanCode; + static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits, const uint16_t value) { HuffmanCode h; @@ -93,35 +93,35 @@ static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits, #define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16) #endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */ -/* Builds Huffman lookup table assuming code lengths are in symbol order. */ +/* Builds Huffman lookup table assuming code lengths are in symbol order. */ BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table, const uint8_t* const code_lengths, uint16_t* count); - + /* Builds Huffman lookup table assuming code lengths are in symbol order. Returns size of resulting table. */ BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg); - + /* Builds a simple Huffman table. The |num_symbols| parameter is to be interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2], 4 means 4 symbols with lengths [1, 2, 3, 3]. */ BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, int root_bits, uint16_t* symbols, uint32_t num_symbols); - -/* Contains a collection of Huffman trees with the same alphabet size. */ + +/* Contains a collection of Huffman trees with the same alphabet size. */ /* max_symbol is needed due to simple codes since log2(alphabet_size) could be greater than log2(max_symbol). */ -typedef struct { - HuffmanCode** htrees; - HuffmanCode* codes; +typedef struct { + HuffmanCode** htrees; + HuffmanCode* codes; uint16_t alphabet_size; uint16_t max_symbol; uint16_t num_htrees; -} HuffmanTreeGroup; - -#if defined(__cplusplus) || defined(c_plusplus) +} HuffmanTreeGroup; + +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_HUFFMAN_H_ */ +#endif + +#endif /* BROTLI_DEC_HUFFMAN_H_ */ diff --git a/contrib/libs/brotli/dec/prefix.h b/contrib/libs/brotli/dec/prefix.h index b4ceebfbd4b..3ea062d84a2 100644 --- a/contrib/libs/brotli/dec/prefix.h +++ b/contrib/libs/brotli/dec/prefix.h @@ -1,750 +1,750 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - +/* Copyright 2013 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Lookup tables to map prefix codes to value ranges. This is used during +*/ + +/* Lookup tables to map prefix codes to value ranges. This is used during decoding of the block lengths, literal insertion lengths and copy lengths. */ - -#ifndef BROTLI_DEC_PREFIX_H_ -#define BROTLI_DEC_PREFIX_H_ - + +#ifndef BROTLI_DEC_PREFIX_H_ +#define BROTLI_DEC_PREFIX_H_ + #include "../common/constants.h" #include /* Represents the range of values belonging to a prefix code: [offset, offset + 2^nbits) */ -struct PrefixCodeRange { +struct PrefixCodeRange { uint16_t offset; uint8_t nbits; -}; - +}; + static const struct PrefixCodeRange kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = { - { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2}, - { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3}, - { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4}, - { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5}, - { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8}, - { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12}, - {8433, 13}, {16625, 24} -}; - -typedef struct CmdLutElement { - uint8_t insert_len_extra_bits; - uint8_t copy_len_extra_bits; - int8_t distance_code; - uint8_t context; - uint16_t insert_len_offset; - uint16_t copy_len_offset; -} CmdLutElement; - + { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2}, + { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3}, + { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4}, + { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5}, + { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8}, + { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12}, + {8433, 13}, {16625, 24} +}; + +typedef struct CmdLutElement { + uint8_t insert_len_extra_bits; + uint8_t copy_len_extra_bits; + int8_t distance_code; + uint8_t context; + uint16_t insert_len_offset; + uint16_t copy_len_offset; +} CmdLutElement; + static const CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = { - { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 }, - { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 }, - { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 }, - { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 }, - { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 }, - { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 }, - { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 }, - { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 }, - { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 }, - { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 }, - { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 }, - { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 }, - { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 }, - { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 }, - { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 }, - { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 }, - { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 }, - { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 }, - { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 }, - { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 }, - { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 }, - { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 }, - { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 }, - { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 }, - { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 }, - { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 }, - { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 }, - { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 }, - { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 }, - { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 }, - { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 }, - { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 }, - { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 }, - { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 }, - { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a }, - { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c }, - { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e }, - { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 }, - { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 }, - { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e }, - { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 }, - { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 }, - { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a }, - { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c }, - { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e }, - { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 }, - { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 }, - { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e }, - { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 }, - { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 }, - { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a }, - { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c }, - { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e }, - { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 }, - { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 }, - { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e }, - { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 }, - { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 }, - { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 }, - { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 }, - { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 }, - { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 }, - { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 }, - { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 }, - { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 }, - { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 }, - { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 }, - { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 }, - { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 }, - { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 }, - { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 }, - { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 }, - { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 }, - { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 }, - { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 }, - { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 }, - { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 }, - { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 }, - { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 }, - { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 }, - { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 }, - { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 }, - { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 }, - { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 }, - { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 }, - { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 }, - { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 }, - { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 }, - { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 }, - { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 }, - { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 }, - { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 }, - { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a }, - { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c }, - { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e }, - { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 }, - { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 }, - { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e }, - { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 }, - { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 }, - { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a }, - { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c }, - { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e }, - { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 }, - { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 }, - { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e }, - { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 }, - { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 }, - { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a }, - { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c }, - { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e }, - { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 }, - { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 }, - { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e }, - { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 }, - { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 }, - { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 }, - { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 }, - { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 }, - { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 }, - { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 }, - { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 }, - { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 }, - { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 }, - { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 }, - { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 }, - { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 }, - { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 }, - { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 }, - { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 }, - { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 }, - { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 }, - { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 }, - { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 }, - { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 }, - { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 }, - { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 }, - { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 }, - { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 }, - { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 }, - { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 }, - { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 }, - { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 }, - { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 }, - { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 }, - { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 }, - { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 }, - { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 }, - { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 }, - { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 }, - { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 }, - { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 }, - { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 }, - { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 }, - { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 }, - { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 }, - { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 }, - { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 }, - { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 }, - { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 }, - { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 }, - { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 }, - { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 }, - { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 }, - { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 }, - { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 }, - { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 }, - { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 }, - { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 }, - { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 }, - { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 }, - { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 }, - { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 }, - { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 }, - { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 }, - { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 }, - { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 }, - { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 }, - { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 }, - { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 }, - { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a }, - { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c }, - { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e }, - { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 }, - { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 }, - { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e }, - { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 }, - { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 }, - { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a }, - { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c }, - { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e }, - { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 }, - { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 }, - { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e }, - { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 }, - { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 }, - { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a }, - { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c }, - { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e }, - { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 }, - { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 }, - { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e }, - { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 }, - { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 }, - { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a }, - { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c }, - { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e }, - { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 }, - { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 }, - { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e }, - { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 }, - { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 }, - { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a }, - { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c }, - { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e }, - { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 }, - { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 }, - { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e }, - { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 }, - { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 }, - { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a }, - { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c }, - { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e }, - { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 }, - { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 }, - { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e }, - { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 }, - { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 }, - { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a }, - { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c }, - { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e }, - { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 }, - { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 }, - { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e }, - { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 }, - { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 }, - { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a }, - { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c }, - { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e }, - { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 }, - { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 }, - { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e }, - { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 }, - { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 }, - { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 }, - { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 }, - { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 }, - { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 }, - { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 }, - { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 }, - { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 }, - { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 }, - { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 }, - { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 }, - { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 }, - { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 }, - { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 }, - { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 }, - { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 }, - { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 }, - { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 }, - { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 }, - { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 }, - { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 }, - { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 }, - { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 }, - { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 }, - { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 }, - { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 }, - { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 }, - { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 }, - { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 }, - { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 }, - { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 }, - { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 }, - { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 }, - { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 }, - { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 }, - { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 }, - { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 }, - { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 }, - { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 }, - { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 }, - { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 }, - { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 }, - { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 }, - { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 }, - { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 }, - { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 }, - { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 }, - { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 }, - { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 }, - { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 }, - { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 }, - { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 }, - { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 }, - { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 }, - { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 }, - { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 }, - { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 }, - { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 }, - { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 }, - { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 }, - { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 }, - { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 }, - { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 }, - { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 }, - { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 }, - { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 }, - { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 }, - { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 }, - { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 }, - { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 }, - { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 }, - { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 }, - { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 }, - { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 }, - { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 }, - { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 }, - { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 }, - { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 }, - { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 }, - { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 }, - { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 }, - { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 }, - { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 }, - { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 }, - { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 }, - { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 }, - { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 }, - { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 }, - { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 }, - { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 }, - { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 }, - { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 }, - { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 }, - { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 }, - { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 }, - { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 }, - { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 }, - { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 }, - { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 }, - { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 }, - { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 }, - { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 }, - { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 }, - { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 }, - { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 }, - { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 }, - { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 }, - { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 }, - { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 }, - { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 }, - { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 }, - { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 }, - { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 }, - { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 }, - { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 }, - { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 }, - { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 }, - { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 }, - { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 }, - { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 }, - { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 }, - { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 }, - { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 }, - { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 }, - { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 }, - { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 }, - { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 }, - { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 }, - { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 }, - { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 }, - { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 }, - { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 }, - { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 }, - { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 }, - { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 }, - { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 }, - { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 }, - { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 }, - { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 }, - { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 }, - { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 }, - { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 }, - { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 }, - { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 }, - { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 }, - { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 }, - { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 }, - { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 }, - { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 }, - { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 }, - { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 }, - { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 }, - { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 }, - { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 }, - { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 }, - { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 }, - { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 }, - { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 }, - { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a }, - { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c }, - { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e }, - { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 }, - { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 }, - { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e }, - { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 }, - { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 }, - { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a }, - { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c }, - { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e }, - { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 }, - { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 }, - { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e }, - { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 }, - { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 }, - { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a }, - { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c }, - { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e }, - { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 }, - { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 }, - { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e }, - { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 }, - { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 }, - { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a }, - { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c }, - { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e }, - { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 }, - { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 }, - { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e }, - { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 }, - { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 }, - { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a }, - { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c }, - { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e }, - { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 }, - { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 }, - { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e }, - { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 }, - { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 }, - { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a }, - { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c }, - { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e }, - { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 }, - { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 }, - { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e }, - { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 }, - { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 }, - { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a }, - { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c }, - { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e }, - { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 }, - { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 }, - { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e }, - { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 }, - { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 }, - { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a }, - { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c }, - { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e }, - { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 }, - { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 }, - { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e }, - { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 }, - { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 }, - { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 }, - { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 }, - { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 }, - { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 }, - { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 }, - { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 }, - { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 }, - { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 }, - { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 }, - { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 }, - { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 }, - { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 }, - { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 }, - { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 }, - { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 }, - { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 }, - { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 }, - { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 }, - { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 }, - { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 }, - { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 }, - { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 }, - { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 }, - { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 }, - { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 }, - { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 }, - { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 }, - { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 }, - { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 }, - { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 }, - { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 }, - { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 }, - { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 }, - { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 }, - { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 }, - { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 }, - { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 }, - { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 }, - { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 }, - { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 }, - { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 }, - { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 }, - { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 }, - { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 }, - { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 }, - { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 }, - { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 }, - { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 }, - { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 }, - { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 }, - { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 }, - { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 }, - { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 }, - { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 }, - { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 }, - { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 }, - { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 }, - { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 }, - { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 }, - { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 }, - { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 }, - { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 }, - { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 }, - { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 }, -}; - -#endif /* BROTLI_DEC_PREFIX_H_ */ + { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 }, + { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 }, + { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 }, + { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 }, + { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 }, + { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 }, + { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 }, + { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 }, + { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 }, + { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 }, + { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 }, + { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 }, + { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 }, + { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 }, + { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 }, + { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 }, + { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 }, + { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 }, + { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 }, + { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 }, + { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 }, + { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 }, + { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 }, + { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 }, + { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 }, + { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 }, + { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 }, + { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 }, + { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 }, + { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 }, + { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 }, + { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 }, + { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 }, + { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 }, + { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a }, + { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c }, + { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e }, + { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 }, + { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 }, + { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e }, + { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 }, + { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 }, + { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a }, + { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c }, + { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e }, + { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 }, + { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 }, + { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e }, + { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 }, + { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 }, + { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a }, + { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c }, + { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e }, + { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 }, + { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 }, + { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e }, + { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 }, + { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 }, + { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 }, + { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 }, + { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 }, + { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 }, + { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 }, + { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 }, + { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 }, + { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 }, + { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 }, + { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 }, + { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 }, + { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 }, + { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 }, + { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 }, + { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 }, + { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 }, + { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 }, + { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 }, + { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 }, + { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 }, + { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 }, + { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 }, + { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 }, + { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 }, + { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 }, + { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 }, + { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 }, + { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 }, + { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 }, + { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 }, + { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 }, + { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 }, + { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 }, + { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 }, + { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a }, + { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c }, + { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e }, + { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 }, + { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 }, + { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e }, + { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 }, + { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 }, + { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a }, + { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c }, + { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e }, + { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 }, + { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 }, + { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e }, + { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 }, + { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 }, + { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a }, + { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c }, + { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e }, + { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 }, + { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 }, + { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e }, + { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 }, + { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 }, + { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 }, + { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 }, + { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 }, + { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 }, + { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 }, + { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 }, + { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 }, + { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 }, + { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 }, + { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 }, + { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 }, + { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 }, + { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 }, + { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 }, + { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 }, + { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 }, + { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 }, + { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 }, + { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 }, + { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 }, + { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 }, + { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 }, + { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 }, + { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 }, + { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 }, + { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 }, + { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 }, + { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 }, + { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 }, + { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 }, + { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 }, + { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 }, + { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 }, + { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 }, + { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 }, + { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 }, + { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 }, + { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 }, + { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 }, + { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 }, + { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 }, + { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 }, + { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 }, + { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 }, + { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 }, + { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 }, + { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 }, + { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 }, + { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 }, + { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 }, + { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 }, + { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 }, + { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 }, + { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 }, + { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 }, + { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 }, + { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 }, + { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 }, + { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 }, + { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 }, + { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 }, + { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 }, + { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 }, + { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 }, + { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a }, + { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c }, + { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e }, + { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 }, + { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 }, + { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e }, + { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 }, + { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 }, + { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a }, + { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c }, + { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e }, + { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 }, + { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 }, + { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e }, + { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 }, + { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 }, + { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a }, + { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c }, + { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e }, + { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 }, + { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 }, + { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e }, + { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 }, + { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 }, + { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a }, + { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c }, + { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e }, + { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 }, + { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 }, + { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e }, + { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 }, + { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 }, + { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a }, + { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c }, + { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e }, + { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 }, + { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 }, + { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e }, + { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 }, + { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 }, + { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a }, + { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c }, + { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e }, + { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 }, + { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 }, + { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e }, + { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 }, + { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 }, + { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a }, + { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c }, + { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e }, + { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 }, + { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 }, + { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e }, + { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 }, + { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 }, + { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a }, + { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c }, + { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e }, + { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 }, + { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 }, + { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e }, + { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 }, + { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 }, + { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 }, + { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 }, + { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 }, + { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 }, + { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 }, + { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 }, + { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 }, + { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 }, + { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 }, + { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 }, + { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 }, + { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 }, + { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 }, + { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 }, + { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 }, + { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 }, + { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 }, + { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 }, + { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 }, + { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 }, + { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 }, + { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 }, + { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 }, + { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 }, + { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 }, + { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 }, + { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 }, + { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 }, + { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 }, + { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 }, + { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 }, + { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 }, + { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 }, + { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 }, + { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 }, + { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 }, + { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 }, + { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 }, + { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 }, + { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 }, + { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 }, + { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 }, + { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 }, + { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 }, + { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 }, + { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 }, + { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 }, + { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 }, + { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 }, + { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 }, + { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 }, + { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 }, + { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 }, + { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 }, + { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 }, + { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 }, + { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 }, + { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 }, + { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 }, + { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 }, + { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 }, + { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 }, + { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 }, + { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 }, + { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 }, + { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 }, + { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 }, + { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 }, + { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 }, + { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 }, + { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 }, + { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 }, + { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 }, + { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 }, + { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 }, + { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 }, + { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 }, + { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 }, + { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 }, + { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 }, + { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 }, + { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 }, + { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 }, + { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 }, + { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 }, + { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 }, + { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 }, + { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 }, + { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 }, + { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 }, + { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 }, + { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 }, + { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 }, + { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 }, + { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 }, + { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 }, + { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 }, + { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 }, + { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 }, + { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 }, + { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 }, + { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 }, + { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 }, + { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 }, + { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 }, + { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 }, + { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 }, + { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 }, + { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 }, + { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 }, + { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 }, + { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 }, + { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 }, + { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 }, + { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 }, + { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 }, + { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 }, + { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 }, + { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 }, + { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 }, + { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 }, + { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 }, + { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 }, + { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 }, + { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 }, + { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 }, + { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 }, + { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 }, + { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 }, + { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 }, + { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 }, + { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 }, + { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 }, + { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 }, + { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 }, + { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 }, + { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 }, + { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 }, + { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 }, + { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 }, + { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 }, + { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 }, + { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 }, + { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 }, + { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 }, + { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 }, + { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 }, + { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 }, + { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 }, + { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 }, + { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 }, + { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 }, + { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 }, + { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 }, + { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 }, + { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 }, + { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 }, + { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a }, + { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c }, + { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e }, + { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 }, + { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 }, + { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e }, + { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 }, + { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 }, + { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a }, + { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c }, + { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e }, + { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 }, + { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 }, + { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e }, + { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 }, + { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 }, + { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a }, + { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c }, + { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e }, + { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 }, + { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 }, + { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e }, + { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 }, + { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 }, + { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a }, + { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c }, + { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e }, + { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 }, + { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 }, + { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e }, + { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 }, + { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 }, + { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a }, + { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c }, + { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e }, + { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 }, + { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 }, + { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e }, + { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 }, + { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 }, + { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a }, + { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c }, + { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e }, + { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 }, + { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 }, + { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e }, + { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 }, + { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 }, + { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a }, + { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c }, + { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e }, + { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 }, + { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 }, + { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e }, + { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 }, + { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 }, + { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a }, + { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c }, + { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e }, + { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 }, + { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 }, + { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e }, + { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 }, + { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 }, + { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 }, + { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 }, + { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 }, + { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 }, + { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 }, + { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 }, + { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 }, + { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 }, + { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 }, + { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 }, + { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 }, + { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 }, + { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 }, + { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 }, + { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 }, + { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 }, + { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 }, + { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 }, + { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 }, + { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 }, + { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 }, + { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 }, + { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 }, + { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 }, + { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 }, + { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 }, + { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 }, + { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 }, + { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 }, + { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 }, + { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 }, + { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 }, + { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 }, + { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 }, + { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 }, + { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 }, + { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 }, + { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 }, + { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 }, + { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 }, + { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 }, + { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 }, + { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 }, + { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 }, + { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 }, + { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 }, + { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 }, + { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 }, + { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 }, + { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 }, + { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 }, + { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 }, + { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 }, + { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 }, + { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 }, + { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 }, + { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 }, + { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 }, + { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 }, + { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 }, + { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 }, + { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 }, + { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 }, + { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 }, +}; + +#endif /* BROTLI_DEC_PREFIX_H_ */ diff --git a/contrib/libs/brotli/dec/state.c b/contrib/libs/brotli/dec/state.c index aebdee19c44..e0b37c2dcd0 100644 --- a/contrib/libs/brotli/dec/state.c +++ b/contrib/libs/brotli/dec/state.c @@ -1,20 +1,20 @@ -/* Copyright 2015 Google Inc. All Rights Reserved. - +/* Copyright 2015 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ - + #include "./state.h" - + #include /* free, malloc */ - + #include -#include "./huffman.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - +#include "./huffman.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { @@ -30,101 +30,101 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */ BrotliInitBitReader(&s->br); - s->state = BROTLI_STATE_UNINITED; + s->state = BROTLI_STATE_UNINITED; s->large_window = 0; - s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; - s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; - s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; - s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE; - s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; - s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; + s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; + s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; + s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; + s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE; + s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; + s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE; - + s->buffer_length = 0; s->loop_counter = 0; s->pos = 0; s->rb_roundtrips = 0; s->partial_pos_out = 0; - s->block_type_trees = NULL; - s->block_len_trees = NULL; - s->ringbuffer = NULL; + s->block_type_trees = NULL; + s->block_len_trees = NULL; + s->ringbuffer = NULL; s->ringbuffer_size = 0; s->new_ringbuffer_size = 0; s->ringbuffer_mask = 0; - - s->context_map = NULL; - s->context_modes = NULL; - s->dist_context_map = NULL; - s->context_map_slice = NULL; - s->dist_context_map_slice = NULL; - + + s->context_map = NULL; + s->context_modes = NULL; + s->dist_context_map = NULL; + s->context_map_slice = NULL; + s->dist_context_map_slice = NULL; + s->sub_loop_counter = 0; - s->literal_hgroup.codes = NULL; - s->literal_hgroup.htrees = NULL; - s->insert_copy_hgroup.codes = NULL; - s->insert_copy_hgroup.htrees = NULL; - s->distance_hgroup.codes = NULL; - s->distance_hgroup.htrees = NULL; - + s->literal_hgroup.codes = NULL; + s->literal_hgroup.htrees = NULL; + s->insert_copy_hgroup.codes = NULL; + s->insert_copy_hgroup.htrees = NULL; + s->distance_hgroup.codes = NULL; + s->distance_hgroup.htrees = NULL; + s->is_last_metablock = 0; s->is_uncompressed = 0; s->is_metadata = 0; s->should_wrap_ringbuffer = 0; s->canny_ringbuffer_allocation = 1; - - s->window_bits = 0; - s->max_distance = 0; - s->dist_rb[0] = 16; - s->dist_rb[1] = 15; - s->dist_rb[2] = 11; - s->dist_rb[3] = 4; - s->dist_rb_idx = 0; - s->block_type_trees = NULL; - s->block_len_trees = NULL; - - /* Make small negative indexes addressable. */ - s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1]; - + + s->window_bits = 0; + s->max_distance = 0; + s->dist_rb[0] = 16; + s->dist_rb[1] = 15; + s->dist_rb[2] = 11; + s->dist_rb[3] = 4; + s->dist_rb_idx = 0; + s->block_type_trees = NULL; + s->block_len_trees = NULL; + + /* Make small negative indexes addressable. */ + s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1]; + s->mtf_upper_bound = 63; s->dictionary = BrotliGetDictionary(); s->transforms = BrotliGetTransforms(); return BROTLI_TRUE; -} - +} + void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { - s->meta_block_remaining_len = 0; + s->meta_block_remaining_len = 0; s->block_length[0] = 1U << 24; s->block_length[1] = 1U << 24; s->block_length[2] = 1U << 24; - s->num_block_types[0] = 1; - s->num_block_types[1] = 1; - s->num_block_types[2] = 1; - s->block_type_rb[0] = 1; - s->block_type_rb[1] = 0; - s->block_type_rb[2] = 1; - s->block_type_rb[3] = 0; - s->block_type_rb[4] = 1; - s->block_type_rb[5] = 0; - s->context_map = NULL; - s->context_modes = NULL; - s->dist_context_map = NULL; - s->context_map_slice = NULL; - s->literal_htree = NULL; - s->dist_context_map_slice = NULL; - s->dist_htree_index = 0; + s->num_block_types[0] = 1; + s->num_block_types[1] = 1; + s->num_block_types[2] = 1; + s->block_type_rb[0] = 1; + s->block_type_rb[1] = 0; + s->block_type_rb[2] = 1; + s->block_type_rb[3] = 0; + s->block_type_rb[4] = 1; + s->block_type_rb[5] = 0; + s->context_map = NULL; + s->context_modes = NULL; + s->dist_context_map = NULL; + s->context_map_slice = NULL; + s->literal_htree = NULL; + s->dist_context_map_slice = NULL; + s->dist_htree_index = 0; s->context_lookup = NULL; - s->literal_hgroup.codes = NULL; - s->literal_hgroup.htrees = NULL; - s->insert_copy_hgroup.codes = NULL; - s->insert_copy_hgroup.htrees = NULL; - s->distance_hgroup.codes = NULL; - s->distance_hgroup.htrees = NULL; -} - + s->literal_hgroup.codes = NULL; + s->literal_hgroup.htrees = NULL; + s->insert_copy_hgroup.codes = NULL; + s->insert_copy_hgroup.htrees = NULL; + s->distance_hgroup.codes = NULL; + s->distance_hgroup.htrees = NULL; +} + void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) { BROTLI_DECODER_FREE(s, s->context_modes); BROTLI_DECODER_FREE(s, s->context_map); @@ -132,15 +132,15 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) { BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees); BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees); BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees); -} - +} + void BrotliDecoderStateCleanup(BrotliDecoderState* s) { BrotliDecoderStateCleanupAfterMetablock(s); - + BROTLI_DECODER_FREE(s, s->ringbuffer); BROTLI_DECODER_FREE(s, s->block_type_trees); -} - +} + BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol, uint32_t ntrees) { @@ -159,6 +159,6 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, return !!p; } -#if defined(__cplusplus) || defined(c_plusplus) +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif +#endif diff --git a/contrib/libs/brotli/dec/state.h b/contrib/libs/brotli/dec/state.h index b21553ee72d..d28b63920ef 100644 --- a/contrib/libs/brotli/dec/state.h +++ b/contrib/libs/brotli/dec/state.h @@ -1,110 +1,110 @@ -/* Copyright 2015 Google Inc. All Rights Reserved. - +/* Copyright 2015 Google Inc. All Rights Reserved. + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Brotli state for partial streaming decoding. */ - -#ifndef BROTLI_DEC_STATE_H_ -#define BROTLI_DEC_STATE_H_ - +*/ + +/* Brotli state for partial streaming decoding. */ + +#ifndef BROTLI_DEC_STATE_H_ +#define BROTLI_DEC_STATE_H_ + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" #include "../common/transform.h" #include -#include "./bit_reader.h" -#include "./huffman.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -typedef enum { - BROTLI_STATE_UNINITED, +#include "./bit_reader.h" +#include "./huffman.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +typedef enum { + BROTLI_STATE_UNINITED, BROTLI_STATE_LARGE_WINDOW_BITS, BROTLI_STATE_INITIALIZE, - BROTLI_STATE_METABLOCK_BEGIN, - BROTLI_STATE_METABLOCK_HEADER, + BROTLI_STATE_METABLOCK_BEGIN, + BROTLI_STATE_METABLOCK_HEADER, BROTLI_STATE_METABLOCK_HEADER_2, - BROTLI_STATE_CONTEXT_MODES, - BROTLI_STATE_COMMAND_BEGIN, - BROTLI_STATE_COMMAND_INNER, + BROTLI_STATE_CONTEXT_MODES, + BROTLI_STATE_COMMAND_BEGIN, + BROTLI_STATE_COMMAND_INNER, BROTLI_STATE_COMMAND_POST_DECODE_LITERALS, BROTLI_STATE_COMMAND_POST_WRAP_COPY, - BROTLI_STATE_UNCOMPRESSED, - BROTLI_STATE_METADATA, - BROTLI_STATE_COMMAND_INNER_WRITE, - BROTLI_STATE_METABLOCK_DONE, - BROTLI_STATE_COMMAND_POST_WRITE_1, - BROTLI_STATE_COMMAND_POST_WRITE_2, - BROTLI_STATE_HUFFMAN_CODE_0, - BROTLI_STATE_HUFFMAN_CODE_1, - BROTLI_STATE_HUFFMAN_CODE_2, - BROTLI_STATE_HUFFMAN_CODE_3, - BROTLI_STATE_CONTEXT_MAP_1, - BROTLI_STATE_CONTEXT_MAP_2, - BROTLI_STATE_TREE_GROUP, - BROTLI_STATE_DONE -} BrotliRunningState; - -typedef enum { - BROTLI_STATE_METABLOCK_HEADER_NONE, - BROTLI_STATE_METABLOCK_HEADER_EMPTY, - BROTLI_STATE_METABLOCK_HEADER_NIBBLES, - BROTLI_STATE_METABLOCK_HEADER_SIZE, - BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED, - BROTLI_STATE_METABLOCK_HEADER_RESERVED, - BROTLI_STATE_METABLOCK_HEADER_BYTES, - BROTLI_STATE_METABLOCK_HEADER_METADATA -} BrotliRunningMetablockHeaderState; - -typedef enum { - BROTLI_STATE_UNCOMPRESSED_NONE, + BROTLI_STATE_UNCOMPRESSED, + BROTLI_STATE_METADATA, + BROTLI_STATE_COMMAND_INNER_WRITE, + BROTLI_STATE_METABLOCK_DONE, + BROTLI_STATE_COMMAND_POST_WRITE_1, + BROTLI_STATE_COMMAND_POST_WRITE_2, + BROTLI_STATE_HUFFMAN_CODE_0, + BROTLI_STATE_HUFFMAN_CODE_1, + BROTLI_STATE_HUFFMAN_CODE_2, + BROTLI_STATE_HUFFMAN_CODE_3, + BROTLI_STATE_CONTEXT_MAP_1, + BROTLI_STATE_CONTEXT_MAP_2, + BROTLI_STATE_TREE_GROUP, + BROTLI_STATE_DONE +} BrotliRunningState; + +typedef enum { + BROTLI_STATE_METABLOCK_HEADER_NONE, + BROTLI_STATE_METABLOCK_HEADER_EMPTY, + BROTLI_STATE_METABLOCK_HEADER_NIBBLES, + BROTLI_STATE_METABLOCK_HEADER_SIZE, + BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED, + BROTLI_STATE_METABLOCK_HEADER_RESERVED, + BROTLI_STATE_METABLOCK_HEADER_BYTES, + BROTLI_STATE_METABLOCK_HEADER_METADATA +} BrotliRunningMetablockHeaderState; + +typedef enum { + BROTLI_STATE_UNCOMPRESSED_NONE, BROTLI_STATE_UNCOMPRESSED_WRITE -} BrotliRunningUncompressedState; - -typedef enum { - BROTLI_STATE_TREE_GROUP_NONE, - BROTLI_STATE_TREE_GROUP_LOOP -} BrotliRunningTreeGroupState; - -typedef enum { - BROTLI_STATE_CONTEXT_MAP_NONE, - BROTLI_STATE_CONTEXT_MAP_READ_PREFIX, - BROTLI_STATE_CONTEXT_MAP_HUFFMAN, +} BrotliRunningUncompressedState; + +typedef enum { + BROTLI_STATE_TREE_GROUP_NONE, + BROTLI_STATE_TREE_GROUP_LOOP +} BrotliRunningTreeGroupState; + +typedef enum { + BROTLI_STATE_CONTEXT_MAP_NONE, + BROTLI_STATE_CONTEXT_MAP_READ_PREFIX, + BROTLI_STATE_CONTEXT_MAP_HUFFMAN, BROTLI_STATE_CONTEXT_MAP_DECODE, BROTLI_STATE_CONTEXT_MAP_TRANSFORM -} BrotliRunningContextMapState; - -typedef enum { - BROTLI_STATE_HUFFMAN_NONE, +} BrotliRunningContextMapState; + +typedef enum { + BROTLI_STATE_HUFFMAN_NONE, BROTLI_STATE_HUFFMAN_SIMPLE_SIZE, BROTLI_STATE_HUFFMAN_SIMPLE_READ, BROTLI_STATE_HUFFMAN_SIMPLE_BUILD, BROTLI_STATE_HUFFMAN_COMPLEX, - BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS -} BrotliRunningHuffmanState; - -typedef enum { - BROTLI_STATE_DECODE_UINT8_NONE, - BROTLI_STATE_DECODE_UINT8_SHORT, - BROTLI_STATE_DECODE_UINT8_LONG -} BrotliRunningDecodeUint8State; - + BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS +} BrotliRunningHuffmanState; + +typedef enum { + BROTLI_STATE_DECODE_UINT8_NONE, + BROTLI_STATE_DECODE_UINT8_SHORT, + BROTLI_STATE_DECODE_UINT8_LONG +} BrotliRunningDecodeUint8State; + typedef enum { BROTLI_STATE_READ_BLOCK_LENGTH_NONE, BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX } BrotliRunningReadBlockLengthState; struct BrotliDecoderStateStruct { - BrotliRunningState state; + BrotliRunningState state; - /* This counter is reused for several disjoint loops. */ + /* This counter is reused for several disjoint loops. */ int loop_counter; - BrotliBitReader br; + BrotliBitReader br; brotli_alloc_func alloc_func; brotli_free_func free_func; @@ -117,99 +117,99 @@ struct BrotliDecoderStateStruct { } buffer; uint32_t buffer_length; - int pos; - int max_backward_distance; - int max_distance; - int ringbuffer_size; - int ringbuffer_mask; - int dist_rb_idx; - int dist_rb[4]; + int pos; + int max_backward_distance; + int max_distance; + int ringbuffer_size; + int ringbuffer_mask; + int dist_rb_idx; + int dist_rb[4]; int error_code; uint32_t sub_loop_counter; - uint8_t* ringbuffer; - uint8_t* ringbuffer_end; - HuffmanCode* htree_command; + uint8_t* ringbuffer; + uint8_t* ringbuffer_end; + HuffmanCode* htree_command; const uint8_t* context_lookup; - uint8_t* context_map_slice; - uint8_t* dist_context_map_slice; - + uint8_t* context_map_slice; + uint8_t* dist_context_map_slice; + /* This ring buffer holds a few past copy distances that will be used by some special distance codes. */ - HuffmanTreeGroup literal_hgroup; - HuffmanTreeGroup insert_copy_hgroup; - HuffmanTreeGroup distance_hgroup; - HuffmanCode* block_type_trees; - HuffmanCode* block_len_trees; - /* This is true if the literal context map histogram type always matches the + HuffmanTreeGroup literal_hgroup; + HuffmanTreeGroup insert_copy_hgroup; + HuffmanTreeGroup distance_hgroup; + HuffmanCode* block_type_trees; + HuffmanCode* block_len_trees; + /* This is true if the literal context map histogram type always matches the block type. It is then not needed to keep the context (faster decoding). */ - int trivial_literal_context; + int trivial_literal_context; /* Distance context is actual after command is decoded and before distance is computed. After distance computation it is used as a temporary variable. */ - int distance_context; - int meta_block_remaining_len; + int distance_context; + int meta_block_remaining_len; uint32_t block_length_index; uint32_t block_length[3]; uint32_t num_block_types[3]; uint32_t block_type_rb[6]; uint32_t distance_postfix_bits; uint32_t num_direct_distance_codes; - int distance_postfix_mask; + int distance_postfix_mask; uint32_t num_dist_htrees; - uint8_t* dist_context_map; + uint8_t* dist_context_map; HuffmanCode* literal_htree; - uint8_t dist_htree_index; + uint8_t dist_htree_index; uint32_t repeat_code_len; uint32_t prev_code_len; - - int copy_length; - int distance_code; - + + int copy_length; + int distance_code; + /* For partial write operations. */ size_t rb_roundtrips; /* how many times we went around the ring-buffer */ size_t partial_pos_out; /* how much output to the user in total */ - + /* For ReadHuffmanCode. */ - uint32_t symbol; - uint32_t repeat; - uint32_t space; - - HuffmanCode table[32]; + uint32_t symbol; + uint32_t repeat; + uint32_t space; + + HuffmanCode table[32]; /* List of heads of symbol chains. */ - uint16_t* symbol_lists; - /* Storage from symbol_lists. */ - uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 + + uint16_t* symbol_lists; + /* Storage from symbol_lists. */ + uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 + BROTLI_NUM_COMMAND_SYMBOLS]; - /* Tails of symbol chains. */ - int next_symbol[32]; + /* Tails of symbol chains. */ + int next_symbol[32]; uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES]; /* Population counts for the code lengths. */ - uint16_t code_length_histo[16]; - + uint16_t code_length_histo[16]; + /* For HuffmanTreeGroupDecode. */ - int htree_index; - HuffmanCode* next; - + int htree_index; + HuffmanCode* next; + /* For DecodeContextMap. */ uint32_t context_index; uint32_t max_run_length_prefix; uint32_t code; HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272]; - + /* For InverseMoveToFrontTransform. */ uint32_t mtf_upper_bound; uint32_t mtf[64 + 1]; - + /* Less used attributes are at the end of this struct. */ /* States inside function calls. */ - BrotliRunningMetablockHeaderState substate_metablock_header; - BrotliRunningTreeGroupState substate_tree_group; - BrotliRunningContextMapState substate_context_map; - BrotliRunningUncompressedState substate_uncompressed; - BrotliRunningHuffmanState substate_huffman; - BrotliRunningDecodeUint8State substate_decode_uint8; + BrotliRunningMetablockHeaderState substate_metablock_header; + BrotliRunningTreeGroupState substate_tree_group; + BrotliRunningContextMapState substate_context_map; + BrotliRunningUncompressedState substate_uncompressed; + BrotliRunningHuffmanState substate_huffman; + BrotliRunningDecodeUint8State substate_decode_uint8; BrotliRunningReadBlockLengthState substate_read_block_length; - + unsigned int is_last_metablock : 1; unsigned int is_uncompressed : 1; unsigned int is_metadata : 1; @@ -217,20 +217,20 @@ struct BrotliDecoderStateStruct { unsigned int canny_ringbuffer_allocation : 1; unsigned int large_window : 1; unsigned int size_nibbles : 8; - uint32_t window_bits; - + uint32_t window_bits; + int new_ringbuffer_size; - + uint32_t num_literal_htrees; - uint8_t* context_map; - uint8_t* context_modes; + uint8_t* context_map; + uint8_t* context_modes; const BrotliDictionary* dictionary; const BrotliTransforms* transforms; - + uint32_t trivial_literal_contexts[8]; /* 256 bits */ }; - + typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal; #define BrotliDecoderState BrotliDecoderStateInternal @@ -251,8 +251,8 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit( X = NULL; \ } -#if defined(__cplusplus) || defined(c_plusplus) +#if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_STATE_H_ */ +#endif + +#endif /* BROTLI_DEC_STATE_H_ */ diff --git a/contrib/libs/brotli/dec/ya.make b/contrib/libs/brotli/dec/ya.make index c510ee5a5d0..0f482f36ed2 100644 --- a/contrib/libs/brotli/dec/ya.make +++ b/contrib/libs/brotli/dec/ya.make @@ -1,6 +1,6 @@ -LIBRARY() - -LICENSE(MIT) +LIBRARY() + +LICENSE(MIT) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) @@ -9,22 +9,22 @@ OWNER( g:contrib g:cpp-contrib ) - -NO_UTIL() -NO_COMPILER_WARNINGS() - +NO_UTIL() + +NO_COMPILER_WARNINGS() + ADDINCL(GLOBAL contrib/libs/brotli/include) PEERDIR( contrib/libs/brotli/common ) -SRCS( - bit_reader.c - decode.c - huffman.c - state.c -) - -END() +SRCS( + bit_reader.c + decode.c + huffman.c + state.c +) + +END() diff --git a/contrib/libs/brotli/enc/backward_references.h b/contrib/libs/brotli/enc/backward_references.h index 7b3c04a8ff5..3a4146647c4 100644 --- a/contrib/libs/brotli/enc/backward_references.h +++ b/contrib/libs/brotli/enc/backward_references.h @@ -1,26 +1,26 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Function to find backward reference copies. */ -#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ -#define BROTLI_ENC_BACKWARD_REFERENCES_H_ - +#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ +#define BROTLI_ENC_BACKWARD_REFERENCES_H_ + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" #include #include "./command.h" -#include "./hash.h" +#include "./hash.h" #include "./quality.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* "commands" points to the next output command to write to, "*num_commands" is initially the total amount of commands output by previous CreateBackwardReferences calls, and must be incremented by the amount written @@ -30,9 +30,9 @@ BROTLI_INTERNAL void BrotliCreateBackwardReferences( size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */ diff --git a/contrib/libs/brotli/enc/bit_cost.h b/contrib/libs/brotli/enc/bit_cost.h index 30324d38c7b..6586469e62f 100644 --- a/contrib/libs/brotli/enc/bit_cost.h +++ b/contrib/libs/brotli/enc/bit_cost.h @@ -1,63 +1,63 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Functions to estimate the bit cost of Huffman trees. */ -#ifndef BROTLI_ENC_BIT_COST_H_ -#define BROTLI_ENC_BIT_COST_H_ - +#ifndef BROTLI_ENC_BIT_COST_H_ +#define BROTLI_ENC_BIT_COST_H_ + #include "../common/platform.h" #include -#include "./fast_log.h" +#include "./fast_log.h" #include "./histogram.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + static BROTLI_INLINE double ShannonEntropy( const uint32_t* population, size_t size, size_t* total) { size_t sum = 0; - double retval = 0; + double retval = 0; const uint32_t* population_end = population + size; size_t p; - if (size & 1) { - goto odd_number_of_elements_left; - } - while (population < population_end) { - p = *population++; - sum += p; + if (size & 1) { + goto odd_number_of_elements_left; + } + while (population < population_end) { + p = *population++; + sum += p; retval -= (double)p * FastLog2(p); - odd_number_of_elements_left: - p = *population++; - sum += p; + odd_number_of_elements_left: + p = *population++; + sum += p; retval -= (double)p * FastLog2(p); - } + } if (sum) retval += (double)sum * FastLog2(sum); - *total = sum; - return retval; -} - + *total = sum; + return retval; +} + static BROTLI_INLINE double BitsEntropy( const uint32_t* population, size_t size) { size_t sum; - double retval = ShannonEntropy(population, size, &sum); - if (retval < sum) { + double retval = ShannonEntropy(population, size, &sum); + if (retval < sum) { /* At least one bit per literal is needed. */ retval = (double)sum; - } - return retval; -} - + } + return retval; +} + BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*); BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*); BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_BIT_COST_H_ */ diff --git a/contrib/libs/brotli/enc/block_splitter.h b/contrib/libs/brotli/enc/block_splitter.h index 2fd1cb417ac..a5e006c4b30 100644 --- a/contrib/libs/brotli/enc/block_splitter.h +++ b/contrib/libs/brotli/enc/block_splitter.h @@ -1,38 +1,38 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Block split point selection utilities. */ -#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ -#define BROTLI_ENC_BLOCK_SPLITTER_H_ - +#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ +#define BROTLI_ENC_BLOCK_SPLITTER_H_ + #include "../common/platform.h" #include -#include "./command.h" +#include "./command.h" #include "./memory.h" #include "./quality.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + typedef struct BlockSplit { size_t num_types; /* Amount of distinct types */ size_t num_blocks; /* Amount of values in types and length */ uint8_t* types; uint32_t* lengths; - + size_t types_alloc_size; size_t lengths_alloc_size; } BlockSplit; - + BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self); BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self); - + BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m, const Command* cmds, const size_t num_commands, @@ -43,9 +43,9 @@ BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m, BlockSplit* literal_split, BlockSplit* insert_and_copy_split, BlockSplit* dist_split); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */ diff --git a/contrib/libs/brotli/enc/brotli_bit_stream.h b/contrib/libs/brotli/enc/brotli_bit_stream.h index 42663c67042..2ed703bf799 100644 --- a/contrib/libs/brotli/enc/brotli_bit_stream.h +++ b/contrib/libs/brotli/enc/brotli_bit_stream.h @@ -1,5 +1,5 @@ /* Copyright 2014 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ @@ -13,32 +13,32 @@ is called "storage" and the index to the bit is called storage_ix in function arguments. */ -#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ -#define BROTLI_ENC_BROTLI_BIT_STREAM_H_ - +#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#define BROTLI_ENC_BROTLI_BIT_STREAM_H_ + #include "../common/context.h" #include "../common/platform.h" #include #include "./command.h" #include "./entropy_encode.h" #include "./memory.h" -#include "./metablock.h" - +#include "./metablock.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* All Store functions here will use a storage_ix, which is always the bit position for the current storage. */ - + BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree, size_t* storage_ix, uint8_t* storage); - + BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( MemoryManager* m, const uint32_t* histogram, const size_t histogram_total, const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix, uint8_t* storage); - + /* REQUIRES: length > 0 */ /* REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m, @@ -47,7 +47,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m, const BrotliEncoderParams* params, ContextType literal_context_mode, const Command* commands, size_t n_commands, const MetaBlockSplit* mb, size_t* storage_ix, uint8_t* storage); - + /* Stores the meta-block without doing any block splitting, just collects one histogram per block category and uses that for entropy coding. REQUIRES: length > 0 @@ -57,7 +57,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m, BROTLI_BOOL is_last, const BrotliEncoderParams* params, const Command* commands, size_t n_commands, size_t* storage_ix, uint8_t* storage); - + /* Same as above, but uses static prefix codes for histograms with a only a few symbols, and uses static code length prefix codes for all other histograms. REQUIRES: length > 0 @@ -67,7 +67,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m, BROTLI_BOOL is_last, const BrotliEncoderParams* params, const Command* commands, size_t n_commands, size_t* storage_ix, uint8_t* storage); - + /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). REQUIRES: length > 0 @@ -76,9 +76,9 @@ BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock( BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input, size_t position, size_t mask, size_t len, size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */ diff --git a/contrib/libs/brotli/enc/cluster.h b/contrib/libs/brotli/enc/cluster.h index daf573dc656..bb26124d24d 100644 --- a/contrib/libs/brotli/enc/cluster.h +++ b/contrib/libs/brotli/enc/cluster.h @@ -1,48 +1,48 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Functions for clustering similar histograms together. */ -#ifndef BROTLI_ENC_CLUSTER_H_ -#define BROTLI_ENC_CLUSTER_H_ - +#ifndef BROTLI_ENC_CLUSTER_H_ +#define BROTLI_ENC_CLUSTER_H_ + #include "../common/platform.h" #include -#include "./histogram.h" +#include "./histogram.h" #include "./memory.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + typedef struct HistogramPair { uint32_t idx1; uint32_t idx2; - double cost_combo; - double cost_diff; + double cost_combo; + double cost_diff; } HistogramPair; - + #define CODE(X) /* Declaration */; - + #define FN(X) X ## Literal #include "./cluster_inc.h" /* NOLINT(build/include) */ #undef FN - + #define FN(X) X ## Command #include "./cluster_inc.h" /* NOLINT(build/include) */ #undef FN - + #define FN(X) X ## Distance #include "./cluster_inc.h" /* NOLINT(build/include) */ #undef FN - + #undef CODE - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_CLUSTER_H_ */ diff --git a/contrib/libs/brotli/enc/command.h b/contrib/libs/brotli/enc/command.h index 181510cd671..1aac85689be 100644 --- a/contrib/libs/brotli/enc/command.h +++ b/contrib/libs/brotli/enc/command.h @@ -1,25 +1,25 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* This class models a sequence of literals and a backward reference copy. */ -#ifndef BROTLI_ENC_COMMAND_H_ -#define BROTLI_ENC_COMMAND_H_ - +#ifndef BROTLI_ENC_COMMAND_H_ +#define BROTLI_ENC_COMMAND_H_ + #include "../common/constants.h" #include "../common/platform.h" #include -#include "./fast_log.h" +#include "./fast_log.h" #include "./params.h" -#include "./prefix.h" - +#include "./prefix.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + static uint32_t kInsBase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 }; static uint32_t kInsExtra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, @@ -28,44 +28,44 @@ static uint32_t kCopyBase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 }; static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 }; - + static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) { - if (insertlen < 6) { + if (insertlen < 6) { return (uint16_t)insertlen; - } else if (insertlen < 130) { + } else if (insertlen < 130) { uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u; return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2); - } else if (insertlen < 2114) { + } else if (insertlen < 2114) { return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10); - } else if (insertlen < 6210) { + } else if (insertlen < 6210) { return 21u; - } else if (insertlen < 22594) { + } else if (insertlen < 22594) { return 22u; - } else { + } else { return 23u; - } -} - + } +} + static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) { - if (copylen < 10) { + if (copylen < 10) { return (uint16_t)(copylen - 2); - } else if (copylen < 134) { + } else if (copylen < 134) { uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u; return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4); - } else if (copylen < 2118) { + } else if (copylen < 2118) { return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12); - } else { + } else { return 23u; - } -} - + } +} + static BROTLI_INLINE uint16_t CombineLengthCodes( uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) { uint16_t bits64 = (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u)); if (use_last_distance && inscode < 8u && copycode < 16u) { return (copycode < 8u) ? bits64 : (bits64 | 64u); - } else { + } else { /* Specification: 5 Encoding of ... (last table) */ /* offset = 2 * index, where index is in range [0..8] */ uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u)); @@ -77,29 +77,29 @@ static BROTLI_INLINE uint16_t CombineLengthCodes( Magic constant is shifted 6 bits left, to avoid final multiplication. */ offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u); return (uint16_t)(offset | bits64); - } -} - + } +} + static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen, BROTLI_BOOL use_last_distance, uint16_t* code) { uint16_t inscode = GetInsertLengthCode(insertlen); uint16_t copycode = GetCopyLengthCode(copylen); *code = CombineLengthCodes(inscode, copycode, use_last_distance); -} - +} + static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) { return kInsBase[inscode]; } - + static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) { return kInsExtra[inscode]; } - + static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) { return kCopyBase[copycode]; } - + static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) { return kCopyExtra[copycode]; } @@ -161,28 +161,28 @@ static BROTLI_INLINE uint32_t CommandRestoreDistanceCode( uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U; return ((offset + extra) << dist->distance_postfix_bits) + lcode + dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES; - } + } } - + static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) { uint32_t r = self->cmd_prefix_ >> 6; uint32_t c = self->cmd_prefix_ & 7; if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) { return c; - } + } return 3; } - + static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) { return self->copy_len_ & 0x1FFFFFF; } - + static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) { uint32_t modifier = self->copy_len_ >> 25; int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1))); return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta); } - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif diff --git a/contrib/libs/brotli/enc/dictionary_hash.h b/contrib/libs/brotli/enc/dictionary_hash.h index a7fafbe0656..b3bb9599f43 100644 --- a/contrib/libs/brotli/enc/dictionary_hash.h +++ b/contrib/libs/brotli/enc/dictionary_hash.h @@ -1,24 +1,24 @@ /* Copyright 2015 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Hash table on the 4-byte prefixes of static dictionary words. */ -#ifndef BROTLI_ENC_DICTIONARY_HASH_H_ -#define BROTLI_ENC_DICTIONARY_HASH_H_ - +#ifndef BROTLI_ENC_DICTIONARY_HASH_H_ +#define BROTLI_ENC_DICTIONARY_HASH_H_ + #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + extern const uint16_t kStaticDictionaryHash[32768]; - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */ diff --git a/contrib/libs/brotli/enc/entropy_encode.h b/contrib/libs/brotli/enc/entropy_encode.h index b1f02d5a35e..f23d9c379d0 100644 --- a/contrib/libs/brotli/enc/entropy_encode.h +++ b/contrib/libs/brotli/enc/entropy_encode.h @@ -1,57 +1,57 @@ /* Copyright 2010 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Entropy encoding (Huffman) utilities. */ -#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ -#define BROTLI_ENC_ENTROPY_ENCODE_H_ - +#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ +#define BROTLI_ENC_ENTROPY_ENCODE_H_ + #include "../common/platform.h" #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* A node of a Huffman tree. */ typedef struct HuffmanTree { uint32_t total_count_; int16_t index_left_; int16_t index_right_or_value_; } HuffmanTree; - + static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count, int16_t left, int16_t right) { self->total_count_ = count; self->index_left_ = left; self->index_right_or_value_ = right; } - + /* Returns 1 is assignment of depths succeeded, otherwise 0. */ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth( int p, HuffmanTree* pool, uint8_t* depth, int max_depth); - + /* This function will create a Huffman tree. - + The (data,length) contains the population counts. The tree_limit is the maximum bit depth of the Huffman codes. - + The depth contains the tree, i.e., how many bits are used for the symbol. - + The actual Huffman tree is constructed in the tree[] array, which has to be at least 2 * length + 1 long. - + See http://en.wikipedia.org/wiki/Huffman_coding */ BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, uint8_t* depth); - + /* Change the population counts in a way that the consequent Huffman tree compression, especially its RLE-part will be more likely to compress this data more efficiently. diff --git a/contrib/libs/brotli/enc/fast_log.h b/contrib/libs/brotli/enc/fast_log.h index 7b5d067de6b..cade1235ade 100644 --- a/contrib/libs/brotli/enc/fast_log.h +++ b/contrib/libs/brotli/enc/fast_log.h @@ -1,147 +1,147 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Utilities for fast computation of logarithms. */ -#ifndef BROTLI_ENC_FAST_LOG_H_ -#define BROTLI_ENC_FAST_LOG_H_ - -#include - +#ifndef BROTLI_ENC_FAST_LOG_H_ +#define BROTLI_ENC_FAST_LOG_H_ + +#include + #include "../common/platform.h" #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { -#endif - +#endif + static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) { /* TODO: generalize and move to platform.h */ #if BROTLI_GNUC_HAS_BUILTIN(__builtin_clz, 3, 4, 0) || \ BROTLI_INTEL_VERSION_CHECK(16, 0, 0) return 31u ^ (uint32_t)__builtin_clz((uint32_t)n); -#else +#else uint32_t result = 0; - while (n >>= 1) result++; - return result; -#endif -} - + while (n >>= 1) result++; + return result; +#endif +} + /* A lookup table for small values of log2(int) to be used in entropy computation. - + ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */ -static const float kLog2Table[] = { - 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, - 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f, - 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f, - 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f, - 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f, - 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f, - 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f, - 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f, - 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f, - 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f, - 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f, - 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f, - 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f, - 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f, - 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f, - 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f, - 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f, - 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f, - 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f, - 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f, - 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f, - 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f, - 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f, - 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f, - 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f, - 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f, - 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f, - 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f, - 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f, - 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f, - 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f, - 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f, - 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f, - 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f, - 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f, - 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f, - 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f, - 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f, - 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f, - 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f, - 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f, - 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f, - 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f, - 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f, - 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f, - 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f, - 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f, - 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f, - 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f, - 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f, - 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f, - 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f, - 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f, - 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f, - 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f, - 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f, - 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f, - 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f, - 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f, - 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f, - 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f, - 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f, - 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f, - 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f, - 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f, - 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f, - 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f, - 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f, - 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f, - 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f, - 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f, - 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f, - 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f, - 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f, - 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f, - 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f, - 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f, - 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f, - 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f, - 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f, - 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f, - 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f, - 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f, - 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f, - 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f, - 7.9943534368588578f -}; - +static const float kLog2Table[] = { + 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, + 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f, + 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f, + 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f, + 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f, + 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f, + 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f, + 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f, + 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f, + 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f, + 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f, + 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f, + 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f, + 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f, + 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f, + 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f, + 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f, + 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f, + 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f, + 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f, + 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f, + 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f, + 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f, + 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f, + 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f, + 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f, + 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f, + 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f, + 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f, + 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f, + 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f, + 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f, + 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f, + 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f, + 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f, + 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f, + 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f, + 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f, + 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f, + 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f, + 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f, + 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f, + 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f, + 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f, + 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f, + 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f, + 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f, + 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f, + 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f, + 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f, + 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f, + 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f, + 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f, + 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f, + 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f, + 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f, + 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f, + 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f, + 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f, + 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f, + 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f, + 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f, + 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f, + 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f, + 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f, + 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f, + 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f, + 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f, + 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f, + 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f, + 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f, + 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f, + 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f, + 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f, + 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f, + 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f, + 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f, + 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f, + 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f, + 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f, + 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f, + 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f, + 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f, + 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f, + 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f, + 7.9943534368588578f +}; + #define LOG_2_INV 1.4426950408889634 /* Faster logarithm for small integers, with the property of log2(0) == 0. */ static BROTLI_INLINE double FastLog2(size_t v) { if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) { - return kLog2Table[v]; - } + return kLog2Table[v]; + } #if (defined(_MSC_VER) && _MSC_VER <= 1700) || \ (defined(__ANDROID_API__) && __ANDROID_API__ < 18) /* Visual Studio 2012 and Android API levels < 18 do not have the log2() * function defined, so we use log() and a multiplication instead. */ return log((double)v) * LOG_2_INV; -#else +#else return log2((double)v); -#endif -} - +#endif +} + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_FAST_LOG_H_ */ diff --git a/contrib/libs/brotli/enc/find_match_length.h b/contrib/libs/brotli/enc/find_match_length.h index 5dd2bbb52e6..bc428cffdaf 100644 --- a/contrib/libs/brotli/enc/find_match_length.h +++ b/contrib/libs/brotli/enc/find_match_length.h @@ -1,24 +1,24 @@ /* Copyright 2010 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Function to find maximal matching prefixes of strings. */ -#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ -#define BROTLI_ENC_FIND_MATCH_LENGTH_H_ - +#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ +#define BROTLI_ENC_FIND_MATCH_LENGTH_H_ + #include "../common/platform.h" #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* Separate implementation for little-endian 64-bit targets, for speed. */ #if defined(__GNUC__) && defined(_LP64) && defined(BROTLI_LITTLE_ENDIAN) - + static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1, const uint8_t* s2, size_t limit) { @@ -27,54 +27,54 @@ static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1, while (BROTLI_PREDICT_TRUE(--limit2)) { if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) == BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) { - s2 += 8; - matched += 8; - } else { + s2 += 8; + matched += 8; + } else { uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^ BROTLI_UNALIGNED_LOAD64LE(s1 + matched); size_t matching_bits = (size_t)__builtin_ctzll(x); - matched += matching_bits >> 3; - return matched; - } - } + matched += matching_bits >> 3; + return matched; + } + } limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */ - while (--limit) { + while (--limit) { if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) { - ++s2; - ++matched; - } else { - return matched; - } - } - return matched; -} -#else + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1, const uint8_t* s2, size_t limit) { size_t matched = 0; - const uint8_t* s2_limit = s2 + limit; - const uint8_t* s2_ptr = s2; + const uint8_t* s2_limit = s2 + limit; + const uint8_t* s2_ptr = s2; /* Find out how long the match is. We loop over the data 32 bits at a time until we find a 32-bit block that doesn't match; then we find the first non-matching bit and use that to calculate the total length of the match. */ - while (s2_ptr <= s2_limit - 4 && + while (s2_ptr <= s2_limit - 4 && BrotliUnalignedRead32(s2_ptr) == BrotliUnalignedRead32(s1 + matched)) { - s2_ptr += 4; - matched += 4; - } - while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) { - ++s2_ptr; - ++matched; - } - return matched; -} -#endif - + s2_ptr += 4; + matched += 4; + } + while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) { + ++s2_ptr; + ++matched; + } + return matched; +} +#endif + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */ diff --git a/contrib/libs/brotli/enc/hash.h b/contrib/libs/brotli/enc/hash.h index c4945fb1016..8c5a7bb5ad5 100644 --- a/contrib/libs/brotli/enc/hash.h +++ b/contrib/libs/brotli/enc/hash.h @@ -1,5 +1,5 @@ /* Copyright 2010 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ @@ -7,26 +7,26 @@ /* A (forgetful) hash table to the data seen by the compressor, to help create backward references to previous data. */ -#ifndef BROTLI_ENC_HASH_H_ -#define BROTLI_ENC_HASH_H_ - +#ifndef BROTLI_ENC_HASH_H_ +#define BROTLI_ENC_HASH_H_ + #include /* memcmp, memset */ - + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" #include #include "./encoder_dict.h" -#include "./fast_log.h" -#include "./find_match_length.h" +#include "./fast_log.h" +#include "./find_match_length.h" #include "./memory.h" #include "./quality.h" -#include "./static_dict.h" - +#include "./static_dict.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* Pointer to hasher data. * * Excluding initialization and destruction, hasher can be passed as @@ -40,10 +40,10 @@ extern "C" { * Using "define" instead of "typedef", because on MSVC __restrict does not work * on typedef pointer types. */ #define HasherHandle uint8_t* - + typedef struct { BrotliHasherParams params; - + /* False if hasher needs to be "prepared" before use. */ BROTLI_BOOL is_prepared_; @@ -80,14 +80,14 @@ static const uint32_t kHashMul32 = 0x1E35A7BD; static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); static const uint64_t kHashMul64Long = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); - + static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ return h >> (32 - 14); -} - +} + static BROTLI_INLINE void PrepareDistanceCache( int* BROTLI_RESTRICT distance_cache, const int num_distances) { if (num_distances > 4) { @@ -108,8 +108,8 @@ static BROTLI_INLINE void PrepareDistanceCache( distance_cache[15] = next_last_distance + 3; } } -} - +} + #define BROTLI_LITERAL_BYTE_SCORE 135 #define BROTLI_DISTANCE_BIT_PENALTY 30 /* Score must be positive after applying maximal penalty. */ @@ -135,19 +135,19 @@ static BROTLI_INLINE score_t BackwardReferenceScore( size_t copy_length, size_t backward_reference_offset) { return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); -} - +} + static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( size_t copy_length) { return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + BROTLI_SCORE_BASE + 15; } - + static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( size_t distance_short_code) { return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE); } - + static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data, size_t max_length, size_t max_backward, @@ -164,33 +164,33 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( if (len > max_length) { return BROTLI_FALSE; } - + matchlen = FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { return BROTLI_FALSE; - } + } { size_t cut = len - matchlen; size_t transform_id = (cut << 2) + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); backward = max_backward + 1 + word_idx + (transform_id << dictionary->words->size_bits_by_length[len]); - } + } if (backward > max_distance) { return BROTLI_FALSE; - } + } score = BackwardReferenceScore(matchlen, backward); if (score < out->score) { return BROTLI_FALSE; - } + } out->len = matchlen; out->len_code_delta = (int)len - (int)matchlen; out->distance = backward; out->score = score; return BROTLI_TRUE; } - + static BROTLI_INLINE void SearchInStaticDictionary( const BrotliEncoderDictionary* dictionary, HasherHandle handle, const uint8_t* data, size_t max_length, @@ -201,7 +201,7 @@ static BROTLI_INLINE void SearchInStaticDictionary( HasherCommon* self = GetHasherCommon(handle); if (self->dict_num_matches < (self->dict_num_lookups >> 7)) { return; - } + } key = Hash14(data) << 1; for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { size_t item = dictionary->hash_table[key]; @@ -212,42 +212,42 @@ static BROTLI_INLINE void SearchInStaticDictionary( max_length, max_backward, max_distance, out); if (item_matches) { self->dict_num_matches++; - } - } - } + } + } + } } - + typedef struct BackwardMatch { uint32_t distance; uint32_t length_and_code; } BackwardMatch; - + static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, size_t dist, size_t len) { self->distance = (uint32_t)dist; self->length_and_code = (uint32_t)(len << 5); } - + static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, size_t dist, size_t len, size_t len_code) { self->distance = (uint32_t)dist; self->length_and_code = (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); } - + static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { return self->length_and_code >> 5; } - + static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { size_t code = self->length_and_code & 31; return code ? code : BackwardMatchLength(self); } - + #define EXPAND_CAT(a, b) CAT(a, b) #define CAT(a, b) a ## b #define FN(X) EXPAND_CAT(X, HASHER()) - + #define HASHER() H10 #define BUCKET_BITS 17 #define MAX_TREE_SEARCH_DEPTH 64 @@ -259,11 +259,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef HASHER /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ #define MAX_NUM_MATCHES_H10 128 - + /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression a little faster (0.5% - 1%) and it compresses 0.15% better on small text and HTML inputs. */ - + #define HASHER() H2 #define BUCKET_BITS 16 #define BUCKET_SWEEP 1 @@ -273,7 +273,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef BUCKET_SWEEP #undef USE_DICTIONARY #undef HASHER - + #define HASHER() H3 #define BUCKET_SWEEP 2 #define USE_DICTIONARY 0 @@ -282,7 +282,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef BUCKET_SWEEP #undef BUCKET_BITS #undef HASHER - + #define HASHER() H4 #define BUCKET_BITS 17 #define BUCKET_SWEEP 4 @@ -293,17 +293,17 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef BUCKET_SWEEP #undef BUCKET_BITS #undef HASHER - + #define HASHER() H5 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ #undef HASHER - + #define HASHER() H6 #include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */ #undef HASHER - + #define BUCKET_BITS 15 - + #define NUM_LAST_DISTANCES_TO_CHECK 4 #define NUM_BANKS 1 #define BANK_BITS 16 @@ -311,7 +311,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ #undef HASHER #undef NUM_LAST_DISTANCES_TO_CHECK - + #define NUM_LAST_DISTANCES_TO_CHECK 10 #define HASHER() H41 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ @@ -319,7 +319,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef NUM_LAST_DISTANCES_TO_CHECK #undef NUM_BANKS #undef BANK_BITS - + #define NUM_LAST_DISTANCES_TO_CHECK 16 #define NUM_BANKS 512 #define BANK_BITS 9 @@ -329,9 +329,9 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef NUM_LAST_DISTANCES_TO_CHECK #undef NUM_BANKS #undef BANK_BITS - + #undef BUCKET_BITS - + #define HASHER() H54 #define BUCKET_BITS 20 #define BUCKET_SWEEP 4 @@ -343,7 +343,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef BUCKET_SWEEP #undef BUCKET_BITS #undef HASHER - + /* fast large window hashers */ #define HASHER() HROLLING_FAST @@ -420,10 +420,10 @@ static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params, #undef SIZE_ default: break; - } + } return result; } - + static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle, BrotliEncoderParams* params, const uint8_t* data, size_t position, size_t input_size, BROTLI_BOOL is_last) { @@ -448,10 +448,10 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle, #undef INITIALIZE_ default: break; - } + } HasherReset(*handle); - } - + } + self = *handle; common = GetHasherCommon(self); if (!common->is_prepared_) { @@ -462,16 +462,16 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle, break; FOR_ALL_HASHERS(PREPARE_) #undef PREPARE_ - default: break; - } + default: break; + } if (position == 0) { common->dict_num_lookups = 0; common->dict_num_matches = 0; } common->is_prepared_ = BROTLI_TRUE; - } + } } - + static BROTLI_INLINE void InitOrStitchToPreviousBlock( MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask, BrotliEncoderParams* params, size_t position, size_t input_size, @@ -490,9 +490,9 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock( default: break; } } - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_HASH_H_ */ diff --git a/contrib/libs/brotli/enc/histogram.h b/contrib/libs/brotli/enc/histogram.h index a522ca7aa7c..42af3c3f9d1 100644 --- a/contrib/libs/brotli/enc/histogram.h +++ b/contrib/libs/brotli/enc/histogram.h @@ -1,14 +1,14 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Models the histograms of literals, commands and distance codes. */ -#ifndef BROTLI_ENC_HISTOGRAM_H_ -#define BROTLI_ENC_HISTOGRAM_H_ - +#ifndef BROTLI_ENC_HISTOGRAM_H_ +#define BROTLI_ENC_HISTOGRAM_H_ + #include /* memset */ #include "../common/constants.h" @@ -16,12 +16,12 @@ #include "../common/platform.h" #include #include "./block_splitter.h" -#include "./command.h" - +#include "./command.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* The distance symbols effectively used by "Large Window Brotli" (32-bit). */ #define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544 @@ -32,21 +32,21 @@ extern "C" { #undef DataType #undef DATA_SIZE #undef FN - + #define FN(X) X ## Command #define DataType uint16_t #define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS #include "./histogram_inc.h" /* NOLINT(build/include) */ #undef DATA_SIZE #undef FN - + #define FN(X) X ## Distance #define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS #include "./histogram_inc.h" /* NOLINT(build/include) */ #undef DataType #undef DATA_SIZE #undef FN - + BROTLI_INTERNAL void BrotliBuildHistogramsWithContext( const Command* cmds, const size_t num_commands, const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split, @@ -55,9 +55,9 @@ BROTLI_INTERNAL void BrotliBuildHistogramsWithContext( const ContextType* context_modes, HistogramLiteral* literal_histograms, HistogramCommand* insert_and_copy_histograms, HistogramDistance* copy_dist_histograms); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_HISTOGRAM_H_ */ diff --git a/contrib/libs/brotli/enc/literal_cost.h b/contrib/libs/brotli/enc/literal_cost.h index 412c1558908..8f53f39d3f2 100644 --- a/contrib/libs/brotli/enc/literal_cost.h +++ b/contrib/libs/brotli/enc/literal_cost.h @@ -1,5 +1,5 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ @@ -7,24 +7,24 @@ /* Literal cost model to allow backward reference replacement to be efficient. */ -#ifndef BROTLI_ENC_LITERAL_COST_H_ -#define BROTLI_ENC_LITERAL_COST_H_ - +#ifndef BROTLI_ENC_LITERAL_COST_H_ +#define BROTLI_ENC_LITERAL_COST_H_ + #include "../common/platform.h" #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* Estimates how many bits the literals in the interval [pos, pos + len) in the ring-buffer (data, mask) will take entropy coded and writes these estimates to the cost[0..len) array. */ BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals( size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_LITERAL_COST_H_ */ diff --git a/contrib/libs/brotli/enc/metablock.h b/contrib/libs/brotli/enc/metablock.h index add40a056c3..334a79a443c 100644 --- a/contrib/libs/brotli/enc/metablock.h +++ b/contrib/libs/brotli/enc/metablock.h @@ -1,5 +1,5 @@ /* Copyright 2015 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ @@ -7,26 +7,26 @@ /* Algorithms for distributing the literals and commands of a metablock between block types and contexts. */ -#ifndef BROTLI_ENC_METABLOCK_H_ -#define BROTLI_ENC_METABLOCK_H_ - +#ifndef BROTLI_ENC_METABLOCK_H_ +#define BROTLI_ENC_METABLOCK_H_ + #include "../common/context.h" #include "../common/platform.h" #include #include "./block_splitter.h" -#include "./command.h" -#include "./histogram.h" +#include "./command.h" +#include "./histogram.h" #include "./memory.h" #include "./quality.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + typedef struct MetaBlockSplit { - BlockSplit literal_split; - BlockSplit command_split; - BlockSplit distance_split; + BlockSplit literal_split; + BlockSplit command_split; + BlockSplit distance_split; uint32_t* literal_context_map; size_t literal_context_map_size; uint32_t* distance_context_map; @@ -38,7 +38,7 @@ typedef struct MetaBlockSplit { HistogramDistance* distance_histograms; size_t distance_histograms_size; } MetaBlockSplit; - + static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) { BrotliInitBlockSplit(&mb->literal_split); BrotliInitBlockSplit(&mb->command_split); @@ -54,7 +54,7 @@ static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) { mb->distance_histograms = 0; mb->distance_histograms_size = 0; } - + static BROTLI_INLINE void DestroyMetaBlockSplit( MemoryManager* m, MetaBlockSplit* mb) { BrotliDestroyBlockSplit(m, &mb->literal_split); @@ -66,7 +66,7 @@ static BROTLI_INLINE void DestroyMetaBlockSplit( BROTLI_FREE(m, mb->command_histograms); BROTLI_FREE(m, mb->distance_histograms); } - + /* Uses the slow shortest-path block splitter and does context clustering. The distance parameters are dynamically selected based on the commands which get recomputed under the new distance parameters. The new distance @@ -82,7 +82,7 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m, size_t num_commands, ContextType literal_context_mode, MetaBlockSplit* mb); - + /* Uses a fast greedy block splitter that tries to merge current block with the last or the second last block and uses a static context clustering which is the same for all block types. */ @@ -91,10 +91,10 @@ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy( uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, size_t n_commands, MetaBlockSplit* mb); - + BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb); - + BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params, uint32_t npostfix, uint32_t ndirect); diff --git a/contrib/libs/brotli/enc/prefix.h b/contrib/libs/brotli/enc/prefix.h index 56b89fa4663..fd359a478d4 100644 --- a/contrib/libs/brotli/enc/prefix.h +++ b/contrib/libs/brotli/enc/prefix.h @@ -1,5 +1,5 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ @@ -7,18 +7,18 @@ /* Functions for encoding of integers into prefix codes the amount of extra bits, and the actual values of the extra bits. */ -#ifndef BROTLI_ENC_PREFIX_H_ -#define BROTLI_ENC_PREFIX_H_ - +#ifndef BROTLI_ENC_PREFIX_H_ +#define BROTLI_ENC_PREFIX_H_ + #include "../common/constants.h" #include "../common/platform.h" #include -#include "./fast_log.h" - +#include "./fast_log.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* Here distance_code is an intermediate code, i.e. one of the special codes or the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, @@ -28,8 +28,8 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, uint32_t* extra_bits) { if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) { *code = (uint16_t)distance_code; - *extra_bits = 0; - return; + *extra_bits = 0; + return; } else { size_t dist = ((size_t)1 << (postfix_bits + 2u)) + (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes); @@ -43,11 +43,11 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes + ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix)); *extra_bits = (uint32_t)((dist - offset) >> postfix_bits); - } -} - + } +} + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_PREFIX_H_ */ diff --git a/contrib/libs/brotli/enc/ringbuffer.h b/contrib/libs/brotli/enc/ringbuffer.h index 1ee7688e543..86079a89d35 100644 --- a/contrib/libs/brotli/enc/ringbuffer.h +++ b/contrib/libs/brotli/enc/ringbuffer.h @@ -1,25 +1,25 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Sliding window over the input data. */ -#ifndef BROTLI_ENC_RINGBUFFER_H_ -#define BROTLI_ENC_RINGBUFFER_H_ - +#ifndef BROTLI_ENC_RINGBUFFER_H_ +#define BROTLI_ENC_RINGBUFFER_H_ + #include /* memcpy */ - + #include "../common/platform.h" #include #include "./memory.h" #include "./quality.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of data in a circular manner: writing a byte writes it to: `position() % (1 << window_bits)'. @@ -80,16 +80,16 @@ static BROTLI_INLINE void RingBufferInitBuffer( memcpy(new_data, rb->data_, 2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere); BROTLI_FREE(m, rb->data_); - } + } rb->data_ = new_data; rb->cur_size_ = buflen; rb->buffer_ = rb->data_ + 2; rb->buffer_[-2] = rb->buffer_[-1] = 0; for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) { rb->buffer_[rb->cur_size_ + i] = 0; - } + } } - + static BROTLI_INLINE void RingBufferWriteTail( const uint8_t* bytes, size_t n, RingBuffer* rb) { const size_t masked_pos = rb->pos_ & rb->mask_; @@ -98,9 +98,9 @@ static BROTLI_INLINE void RingBufferWriteTail( const size_t p = rb->size_ + masked_pos; memcpy(&rb->buffer_[p], bytes, BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos)); - } + } } - + /* Push bytes into the ring buffer. */ static BROTLI_INLINE void RingBufferWrite( MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) { @@ -116,7 +116,7 @@ static BROTLI_INLINE void RingBufferWrite( if (BROTLI_IS_OOM(m)) return; memcpy(rb->buffer_, bytes, n); return; - } + } if (rb->cur_size_ < rb->total_size_) { /* Lazily allocate the full buffer. */ RingBufferInitBuffer(m, rb->total_size_, rb); @@ -142,8 +142,8 @@ static BROTLI_INLINE void RingBufferWrite( /* Copy into the beginning of the buffer */ memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos), n - (rb->size_ - masked_pos)); - } - } + } + } { BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0; uint32_t rb_pos_mask = (1u << 31) - 1; @@ -156,9 +156,9 @@ static BROTLI_INLINE void RingBufferWrite( } } } - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_RINGBUFFER_H_ */ diff --git a/contrib/libs/brotli/enc/static_dict.h b/contrib/libs/brotli/enc/static_dict.h index a5e06f43c53..6b5d4eb0c98 100644 --- a/contrib/libs/brotli/enc/static_dict.h +++ b/contrib/libs/brotli/enc/static_dict.h @@ -1,26 +1,26 @@ /* Copyright 2013 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Class to model the static dictionary. */ -#ifndef BROTLI_ENC_STATIC_DICT_H_ -#define BROTLI_ENC_STATIC_DICT_H_ - +#ifndef BROTLI_ENC_STATIC_DICT_H_ +#define BROTLI_ENC_STATIC_DICT_H_ + #include "../common/dictionary.h" #include "../common/platform.h" #include #include "./encoder_dict.h" - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37 static const uint32_t kInvalidMatch = 0xFFFFFFF; - + /* Matches data against static dictionary words, and for each length l, for which a match is found, updates matches[l] to be the minimum possible (distance << 5) + len_code. @@ -32,9 +32,9 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches); - + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_STATIC_DICT_H_ */ diff --git a/contrib/libs/brotli/enc/static_dict_lut.h b/contrib/libs/brotli/enc/static_dict_lut.h index aba9af45086..e299cda6d82 100644 --- a/contrib/libs/brotli/enc/static_dict_lut.h +++ b/contrib/libs/brotli/enc/static_dict_lut.h @@ -1,30 +1,30 @@ /* Copyright 2015 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ - + /* Lookup table for static dictionary and transforms. */ - + #ifndef BROTLI_ENC_STATIC_DICT_LUT_H_ #define BROTLI_ENC_STATIC_DICT_LUT_H_ - + #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif typedef struct DictWord { /* Highest bit is used to indicate end of bucket. */ - uint8_t len; - uint8_t transform; - uint16_t idx; + uint8_t len; + uint8_t transform; + uint16_t idx; } DictWord; - + static const int kDictNumBits = 15; static const uint32_t kDictHashMul32 = 0x1E35A7BD; - + static const uint16_t kStaticDictionaryBuckets[32768] = { 1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29, 0,53,0,0,0,0,0,0,55,0,0,0,0,0,0,61,76,0,0,0,94,0,0,0,0,0,0,96,0,97,0,98,0,0,0,0, @@ -5855,10 +5855,10 @@ static const DictWord kStaticDictionaryWords[31705] = { 458},{12,0,756},{132,10,420},{134,0,1504},{6,0,757},{133,11,383},{6,0,1266},{135 ,0,1735},{5,0,598},{7,0,791},{8,0,108},{9,0,123},{7,10,1570},{140,10,542},{142, 11,410},{9,11,660},{138,11,347} -}; - +}; + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_STATIC_DICT_LUT_H_ */ diff --git a/contrib/libs/brotli/enc/write_bits.h b/contrib/libs/brotli/enc/write_bits.h index 6f6080b5c04..36515a6893f 100644 --- a/contrib/libs/brotli/enc/write_bits.h +++ b/contrib/libs/brotli/enc/write_bits.h @@ -1,23 +1,23 @@ /* Copyright 2010 Google Inc. All Rights Reserved. - + Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Write bits into a byte array. */ -#ifndef BROTLI_ENC_WRITE_BITS_H_ -#define BROTLI_ENC_WRITE_BITS_H_ - +#ifndef BROTLI_ENC_WRITE_BITS_H_ +#define BROTLI_ENC_WRITE_BITS_H_ + #include "../common/platform.h" #include - + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif - + /*#define BIT_WRITER_DEBUG */ - + /* This function writes bits into bytes in increasing addresses, and within a byte least-significant-bit first. @@ -50,36 +50,36 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, (int)*pos)); BROTLI_DCHECK((bits >> n_bits) == 0); BROTLI_DCHECK(n_bits <= 56); - v |= bits << (*pos & 7); + v |= bits << (*pos & 7); BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ - *pos += n_bits; -#else + *pos += n_bits; +#else /* implicit & 0xFF is assumed for uint8_t arithmetics */ uint8_t* array_pos = &array[*pos >> 3]; const size_t bits_reserved_in_first_byte = (*pos & 7); size_t bits_left_to_write; - bits <<= bits_reserved_in_first_byte; + bits <<= bits_reserved_in_first_byte; *array_pos++ |= (uint8_t)bits; for (bits_left_to_write = n_bits + bits_reserved_in_first_byte; bits_left_to_write >= 9; - bits_left_to_write -= 8) { - bits >>= 8; + bits_left_to_write -= 8) { + bits >>= 8; *array_pos++ = (uint8_t)bits; - } - *array_pos = 0; - *pos += n_bits; -#endif -} - + } + *array_pos = 0; + *pos += n_bits; +#endif +} + static BROTLI_INLINE void BrotliWriteBitsPrepareStorage( size_t pos, uint8_t* array) { BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos)); BROTLI_DCHECK((pos & 7) == 0); - array[pos >> 3] = 0; -} - + array[pos >> 3] = 0; +} + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif - + #endif /* BROTLI_ENC_WRITE_BITS_H_ */ diff --git a/contrib/libs/brotli/enc/ya.make b/contrib/libs/brotli/enc/ya.make index 6d415989e7f..67da82ec4d5 100644 --- a/contrib/libs/brotli/enc/ya.make +++ b/contrib/libs/brotli/enc/ya.make @@ -1,6 +1,6 @@ -LIBRARY() - -LICENSE(MIT) +LIBRARY() + +LICENSE(MIT) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) @@ -9,19 +9,19 @@ OWNER( g:contrib g:cpp-contrib ) - -NO_UTIL() -NO_COMPILER_WARNINGS() - +NO_UTIL() + +NO_COMPILER_WARNINGS() + ADDINCL(GLOBAL contrib/libs/brotli/include) -PEERDIR( +PEERDIR( contrib/libs/brotli/common - contrib/libs/brotli/dec -) - -SRCS( + contrib/libs/brotli/dec +) + +SRCS( backward_references.c backward_references_hq.c bit_cost.c @@ -40,8 +40,8 @@ SRCS( metablock.c static_dict.c utf8_util.c -) - +) + CFLAGS(-DBROTLI_BUILD_PORTABLE) -END() +END() diff --git a/contrib/libs/brotli/ya.make b/contrib/libs/brotli/ya.make index f1b05ee869e..f0941aa6381 100644 --- a/contrib/libs/brotli/ya.make +++ b/contrib/libs/brotli/ya.make @@ -1,10 +1,10 @@ VERSION(1.0.1) -RECURSE( +RECURSE( common - dec - enc - tools + dec + enc + tools python java -) +) diff --git a/contrib/libs/c-ares/ares_build.h b/contrib/libs/c-ares/ares_build.h index 26ad1251764..e0a2ed784b5 100644 --- a/contrib/libs/c-ares/ares_build.h +++ b/contrib/libs/c-ares/ares_build.h @@ -202,9 +202,9 @@ # else # define CARES_TYPEOF_ARES_SSIZE_T long # endif -#else +#else # define CARES_TYPEOF_ARES_SSIZE_T ssize_t -#endif +#endif typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t; diff --git a/contrib/libs/c-ares/ares_config.h b/contrib/libs/c-ares/ares_config.h index 1e8def9f5a8..6695a6ad21b 100644 --- a/contrib/libs/c-ares/ares_config.h +++ b/contrib/libs/c-ares/ares_config.h @@ -46,11 +46,11 @@ /* Define to 1 if you have the header file. */ #define HAVE_ARPA_INET_H 1 -#if defined(__ANDROID__) -#else +#if defined(__ANDROID__) +#else /* Define to 1 if you have the header file. */ #define HAVE_ARPA_NAMESER_COMPAT_H 1 -#endif +#endif /* Define to 1 if you have the header file. */ #define HAVE_ARPA_NAMESER_H 1 @@ -118,10 +118,10 @@ /* Define to 1 if you have the getservbyport_r function. */ #define HAVE_GETSERVBYPORT_R 1 -#if defined(__APPLE__) -#undef HAVE_GETSERVBYPORT_R -#endif - +#if defined(__APPLE__) +#undef HAVE_GETSERVBYPORT_R +#endif + #if defined(__ANDROID__) #undef HAVE_GETSERVBYPORT_R #endif @@ -385,7 +385,7 @@ #define RANDOM_FILE "/dev/urandom" /* Define to the type qualifier pointed by arg 5 for recvfrom. */ -#define RECVFROM_QUAL_ARG5 +#define RECVFROM_QUAL_ARG5 /* Define to the type of arg 1 for recvfrom. */ #define RECVFROM_TYPE_ARG1 int @@ -456,24 +456,24 @@ /* The size of `int', as computed by sizeof. */ #define SIZEOF_INT 4 -#if defined(_MSC_VER) -#define SIZEOF_LONG 4 -#elif defined(__SIZEOF_LONG__) -#define SIZEOF_LONG __SIZEOF_LONG__ -#else +#if defined(_MSC_VER) +#define SIZEOF_LONG 4 +#elif defined(__SIZEOF_LONG__) +#define SIZEOF_LONG __SIZEOF_LONG__ +#else /* The size of `long', as computed by sizeof. */ #define SIZEOF_LONG 8 -#endif +#endif /* The size of `short', as computed by sizeof. */ #define SIZEOF_SHORT 2 -#if defined(__SIZEOF_SIZE_T__) -#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ -#else +#if defined(__SIZEOF_SIZE_T__) +#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ +#else /* The size of `size_t', as computed by sizeof. */ #define SIZEOF_SIZE_T 8 -#endif +#endif /* The size of `struct in6_addr', as computed by sizeof. */ #define SIZEOF_STRUCT_IN6_ADDR 16 diff --git a/contrib/libs/c-ares/ares_setup.h b/contrib/libs/c-ares/ares_setup.h index 9a9badb9d96..b9a1fd5b528 100644 --- a/contrib/libs/c-ares/ares_setup.h +++ b/contrib/libs/c-ares/ares_setup.h @@ -28,7 +28,7 @@ * configuration file for platforms which lack config tool. */ -#if defined(HAVE_CONFIG_H) && !defined(_MSC_VER) +#if defined(HAVE_CONFIG_H) && !defined(_MSC_VER) #include "ares_config.h" #else diff --git a/contrib/libs/cctz/tzdata/ya.make b/contrib/libs/cctz/tzdata/ya.make index 8b8be805f0a..3b9ff5cc673 100644 --- a/contrib/libs/cctz/tzdata/ya.make +++ b/contrib/libs/cctz/tzdata/ya.make @@ -2,7 +2,7 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) +LICENSE(Apache-2.0) OWNER( dfyz diff --git a/contrib/libs/crcutil/ya.make b/contrib/libs/crcutil/ya.make index 1d6fbdfeecd..2da8ef940f7 100644 --- a/contrib/libs/crcutil/ya.make +++ b/contrib/libs/crcutil/ya.make @@ -1,6 +1,6 @@ LIBRARY() -LICENSE(Apache-2.0) +LICENSE(Apache-2.0) VERSION(1.0) @@ -10,18 +10,18 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_UTIL() NO_COMPILER_WARNINGS() NO_JOIN_SRC() -IF (GCC AND USE_LTO) - CFLAGS(-DCRCUTIL_FORCE_ASM_CRC32C=1) +IF (GCC AND USE_LTO) + CFLAGS(-DCRCUTIL_FORCE_ASM_CRC32C=1) ENDIF() - -IF (ARCH_I386 OR ARCH_X86_64) + +IF (ARCH_I386 OR ARCH_X86_64) IF (OS_WINDOWS) SRCS( multiword_64_64_cl_i386_mmx.cc @@ -29,7 +29,7 @@ IF (ARCH_I386 OR ARCH_X86_64) ELSEIF (OS_ANDROID AND ARCH_I386) # 32-bit Android has some problems with register allocation, so we fall back to default implementation ELSE() - IF (CLANG) + IF (CLANG) CFLAGS(-DCRCUTIL_USE_MM_CRC32=1) IF (ARCH_I386) # clang doesn't support this as optimization attribute and has problems with register allocation @@ -42,17 +42,17 @@ IF (ARCH_I386 OR ARCH_X86_64) multiword_64_64_gcc_i386_mmx.cc ) ENDIF() - ELSE() - CFLAGS( + ELSE() + CFLAGS( -mcrc32 -DCRCUTIL_USE_MM_CRC32=1 - ) - ENDIF() - SRCS( - multiword_128_64_gcc_amd64_sse2.cc - multiword_64_64_gcc_amd64_asm.cc + ) + ENDIF() + SRCS( + multiword_128_64_gcc_amd64_sse2.cc + multiword_64_64_gcc_amd64_asm.cc ) - ENDIF() + ENDIF() IF (OS_WINDOWS) SRCS( crc32c_sse4.cc diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt index 718c123c48a..f2ee7fef0c6 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt @@ -1,35 +1,35 @@ -absvti2 -addvti3 -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divti3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -muloti4 -multi3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -subvti3 -ucmpti2 -udivmodti4 -udivti3 -umodti3 +absvti2 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +subvti3 +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt index 844be5088f6..70d3644f271 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt @@ -1,96 +1,96 @@ -apple_versioning -absvdi2 -absvsi2 -adddf3 -addsf3 -addvdi3 -addvsi3 -ashldi3 -ashrdi3 -clear_cache -clzdi2 -clzsi2 -cmpdi2 -ctzdi2 -ctzsi2 -divdc3 -divdf3 -divdi3 -divmoddi4 -divmodsi4 -divsc3 -divsf3 -divsi3 -divxc3 -enable_execute_stack -comparedf2 -comparesf2 -extendhfsf2 -extendsfdf2 -ffsdi2 -fixdfdi -fixdfsi -fixsfdi -fixsfsi -fixunsdfdi -fixunsdfsi -fixunssfdi -fixunssfsi -fixunsxfdi -fixunsxfsi -fixxfdi -floatdidf -floatdisf -floatdixf -floatsidf -floatsisf -floatunsidf -floatunsisf -gcc_personality_v0 -gnu_f2h_ieee -gnu_h2f_ieee -lshrdi3 -moddi3 -modsi3 -muldc3 -muldf3 -muldi3 -mulodi4 -mulosi4 -mulsc3 -mulsf3 -mulvdi3 -mulvsi3 -mulxc3 -negdf2 -negdi2 -negsf2 -negvdi2 -negvsi2 -paritydi2 -paritysi2 -popcountdi2 -popcountsi2 -powidf2 -powisf2 -powixf2 -subdf3 -subsf3 -subvdi3 -subvsi3 -truncdfhf2 -truncdfsf2 -truncsfhf2 -ucmpdi2 -udivdi3 -udivmoddi4 -udivmodsi4 -udivsi3 -umoddi3 -umodsi3 -atomic_flag_clear -atomic_flag_clear_explicit -atomic_flag_test_and_set -atomic_flag_test_and_set_explicit -atomic_signal_fence -atomic_thread_fence \ No newline at end of file +apple_versioning +absvdi2 +absvsi2 +adddf3 +addsf3 +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +clear_cache +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdf3 +divdi3 +divmoddi4 +divmodsi4 +divsc3 +divsf3 +divsi3 +divxc3 +enable_execute_stack +comparedf2 +comparesf2 +extendhfsf2 +extendsfdf2 +ffsdi2 +fixdfdi +fixdfsi +fixsfdi +fixsfsi +fixunsdfdi +fixunsdfsi +fixunssfdi +fixunssfsi +fixunsxfdi +fixunsxfsi +fixxfdi +floatdidf +floatdisf +floatdixf +floatsidf +floatsisf +floatunsidf +floatunsisf +gcc_personality_v0 +gnu_f2h_ieee +gnu_h2f_ieee +lshrdi3 +moddi3 +modsi3 +muldc3 +muldf3 +muldi3 +mulodi4 +mulosi4 +mulsc3 +mulsf3 +mulvdi3 +mulvsi3 +mulxc3 +negdf2 +negdi2 +negsf2 +negvdi2 +negvsi2 +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +powixf2 +subdf3 +subsf3 +subvdi3 +subvsi3 +truncdfhf2 +truncdfsf2 +truncsfhf2 +ucmpdi2 +udivdi3 +udivmoddi4 +udivmodsi4 +udivsi3 +umoddi3 +umodsi3 +atomic_flag_clear +atomic_flag_clear_explicit +atomic_flag_test_and_set +atomic_flag_test_and_set_explicit +atomic_signal_fence +atomic_thread_fence \ No newline at end of file diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt index bc6fcefc207..266e4221524 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt @@ -1,4 +1,4 @@ -file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) -foreach(filter_file ${filter_files}) - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) -endforeach() +file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) +foreach(filter_file ${filter_files}) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) +endforeach() diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT b/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT index 50b1e2d5136..173eccca6de 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT @@ -1,11 +1,11 @@ -This folder contains list of symbols that should be excluded from the builtin -libraries for Darwin. There are two reasons symbols are excluded: - -(1) They aren't supported on Darwin -(2) They are contained within the OS on the minimum supported target - -The builtin libraries must contain all symbols not provided by the lowest -supported target OS. Meaning if minimum deployment target is iOS 6, all builtins -not included in the ios6-.txt files need to be included. The one catch is -that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting -iOS 6, the minimum deployment target for arm64 binaries is iOS 7. +This folder contains list of symbols that should be excluded from the builtin +libraries for Darwin. There are two reasons symbols are excluded: + +(1) They aren't supported on Darwin +(2) They are contained within the OS on the minimum supported target + +The builtin libraries must contain all symbols not provided by the lowest +supported target OS. Meaning if minimum deployment target is iOS 6, all builtins +not included in the ios6-.txt files need to be included. The one catch is +that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting +iOS 6, the minimum deployment target for arm64 binaries is iOS 7. diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt index 4500f5f7208..6aa542f7fe4 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt @@ -1,57 +1,57 @@ -absvti2 -addtf3 -addvti3 -aeabi_cdcmp -aeabi_cdcmpeq_check_nan -aeabi_cfcmp -aeabi_cfcmpeq_check_nan -aeabi_dcmp -aeabi_div0 -aeabi_drsub -aeabi_fcmp -aeabi_frsub -aeabi_idivmod -aeabi_ldivmod -aeabi_memcmp -aeabi_memcpy -aeabi_memmove -aeabi_memset -aeabi_uidivmod -aeabi_uldivmod -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divtf3 -divti3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -multf3 -multi3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subtf3 -subvti3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 +absvti2 +addtf3 +addvti3 +aeabi_cdcmp +aeabi_cdcmpeq_check_nan +aeabi_cfcmp +aeabi_cfcmpeq_check_nan +aeabi_dcmp +aeabi_div0 +aeabi_drsub +aeabi_fcmp +aeabi_frsub +aeabi_idivmod +aeabi_ldivmod +aeabi_memcmp +aeabi_memcpy +aeabi_memmove +aeabi_memset +aeabi_uidivmod +aeabi_uldivmod +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divtf3 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +multf3 +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subtf3 +subvti3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt index 735a87b05a3..28167aa4c5d 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt @@ -1,57 +1,57 @@ -absvti2 -addtf3 -addvti3 -aeabi_cdcmp -aeabi_cdcmpeq_check_nan -aeabi_cfcmp -aeabi_cfcmpeq_check_nan -aeabi_dcmp -aeabi_div0 -aeabi_drsub -aeabi_fcmp -aeabi_frsub -aeabi_idivmod -aeabi_ldivmod -aeabi_memcmp -aeabi_memcpy -aeabi_memmove -aeabi_memset -aeabi_uidivmod -aeabi_uldivmod -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divtf3 -divti3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -multf -multi3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subtf3 -subvti3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 +absvti2 +addtf3 +addvti3 +aeabi_cdcmp +aeabi_cdcmpeq_check_nan +aeabi_cfcmp +aeabi_cfcmpeq_check_nan +aeabi_dcmp +aeabi_div0 +aeabi_drsub +aeabi_fcmp +aeabi_frsub +aeabi_idivmod +aeabi_ldivmod +aeabi_memcmp +aeabi_memcpy +aeabi_memmove +aeabi_memset +aeabi_uidivmod +aeabi_uldivmod +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divtf3 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +multf +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subtf3 +subvti3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt index 8e02d082995..5db24000a17 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt @@ -1 +1 @@ -apple_versioning +apple_versioning diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt index bc83c6dd0a9..b01fa711a35 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt @@ -1,120 +1,120 @@ -absvdi2 -absvsi2 -adddf3 -adddf3vfp -addsf3 -addsf3vfp -addvdi3 -addvsi3 -ashldi3 -ashrdi3 -bswapdi2 -bswapsi2 -clzdi2 -clzsi2 -cmpdi2 -ctzdi2 -ctzsi2 -divdc3 -divdf3 -divdf3vfp -divdi3 -divmodsi4 -divsc3 -divsf3 -divsf3vfp -divsi3 -eqdf2 -eqdf2vfp -eqsf2 -eqsf2vfp -extendsfdf2 -extendsfdf2vfp -ffsdi2 -fixdfdi -fixdfsi -fixdfsivfp -fixsfdi -fixsfsi -fixsfsivfp -fixunsdfdi -fixunsdfsi -fixunsdfsivfp -fixunssfdi -fixunssfsi -fixunssfsivfp -floatdidf -floatdisf -floatsidf -floatsidfvfp -floatsisf -floatsisfvfp -floatundidf -floatundisf -floatunsidf -floatunsisf -floatunssidfvfp -floatunssisfvfp -gcc_personality_sj0 -gedf2 -gedf2vfp -gesf2 -gesf2vfp -gtdf2 -gtdf2vfp -gtsf2 -gtsf2vfp -ledf2 -ledf2vfp -lesf2 -lesf2vfp -lshrdi3 -ltdf2 -ltdf2vfp -ltsf2 -ltsf2vfp -moddi3 -modsi3 -muldc3 -muldf3 -muldf3vfp -muldi3 -mulodi4 -mulosi4 -mulsc3 -mulsf3 -mulsf3vfp -mulvdi3 -mulvsi3 -nedf2 -nedf2vfp -negdi2 -negvdi2 -negvsi2 -nesf2 -nesf2vfp -paritydi2 -paritysi2 -popcountdi2 -popcountsi2 -powidf2 -powisf2 -subdf3 -subdf3vfp -subsf3 -subsf3vfp -subvdi3 -subvsi3 -truncdfsf2 -truncdfsf2vfp -ucmpdi2 -udivdi3 -udivmoddi4 -udivmodsi4 -udivsi3 -umoddi3 -umodsi3 -unorddf2 -unorddf2vfp -unordsf2 -unordsf2vfp +absvdi2 +absvsi2 +adddf3 +adddf3vfp +addsf3 +addsf3vfp +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +bswapdi2 +bswapsi2 +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdf3 +divdf3vfp +divdi3 +divmodsi4 +divsc3 +divsf3 +divsf3vfp +divsi3 +eqdf2 +eqdf2vfp +eqsf2 +eqsf2vfp +extendsfdf2 +extendsfdf2vfp +ffsdi2 +fixdfdi +fixdfsi +fixdfsivfp +fixsfdi +fixsfsi +fixsfsivfp +fixunsdfdi +fixunsdfsi +fixunsdfsivfp +fixunssfdi +fixunssfsi +fixunssfsivfp +floatdidf +floatdisf +floatsidf +floatsidfvfp +floatsisf +floatsisfvfp +floatundidf +floatundisf +floatunsidf +floatunsisf +floatunssidfvfp +floatunssisfvfp +gcc_personality_sj0 +gedf2 +gedf2vfp +gesf2 +gesf2vfp +gtdf2 +gtdf2vfp +gtsf2 +gtsf2vfp +ledf2 +ledf2vfp +lesf2 +lesf2vfp +lshrdi3 +ltdf2 +ltdf2vfp +ltsf2 +ltsf2vfp +moddi3 +modsi3 +muldc3 +muldf3 +muldf3vfp +muldi3 +mulodi4 +mulosi4 +mulsc3 +mulsf3 +mulsf3vfp +mulvdi3 +mulvsi3 +nedf2 +nedf2vfp +negdi2 +negvdi2 +negvsi2 +nesf2 +nesf2vfp +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +subdf3 +subdf3vfp +subsf3 +subsf3vfp +subvdi3 +subvsi3 +truncdfsf2 +truncdfsf2vfp +ucmpdi2 +udivdi3 +udivmoddi4 +udivmodsi4 +udivsi3 +umoddi3 +umodsi3 +unorddf2 +unorddf2vfp +unordsf2 +unordsf2vfp diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt index bc83c6dd0a9..b01fa711a35 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt @@ -1,120 +1,120 @@ -absvdi2 -absvsi2 -adddf3 -adddf3vfp -addsf3 -addsf3vfp -addvdi3 -addvsi3 -ashldi3 -ashrdi3 -bswapdi2 -bswapsi2 -clzdi2 -clzsi2 -cmpdi2 -ctzdi2 -ctzsi2 -divdc3 -divdf3 -divdf3vfp -divdi3 -divmodsi4 -divsc3 -divsf3 -divsf3vfp -divsi3 -eqdf2 -eqdf2vfp -eqsf2 -eqsf2vfp -extendsfdf2 -extendsfdf2vfp -ffsdi2 -fixdfdi -fixdfsi -fixdfsivfp -fixsfdi -fixsfsi -fixsfsivfp -fixunsdfdi -fixunsdfsi -fixunsdfsivfp -fixunssfdi -fixunssfsi -fixunssfsivfp -floatdidf -floatdisf -floatsidf -floatsidfvfp -floatsisf -floatsisfvfp -floatundidf -floatundisf -floatunsidf -floatunsisf -floatunssidfvfp -floatunssisfvfp -gcc_personality_sj0 -gedf2 -gedf2vfp -gesf2 -gesf2vfp -gtdf2 -gtdf2vfp -gtsf2 -gtsf2vfp -ledf2 -ledf2vfp -lesf2 -lesf2vfp -lshrdi3 -ltdf2 -ltdf2vfp -ltsf2 -ltsf2vfp -moddi3 -modsi3 -muldc3 -muldf3 -muldf3vfp -muldi3 -mulodi4 -mulosi4 -mulsc3 -mulsf3 -mulsf3vfp -mulvdi3 -mulvsi3 -nedf2 -nedf2vfp -negdi2 -negvdi2 -negvsi2 -nesf2 -nesf2vfp -paritydi2 -paritysi2 -popcountdi2 -popcountsi2 -powidf2 -powisf2 -subdf3 -subdf3vfp -subsf3 -subsf3vfp -subvdi3 -subvsi3 -truncdfsf2 -truncdfsf2vfp -ucmpdi2 -udivdi3 -udivmoddi4 -udivmodsi4 -udivsi3 -umoddi3 -umodsi3 -unorddf2 -unorddf2vfp -unordsf2 -unordsf2vfp +absvdi2 +absvsi2 +adddf3 +adddf3vfp +addsf3 +addsf3vfp +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +bswapdi2 +bswapsi2 +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdf3 +divdf3vfp +divdi3 +divmodsi4 +divsc3 +divsf3 +divsf3vfp +divsi3 +eqdf2 +eqdf2vfp +eqsf2 +eqsf2vfp +extendsfdf2 +extendsfdf2vfp +ffsdi2 +fixdfdi +fixdfsi +fixdfsivfp +fixsfdi +fixsfsi +fixsfsivfp +fixunsdfdi +fixunsdfsi +fixunsdfsivfp +fixunssfdi +fixunssfsi +fixunssfsivfp +floatdidf +floatdisf +floatsidf +floatsidfvfp +floatsisf +floatsisfvfp +floatundidf +floatundisf +floatunsidf +floatunsisf +floatunssidfvfp +floatunssisfvfp +gcc_personality_sj0 +gedf2 +gedf2vfp +gesf2 +gesf2vfp +gtdf2 +gtdf2vfp +gtsf2 +gtsf2vfp +ledf2 +ledf2vfp +lesf2 +lesf2vfp +lshrdi3 +ltdf2 +ltdf2vfp +ltsf2 +ltsf2vfp +moddi3 +modsi3 +muldc3 +muldf3 +muldf3vfp +muldi3 +mulodi4 +mulosi4 +mulsc3 +mulsf3 +mulsf3vfp +mulvdi3 +mulvsi3 +nedf2 +nedf2vfp +negdi2 +negvdi2 +negvsi2 +nesf2 +nesf2vfp +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +subdf3 +subdf3vfp +subsf3 +subsf3vfp +subvdi3 +subvsi3 +truncdfsf2 +truncdfsf2vfp +ucmpdi2 +udivdi3 +udivmoddi4 +udivmodsi4 +udivsi3 +umoddi3 +umodsi3 +unorddf2 +unorddf2vfp +unordsf2 +unordsf2vfp diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt index 3e0c2b25a72..5e4caf9e9fb 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt @@ -1,16 +1,16 @@ -clzti2 -divti3 -fixdfti -fixsfti -fixunsdfti -floattidf -floattisf -floatuntidf -floatuntisf -gcc_personality_v0 -modti3 -powidf2 -powisf2 -udivmodti4 -udivti3 -umodti3 +clzti2 +divti3 +fixdfti +fixsfti +fixunsdfti +floattidf +floattisf +floatuntidf +floatuntisf +gcc_personality_v0 +modti3 +powidf2 +powisf2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt index fc9372cc7ef..60c0e2d6505 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt @@ -1,82 +1,82 @@ -absvti2 -addtf3 -addvti3 -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divti3 -divtf3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -muloti4 -multi3 -multf3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subvti3 -subtf3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 -absvti2 -addtf3 -addvti3 -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divti3 -divtf3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -muloti4 -multi3 -multf3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subvti3 -subtf3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt index 5a25ce40419..de1574e6ce3 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt @@ -1,12 +1,12 @@ -addtf3 -divtf3 -multf3 -powitf2 -subtf3 -trampoline_setup -addtf3 -divtf3 -multf3 -powitf2 -subtf3 -trampoline_setup +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt index 8e02d082995..5db24000a17 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt @@ -1 +1 @@ -apple_versioning +apple_versioning diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt index fc9372cc7ef..60c0e2d6505 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt @@ -1,82 +1,82 @@ -absvti2 -addtf3 -addvti3 -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divti3 -divtf3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -muloti4 -multi3 -multf3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subvti3 -subtf3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 -absvti2 -addtf3 -addvti3 -ashlti3 -ashrti3 -clzti2 -cmpti2 -ctzti2 -divti3 -divtf3 -ffsti2 -fixdfti -fixsfti -fixunsdfti -fixunssfti -fixunsxfti -fixxfti -floattidf -floattisf -floattixf -floatuntidf -floatuntisf -floatuntixf -lshrti3 -modti3 -muloti4 -multi3 -multf3 -mulvti3 -negti2 -negvti2 -parityti2 -popcountti2 -powitf2 -subvti3 -subtf3 -trampoline_setup -ucmpti2 -udivmodti4 -udivti3 -umodti3 +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt index 5a25ce40419..de1574e6ce3 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt @@ -1,12 +1,12 @@ -addtf3 -divtf3 -multf3 -powitf2 -subtf3 -trampoline_setup -addtf3 -divtf3 -multf3 -powitf2 -subtf3 -trampoline_setup +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt index 8e02d082995..5db24000a17 100644 --- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt +++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt @@ -1 +1 @@ -apple_versioning +apple_versioning diff --git a/contrib/libs/cxxsupp/builtins/README.txt b/contrib/libs/cxxsupp/builtins/README.txt index c5d95c2c96d..ad36e4e5279 100644 --- a/contrib/libs/cxxsupp/builtins/README.txt +++ b/contrib/libs/cxxsupp/builtins/README.txt @@ -1,345 +1,345 @@ -Compiler-RT -================================ - -This directory and its subdirectories contain source code for the compiler -support routines. - -Compiler-RT is open source software. You may freely distribute it under the -terms of the license agreement found in LICENSE.txt. - -================================ - -This is a replacement library for libgcc. Each function is contained -in its own file. Each function has a corresponding unit test under -test/Unit. - -A rudimentary script to test each file is in the file called -test/Unit/test. - -Here is the specification for this library: - -http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc - -Here is a synopsis of the contents of this library: - -typedef int si_int; -typedef unsigned su_int; - -typedef long long di_int; -typedef unsigned long long du_int; - -// Integral bit manipulation - -di_int __ashldi3(di_int a, si_int b); // a << b -ti_int __ashlti3(ti_int a, si_int b); // a << b - -di_int __ashrdi3(di_int a, si_int b); // a >> b arithmetic (sign fill) -ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill) -di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill) -ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill) - -si_int __clzsi2(si_int a); // count leading zeros -si_int __clzdi2(di_int a); // count leading zeros -si_int __clzti2(ti_int a); // count leading zeros -si_int __ctzsi2(si_int a); // count trailing zeros -si_int __ctzdi2(di_int a); // count trailing zeros -si_int __ctzti2(ti_int a); // count trailing zeros - -si_int __ffsdi2(di_int a); // find least significant 1 bit -si_int __ffsti2(ti_int a); // find least significant 1 bit - -si_int __paritysi2(si_int a); // bit parity -si_int __paritydi2(di_int a); // bit parity -si_int __parityti2(ti_int a); // bit parity - -si_int __popcountsi2(si_int a); // bit population -si_int __popcountdi2(di_int a); // bit population -si_int __popcountti2(ti_int a); // bit population - -uint32_t __bswapsi2(uint32_t a); // a byteswapped, arm only -uint64_t __bswapdi2(uint64_t a); // a byteswapped, arm only - -// Integral arithmetic - -di_int __negdi2 (di_int a); // -a -ti_int __negti2 (ti_int a); // -a -di_int __muldi3 (di_int a, di_int b); // a * b -ti_int __multi3 (ti_int a, ti_int b); // a * b -si_int __divsi3 (si_int a, si_int b); // a / b signed -di_int __divdi3 (di_int a, di_int b); // a / b signed -ti_int __divti3 (ti_int a, ti_int b); // a / b signed -su_int __udivsi3 (su_int n, su_int d); // a / b unsigned -du_int __udivdi3 (du_int a, du_int b); // a / b unsigned -tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned -si_int __modsi3 (si_int a, si_int b); // a % b signed -di_int __moddi3 (di_int a, di_int b); // a % b signed -ti_int __modti3 (ti_int a, ti_int b); // a % b signed -su_int __umodsi3 (su_int a, su_int b); // a % b unsigned -du_int __umoddi3 (du_int a, du_int b); // a % b unsigned -tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned -du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned -tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned -su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned -si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed - - - -// Integral arithmetic with trapping overflow - -si_int __absvsi2(si_int a); // abs(a) -di_int __absvdi2(di_int a); // abs(a) -ti_int __absvti2(ti_int a); // abs(a) - -si_int __negvsi2(si_int a); // -a -di_int __negvdi2(di_int a); // -a -ti_int __negvti2(ti_int a); // -a - -si_int __addvsi3(si_int a, si_int b); // a + b -di_int __addvdi3(di_int a, di_int b); // a + b -ti_int __addvti3(ti_int a, ti_int b); // a + b - -si_int __subvsi3(si_int a, si_int b); // a - b -di_int __subvdi3(di_int a, di_int b); // a - b -ti_int __subvti3(ti_int a, ti_int b); // a - b - -si_int __mulvsi3(si_int a, si_int b); // a * b -di_int __mulvdi3(di_int a, di_int b); // a * b -ti_int __mulvti3(ti_int a, ti_int b); // a * b - - -// Integral arithmetic which returns if overflow - -si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range -di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range -ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to - one if result not in signed range - - -// Integral comparison: a < b -> 0 -// a == b -> 1 -// a > b -> 2 - -si_int __cmpdi2 (di_int a, di_int b); -si_int __cmpti2 (ti_int a, ti_int b); -si_int __ucmpdi2(du_int a, du_int b); -si_int __ucmpti2(tu_int a, tu_int b); - -// Integral / floating point conversion - -di_int __fixsfdi( float a); -di_int __fixdfdi( double a); -di_int __fixxfdi(long double a); - -ti_int __fixsfti( float a); -ti_int __fixdfti( double a); -ti_int __fixxfti(long double a); -uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation - -su_int __fixunssfsi( float a); -su_int __fixunsdfsi( double a); -su_int __fixunsxfsi(long double a); - -du_int __fixunssfdi( float a); -du_int __fixunsdfdi( double a); -du_int __fixunsxfdi(long double a); - -tu_int __fixunssfti( float a); -tu_int __fixunsdfti( double a); -tu_int __fixunsxfti(long double a); -uint64_t __fixunstfdi(long double input); // ppc only - -float __floatdisf(di_int a); -double __floatdidf(di_int a); -long double __floatdixf(di_int a); -long double __floatditf(int64_t a); // ppc only - -float __floattisf(ti_int a); -double __floattidf(ti_int a); -long double __floattixf(ti_int a); - -float __floatundisf(du_int a); -double __floatundidf(du_int a); -long double __floatundixf(du_int a); -long double __floatunditf(uint64_t a); // ppc only - -float __floatuntisf(tu_int a); -double __floatuntidf(tu_int a); -long double __floatuntixf(tu_int a); - -// Floating point raised to integer power - -float __powisf2( float a, si_int b); // a ^ b -double __powidf2( double a, si_int b); // a ^ b -long double __powixf2(long double a, si_int b); // a ^ b -long double __powitf2(long double a, si_int b); // ppc only, a ^ b - -// Complex arithmetic - -// (a + ib) * (c + id) - - float _Complex __mulsc3( float a, float b, float c, float d); - double _Complex __muldc3(double a, double b, double c, double d); -long double _Complex __mulxc3(long double a, long double b, - long double c, long double d); -long double _Complex __multc3(long double a, long double b, - long double c, long double d); // ppc only - -// (a + ib) / (c + id) - - float _Complex __divsc3( float a, float b, float c, float d); - double _Complex __divdc3(double a, double b, double c, double d); -long double _Complex __divxc3(long double a, long double b, - long double c, long double d); -long double _Complex __divtc3(long double a, long double b, - long double c, long double d); // ppc only - - -// Runtime support - -// __clear_cache() is used to tell process that new instructions have been -// written to an address range. Necessary on processors that do not have -// a unified instruction and data cache. -void __clear_cache(void* start, void* end); - -// __enable_execute_stack() is used with nested functions when a trampoline -// function is written onto the stack and that page range needs to be made -// executable. -void __enable_execute_stack(void* addr); - -// __gcc_personality_v0() is normally only called by the system unwinder. -// C code (as opposed to C++) normally does not need a personality function -// because there are no catch clauses or destructors to be run. But there -// is a C language extension __attribute__((cleanup(func))) which marks local -// variables as needing the cleanup function "func" to be run when the -// variable goes out of scope. That includes when an exception is thrown, -// so a personality handler is needed. -_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, - uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, - _Unwind_Context_t context); - -// for use with some implementations of assert() in -void __eprintf(const char* format, const char* assertion_expression, - const char* line, const char* file); - -// for systems with emulated thread local storage -void* __emutls_get_address(struct __emutls_control*); - - -// Power PC specific functions - -// There is no C interface to the saveFP/restFP functions. They are helper -// functions called by the prolog and epilog of functions that need to save -// a number of non-volatile float point registers. -saveFP -restFP - -// PowerPC has a standard template for trampoline functions. This function -// generates a custom trampoline function with the specific realFunc -// and localsPtr values. -void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, - const void* realFunc, void* localsPtr); - -// adds two 128-bit double-double precision values ( x + y ) -long double __gcc_qadd(long double x, long double y); - -// subtracts two 128-bit double-double precision values ( x - y ) -long double __gcc_qsub(long double x, long double y); - -// multiples two 128-bit double-double precision values ( x * y ) -long double __gcc_qmul(long double x, long double y); - -// divides two 128-bit double-double precision values ( x / y ) -long double __gcc_qdiv(long double a, long double b); - - -// ARM specific functions - -// There is no C interface to the switch* functions. These helper functions -// are only needed by Thumb1 code for efficient switch table generation. -switch16 -switch32 -switch8 -switchu8 - -// There is no C interface to the *_vfp_d8_d15_regs functions. There are -// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use -// SJLJ for exceptions, each function with a catch clause or destuctors needs -// to save and restore all registers in it prolog and epliog. But there is -// no way to access vector and high float registers from thumb1 code, so the -// compiler must add call outs to these helper functions in the prolog and -// epilog. -restore_vfp_d8_d15_regs -save_vfp_d8_d15_regs - - -// Note: long ago ARM processors did not have floating point hardware support. -// Floating point was done in software and floating point parameters were -// passed in integer registers. When hardware support was added for floating -// point, new *vfp functions were added to do the same operations but with -// floating point parameters in floating point registers. - -// Undocumented functions - -float __addsf3vfp(float a, float b); // Appears to return a + b -double __adddf3vfp(double a, double b); // Appears to return a + b -float __divsf3vfp(float a, float b); // Appears to return a / b -double __divdf3vfp(double a, double b); // Appears to return a / b -int __eqsf2vfp(float a, float b); // Appears to return one - // iff a == b and neither is NaN. -int __eqdf2vfp(double a, double b); // Appears to return one - // iff a == b and neither is NaN. -double __extendsfdf2vfp(float a); // Appears to convert from - // float to double. -int __fixdfsivfp(double a); // Appears to convert from - // double to int. -int __fixsfsivfp(float a); // Appears to convert from - // float to int. -unsigned int __fixunssfsivfp(float a); // Appears to convert from - // float to unsigned int. -unsigned int __fixunsdfsivfp(double a); // Appears to convert from - // double to unsigned int. -double __floatsidfvfp(int a); // Appears to convert from - // int to double. -float __floatsisfvfp(int a); // Appears to convert from - // int to float. -double __floatunssidfvfp(unsigned int a); // Appears to convert from - // unisgned int to double. -float __floatunssisfvfp(unsigned int a); // Appears to convert from - // unisgned int to float. -int __gedf2vfp(double a, double b); // Appears to return __gedf2 - // (a >= b) -int __gesf2vfp(float a, float b); // Appears to return __gesf2 - // (a >= b) -int __gtdf2vfp(double a, double b); // Appears to return __gtdf2 - // (a > b) -int __gtsf2vfp(float a, float b); // Appears to return __gtsf2 - // (a > b) -int __ledf2vfp(double a, double b); // Appears to return __ledf2 - // (a <= b) -int __lesf2vfp(float a, float b); // Appears to return __lesf2 - // (a <= b) -int __ltdf2vfp(double a, double b); // Appears to return __ltdf2 - // (a < b) -int __ltsf2vfp(float a, float b); // Appears to return __ltsf2 - // (a < b) -double __muldf3vfp(double a, double b); // Appears to return a * b -float __mulsf3vfp(float a, float b); // Appears to return a * b -int __nedf2vfp(double a, double b); // Appears to return __nedf2 - // (a != b) -double __negdf2vfp(double a); // Appears to return -a -float __negsf2vfp(float a); // Appears to return -a -float __negsf2vfp(float a); // Appears to return -a -double __subdf3vfp(double a, double b); // Appears to return a - b -float __subsf3vfp(float a, float b); // Appears to return a - b -float __truncdfsf2vfp(double a); // Appears to convert from - // double to float. -int __unorddf2vfp(double a, double b); // Appears to return __unorddf2 -int __unordsf2vfp(float a, float b); // Appears to return __unordsf2 - - -Preconditions are listed for each function at the definition when there are any. -Any preconditions reflect the specification at -http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. - -Assumptions are listed in "int_lib.h", and in individual files. Where possible -assumptions are checked at compile time. +Compiler-RT +================================ + +This directory and its subdirectories contain source code for the compiler +support routines. + +Compiler-RT is open source software. You may freely distribute it under the +terms of the license agreement found in LICENSE.txt. + +================================ + +This is a replacement library for libgcc. Each function is contained +in its own file. Each function has a corresponding unit test under +test/Unit. + +A rudimentary script to test each file is in the file called +test/Unit/test. + +Here is the specification for this library: + +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc + +Here is a synopsis of the contents of this library: + +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +// Integral bit manipulation + +di_int __ashldi3(di_int a, si_int b); // a << b +ti_int __ashlti3(ti_int a, si_int b); // a << b + +di_int __ashrdi3(di_int a, si_int b); // a >> b arithmetic (sign fill) +ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill) +di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill) +ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill) + +si_int __clzsi2(si_int a); // count leading zeros +si_int __clzdi2(di_int a); // count leading zeros +si_int __clzti2(ti_int a); // count leading zeros +si_int __ctzsi2(si_int a); // count trailing zeros +si_int __ctzdi2(di_int a); // count trailing zeros +si_int __ctzti2(ti_int a); // count trailing zeros + +si_int __ffsdi2(di_int a); // find least significant 1 bit +si_int __ffsti2(ti_int a); // find least significant 1 bit + +si_int __paritysi2(si_int a); // bit parity +si_int __paritydi2(di_int a); // bit parity +si_int __parityti2(ti_int a); // bit parity + +si_int __popcountsi2(si_int a); // bit population +si_int __popcountdi2(di_int a); // bit population +si_int __popcountti2(ti_int a); // bit population + +uint32_t __bswapsi2(uint32_t a); // a byteswapped, arm only +uint64_t __bswapdi2(uint64_t a); // a byteswapped, arm only + +// Integral arithmetic + +di_int __negdi2 (di_int a); // -a +ti_int __negti2 (ti_int a); // -a +di_int __muldi3 (di_int a, di_int b); // a * b +ti_int __multi3 (ti_int a, ti_int b); // a * b +si_int __divsi3 (si_int a, si_int b); // a / b signed +di_int __divdi3 (di_int a, di_int b); // a / b signed +ti_int __divti3 (ti_int a, ti_int b); // a / b signed +su_int __udivsi3 (su_int n, su_int d); // a / b unsigned +du_int __udivdi3 (du_int a, du_int b); // a / b unsigned +tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned +si_int __modsi3 (si_int a, si_int b); // a % b signed +di_int __moddi3 (di_int a, di_int b); // a % b signed +ti_int __modti3 (ti_int a, ti_int b); // a % b signed +su_int __umodsi3 (su_int a, su_int b); // a % b unsigned +du_int __umoddi3 (du_int a, du_int b); // a % b unsigned +tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned +su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned +si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed + + + +// Integral arithmetic with trapping overflow + +si_int __absvsi2(si_int a); // abs(a) +di_int __absvdi2(di_int a); // abs(a) +ti_int __absvti2(ti_int a); // abs(a) + +si_int __negvsi2(si_int a); // -a +di_int __negvdi2(di_int a); // -a +ti_int __negvti2(ti_int a); // -a + +si_int __addvsi3(si_int a, si_int b); // a + b +di_int __addvdi3(di_int a, di_int b); // a + b +ti_int __addvti3(ti_int a, ti_int b); // a + b + +si_int __subvsi3(si_int a, si_int b); // a - b +di_int __subvdi3(di_int a, di_int b); // a - b +ti_int __subvti3(ti_int a, ti_int b); // a - b + +si_int __mulvsi3(si_int a, si_int b); // a * b +di_int __mulvdi3(di_int a, di_int b); // a * b +ti_int __mulvti3(ti_int a, ti_int b); // a * b + + +// Integral arithmetic which returns if overflow + +si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range +di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range +ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to + one if result not in signed range + + +// Integral comparison: a < b -> 0 +// a == b -> 1 +// a > b -> 2 + +si_int __cmpdi2 (di_int a, di_int b); +si_int __cmpti2 (ti_int a, ti_int b); +si_int __ucmpdi2(du_int a, du_int b); +si_int __ucmpti2(tu_int a, tu_int b); + +// Integral / floating point conversion + +di_int __fixsfdi( float a); +di_int __fixdfdi( double a); +di_int __fixxfdi(long double a); + +ti_int __fixsfti( float a); +ti_int __fixdfti( double a); +ti_int __fixxfti(long double a); +uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation + +su_int __fixunssfsi( float a); +su_int __fixunsdfsi( double a); +su_int __fixunsxfsi(long double a); + +du_int __fixunssfdi( float a); +du_int __fixunsdfdi( double a); +du_int __fixunsxfdi(long double a); + +tu_int __fixunssfti( float a); +tu_int __fixunsdfti( double a); +tu_int __fixunsxfti(long double a); +uint64_t __fixunstfdi(long double input); // ppc only + +float __floatdisf(di_int a); +double __floatdidf(di_int a); +long double __floatdixf(di_int a); +long double __floatditf(int64_t a); // ppc only + +float __floattisf(ti_int a); +double __floattidf(ti_int a); +long double __floattixf(ti_int a); + +float __floatundisf(du_int a); +double __floatundidf(du_int a); +long double __floatundixf(du_int a); +long double __floatunditf(uint64_t a); // ppc only + +float __floatuntisf(tu_int a); +double __floatuntidf(tu_int a); +long double __floatuntixf(tu_int a); + +// Floating point raised to integer power + +float __powisf2( float a, si_int b); // a ^ b +double __powidf2( double a, si_int b); // a ^ b +long double __powixf2(long double a, si_int b); // a ^ b +long double __powitf2(long double a, si_int b); // ppc only, a ^ b + +// Complex arithmetic + +// (a + ib) * (c + id) + + float _Complex __mulsc3( float a, float b, float c, float d); + double _Complex __muldc3(double a, double b, double c, double d); +long double _Complex __mulxc3(long double a, long double b, + long double c, long double d); +long double _Complex __multc3(long double a, long double b, + long double c, long double d); // ppc only + +// (a + ib) / (c + id) + + float _Complex __divsc3( float a, float b, float c, float d); + double _Complex __divdc3(double a, double b, double c, double d); +long double _Complex __divxc3(long double a, long double b, + long double c, long double d); +long double _Complex __divtc3(long double a, long double b, + long double c, long double d); // ppc only + + +// Runtime support + +// __clear_cache() is used to tell process that new instructions have been +// written to an address range. Necessary on processors that do not have +// a unified instruction and data cache. +void __clear_cache(void* start, void* end); + +// __enable_execute_stack() is used with nested functions when a trampoline +// function is written onto the stack and that page range needs to be made +// executable. +void __enable_execute_stack(void* addr); + +// __gcc_personality_v0() is normally only called by the system unwinder. +// C code (as opposed to C++) normally does not need a personality function +// because there are no catch clauses or destructors to be run. But there +// is a C language extension __attribute__((cleanup(func))) which marks local +// variables as needing the cleanup function "func" to be run when the +// variable goes out of scope. That includes when an exception is thrown, +// so a personality handler is needed. +_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context); + +// for use with some implementations of assert() in +void __eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file); + +// for systems with emulated thread local storage +void* __emutls_get_address(struct __emutls_control*); + + +// Power PC specific functions + +// There is no C interface to the saveFP/restFP functions. They are helper +// functions called by the prolog and epilog of functions that need to save +// a number of non-volatile float point registers. +saveFP +restFP + +// PowerPC has a standard template for trampoline functions. This function +// generates a custom trampoline function with the specific realFunc +// and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + +// adds two 128-bit double-double precision values ( x + y ) +long double __gcc_qadd(long double x, long double y); + +// subtracts two 128-bit double-double precision values ( x - y ) +long double __gcc_qsub(long double x, long double y); + +// multiples two 128-bit double-double precision values ( x * y ) +long double __gcc_qmul(long double x, long double y); + +// divides two 128-bit double-double precision values ( x / y ) +long double __gcc_qdiv(long double a, long double b); + + +// ARM specific functions + +// There is no C interface to the switch* functions. These helper functions +// are only needed by Thumb1 code for efficient switch table generation. +switch16 +switch32 +switch8 +switchu8 + +// There is no C interface to the *_vfp_d8_d15_regs functions. There are +// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use +// SJLJ for exceptions, each function with a catch clause or destuctors needs +// to save and restore all registers in it prolog and epliog. But there is +// no way to access vector and high float registers from thumb1 code, so the +// compiler must add call outs to these helper functions in the prolog and +// epilog. +restore_vfp_d8_d15_regs +save_vfp_d8_d15_regs + + +// Note: long ago ARM processors did not have floating point hardware support. +// Floating point was done in software and floating point parameters were +// passed in integer registers. When hardware support was added for floating +// point, new *vfp functions were added to do the same operations but with +// floating point parameters in floating point registers. + +// Undocumented functions + +float __addsf3vfp(float a, float b); // Appears to return a + b +double __adddf3vfp(double a, double b); // Appears to return a + b +float __divsf3vfp(float a, float b); // Appears to return a / b +double __divdf3vfp(double a, double b); // Appears to return a / b +int __eqsf2vfp(float a, float b); // Appears to return one + // iff a == b and neither is NaN. +int __eqdf2vfp(double a, double b); // Appears to return one + // iff a == b and neither is NaN. +double __extendsfdf2vfp(float a); // Appears to convert from + // float to double. +int __fixdfsivfp(double a); // Appears to convert from + // double to int. +int __fixsfsivfp(float a); // Appears to convert from + // float to int. +unsigned int __fixunssfsivfp(float a); // Appears to convert from + // float to unsigned int. +unsigned int __fixunsdfsivfp(double a); // Appears to convert from + // double to unsigned int. +double __floatsidfvfp(int a); // Appears to convert from + // int to double. +float __floatsisfvfp(int a); // Appears to convert from + // int to float. +double __floatunssidfvfp(unsigned int a); // Appears to convert from + // unisgned int to double. +float __floatunssisfvfp(unsigned int a); // Appears to convert from + // unisgned int to float. +int __gedf2vfp(double a, double b); // Appears to return __gedf2 + // (a >= b) +int __gesf2vfp(float a, float b); // Appears to return __gesf2 + // (a >= b) +int __gtdf2vfp(double a, double b); // Appears to return __gtdf2 + // (a > b) +int __gtsf2vfp(float a, float b); // Appears to return __gtsf2 + // (a > b) +int __ledf2vfp(double a, double b); // Appears to return __ledf2 + // (a <= b) +int __lesf2vfp(float a, float b); // Appears to return __lesf2 + // (a <= b) +int __ltdf2vfp(double a, double b); // Appears to return __ltdf2 + // (a < b) +int __ltsf2vfp(float a, float b); // Appears to return __ltsf2 + // (a < b) +double __muldf3vfp(double a, double b); // Appears to return a * b +float __mulsf3vfp(float a, float b); // Appears to return a * b +int __nedf2vfp(double a, double b); // Appears to return __nedf2 + // (a != b) +double __negdf2vfp(double a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +double __subdf3vfp(double a, double b); // Appears to return a - b +float __subsf3vfp(float a, float b); // Appears to return a - b +float __truncdfsf2vfp(double a); // Appears to convert from + // double to float. +int __unorddf2vfp(double a, double b); // Appears to return __unorddf2 +int __unordsf2vfp(float a, float b); // Appears to return __unordsf2 + + +Preconditions are listed for each function at the definition when there are any. +Any preconditions reflect the specification at +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. + +Assumptions are listed in "int_lib.h", and in individual files. Where possible +assumptions are checked at compile time. diff --git a/contrib/libs/cxxsupp/builtins/absvdi2.c b/contrib/libs/cxxsupp/builtins/absvdi2.c index f87098395ac..682c2355d2a 100644 --- a/contrib/libs/cxxsupp/builtins/absvdi2.c +++ b/contrib/libs/cxxsupp/builtins/absvdi2.c @@ -1,29 +1,29 @@ -/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------=== - * - * This file implements __absvdi2 for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: absolute value */ - -/* Effects: aborts if abs(x) < 0 */ - -COMPILER_RT_ABI di_int -__absvdi2(di_int a) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - if (a == ((di_int)1 << (N-1))) - compilerrt_abort(); - const di_int t = a >> (N - 1); - return (a ^ t) - t; -} +/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __absvdi2 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI di_int +__absvdi2(di_int a) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)1 << (N-1))) + compilerrt_abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/contrib/libs/cxxsupp/builtins/absvsi2.c b/contrib/libs/cxxsupp/builtins/absvsi2.c index e7d2a82c981..4812af81598 100644 --- a/contrib/libs/cxxsupp/builtins/absvsi2.c +++ b/contrib/libs/cxxsupp/builtins/absvsi2.c @@ -1,29 +1,29 @@ -/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __absvsi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: absolute value */ - -/* Effects: aborts if abs(x) < 0 */ - -COMPILER_RT_ABI si_int -__absvsi2(si_int a) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - if (a == (1 << (N-1))) - compilerrt_abort(); - const si_int t = a >> (N - 1); - return (a ^ t) - t; -} +/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __absvsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI si_int +__absvsi2(si_int a) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + if (a == (1 << (N-1))) + compilerrt_abort(); + const si_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/contrib/libs/cxxsupp/builtins/absvti2.c b/contrib/libs/cxxsupp/builtins/absvti2.c index 945673abffb..7927770c9ab 100644 --- a/contrib/libs/cxxsupp/builtins/absvti2.c +++ b/contrib/libs/cxxsupp/builtins/absvti2.c @@ -1,34 +1,34 @@ -/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __absvti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: absolute value */ - -/* Effects: aborts if abs(x) < 0 */ - -COMPILER_RT_ABI ti_int -__absvti2(ti_int a) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - if (a == ((ti_int)1 << (N-1))) - compilerrt_abort(); - const ti_int s = a >> (N - 1); - return (a ^ s) - s; -} - -#endif /* CRT_HAS_128BIT */ - +/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __absvti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI ti_int +__absvti2(ti_int a) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + if (a == ((ti_int)1 << (N-1))) + compilerrt_abort(); + const ti_int s = a >> (N - 1); + return (a ^ s) - s; +} + +#endif /* CRT_HAS_128BIT */ + diff --git a/contrib/libs/cxxsupp/builtins/adddf3.c b/contrib/libs/cxxsupp/builtins/adddf3.c index 7d80a363941..8b7aae0a6f8 100644 --- a/contrib/libs/cxxsupp/builtins/adddf3.c +++ b/contrib/libs/cxxsupp/builtins/adddf3.c @@ -1,22 +1,22 @@ -//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements double-precision soft-float addition with the IEEE-754 -// default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_add_impl.inc" - -ARM_EABI_FNALIAS(dadd, adddf3) - -COMPILER_RT_ABI double __adddf3(double a, double b){ - return __addXf3__(a, b); -} +//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_add_impl.inc" + +ARM_EABI_FNALIAS(dadd, adddf3) + +COMPILER_RT_ABI double __adddf3(double a, double b){ + return __addXf3__(a, b); +} diff --git a/contrib/libs/cxxsupp/builtins/addsf3.c b/contrib/libs/cxxsupp/builtins/addsf3.c index eddb1c63607..0f5d6ea4097 100644 --- a/contrib/libs/cxxsupp/builtins/addsf3.c +++ b/contrib/libs/cxxsupp/builtins/addsf3.c @@ -1,22 +1,22 @@ -//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float addition with the IEEE-754 -// default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_add_impl.inc" - -ARM_EABI_FNALIAS(fadd, addsf3) - -COMPILER_RT_ABI float __addsf3(float a, float b) { - return __addXf3__(a, b); -} +//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_add_impl.inc" + +ARM_EABI_FNALIAS(fadd, addsf3) + +COMPILER_RT_ABI float __addsf3(float a, float b) { + return __addXf3__(a, b); +} diff --git a/contrib/libs/cxxsupp/builtins/addtf3.c b/contrib/libs/cxxsupp/builtins/addtf3.c index 7841492b1c9..e4bbe0227ae 100644 --- a/contrib/libs/cxxsupp/builtins/addtf3.c +++ b/contrib/libs/cxxsupp/builtins/addtf3.c @@ -1,25 +1,25 @@ -//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements quad-precision soft-float addition with the IEEE-754 -// default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#include "fp_add_impl.inc" - -COMPILER_RT_ABI long double __addtf3(long double a, long double b){ - return __addXf3__(a, b); -} - -#endif +//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#include "fp_add_impl.inc" + +COMPILER_RT_ABI long double __addtf3(long double a, long double b){ + return __addXf3__(a, b); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/addvdi3.c b/contrib/libs/cxxsupp/builtins/addvdi3.c index 323d2eba79a..0da38945679 100644 --- a/contrib/libs/cxxsupp/builtins/addvdi3.c +++ b/contrib/libs/cxxsupp/builtins/addvdi3.c @@ -1,36 +1,36 @@ -/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __addvdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a + b */ - -/* Effects: aborts if a + b overflows */ - -COMPILER_RT_ABI di_int -__addvdi3(di_int a, di_int b) -{ - di_int s = (du_int) a + (du_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; -} +/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI di_int +__addvdi3(di_int a, di_int b) +{ + di_int s = (du_int) a + (du_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/libs/cxxsupp/builtins/addvsi3.c b/contrib/libs/cxxsupp/builtins/addvsi3.c index 55d14ecbdb7..94ca726f42b 100644 --- a/contrib/libs/cxxsupp/builtins/addvsi3.c +++ b/contrib/libs/cxxsupp/builtins/addvsi3.c @@ -1,36 +1,36 @@ -/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __addvsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a + b */ - -/* Effects: aborts if a + b overflows */ - -COMPILER_RT_ABI si_int -__addvsi3(si_int a, si_int b) -{ - si_int s = (su_int) a + (su_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; -} +/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI si_int +__addvsi3(si_int a, si_int b) +{ + si_int s = (su_int) a + (su_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/libs/cxxsupp/builtins/addvti3.c b/contrib/libs/cxxsupp/builtins/addvti3.c index fb2acff91af..c224de60aab 100644 --- a/contrib/libs/cxxsupp/builtins/addvti3.c +++ b/contrib/libs/cxxsupp/builtins/addvti3.c @@ -1,40 +1,40 @@ -/* ===-- addvti3.c - Implement __addvti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __addvti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a + b */ - -/* Effects: aborts if a + b overflows */ - -COMPILER_RT_ABI ti_int -__addvti3(ti_int a, ti_int b) -{ - ti_int s = (tu_int) a + (tu_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- addvti3.c - Implement __addvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI ti_int +__addvti3(ti_int a, ti_int b) +{ + ti_int s = (tu_int) a + (tu_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/apple_versioning.c b/contrib/libs/cxxsupp/builtins/apple_versioning.c index 2b852f87ffe..3797a1ab02d 100644 --- a/contrib/libs/cxxsupp/builtins/apple_versioning.c +++ b/contrib/libs/cxxsupp/builtins/apple_versioning.c @@ -1,350 +1,350 @@ -/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - - -#if __APPLE__ - #include - - #if __IPHONE_OS_VERSION_MIN_REQUIRED - #define NOT_HERE_BEFORE_10_6(sym) - #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ - extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ - extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ - extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ - extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp50 = 0; - #else - #define NOT_HERE_BEFORE_10_6(sym) \ - extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; - #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ - extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ - extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ - extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp6 = 0; - #endif - - -/* Symbols in libSystem.dylib in 10.6 and later, - * but are in libgcc_s.dylib in earlier versions - */ - -NOT_HERE_BEFORE_10_6(__absvdi2) -NOT_HERE_BEFORE_10_6(__absvsi2) -NOT_HERE_BEFORE_10_6(__absvti2) -NOT_HERE_BEFORE_10_6(__addvdi3) -NOT_HERE_BEFORE_10_6(__addvsi3) -NOT_HERE_BEFORE_10_6(__addvti3) -NOT_HERE_BEFORE_10_6(__ashldi3) -NOT_HERE_BEFORE_10_6(__ashlti3) -NOT_HERE_BEFORE_10_6(__ashrdi3) -NOT_HERE_BEFORE_10_6(__ashrti3) -NOT_HERE_BEFORE_10_6(__clear_cache) -NOT_HERE_BEFORE_10_6(__clzdi2) -NOT_HERE_BEFORE_10_6(__clzsi2) -NOT_HERE_BEFORE_10_6(__clzti2) -NOT_HERE_BEFORE_10_6(__cmpdi2) -NOT_HERE_BEFORE_10_6(__cmpti2) -NOT_HERE_BEFORE_10_6(__ctzdi2) -NOT_HERE_BEFORE_10_6(__ctzsi2) -NOT_HERE_BEFORE_10_6(__ctzti2) -NOT_HERE_BEFORE_10_6(__divdc3) -NOT_HERE_BEFORE_10_6(__divdi3) -NOT_HERE_BEFORE_10_6(__divsc3) -NOT_HERE_BEFORE_10_6(__divtc3) -NOT_HERE_BEFORE_10_6(__divti3) -NOT_HERE_BEFORE_10_6(__divxc3) -NOT_HERE_BEFORE_10_6(__enable_execute_stack) -NOT_HERE_BEFORE_10_6(__ffsdi2) -NOT_HERE_BEFORE_10_6(__ffsti2) -NOT_HERE_BEFORE_10_6(__fixdfdi) -NOT_HERE_BEFORE_10_6(__fixdfti) -NOT_HERE_BEFORE_10_6(__fixsfdi) -NOT_HERE_BEFORE_10_6(__fixsfti) -NOT_HERE_BEFORE_10_6(__fixtfdi) -NOT_HERE_BEFORE_10_6(__fixunsdfdi) -NOT_HERE_BEFORE_10_6(__fixunsdfsi) -NOT_HERE_BEFORE_10_6(__fixunsdfti) -NOT_HERE_BEFORE_10_6(__fixunssfdi) -NOT_HERE_BEFORE_10_6(__fixunssfsi) -NOT_HERE_BEFORE_10_6(__fixunssfti) -NOT_HERE_BEFORE_10_6(__fixunstfdi) -NOT_HERE_BEFORE_10_6(__fixunsxfdi) -NOT_HERE_BEFORE_10_6(__fixunsxfsi) -NOT_HERE_BEFORE_10_6(__fixunsxfti) -NOT_HERE_BEFORE_10_6(__fixxfdi) -NOT_HERE_BEFORE_10_6(__fixxfti) -NOT_HERE_BEFORE_10_6(__floatdidf) -NOT_HERE_BEFORE_10_6(__floatdisf) -NOT_HERE_BEFORE_10_6(__floatditf) -NOT_HERE_BEFORE_10_6(__floatdixf) -NOT_HERE_BEFORE_10_6(__floattidf) -NOT_HERE_BEFORE_10_6(__floattisf) -NOT_HERE_BEFORE_10_6(__floattixf) -NOT_HERE_BEFORE_10_6(__floatundidf) -NOT_HERE_BEFORE_10_6(__floatundisf) -NOT_HERE_BEFORE_10_6(__floatunditf) -NOT_HERE_BEFORE_10_6(__floatundixf) -NOT_HERE_BEFORE_10_6(__floatuntidf) -NOT_HERE_BEFORE_10_6(__floatuntisf) -NOT_HERE_BEFORE_10_6(__floatuntixf) -NOT_HERE_BEFORE_10_6(__gcc_personality_v0) -NOT_HERE_BEFORE_10_6(__lshrdi3) -NOT_HERE_BEFORE_10_6(__lshrti3) -NOT_HERE_BEFORE_10_6(__moddi3) -NOT_HERE_BEFORE_10_6(__modti3) -NOT_HERE_BEFORE_10_6(__muldc3) -NOT_HERE_BEFORE_10_6(__muldi3) -NOT_HERE_BEFORE_10_6(__mulsc3) -NOT_HERE_BEFORE_10_6(__multc3) -NOT_HERE_BEFORE_10_6(__multi3) -NOT_HERE_BEFORE_10_6(__mulvdi3) -NOT_HERE_BEFORE_10_6(__mulvsi3) -NOT_HERE_BEFORE_10_6(__mulvti3) -NOT_HERE_BEFORE_10_6(__mulxc3) -NOT_HERE_BEFORE_10_6(__negdi2) -NOT_HERE_BEFORE_10_6(__negti2) -NOT_HERE_BEFORE_10_6(__negvdi2) -NOT_HERE_BEFORE_10_6(__negvsi2) -NOT_HERE_BEFORE_10_6(__negvti2) -NOT_HERE_BEFORE_10_6(__paritydi2) -NOT_HERE_BEFORE_10_6(__paritysi2) -NOT_HERE_BEFORE_10_6(__parityti2) -NOT_HERE_BEFORE_10_6(__popcountdi2) -NOT_HERE_BEFORE_10_6(__popcountsi2) -NOT_HERE_BEFORE_10_6(__popcountti2) -NOT_HERE_BEFORE_10_6(__powidf2) -NOT_HERE_BEFORE_10_6(__powisf2) -NOT_HERE_BEFORE_10_6(__powitf2) -NOT_HERE_BEFORE_10_6(__powixf2) -NOT_HERE_BEFORE_10_6(__subvdi3) -NOT_HERE_BEFORE_10_6(__subvsi3) -NOT_HERE_BEFORE_10_6(__subvti3) -NOT_HERE_BEFORE_10_6(__ucmpdi2) -NOT_HERE_BEFORE_10_6(__ucmpti2) -NOT_HERE_BEFORE_10_6(__udivdi3) -NOT_HERE_BEFORE_10_6(__udivmoddi4) -NOT_HERE_BEFORE_10_6(__udivmodti4) -NOT_HERE_BEFORE_10_6(__udivti3) -NOT_HERE_BEFORE_10_6(__umoddi3) -NOT_HERE_BEFORE_10_6(__umodti3) - - -#if __ppc__ -NOT_HERE_BEFORE_10_6(__gcc_qadd) -NOT_HERE_BEFORE_10_6(__gcc_qdiv) -NOT_HERE_BEFORE_10_6(__gcc_qmul) -NOT_HERE_BEFORE_10_6(__gcc_qsub) -NOT_HERE_BEFORE_10_6(__trampoline_setup) -#endif /* __ppc__ */ - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8) - -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) -NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) - - -#if __arm__ && __DYNAMIC__ - #define NOT_HERE_UNTIL_AFTER_4_3(sym) \ - extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ - extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ - extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ - extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ - extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ - extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp7 = 0; - -NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) -NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) -NOT_HERE_UNTIL_AFTER_4_3(__adddf3) -NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__addsf3) -NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__addvdi3) -NOT_HERE_UNTIL_AFTER_4_3(__addvsi3) -NOT_HERE_UNTIL_AFTER_4_3(__ashldi3) -NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3) -NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2) -NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2) -NOT_HERE_UNTIL_AFTER_4_3(__clzdi2) -NOT_HERE_UNTIL_AFTER_4_3(__clzsi2) -NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2) -NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2) -NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2) -NOT_HERE_UNTIL_AFTER_4_3(__divdc3) -NOT_HERE_UNTIL_AFTER_4_3(__divdf3) -NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__divdi3) -NOT_HERE_UNTIL_AFTER_4_3(__divsc3) -NOT_HERE_UNTIL_AFTER_4_3(__divsf3) -NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__divsi3) -NOT_HERE_UNTIL_AFTER_4_3(__eqdf2) -NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__eqsf2) -NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2) -NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2) -NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi) -NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi) -NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp) -NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi) -NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi) -NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp) -NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi) -NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi) -NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp) -NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi) -NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi) -NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp) -NOT_HERE_UNTIL_AFTER_4_3(__floatdidf) -NOT_HERE_UNTIL_AFTER_4_3(__floatdisf) -NOT_HERE_UNTIL_AFTER_4_3(__floatsidf) -NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp) -NOT_HERE_UNTIL_AFTER_4_3(__floatsisf) -NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp) -NOT_HERE_UNTIL_AFTER_4_3(__floatundidf) -NOT_HERE_UNTIL_AFTER_4_3(__floatundisf) -NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf) -NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf) -NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp) -NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp) -NOT_HERE_UNTIL_AFTER_4_3(__gedf2) -NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__gesf2) -NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__gtdf2) -NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__gtsf2) -NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__ledf2) -NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__lesf2) -NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3) -NOT_HERE_UNTIL_AFTER_4_3(__ltdf2) -NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__ltsf2) -NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__moddi3) -NOT_HERE_UNTIL_AFTER_4_3(__modsi3) -NOT_HERE_UNTIL_AFTER_4_3(__muldc3) -NOT_HERE_UNTIL_AFTER_4_3(__muldf3) -NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__muldi3) -NOT_HERE_UNTIL_AFTER_4_3(__mulsc3) -NOT_HERE_UNTIL_AFTER_4_3(__mulsf3) -NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3) -NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3) -NOT_HERE_UNTIL_AFTER_4_3(__nedf2) -NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__negdi2) -NOT_HERE_UNTIL_AFTER_4_3(__negvdi2) -NOT_HERE_UNTIL_AFTER_4_3(__negvsi2) -NOT_HERE_UNTIL_AFTER_4_3(__nesf2) -NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__paritydi2) -NOT_HERE_UNTIL_AFTER_4_3(__paritysi2) -NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2) -NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2) -NOT_HERE_UNTIL_AFTER_4_3(__powidf2) -NOT_HERE_UNTIL_AFTER_4_3(__powisf2) -NOT_HERE_UNTIL_AFTER_4_3(__subdf3) -NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__subsf3) -NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp) -NOT_HERE_UNTIL_AFTER_4_3(__subvdi3) -NOT_HERE_UNTIL_AFTER_4_3(__subvsi3) -NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2) -NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2) -NOT_HERE_UNTIL_AFTER_4_3(__udivdi3) -NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4) -NOT_HERE_UNTIL_AFTER_4_3(__udivsi3) -NOT_HERE_UNTIL_AFTER_4_3(__umoddi3) -NOT_HERE_UNTIL_AFTER_4_3(__umodsi3) -NOT_HERE_UNTIL_AFTER_4_3(__unorddf2) -NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp) -NOT_HERE_UNTIL_AFTER_4_3(__unordsf2) -NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp) - -NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4) -NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) -#endif // __arm__ && __DYNAMIC__ - - - - - -#else /* !__APPLE__ */ - -extern int avoid_empty_file; - -#endif /* !__APPLE__*/ +/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + + +#if __APPLE__ + #include + + #if __IPHONE_OS_VERSION_MIN_REQUIRED + #define NOT_HERE_BEFORE_10_6(sym) + #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ + extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ + extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ + extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp50 = 0; + #else + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; + #endif + + +/* Symbols in libSystem.dylib in 10.6 and later, + * but are in libgcc_s.dylib in earlier versions + */ + +NOT_HERE_BEFORE_10_6(__absvdi2) +NOT_HERE_BEFORE_10_6(__absvsi2) +NOT_HERE_BEFORE_10_6(__absvti2) +NOT_HERE_BEFORE_10_6(__addvdi3) +NOT_HERE_BEFORE_10_6(__addvsi3) +NOT_HERE_BEFORE_10_6(__addvti3) +NOT_HERE_BEFORE_10_6(__ashldi3) +NOT_HERE_BEFORE_10_6(__ashlti3) +NOT_HERE_BEFORE_10_6(__ashrdi3) +NOT_HERE_BEFORE_10_6(__ashrti3) +NOT_HERE_BEFORE_10_6(__clear_cache) +NOT_HERE_BEFORE_10_6(__clzdi2) +NOT_HERE_BEFORE_10_6(__clzsi2) +NOT_HERE_BEFORE_10_6(__clzti2) +NOT_HERE_BEFORE_10_6(__cmpdi2) +NOT_HERE_BEFORE_10_6(__cmpti2) +NOT_HERE_BEFORE_10_6(__ctzdi2) +NOT_HERE_BEFORE_10_6(__ctzsi2) +NOT_HERE_BEFORE_10_6(__ctzti2) +NOT_HERE_BEFORE_10_6(__divdc3) +NOT_HERE_BEFORE_10_6(__divdi3) +NOT_HERE_BEFORE_10_6(__divsc3) +NOT_HERE_BEFORE_10_6(__divtc3) +NOT_HERE_BEFORE_10_6(__divti3) +NOT_HERE_BEFORE_10_6(__divxc3) +NOT_HERE_BEFORE_10_6(__enable_execute_stack) +NOT_HERE_BEFORE_10_6(__ffsdi2) +NOT_HERE_BEFORE_10_6(__ffsti2) +NOT_HERE_BEFORE_10_6(__fixdfdi) +NOT_HERE_BEFORE_10_6(__fixdfti) +NOT_HERE_BEFORE_10_6(__fixsfdi) +NOT_HERE_BEFORE_10_6(__fixsfti) +NOT_HERE_BEFORE_10_6(__fixtfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfsi) +NOT_HERE_BEFORE_10_6(__fixunsdfti) +NOT_HERE_BEFORE_10_6(__fixunssfdi) +NOT_HERE_BEFORE_10_6(__fixunssfsi) +NOT_HERE_BEFORE_10_6(__fixunssfti) +NOT_HERE_BEFORE_10_6(__fixunstfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfsi) +NOT_HERE_BEFORE_10_6(__fixunsxfti) +NOT_HERE_BEFORE_10_6(__fixxfdi) +NOT_HERE_BEFORE_10_6(__fixxfti) +NOT_HERE_BEFORE_10_6(__floatdidf) +NOT_HERE_BEFORE_10_6(__floatdisf) +NOT_HERE_BEFORE_10_6(__floatditf) +NOT_HERE_BEFORE_10_6(__floatdixf) +NOT_HERE_BEFORE_10_6(__floattidf) +NOT_HERE_BEFORE_10_6(__floattisf) +NOT_HERE_BEFORE_10_6(__floattixf) +NOT_HERE_BEFORE_10_6(__floatundidf) +NOT_HERE_BEFORE_10_6(__floatundisf) +NOT_HERE_BEFORE_10_6(__floatunditf) +NOT_HERE_BEFORE_10_6(__floatundixf) +NOT_HERE_BEFORE_10_6(__floatuntidf) +NOT_HERE_BEFORE_10_6(__floatuntisf) +NOT_HERE_BEFORE_10_6(__floatuntixf) +NOT_HERE_BEFORE_10_6(__gcc_personality_v0) +NOT_HERE_BEFORE_10_6(__lshrdi3) +NOT_HERE_BEFORE_10_6(__lshrti3) +NOT_HERE_BEFORE_10_6(__moddi3) +NOT_HERE_BEFORE_10_6(__modti3) +NOT_HERE_BEFORE_10_6(__muldc3) +NOT_HERE_BEFORE_10_6(__muldi3) +NOT_HERE_BEFORE_10_6(__mulsc3) +NOT_HERE_BEFORE_10_6(__multc3) +NOT_HERE_BEFORE_10_6(__multi3) +NOT_HERE_BEFORE_10_6(__mulvdi3) +NOT_HERE_BEFORE_10_6(__mulvsi3) +NOT_HERE_BEFORE_10_6(__mulvti3) +NOT_HERE_BEFORE_10_6(__mulxc3) +NOT_HERE_BEFORE_10_6(__negdi2) +NOT_HERE_BEFORE_10_6(__negti2) +NOT_HERE_BEFORE_10_6(__negvdi2) +NOT_HERE_BEFORE_10_6(__negvsi2) +NOT_HERE_BEFORE_10_6(__negvti2) +NOT_HERE_BEFORE_10_6(__paritydi2) +NOT_HERE_BEFORE_10_6(__paritysi2) +NOT_HERE_BEFORE_10_6(__parityti2) +NOT_HERE_BEFORE_10_6(__popcountdi2) +NOT_HERE_BEFORE_10_6(__popcountsi2) +NOT_HERE_BEFORE_10_6(__popcountti2) +NOT_HERE_BEFORE_10_6(__powidf2) +NOT_HERE_BEFORE_10_6(__powisf2) +NOT_HERE_BEFORE_10_6(__powitf2) +NOT_HERE_BEFORE_10_6(__powixf2) +NOT_HERE_BEFORE_10_6(__subvdi3) +NOT_HERE_BEFORE_10_6(__subvsi3) +NOT_HERE_BEFORE_10_6(__subvti3) +NOT_HERE_BEFORE_10_6(__ucmpdi2) +NOT_HERE_BEFORE_10_6(__ucmpti2) +NOT_HERE_BEFORE_10_6(__udivdi3) +NOT_HERE_BEFORE_10_6(__udivmoddi4) +NOT_HERE_BEFORE_10_6(__udivmodti4) +NOT_HERE_BEFORE_10_6(__udivti3) +NOT_HERE_BEFORE_10_6(__umoddi3) +NOT_HERE_BEFORE_10_6(__umodti3) + + +#if __ppc__ +NOT_HERE_BEFORE_10_6(__gcc_qadd) +NOT_HERE_BEFORE_10_6(__gcc_qdiv) +NOT_HERE_BEFORE_10_6(__gcc_qmul) +NOT_HERE_BEFORE_10_6(__gcc_qsub) +NOT_HERE_BEFORE_10_6(__trampoline_setup) +#endif /* __ppc__ */ + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) + + +#if __arm__ && __DYNAMIC__ + #define NOT_HERE_UNTIL_AFTER_4_3(sym) \ + extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ + extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ + extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ + extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; + +NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__addvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashldi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2) +NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__divdc3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divdi3) +NOT_HERE_UNTIL_AFTER_4_3(__divsc3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divsi3) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatdidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatdisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatundidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatundisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__moddi3) +NOT_HERE_UNTIL_AFTER_4_3(__modsi3) +NOT_HERE_UNTIL_AFTER_4_3(__muldc3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__muldi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsc3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__negdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__paritydi2) +NOT_HERE_UNTIL_AFTER_4_3(__paritysi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2) +NOT_HERE_UNTIL_AFTER_4_3(__powidf2) +NOT_HERE_UNTIL_AFTER_4_3(__powisf2) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__subvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__udivdi3) +NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivsi3) +NOT_HERE_UNTIL_AFTER_4_3(__umoddi3) +NOT_HERE_UNTIL_AFTER_4_3(__umodsi3) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp) + +NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) +#endif // __arm__ && __DYNAMIC__ + + + + + +#else /* !__APPLE__ */ + +extern int avoid_empty_file; + +#endif /* !__APPLE__*/ diff --git a/contrib/libs/cxxsupp/builtins/arm/Makefile.mk b/contrib/libs/cxxsupp/builtins/arm/Makefile.mk index e41c9f2d0a2..ed2e8323e39 100644 --- a/contrib/libs/cxxsupp/builtins/arm/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/arm/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S index 2e8608704ad..2825ae92cd5 100644 --- a/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S @@ -1,26 +1,26 @@ -//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// double __adddf3vfp(double a, double b) { return a + b; } -// -// Adds two double precision floating point numbers using the Darwin -// calling convention where double arguments are passsed in GPR pairs -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) - vmov d6, r0, r1 // move first param from r0/r1 pair into d6 - vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vadd.f64 d6, d6, d7 - vmov r0, r1, d6 // move result back to r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__adddf3vfp) +//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// double __adddf3vfp(double a, double b) { return a + b; } +// +// Adds two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vadd.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__adddf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S index 80c8d1b2f6d..bff5a7e0fbe 100644 --- a/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S @@ -1,26 +1,26 @@ -//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __addsf3vfp(float a, float b); -// -// Adds two single precision floating point numbers using the Darwin -// calling convention where single arguments are passsed in GPRs -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) - vmov s14, r0 // move first param from r0 into float register - vmov s15, r1 // move second param from r1 into float register - vadd.f32 s14, s14, s15 - vmov r0, s14 // move result back to r0 - bx lr -END_COMPILERRT_FUNCTION(__addsf3vfp) +//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __addsf3vfp(float a, float b); +// +// Adds two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed in GPRs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vadd.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__addsf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S index ca2bd750bf4..036a6f542f7 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S @@ -1,96 +1,96 @@ -//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ -#error big endian support not implemented -#endif - -#define APSR_Z (1 << 30) -#define APSR_C (1 << 29) - -// void __aeabi_cdcmpeq(double a, double b) { -// if (isnan(a) || isnan(b)) { -// Z = 0; C = 1; -// } else { -// __aeabi_cdcmple(a, b); -// } -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) - push {r0-r3, lr} - bl __aeabi_cdcmpeq_check_nan - cmp r0, #1 - pop {r0-r3, lr} - - // NaN has been ruled out, so __aeabi_cdcmple can't trap - bne __aeabi_cdcmple - - msr CPSR_f, #APSR_C - JMP(lr) -END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) - - -// void __aeabi_cdcmple(double a, double b) { -// if (__aeabi_dcmplt(a, b)) { -// Z = 0; C = 0; -// } else if (__aeabi_dcmpeq(a, b)) { -// Z = 1; C = 1; -// } else { -// Z = 0; C = 1; -// } -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) - // Per the RTABI, this function must preserve r0-r11. - // Save lr in the same instruction for compactness - push {r0-r3, lr} - - bl __aeabi_dcmplt - cmp r0, #1 - moveq ip, #0 - beq 1f - - ldm sp, {r0-r3} - bl __aeabi_dcmpeq - cmp r0, #1 - moveq ip, #(APSR_C | APSR_Z) - movne ip, #(APSR_C) - -1: - msr CPSR_f, ip - pop {r0-r3} - POP_PC() -END_COMPILERRT_FUNCTION(__aeabi_cdcmple) - -// int __aeabi_cdrcmple(double a, double b) { -// return __aeabi_cdcmple(b, a); -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple) - // Swap r0 and r2 - mov ip, r0 - mov r0, r2 - mov r2, ip - - // Swap r1 and r3 - mov ip, r1 - mov r1, r3 - mov r3, ip - - b __aeabi_cdcmple -END_COMPILERRT_FUNCTION(__aeabi_cdrcmple) - +//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +#error big endian support not implemented +#endif + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cdcmpeq(double a, double b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cdcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + push {r0-r3, lr} + bl __aeabi_cdcmpeq_check_nan + cmp r0, #1 + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cdcmple can't trap + bne __aeabi_cdcmple + + msr CPSR_f, #APSR_C + JMP(lr) +END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + + +// void __aeabi_cdcmple(double a, double b) { +// if (__aeabi_dcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_dcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_dcmplt + cmp r0, #1 + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr CPSR_f, ip + pop {r0-r3} + POP_PC() +END_COMPILERRT_FUNCTION(__aeabi_cdcmple) + +// int __aeabi_cdrcmple(double a, double b) { +// return __aeabi_cdcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + // Swap r0 and r2 + mov ip, r0 + mov r0, r2 + mov r2, ip + + // Swap r1 and r3 + mov ip, r1 + mov r1, r3 + mov r3, ip + + b __aeabi_cdcmple +END_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c index 85f484fea1e..577f6b2c553 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c @@ -1,16 +1,16 @@ -//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include - -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) -int __aeabi_cdcmpeq_check_nan(double a, double b) { - return __builtin_isnan(a) || __builtin_isnan(b); -} +//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +__attribute__((pcs("aapcs"))) +__attribute__((visibility("hidden"))) +int __aeabi_cdcmpeq_check_nan(double a, double b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S index af8a19b18a9..43594e5c393 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S @@ -1,91 +1,91 @@ -//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ -#error big endian support not implemented -#endif - -#define APSR_Z (1 << 30) -#define APSR_C (1 << 29) - -// void __aeabi_cfcmpeq(float a, float b) { -// if (isnan(a) || isnan(b)) { -// Z = 0; C = 1; -// } else { -// __aeabi_cfcmple(a, b); -// } -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) - push {r0-r3, lr} - bl __aeabi_cfcmpeq_check_nan - cmp r0, #1 - pop {r0-r3, lr} - - // NaN has been ruled out, so __aeabi_cfcmple can't trap - bne __aeabi_cfcmple - - msr CPSR_f, #APSR_C - JMP(lr) -END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) - - -// void __aeabi_cfcmple(float a, float b) { -// if (__aeabi_fcmplt(a, b)) { -// Z = 0; C = 0; -// } else if (__aeabi_fcmpeq(a, b)) { -// Z = 1; C = 1; -// } else { -// Z = 0; C = 1; -// } -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) - // Per the RTABI, this function must preserve r0-r11. - // Save lr in the same instruction for compactness - push {r0-r3, lr} - - bl __aeabi_fcmplt - cmp r0, #1 - moveq ip, #0 - beq 1f - - ldm sp, {r0-r3} - bl __aeabi_fcmpeq - cmp r0, #1 - moveq ip, #(APSR_C | APSR_Z) - movne ip, #(APSR_C) - -1: - msr CPSR_f, ip - pop {r0-r3} - POP_PC() -END_COMPILERRT_FUNCTION(__aeabi_cfcmple) - -// int __aeabi_cfrcmple(float a, float b) { -// return __aeabi_cfcmple(b, a); -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple) - // Swap r0 and r1 - mov ip, r0 - mov r0, r1 - mov r1, ip - - b __aeabi_cfcmple -END_COMPILERRT_FUNCTION(__aeabi_cfrcmple) - +//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +#error big endian support not implemented +#endif + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cfcmpeq(float a, float b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cfcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + push {r0-r3, lr} + bl __aeabi_cfcmpeq_check_nan + cmp r0, #1 + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cfcmple can't trap + bne __aeabi_cfcmple + + msr CPSR_f, #APSR_C + JMP(lr) +END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + + +// void __aeabi_cfcmple(float a, float b) { +// if (__aeabi_fcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_fcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_fcmplt + cmp r0, #1 + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr CPSR_f, ip + pop {r0-r3} + POP_PC() +END_COMPILERRT_FUNCTION(__aeabi_cfcmple) + +// int __aeabi_cfrcmple(float a, float b) { +// return __aeabi_cfcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + // Swap r0 and r1 + mov ip, r0 + mov r0, r1 + mov r1, ip + + b __aeabi_cfcmple +END_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c index 7727bff2532..992e31fbd8d 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c @@ -1,16 +1,16 @@ -//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include - -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) -int __aeabi_cfcmpeq_check_nan(float a, float b) { - return __builtin_isnan(a) || __builtin_isnan(b); -} +//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include + +__attribute__((pcs("aapcs"))) +__attribute__((visibility("hidden"))) +int __aeabi_cfcmpeq_check_nan(float a, float b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S index eb413bd4dc6..310c35b7493 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S @@ -1,40 +1,40 @@ -//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { -// int result = __{eq,lt,le,ge,gt}df2(a, b); -// if (result {==,<,<=,>=,>} 0) { -// return 1; -// } else { -// return 0; -// } -// } - -#define DEFINE_AEABI_DCMP(cond) \ - .syntax unified SEPARATOR \ - .p2align 2 SEPARATOR \ -DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ - push { r4, lr } SEPARATOR \ - bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ - cmp r0, #0 SEPARATOR \ - b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ - pop { r4, pc } SEPARATOR \ -1: SEPARATOR \ - mov r0, #1 SEPARATOR \ - pop { r4, pc } SEPARATOR \ -END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) - -DEFINE_AEABI_DCMP(eq) -DEFINE_AEABI_DCMP(lt) -DEFINE_AEABI_DCMP(le) -DEFINE_AEABI_DCMP(ge) -DEFINE_AEABI_DCMP(gt) +//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { +// int result = __{eq,lt,le,ge,gt}df2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#define DEFINE_AEABI_DCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ + push { r4, lr } SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + mov r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + mov r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) + +DEFINE_AEABI_DCMP(eq) +DEFINE_AEABI_DCMP(lt) +DEFINE_AEABI_DCMP(le) +DEFINE_AEABI_DCMP(ge) +DEFINE_AEABI_DCMP(gt) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c index 3f2785cb072..ccc95fa5c12 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c @@ -1,43 +1,43 @@ -/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements the division by zero helper routines as specified by the - * Run-time ABI for the ARM Architecture. - * - * ===----------------------------------------------------------------------=== - */ - -/* - * RTABI 4.3.2 - Division by zero - * - * The *div0 functions: - * - Return the value passed to them as a parameter - * - Or, return a fixed value defined by the execution environment (such as 0) - * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return - * - * An application may provide its own implementations of the *div0 functions to - * for a particular behaviour from the *div and *divmod functions called out of - * line. - */ - -/* provide an unused declaration to pacify pendantic compilation */ -extern unsigned char declaration; - -#if defined(__ARM_EABI__) -int __attribute__((weak)) __attribute__((visibility("hidden"))) -__aeabi_idiv0(int return_value) { - return return_value; -} - -long long __attribute__((weak)) __attribute__((visibility("hidden"))) -__aeabi_ldiv0(long long return_value) { - return return_value; -} -#endif - +/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements the division by zero helper routines as specified by the + * Run-time ABI for the ARM Architecture. + * + * ===----------------------------------------------------------------------=== + */ + +/* + * RTABI 4.3.2 - Division by zero + * + * The *div0 functions: + * - Return the value passed to them as a parameter + * - Or, return a fixed value defined by the execution environment (such as 0) + * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return + * + * An application may provide its own implementations of the *div0 functions to + * for a particular behaviour from the *div and *divmod functions called out of + * line. + */ + +/* provide an unused declaration to pacify pendantic compilation */ +extern unsigned char declaration; + +#if defined(__ARM_EABI__) +int __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_idiv0(int return_value) { + return return_value; +} + +long long __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_ldiv0(long long return_value) { + return return_value; +} +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c index 8a39c6dac39..fc17d5a4cc7 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c @@ -1,19 +1,19 @@ -//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "../fp_lib.h" - -COMPILER_RT_ABI fp_t -__aeabi_dsub(fp_t, fp_t); - -COMPILER_RT_ABI fp_t -__aeabi_drsub(fp_t a, fp_t b) { - return __aeabi_dsub(b, a); -} +//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "../fp_lib.h" + +COMPILER_RT_ABI fp_t +__aeabi_dsub(fp_t, fp_t); + +COMPILER_RT_ABI fp_t +__aeabi_drsub(fp_t a, fp_t b) { + return __aeabi_dsub(b, a); +} diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S index 2dab884a489..55f49a2b5af 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S @@ -1,40 +1,40 @@ -//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { -// int result = __{eq,lt,le,ge,gt}sf2(a, b); -// if (result {==,<,<=,>=,>} 0) { -// return 1; -// } else { -// return 0; -// } -// } - -#define DEFINE_AEABI_FCMP(cond) \ - .syntax unified SEPARATOR \ - .p2align 2 SEPARATOR \ -DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ - push { r4, lr } SEPARATOR \ - bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ - cmp r0, #0 SEPARATOR \ - b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ - pop { r4, pc } SEPARATOR \ -1: SEPARATOR \ - mov r0, #1 SEPARATOR \ - pop { r4, pc } SEPARATOR \ -END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) - -DEFINE_AEABI_FCMP(eq) -DEFINE_AEABI_FCMP(lt) -DEFINE_AEABI_FCMP(le) -DEFINE_AEABI_FCMP(ge) -DEFINE_AEABI_FCMP(gt) +//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { +// int result = __{eq,lt,le,ge,gt}sf2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#define DEFINE_AEABI_FCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ + push { r4, lr } SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + mov r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + mov r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) + +DEFINE_AEABI_FCMP(eq) +DEFINE_AEABI_FCMP(lt) +DEFINE_AEABI_FCMP(le) +DEFINE_AEABI_FCMP(ge) +DEFINE_AEABI_FCMP(gt) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c index 1d019df8084..64258dc7e07 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c @@ -1,19 +1,19 @@ -//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "../fp_lib.h" - -COMPILER_RT_ABI fp_t -__aeabi_fsub(fp_t, fp_t); - -COMPILER_RT_ABI fp_t -__aeabi_frsub(fp_t a, fp_t b) { - return __aeabi_fsub(b, a); -} +//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "../fp_lib.h" + +COMPILER_RT_ABI fp_t +__aeabi_fsub(fp_t, fp_t); + +COMPILER_RT_ABI fp_t +__aeabi_frsub(fp_t a, fp_t b) { + return __aeabi_fsub(b, a); +} diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S index eb6d55529a4..384add38279 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S @@ -1,28 +1,28 @@ -//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { -// int rem, quot; -// quot = __divmodsi4(numerator, denominator, &rem); -// return {quot, rem}; -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) - push { lr } - sub sp, sp, #4 - mov r2, sp - bl SYMBOL_NAME(__divmodsi4) - ldr r1, [sp] - add sp, sp, #4 - pop { pc } -END_COMPILERRT_FUNCTION(__aeabi_idivmod) +//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { +// int rem, quot; +// quot = __divmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__divmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_idivmod) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S index e27f79a9c1a..ad06f1de2af 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S @@ -1,31 +1,31 @@ -//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// struct { int64_t quot, int64_t rem} -// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { -// int64_t rem, quot; -// quot = __divmoddi4(numerator, denominator, &rem); -// return {quot, rem}; -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) - push {r11, lr} - sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] - bl SYMBOL_NAME(__divmoddi4) - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r11, pc} -END_COMPILERRT_FUNCTION(__aeabi_ldivmod) +//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int64_t quot, int64_t rem} +// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { +// int64_t rem, quot; +// quot = __divmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) + push {r11, lr} + sub sp, sp, #16 + add r12, sp, #8 + str r12, [sp] + bl SYMBOL_NAME(__divmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r11, pc} +END_COMPILERRT_FUNCTION(__aeabi_ldivmod) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S index b4c7f31bd9c..051ce435bab 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S @@ -1,20 +1,20 @@ -//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } - - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) - b memcmp -END_COMPILERRT_FUNCTION(__aeabi_memcmp) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) +//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) + b memcmp +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S index 1d3bbc8be59..cf02332490a 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S @@ -1,20 +1,20 @@ -//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } - - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) - b memcpy -END_COMPILERRT_FUNCTION(__aeabi_memcpy) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) +//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) + b memcpy +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S index 16f4da5409c..4dda06f75d0 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S @@ -1,20 +1,20 @@ -//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===---------------------------------------------------------------------===// - -#include "../assembly.h" - -// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } - - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) - b memmove -END_COMPILERRT_FUNCTION(__aeabi_memmove) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) +//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) + b memmove +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S index d0f8a587dc6..c8b49c7809a 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S @@ -1,34 +1,34 @@ -//===-- aeabi_memset.S - EABI memset implementation -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } -// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } - - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) - mov r3, r1 - mov r1, r2 - mov r2, r3 - b memset -END_COMPILERRT_FUNCTION(__aeabi_memset) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) - -DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) - mov r2, r1 - mov r1, #0 - b memset -END_COMPILERRT_FUNCTION(__aeabi_memclr) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) - +//===-- aeabi_memset.S - EABI memset implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + mov r1, #0 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S index 0452978c51d..8ea474d91c6 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S @@ -1,29 +1,29 @@ -//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// struct { unsigned quot, unsigned rem} -// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { -// unsigned rem, quot; -// quot = __udivmodsi4(numerator, denominator, &rem); -// return {quot, rem}; -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) - push { lr } - sub sp, sp, #4 - mov r2, sp - bl SYMBOL_NAME(__udivmodsi4) - ldr r1, [sp] - add sp, sp, #4 - pop { pc } -END_COMPILERRT_FUNCTION(__aeabi_uidivmod) +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S index a0e2a57c664..4e1f8e2a673 100644 --- a/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S +++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S @@ -1,31 +1,31 @@ -//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// struct { uint64_t quot, uint64_t rem} -// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { -// uint64_t rem, quot; -// quot = __udivmoddi4(numerator, denominator, &rem); -// return {quot, rem}; -// } - - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) - push {r11, lr} - sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] - bl SYMBOL_NAME(__udivmoddi4) - ldr r2, [sp, #8] - ldr r3, [sp, #12] - add sp, sp, #16 - pop {r11, pc} -END_COMPILERRT_FUNCTION(__aeabi_uldivmod) +//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { uint64_t quot, uint64_t rem} +// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { +// uint64_t rem, quot; +// quot = __udivmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) + push {r11, lr} + sub sp, sp, #16 + add r12, sp, #8 + str r12, [sp] + bl SYMBOL_NAME(__udivmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r11, pc} +END_COMPILERRT_FUNCTION(__aeabi_uldivmod) diff --git a/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S b/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S index 02975c7f150..86f3bba8c29 100644 --- a/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S +++ b/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S @@ -1,47 +1,47 @@ -//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -// -// extern uint64_t __bswapdi2(uint64_t); -// -// Reverse all the bytes in a 64-bit integer. -// - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2) -#else -DEFINE_COMPILERRT_FUNCTION(__bswapdi2) -#endif -#if __ARM_ARCH < 6 - // before armv6 does not have "rev" instruction - // r2 = rev(r0) - eor r2, r0, r0, ror #16 - bic r2, r2, #0xff0000 - mov r2, r2, lsr #8 - eor r2, r2, r0, ror #8 - // r0 = rev(r1) - eor r0, r1, r1, ror #16 - bic r0, r0, #0xff0000 - mov r0, r0, lsr #8 - eor r0, r0, r1, ror #8 -#else - rev r2, r0 // r2 = rev(r0) - rev r0, r1 // r0 = rev(r1) -#endif - mov r1, r2 // r1 = r2 = rev(r0) - JMP(lr) -END_COMPILERRT_FUNCTION(__bswapdi2) +//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +// +// extern uint64_t __bswapdi2(uint64_t); +// +// Reverse all the bytes in a 64-bit integer. +// + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2) +#else +DEFINE_COMPILERRT_FUNCTION(__bswapdi2) +#endif +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + // r2 = rev(r0) + eor r2, r0, r0, ror #16 + bic r2, r2, #0xff0000 + mov r2, r2, lsr #8 + eor r2, r2, r0, ror #8 + // r0 = rev(r1) + eor r0, r1, r1, ror #16 + bic r0, r0, #0xff0000 + mov r0, r0, lsr #8 + eor r0, r0, r1, ror #8 +#else + rev r2, r0 // r2 = rev(r0) + rev r0, r1 // r0 = rev(r1) +#endif + mov r1, r2 // r1 = r2 = rev(r0) + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapdi2) diff --git a/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S b/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S index cbb92169354..59ba8158fd5 100644 --- a/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S +++ b/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S @@ -1,39 +1,39 @@ -//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -// -// extern uint32_t __bswapsi2(uint32_t); -// -// Reverse all the bytes in a 32-bit integer. -// - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2) -#else -DEFINE_COMPILERRT_FUNCTION(__bswapsi2) -#endif -#if __ARM_ARCH < 6 - // before armv6 does not have "rev" instruction - eor r1, r0, r0, ror #16 - bic r1, r1, #0xff0000 - mov r1, r1, lsr #8 - eor r0, r1, r0, ror #8 -#else - rev r0, r0 -#endif - JMP(lr) -END_COMPILERRT_FUNCTION(__bswapsi2) +//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +// +// extern uint32_t __bswapsi2(uint32_t); +// +// Reverse all the bytes in a 32-bit integer. +// + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2) +#else +DEFINE_COMPILERRT_FUNCTION(__bswapsi2) +#endif +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 +#else + rev r0, r0 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapsi2) diff --git a/contrib/libs/cxxsupp/builtins/arm/clzdi2.S b/contrib/libs/cxxsupp/builtins/arm/clzdi2.S index 28d4f8761ea..a55abac0469 100644 --- a/contrib/libs/cxxsupp/builtins/arm/clzdi2.S +++ b/contrib/libs/cxxsupp/builtins/arm/clzdi2.S @@ -1,97 +1,97 @@ -/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements count leading zeros for 64bit arguments. - * - * ===----------------------------------------------------------------------=== - */ -#include "../assembly.h" - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - - - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2) -#else -DEFINE_COMPILERRT_FUNCTION(__clzdi2) -#endif -#ifdef __ARM_FEATURE_CLZ -#ifdef __ARMEB__ - cmp r0, 0 - itee ne - clzne r0, r0 - clzeq r0, r1 - addeq r0, r0, 32 -#else - cmp r1, 0 - itee ne - clzne r0, r1 - clzeq r0, r0 - addeq r0, r0, 32 -#endif - JMP(lr) -#else - /* Assumption: n != 0 */ - - /* - * r0: n - * r1: upper half of n, overwritten after check - * r1: count of leading zeros in n + 1 - * r2: scratch register for shifted r0 - */ -#ifdef __ARMEB__ - cmp r0, 0 - moveq r0, r1 -#else - cmp r1, 0 - movne r0, r1 -#endif - movne r1, 1 - moveq r1, 33 - - /* - * Basic block: - * if ((r0 >> SHIFT) == 0) - * r1 += SHIFT; - * else - * r0 >>= SHIFT; - * for descending powers of two as SHIFT. - */ -#define BLOCK(shift) \ - lsrs r2, r0, shift; \ - movne r0, r2; \ - addeq r1, shift \ - - BLOCK(16) - BLOCK(8) - BLOCK(4) - BLOCK(2) - - /* - * The basic block invariants at this point are (r0 >> 2) == 0 and - * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. - * - * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) - * ---+----------------+----------------+------------+-------------- - * 1 | 1 | 0 | 0 | 1 - * 2 | 0 | 1 | -1 | 0 - * 3 | 0 | 1 | -1 | 0 - * - * The r1's initial value of 1 compensates for the 1 here. - */ - sub r0, r1, r0, lsr #1 - - JMP(lr) -#endif // __ARM_FEATURE_CLZ -END_COMPILERRT_FUNCTION(__clzdi2) +/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 64bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2) +#else +DEFINE_COMPILERRT_FUNCTION(__clzdi2) +#endif +#ifdef __ARM_FEATURE_CLZ +#ifdef __ARMEB__ + cmp r0, 0 + itee ne + clzne r0, r0 + clzeq r0, r1 + addeq r0, r0, 32 +#else + cmp r1, 0 + itee ne + clzne r0, r1 + clzeq r0, r0 + addeq r0, r0, 32 +#endif + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: upper half of n, overwritten after check + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ +#ifdef __ARMEB__ + cmp r0, 0 + moveq r0, r1 +#else + cmp r1, 0 + movne r0, r1 +#endif + movne r1, 1 + moveq r1, 33 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzdi2) diff --git a/contrib/libs/cxxsupp/builtins/arm/clzsi2.S b/contrib/libs/cxxsupp/builtins/arm/clzsi2.S index d396ebe99c4..1cd379bfb0a 100644 --- a/contrib/libs/cxxsupp/builtins/arm/clzsi2.S +++ b/contrib/libs/cxxsupp/builtins/arm/clzsi2.S @@ -1,76 +1,76 @@ -/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements count leading zeros for 32bit arguments. - * - * ===----------------------------------------------------------------------=== - */ -#include "../assembly.h" - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2) -#else -DEFINE_COMPILERRT_FUNCTION(__clzsi2) -#endif -#ifdef __ARM_FEATURE_CLZ - clz r0, r0 - JMP(lr) -#else - /* Assumption: n != 0 */ - - /* - * r0: n - * r1: count of leading zeros in n + 1 - * r2: scratch register for shifted r0 - */ - mov r1, 1 - - /* - * Basic block: - * if ((r0 >> SHIFT) == 0) - * r1 += SHIFT; - * else - * r0 >>= SHIFT; - * for descending powers of two as SHIFT. - */ - -#define BLOCK(shift) \ - lsrs r2, r0, shift; \ - movne r0, r2; \ - addeq r1, shift \ - - BLOCK(16) - BLOCK(8) - BLOCK(4) - BLOCK(2) - - /* - * The basic block invariants at this point are (r0 >> 2) == 0 and - * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. - * - * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) - * ---+----------------+----------------+------------+-------------- - * 1 | 1 | 0 | 0 | 1 - * 2 | 0 | 1 | -1 | 0 - * 3 | 0 | 1 | -1 | 0 - * - * The r1's initial value of 1 compensates for the 1 here. - */ - sub r0, r1, r0, lsr #1 - - JMP(lr) -#endif // __ARM_FEATURE_CLZ -END_COMPILERRT_FUNCTION(__clzsi2) +/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 32bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2) +#else +DEFINE_COMPILERRT_FUNCTION(__clzsi2) +#endif +#ifdef __ARM_FEATURE_CLZ + clz r0, r0 + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ + mov r1, 1 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ + +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzsi2) diff --git a/contrib/libs/cxxsupp/builtins/arm/comparesf2.S b/contrib/libs/cxxsupp/builtins/arm/comparesf2.S index 8effe8cd7eb..cf71d36e051 100644 --- a/contrib/libs/cxxsupp/builtins/arm/comparesf2.S +++ b/contrib/libs/cxxsupp/builtins/arm/comparesf2.S @@ -1,148 +1,148 @@ -//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the following soft-fp_t comparison routines: -// -// __eqsf2 __gesf2 __unordsf2 -// __lesf2 __gtsf2 -// __ltsf2 -// __nesf2 -// -// The semantics of the routines grouped in each column are identical, so there -// is a single implementation for each, with multiple names. -// -// The routines behave as follows: -// -// __lesf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// 1 if either a or b is NaN -// -// __gesf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// -1 if either a or b is NaN -// -// __unordsf2(a,b) returns 0 if both a and b are numbers -// 1 if either a or b is NaN -// -// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of -// NaN values. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" -.syntax unified - -.p2align 2 -DEFINE_COMPILERRT_FUNCTION(__eqsf2) - // Make copies of a and b with the sign bit shifted off the top. These will - // be used to detect zeros and NaNs. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - - // We do the comparison in three stages (ignoring NaN values for the time - // being). First, we orr the absolute values of a and b; this sets the Z - // flag if both a and b are zero (of either sign). The shift of r3 doesn't - // effect this at all, but it *does* make sure that the C flag is clear for - // the subsequent operations. - orrs r12, r2, r3, lsr #1 - - // Next, we check if a and b have the same or different signs. If they have - // opposite signs, this eor will set the N flag. - it ne - eorsne r12, r0, r1 - - // If a and b are equal (either both zeros or bit identical; again, we're - // ignoring NaNs for now), this subtract will zero out r0. If they have the - // same sign, the flags are updated as they would be for a comparison of the - // absolute values of a and b. - it pl - subspl r0, r2, r3 - - // If a is smaller in magnitude than b and both have the same sign, place - // the negation of the sign of b in r0. Thus, if both are negative and - // a > b, this sets r0 to 0; if both are positive and a < b, this sets - // r0 to -1. - // - // This is also done if a and b have opposite signs and are not both zero, - // because in that case the subtract was not performed and the C flag is - // still clear from the shift argument in orrs; if a is positive and b - // negative, this places 0 in r0; if a is negative and b positive, -1 is - // placed in r0. - it lo - mvnlo r0, r1, asr #31 - - // If a is greater in magnitude than b and both have the same sign, place - // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed - // in r0, which is the desired result. Conversely, if both are positive - // and a > b, zero is placed in r0. - it hi - movhi r0, r1, asr #31 - - // If you've been keeping track, at this point r0 contains -1 if a < b and - // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. - // If a == b, then the Z flag is set, so we can get the correct final value - // into r0 by simply or'ing with 1 if Z is clear. - it ne - orrne r0, r0, #1 - - // Finally, we need to deal with NaNs. If either argument is NaN, replace - // the value in r0 with 1. - cmp r2, #0xff000000 - ite ls - cmpls r3, #0xff000000 - movhi r0, #1 - JMP(lr) -END_COMPILERRT_FUNCTION(__eqsf2) -DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) -DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) -DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) - -.p2align 2 -DEFINE_COMPILERRT_FUNCTION(__gtsf2) - // Identical to the preceding except in that we return -1 for NaN values. - // Given that the two paths share so much code, one might be tempted to - // unify them; however, the extra code needed to do so makes the code size - // to performance tradeoff very hard to justify for such small functions. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - orrs r12, r2, r3, lsr #1 - it ne - eorsne r12, r0, r1 - it pl - subspl r0, r2, r3 - it lo - mvnlo r0, r1, asr #31 - it hi - movhi r0, r1, asr #31 - it ne - orrne r0, r0, #1 - cmp r2, #0xff000000 - ite ls - cmpls r3, #0xff000000 - movhi r0, #-1 - JMP(lr) -END_COMPILERRT_FUNCTION(__gtsf2) -DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) - -.p2align 2 -DEFINE_COMPILERRT_FUNCTION(__unordsf2) - // Return 1 for NaN values, 0 otherwise. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mov r0, #0 - cmp r2, #0xff000000 - ite ls - cmpls r3, #0xff000000 - movhi r0, #1 - JMP(lr) -END_COMPILERRT_FUNCTION(__unordsf2) - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) +//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, with multiple names. +// +// The routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +.syntax unified + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2) + // Make copies of a and b with the sign bit shifted off the top. These will + // be used to detect zeros and NaNs. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + + // We do the comparison in three stages (ignoring NaN values for the time + // being). First, we orr the absolute values of a and b; this sets the Z + // flag if both a and b are zero (of either sign). The shift of r3 doesn't + // effect this at all, but it *does* make sure that the C flag is clear for + // the subsequent operations. + orrs r12, r2, r3, lsr #1 + + // Next, we check if a and b have the same or different signs. If they have + // opposite signs, this eor will set the N flag. + it ne + eorsne r12, r0, r1 + + // If a and b are equal (either both zeros or bit identical; again, we're + // ignoring NaNs for now), this subtract will zero out r0. If they have the + // same sign, the flags are updated as they would be for a comparison of the + // absolute values of a and b. + it pl + subspl r0, r2, r3 + + // If a is smaller in magnitude than b and both have the same sign, place + // the negation of the sign of b in r0. Thus, if both are negative and + // a > b, this sets r0 to 0; if both are positive and a < b, this sets + // r0 to -1. + // + // This is also done if a and b have opposite signs and are not both zero, + // because in that case the subtract was not performed and the C flag is + // still clear from the shift argument in orrs; if a is positive and b + // negative, this places 0 in r0; if a is negative and b positive, -1 is + // placed in r0. + it lo + mvnlo r0, r1, asr #31 + + // If a is greater in magnitude than b and both have the same sign, place + // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed + // in r0, which is the desired result. Conversely, if both are positive + // and a > b, zero is placed in r0. + it hi + movhi r0, r1, asr #31 + + // If you've been keeping track, at this point r0 contains -1 if a < b and + // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. + // If a == b, then the Z flag is set, so we can get the correct final value + // into r0 by simply or'ing with 1 if Z is clear. + it ne + orrne r0, r0, #1 + + // Finally, we need to deal with NaNs. If either argument is NaN, replace + // the value in r0 with 1. + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 + JMP(lr) +END_COMPILERRT_FUNCTION(__eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2) + // Identical to the preceding except in that we return -1 for NaN values. + // Given that the two paths share so much code, one might be tempted to + // unify them; however, the extra code needed to do so makes the code size + // to performance tradeoff very hard to justify for such small functions. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + orrs r12, r2, r3, lsr #1 + it ne + eorsne r12, r0, r1 + it pl + subspl r0, r2, r3 + it lo + mvnlo r0, r1, asr #31 + it hi + movhi r0, r1, asr #31 + it ne + orrne r0, r0, #1 + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #-1 + JMP(lr) +END_COMPILERRT_FUNCTION(__gtsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2) + // Return 1 for NaN values, 0 otherwise. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mov r0, #0 + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 + JMP(lr) +END_COMPILERRT_FUNCTION(__unordsf2) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) diff --git a/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S index 7266c235db4..6eebef167a2 100644 --- a/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S @@ -1,26 +1,26 @@ -//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __divdf3vfp(double a, double b); -// -// Divides two double precision floating point numbers using the Darwin -// calling convention where double arguments are passsed in GPR pairs -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) - vmov d6, r0, r1 // move first param from r0/r1 pair into d6 - vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vdiv.f64 d5, d6, d7 - vmov r0, r1, d5 // move result back to r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__divdf3vfp) +//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __divdf3vfp(double a, double b); +// +// Divides two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vdiv.f64 d5, d6, d7 + vmov r0, r1, d5 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__divdf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S b/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S index 7d62a3d3ebc..646b9ab78fb 100644 --- a/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S +++ b/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S @@ -1,74 +1,74 @@ -/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __divmodsi4 (32-bit signed integer divide and - * modulus) function for the ARM architecture. A naive digit-by-digit - * computation is employed for simplicity. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - -#define ESTABLISH_FRAME \ - push {r4-r7, lr} ;\ - add r7, sp, #12 -#define CLEAR_FRAME_AND_RETURN \ - pop {r4-r7, pc} - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -@ int __divmodsi4(int divident, int divisor, int *remainder) -@ Calculate the quotient and remainder of the (signed) division. The return -@ value is the quotient, the remainder is placed in the variable. - - .p2align 3 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4) -#else -DEFINE_COMPILERRT_FUNCTION(__divmodsi4) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1, r1 - beq LOCAL_LABEL(divzero) - mov r3, r0 - sdiv r0, r3, r1 - mls r1, r0, r1, r3 - str r1, [r2] - bx lr -LOCAL_LABEL(divzero): - mov r0, #0 - bx lr -#else - ESTABLISH_FRAME -// Set aside the sign of the quotient and modulus, and the address for the -// modulus. - eor r4, r0, r1 - mov r5, r0 - mov r6, r2 -// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). - eor ip, r0, r0, asr #31 - eor lr, r1, r1, asr #31 - sub r0, ip, r0, asr #31 - sub r1, lr, r1, asr #31 -// Unsigned divmod: - bl SYMBOL_NAME(__udivmodsi4) -// Apply the sign of quotient and modulus - ldr r1, [r6] - eor r0, r0, r4, asr #31 - eor r1, r1, r5, asr #31 - sub r0, r0, r4, asr #31 - sub r1, r1, r5, asr #31 - str r1, [r6] - CLEAR_FRAME_AND_RETURN -#endif -END_COMPILERRT_FUNCTION(__divmodsi4) +/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divmodsi4 (32-bit signed integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4-r7, lr} ;\ + add r7, sp, #12 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4-r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ int __divmodsi4(int divident, int divisor, int *remainder) +@ Calculate the quotient and remainder of the (signed) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 3 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4) +#else +DEFINE_COMPILERRT_FUNCTION(__divmodsi4) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + mov r3, r0 + sdiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME +// Set aside the sign of the quotient and modulus, and the address for the +// modulus. + eor r4, r0, r1 + mov r5, r0 + mov r6, r2 +// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor ip, r0, r0, asr #31 + eor lr, r1, r1, asr #31 + sub r0, ip, r0, asr #31 + sub r1, lr, r1, asr #31 +// Unsigned divmod: + bl SYMBOL_NAME(__udivmodsi4) +// Apply the sign of quotient and modulus + ldr r1, [r6] + eor r0, r0, r4, asr #31 + eor r1, r1, r5, asr #31 + sub r0, r0, r4, asr #31 + sub r1, r1, r5, asr #31 + str r1, [r6] + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divmodsi4) diff --git a/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S index 55feaf46fcd..fdbaebc8837 100644 --- a/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S @@ -1,26 +1,26 @@ -//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __divsf3vfp(float a, float b); -// -// Divides two single precision floating point numbers using the Darwin -// calling convention where single arguments are passsed like 32-bit ints. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) - vmov s14, r0 // move first param from r0 into float register - vmov s15, r1 // move second param from r1 into float register - vdiv.f32 s13, s14, s15 - vmov r0, s13 // move result back to r0 - bx lr -END_COMPILERRT_FUNCTION(__divsf3vfp) +//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __divsf3vfp(float a, float b); +// +// Divides two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vdiv.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__divsf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/divsi3.S b/contrib/libs/cxxsupp/builtins/arm/divsi3.S index 3ad8000dd6d..adf8f94fc7b 100644 --- a/contrib/libs/cxxsupp/builtins/arm/divsi3.S +++ b/contrib/libs/cxxsupp/builtins/arm/divsi3.S @@ -1,65 +1,65 @@ -/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __divsi3 (32-bit signed integer divide) function - * for the ARM architecture as a wrapper around the unsigned routine. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - -#define ESTABLISH_FRAME \ - push {r4, r7, lr} ;\ - add r7, sp, #4 -#define CLEAR_FRAME_AND_RETURN \ - pop {r4, r7, pc} - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - - .p2align 3 -// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) - -@ int __divsi3(int divident, int divisor) -@ Calculate and return the quotient of the (signed) division. - -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3) -#else -DEFINE_COMPILERRT_FUNCTION(__divsi3) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1,r1 - beq LOCAL_LABEL(divzero) - sdiv r0, r0, r1 - bx lr -LOCAL_LABEL(divzero): - mov r0,#0 - bx lr -#else -ESTABLISH_FRAME -// Set aside the sign of the quotient. - eor r4, r0, r1 -// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). - eor r2, r0, r0, asr #31 - eor r3, r1, r1, asr #31 - sub r0, r2, r0, asr #31 - sub r1, r3, r1, asr #31 -// abs(a) / abs(b) - bl SYMBOL_NAME(__udivsi3) -// Apply sign of quotient to result and return. - eor r0, r0, r4, asr #31 - sub r0, r0, r4, asr #31 - CLEAR_FRAME_AND_RETURN -#endif -END_COMPILERRT_FUNCTION(__divsi3) +/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divsi3 (32-bit signed integer divide) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 3 +// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) + +@ int __divsi3(int divident, int divisor) +@ Calculate and return the quotient of the (signed) division. + +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__divsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1,r1 + beq LOCAL_LABEL(divzero) + sdiv r0, r0, r1 + bx lr +LOCAL_LABEL(divzero): + mov r0,#0 + bx lr +#else +ESTABLISH_FRAME +// Set aside the sign of the quotient. + eor r4, r0, r1 +// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 +// abs(a) / abs(b) + bl SYMBOL_NAME(__udivsi3) +// Apply sign of quotient to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divsi3) diff --git a/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S index c3246f90eb2..7f2fbc3072d 100644 --- a/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S @@ -1,29 +1,29 @@ -//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __eqdf2vfp(double a, double b); -// -// Returns one iff a == b and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - moveq r0, #1 // set result register to 1 if equal - movne r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__eqdf2vfp) +//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqdf2vfp(double a, double b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqdf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S index adcd7e8e815..a318b336ae9 100644 --- a/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S @@ -1,29 +1,29 @@ -//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __eqsf2vfp(float a, float b); -// -// Returns one iff a == b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - moveq r0, #1 // set result register to 1 if equal - movne r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__eqsf2vfp) +//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqsf2vfp(float a, float b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S index 63eb10284e6..b998e589459 100644 --- a/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S @@ -1,26 +1,26 @@ -//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __extendsfdf2vfp(float a); -// -// Converts single precision float to double precision result. -// Uses Darwin calling convention where a single precision parameter is -// passed in a GPR and a double precision result is returned in R0/R1 pair. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) - vmov s15, r0 // load float register from R0 - vcvt.f64.f32 d7, s15 // convert single to double - vmov r0, r1, d7 // return result in r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__extendsfdf2vfp) +//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __extendsfdf2vfp(float a); +// +// Converts single precision float to double precision result. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR and a double precision result is returned in R0/R1 pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) + vmov s15, r0 // load float register from R0 + vcvt.f64.f32 d7, s15 // convert single to double + vmov r0, r1, d7 // return result in r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__extendsfdf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S index 628e7462b65..e3bd8e05e01 100644 --- a/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S @@ -1,26 +1,26 @@ -//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __fixdfsivfp(double a); -// -// Converts double precision float to a 32-bit int rounding towards zero. -// Uses Darwin calling convention where a double precision parameter is -// passed in GPR register pair. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) - vmov d7, r0, r1 // load double register from R0/R1 - vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__fixdfsivfp) +//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixdfsivfp(double a); +// +// Converts double precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixdfsivfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S index 549d8b7ef79..3d0d0f56d23 100644 --- a/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S @@ -1,26 +1,26 @@ -//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __fixsfsivfp(float a); -// -// Converts single precision float to a 32-bit int rounding towards zero. -// Uses Darwin calling convention where a single precision parameter is -// passed in a GPR.. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) - vmov s15, r0 // load float register from R0 - vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__fixsfsivfp) +//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixsfsivfp(float a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) + vmov s15, r0 // load float register from R0 + vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixsfsivfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S index 54668fbe6ef..35dda5b9b03 100644 --- a/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S @@ -1,27 +1,27 @@ -//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern unsigned int __fixunsdfsivfp(double a); -// -// Converts double precision float to a 32-bit unsigned int rounding towards -// zero. All negative values become zero. -// Uses Darwin calling convention where a double precision parameter is -// passed in GPR register pair. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) - vmov d7, r0, r1 // load double register from R0/R1 - vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__fixunsdfsivfp) +//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunsdfsivfp(double a); +// +// Converts double precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixunsdfsivfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S index a5c4f109339..5c3a7d926fc 100644 --- a/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S @@ -1,27 +1,27 @@ -//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern unsigned int __fixunssfsivfp(float a); -// -// Converts single precision float to a 32-bit unsigned int rounding towards -// zero. All negative values become zero. -// Uses Darwin calling convention where a single precision parameter is -// passed in a GPR.. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) - vmov s15, r0 // load float register from R0 - vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__fixunssfsivfp) +//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunssfsivfp(float a); +// +// Converts single precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) + vmov s15, r0 // load float register from R0 + vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixunssfsivfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S index addd40640c2..d69184914cc 100644 --- a/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S @@ -1,26 +1,26 @@ -//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __floatsidfvfp(int a); -// -// Converts a 32-bit int to a double precision float. -// Uses Darwin calling convention where a double precision result is -// return in GPR register pair. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) - vmov s15, r0 // move int to float register s15 - vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 - vmov r0, r1, d7 // move d7 to result register pair r0/r1 - bx lr -END_COMPILERRT_FUNCTION(__floatsidfvfp) +//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatsidfvfp(int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 + bx lr +END_COMPILERRT_FUNCTION(__floatsidfvfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S index 21dc2df5694..4a0cb39d0eb 100644 --- a/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S @@ -1,26 +1,26 @@ -//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __floatsisfvfp(int a); -// -// Converts single precision float to a 32-bit int rounding towards zero. -// Uses Darwin calling convention where a single precision result is -// return in a GPR.. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) - vmov s15, r0 // move int to float register s15 - vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__floatsisfvfp) +//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatsisfvfp(int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__floatsisfvfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S index ea5bea97c8f..d92969ea345 100644 --- a/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S @@ -1,26 +1,26 @@ -//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __floatunssidfvfp(unsigned int a); -// -// Converts a 32-bit int to a double precision float. -// Uses Darwin calling convention where a double precision result is -// return in GPR register pair. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) - vmov s15, r0 // move int to float register s15 - vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 - vmov r0, r1, d7 // move d7 to result register pair r0/r1 - bx lr -END_COMPILERRT_FUNCTION(__floatunssidfvfp) +//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatunssidfvfp(unsigned int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 + bx lr +END_COMPILERRT_FUNCTION(__floatunssidfvfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S index a031b33c179..f6aeba56ae1 100644 --- a/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S @@ -1,26 +1,26 @@ -//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __floatunssisfvfp(unsigned int a); -// -// Converts single precision float to a 32-bit int rounding towards zero. -// Uses Darwin calling convention where a single precision result is -// return in a GPR.. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) - vmov s15, r0 // move int to float register s15 - vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 - vmov r0, s15 // move s15 to result register - bx lr -END_COMPILERRT_FUNCTION(__floatunssisfvfp) +//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatunssisfvfp(unsigned int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__floatunssisfvfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S index 77fbf64d473..9e235270175 100644 --- a/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S @@ -1,29 +1,29 @@ -//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __gedf2vfp(double a, double b); -// -// Returns one iff a >= b and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movge r0, #1 // set result register to 1 if greater than or equal - movlt r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__gedf2vfp) +//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gedf2vfp(double a, double b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gedf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S index 834f4b17d46..0ff60847788 100644 --- a/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S @@ -1,29 +1,29 @@ -//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __gesf2vfp(float a, float b); -// -// Returns one iff a >= b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movge r0, #1 // set result register to 1 if greater than or equal - movlt r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__gesf2vfp) +//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gesf2vfp(float a, float b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gesf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S index 329238d6e7b..3dc5d5b5922 100644 --- a/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S @@ -1,29 +1,29 @@ -//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __gtdf2vfp(double a, double b); -// -// Returns one iff a > b and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movgt r0, #1 // set result register to 1 if equal - movle r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__gtdf2vfp) +//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __gtdf2vfp(double a, double b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtdf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S index 74e0be62c29..ddd843acf59 100644 --- a/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S @@ -1,29 +1,29 @@ -//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __gtsf2vfp(float a, float b); -// -// Returns one iff a > b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movgt r0, #1 // set result register to 1 if equal - movle r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__gtsf2vfp) +//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gtsf2vfp(float a, float b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S index 17b004e66ab..b06ff6db5a3 100644 --- a/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S @@ -1,29 +1,29 @@ -//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __ledf2vfp(double a, double b); -// -// Returns one iff a <= b and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movls r0, #1 // set result register to 1 if equal - movhi r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__ledf2vfp) +//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ledf2vfp(double a, double b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ledf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S index 86482ff638a..9b33c0c5369 100644 --- a/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S @@ -1,29 +1,29 @@ -//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __lesf2vfp(float a, float b); -// -// Returns one iff a <= b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movls r0, #1 // set result register to 1 if equal - movhi r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__lesf2vfp) +//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __lesf2vfp(float a, float b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__lesf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S index b91b7395604..9f794b026a4 100644 --- a/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S @@ -1,29 +1,29 @@ -//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __ltdf2vfp(double a, double b); -// -// Returns one iff a < b and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movmi r0, #1 // set result register to 1 if equal - movpl r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__ltdf2vfp) +//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ltdf2vfp(double a, double b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltdf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S index 6aee77c23f4..ba190d9d8dc 100644 --- a/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S @@ -1,29 +1,29 @@ -//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __ltsf2vfp(float a, float b); -// -// Returns one iff a < b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movmi r0, #1 // set result register to 1 if equal - movpl r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__ltsf2vfp) +//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __ltsf2vfp(float a, float b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/modsi3.S b/contrib/libs/cxxsupp/builtins/arm/modsi3.S index d997107c28f..295a227d862 100644 --- a/contrib/libs/cxxsupp/builtins/arm/modsi3.S +++ b/contrib/libs/cxxsupp/builtins/arm/modsi3.S @@ -1,63 +1,63 @@ -/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __modsi3 (32-bit signed integer modulus) function - * for the ARM architecture as a wrapper around the unsigned routine. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - -#define ESTABLISH_FRAME \ - push {r4, r7, lr} ;\ - add r7, sp, #4 -#define CLEAR_FRAME_AND_RETURN \ - pop {r4, r7, pc} - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -@ int __modsi3(int divident, int divisor) -@ Calculate and return the remainder of the (signed) division. - - .p2align 3 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3) -#else -DEFINE_COMPILERRT_FUNCTION(__modsi3) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1, r1 - beq LOCAL_LABEL(divzero) - sdiv r2, r0, r1 - mls r0, r2, r1, r0 - bx lr -LOCAL_LABEL(divzero): - mov r0, #0 - bx lr -#else - ESTABLISH_FRAME - // Set aside the sign of the dividend. - mov r4, r0 - // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). - eor r2, r0, r0, asr #31 - eor r3, r1, r1, asr #31 - sub r0, r2, r0, asr #31 - sub r1, r3, r1, asr #31 - // abs(a) % abs(b) - bl SYMBOL_NAME(__umodsi3) - // Apply sign of dividend to result and return. - eor r0, r0, r4, asr #31 - sub r0, r0, r4, asr #31 - CLEAR_FRAME_AND_RETURN -#endif -END_COMPILERRT_FUNCTION(__modsi3) +/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __modsi3 (32-bit signed integer modulus) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ int __modsi3(int divident, int divisor) +@ Calculate and return the remainder of the (signed) division. + + .p2align 3 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__modsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + sdiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME + // Set aside the sign of the dividend. + mov r4, r0 + // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 + // abs(a) % abs(b) + bl SYMBOL_NAME(__umodsi3) + // Apply sign of dividend to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__modsi3) diff --git a/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S index 358051c55ac..636cc711ac1 100644 --- a/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S @@ -1,26 +1,26 @@ -//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __muldf3vfp(double a, double b); -// -// Multiplies two double precision floating point numbers using the Darwin -// calling convention where double arguments are passsed in GPR pairs -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) - vmov d6, r0, r1 // move first param from r0/r1 pair into d6 - vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vmul.f64 d6, d6, d7 - vmov r0, r1, d6 // move result back to r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__muldf3vfp) +//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __muldf3vfp(double a, double b); +// +// Multiplies two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vmul.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__muldf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S index f43b3dc033c..7f4008266bf 100644 --- a/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S @@ -1,26 +1,26 @@ -//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __mulsf3vfp(float a, float b); -// -// Multiplies two single precision floating point numbers using the Darwin -// calling convention where single arguments are passsed like 32-bit ints. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) - vmov s14, r0 // move first param from r0 into float register - vmov s15, r1 // move second param from r1 into float register - vmul.f32 s13, s14, s15 - vmov r0, s13 // move result back to r0 - bx lr -END_COMPILERRT_FUNCTION(__mulsf3vfp) +//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __mulsf3vfp(float a, float b); +// +// Multiplies two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vmul.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__mulsf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S index 803cf492201..7ab2f5501ce 100644 --- a/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S @@ -1,29 +1,29 @@ -//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __nedf2vfp(double a, double b); -// -// Returns zero if a and b are unequal and neither is NaN. -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movne r0, #1 // set result register to 0 if unequal - moveq r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__nedf2vfp) +//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __nedf2vfp(double a, double b); +// +// Returns zero if a and b are unequal and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movne r0, #1 // set result register to 0 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nedf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S index 742672acbc3..56d73c67617 100644 --- a/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S @@ -1,23 +1,23 @@ -//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __negdf2vfp(double a, double b); -// -// Returns the negation a double precision floating point numbers using the -// Darwin calling convention where double arguments are passsed in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) - eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__negdf2vfp) +//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __negdf2vfp(double a, double b); +// +// Returns the negation a double precision floating point numbers using the +// Darwin calling convention where double arguments are passsed in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) + eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__negdf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S index 1951043ba32..a6e32e1ff89 100644 --- a/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S @@ -1,23 +1,23 @@ -//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __negsf2vfp(float a); -// -// Returns the negation of a single precision floating point numbers using the -// Darwin calling convention where single arguments are passsed like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) - eor r0, r0, #-2147483648 // flip sign bit on float in r0 - bx lr -END_COMPILERRT_FUNCTION(__negsf2vfp) +//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __negsf2vfp(float a); +// +// Returns the negation of a single precision floating point numbers using the +// Darwin calling convention where single arguments are passsed like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) + eor r0, r0, #-2147483648 // flip sign bit on float in r0 + bx lr +END_COMPILERRT_FUNCTION(__negsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S index 02a726e6027..9fe8ecdefb3 100644 --- a/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S @@ -1,29 +1,29 @@ -//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __nesf2vfp(float a, float b); -// -// Returns one iff a != b and neither is NaN. -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movne r0, #1 // set result register to 1 if unequal - moveq r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__nesf2vfp) +//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __nesf2vfp(float a, float b); +// +// Returns one iff a != b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movne r0, #1 // set result register to 1 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nesf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S b/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S index a4866cc6f2e..0f6ea513616 100644 --- a/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S +++ b/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S @@ -1,33 +1,33 @@ -//===-- save_restore_regs.S - Implement save/restore* ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling C++ functions that need to handle thrown exceptions the -// compiler is required to save all registers and call __Unwind_SjLj_Register -// in the function prolog. But when compiling for thumb1, there are -// no instructions to access the floating point registers, so the -// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs -// written in ARM to save the float registers. In the epilog, the compiler -// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. -// - - .text - .syntax unified - -// -// Restore registers d8-d15 from stack -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) - vldmia sp!, {d8-d15} // pop registers d8-d15 off stack - bx lr // return to prolog -END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) - +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Restore registers d8-d15 from stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) + vldmia sp!, {d8-d15} // pop registers d8-d15 off stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) + diff --git a/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S b/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S index 9260d9c9a0c..f1d90e75808 100644 --- a/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S +++ b/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S @@ -1,33 +1,33 @@ -//===-- save_restore_regs.S - Implement save/restore* ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling C++ functions that need to handle thrown exceptions the -// compiler is required to save all registers and call __Unwind_SjLj_Register -// in the function prolog. But when compiling for thumb1, there are -// no instructions to access the floating point registers, so the -// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs -// written in ARM to save the float registers. In the epilog, the compiler -// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. -// - - .text - .syntax unified - -// -// Save registers d8-d15 onto stack -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) - vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack - bx lr // return to prolog -END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) - +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Save registers d8-d15 onto stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) + vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) + diff --git a/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list b/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list index 6144dd57698..cc6a4b3cdd2 100644 --- a/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list +++ b/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list @@ -1,21 +1,21 @@ -# -# These are soft float functions which can be -# aliased to the *vfp functions on arm processors -# that support floating point instructions. -# -___adddf3vfp ___adddf3 -___addsf3vfp ___addsf3 -___divdf3vfp ___divdf3 -___divsf3vfp ___divsf3 -___extendsfdf2vfp ___extendsfdf2 -___fixdfsivfp ___fixdfsi -___fixsfsivfp ___fixsfsi -___floatsidfvfp ___floatsidf -___floatsisfvfp ___floatsisf -___muldf3vfp ___muldf3 -___mulsf3vfp ___mulsf3 -___subdf3vfp ___subdf3 -___subsf3vfp ___subsf3 -___truncdfsf2vfp ___truncdfsf2 -___floatunssidfvfp ___floatunsidf -___floatunssisfvfp ___floatunsisf +# +# These are soft float functions which can be +# aliased to the *vfp functions on arm processors +# that support floating point instructions. +# +___adddf3vfp ___adddf3 +___addsf3vfp ___addsf3 +___divdf3vfp ___divdf3 +___divsf3vfp ___divsf3 +___extendsfdf2vfp ___extendsfdf2 +___fixdfsivfp ___fixdfsi +___fixsfsivfp ___fixsfsi +___floatsidfvfp ___floatsidf +___floatsisfvfp ___floatsisf +___muldf3vfp ___muldf3 +___mulsf3vfp ___mulsf3 +___subdf3vfp ___subdf3 +___subsf3vfp ___subsf3 +___truncdfsf2vfp ___truncdfsf2 +___floatunssidfvfp ___floatunsidf +___floatunssisfvfp ___floatunsisf diff --git a/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S index b9d628a006d..5f3c0f70dbc 100644 --- a/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S @@ -1,26 +1,26 @@ -//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern double __subdf3vfp(double a, double b); -// -// Returns difference between two double precision floating point numbers using -// the Darwin calling convention where double arguments are passsed in GPR pairs -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) - vmov d6, r0, r1 // move first param from r0/r1 pair into d6 - vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vsub.f64 d6, d6, d7 - vmov r0, r1, d6 // move result back to r0/r1 pair - bx lr -END_COMPILERRT_FUNCTION(__subdf3vfp) +//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __subdf3vfp(double a, double b); +// +// Returns difference between two double precision floating point numbers using +// the Darwin calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vsub.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__subdf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S index 8d8e3d61551..d6e06df5192 100644 --- a/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S @@ -1,27 +1,27 @@ -//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __subsf3vfp(float a, float b); -// -// Returns the difference between two single precision floating point numbers -// using the Darwin calling convention where single arguments are passsed -// like 32-bit ints. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) - vmov s14, r0 // move first param from r0 into float register - vmov s15, r1 // move second param from r1 into float register - vsub.f32 s14, s14, s15 - vmov r0, s14 // move result back to r0 - bx lr -END_COMPILERRT_FUNCTION(__subsf3vfp) +//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __subsf3vfp(float a, float b); +// +// Returns the difference between two single precision floating point numbers +// using the Darwin calling convention where single arguments are passsed +// like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vsub.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__subsf3vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/switch16.S b/contrib/libs/cxxsupp/builtins/arm/switch16.S index 4350d4fee36..3c3a6b10612 100644 --- a/contrib/libs/cxxsupp/builtins/arm/switch16.S +++ b/contrib/libs/cxxsupp/builtins/arm/switch16.S @@ -1,44 +1,44 @@ -//===-- switch.S - Implement switch* --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling switch statements in thumb mode, the compiler -// can use these __switch* helper functions The compiler emits a blx to -// the __switch* function followed by a table of displacements for each -// case statement. On entry, R0 is the index into the table. The __switch* -// function uses the return address in lr to find the start of the table. -// The first entry in the table is the count of the entries in the table. -// It then uses R0 to index into the table and get the displacement of the -// address to jump to. If R0 is greater than the size of the table, it jumps -// to the last entry in the table. Each displacement in the table is actually -// the distance from lr to the label, thus making the tables PIC. - - - .text - .syntax unified - -// -// The table contains signed 2-byte sized elements which are 1/2 the distance -// from lr to the target label. -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) - ldrh ip, [lr, #-1] // get first 16-bit word in table - cmp r0, ip // compare with index - add r0, lr, r0, lsl #1 // compute address of element in table - add ip, lr, ip, lsl #1 // compute address of last element in table - ite lo - ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range - ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range - add ip, lr, r0, lsl #1 // compute label = lr + element*2 - bx ip // jump to computed label -END_COMPILERRT_FUNCTION(__switch16) - +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 2-byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) + ldrh ip, [lr, #-1] // get first 16-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #1 // compute address of element in table + add ip, lr, ip, lsl #1 // compute address of last element in table + ite lo + ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range + ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch16) + diff --git a/contrib/libs/cxxsupp/builtins/arm/switch32.S b/contrib/libs/cxxsupp/builtins/arm/switch32.S index 42e5782eb25..b38cd2b764a 100644 --- a/contrib/libs/cxxsupp/builtins/arm/switch32.S +++ b/contrib/libs/cxxsupp/builtins/arm/switch32.S @@ -1,44 +1,44 @@ -//===-- switch.S - Implement switch* --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling switch statements in thumb mode, the compiler -// can use these __switch* helper functions The compiler emits a blx to -// the __switch* function followed by a table of displacements for each -// case statement. On entry, R0 is the index into the table. The __switch* -// function uses the return address in lr to find the start of the table. -// The first entry in the table is the count of the entries in the table. -// It then uses R0 to index into the table and get the displacement of the -// address to jump to. If R0 is greater than the size of the table, it jumps -// to the last entry in the table. Each displacement in the table is actually -// the distance from lr to the label, thus making the tables PIC. - - - .text - .syntax unified - -// -// The table contains signed 4-byte sized elements which are the distance -// from lr to the target label. -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) - ldr ip, [lr, #-1] // get first 32-bit word in table - cmp r0, ip // compare with index - add r0, lr, r0, lsl #2 // compute address of element in table - add ip, lr, ip, lsl #2 // compute address of last element in table - ite lo - ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range - ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range - add ip, lr, r0 // compute label = lr + element - bx ip // jump to computed label -END_COMPILERRT_FUNCTION(__switch32) - +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 4-byte sized elements which are the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) + ldr ip, [lr, #-1] // get first 32-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #2 // compute address of element in table + add ip, lr, ip, lsl #2 // compute address of last element in table + ite lo + ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range + ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range + add ip, lr, r0 // compute label = lr + element + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch32) + diff --git a/contrib/libs/cxxsupp/builtins/arm/switch8.S b/contrib/libs/cxxsupp/builtins/arm/switch8.S index d33e545c734..d7c20423def 100644 --- a/contrib/libs/cxxsupp/builtins/arm/switch8.S +++ b/contrib/libs/cxxsupp/builtins/arm/switch8.S @@ -1,42 +1,42 @@ -//===-- switch.S - Implement switch* --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling switch statements in thumb mode, the compiler -// can use these __switch* helper functions The compiler emits a blx to -// the __switch* function followed by a table of displacements for each -// case statement. On entry, R0 is the index into the table. The __switch* -// function uses the return address in lr to find the start of the table. -// The first entry in the table is the count of the entries in the table. -// It then uses R0 to index into the table and get the displacement of the -// address to jump to. If R0 is greater than the size of the table, it jumps -// to the last entry in the table. Each displacement in the table is actually -// the distance from lr to the label, thus making the tables PIC. - - - .text - .syntax unified - -// -// The table contains signed byte sized elements which are 1/2 the distance -// from lr to the target label. -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) - ldrb ip, [lr, #-1] // get first byte in table - cmp r0, ip // signed compare with index - ite lo - ldrsblo r0, [lr, r0] // get indexed byte out of table - ldrsbhs r0, [lr, ip] // if out of range, use last entry in table - add ip, lr, r0, lsl #1 // compute label = lr + element*2 - bx ip // jump to computed label -END_COMPILERRT_FUNCTION(__switch8) - +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // signed compare with index + ite lo + ldrsblo r0, [lr, r0] // get indexed byte out of table + ldrsbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch8) + diff --git a/contrib/libs/cxxsupp/builtins/arm/switchu8.S b/contrib/libs/cxxsupp/builtins/arm/switchu8.S index af7ebedf83b..1844f11c604 100644 --- a/contrib/libs/cxxsupp/builtins/arm/switchu8.S +++ b/contrib/libs/cxxsupp/builtins/arm/switchu8.S @@ -1,42 +1,42 @@ -//===-- switch.S - Implement switch* --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling switch statements in thumb mode, the compiler -// can use these __switch* helper functions The compiler emits a blx to -// the __switch* function followed by a table of displacements for each -// case statement. On entry, R0 is the index into the table. The __switch* -// function uses the return address in lr to find the start of the table. -// The first entry in the table is the count of the entries in the table. -// It then uses R0 to index into the table and get the displacement of the -// address to jump to. If R0 is greater than the size of the table, it jumps -// to the last entry in the table. Each displacement in the table is actually -// the distance from lr to the label, thus making the tables PIC. - - - .text - .syntax unified - -// -// The table contains unsigned byte sized elements which are 1/2 the distance -// from lr to the target label. -// - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) - ldrb ip, [lr, #-1] // get first byte in table - cmp r0, ip // compare with index - ite lo - ldrblo r0, [lr, r0] // get indexed byte out of table - ldrbhs r0, [lr, ip] // if out of range, use last entry in table - add ip, lr, r0, lsl #1 // compute label = lr + element*2 - bx ip // jump to computed label -END_COMPILERRT_FUNCTION(__switchu8) - +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains unsigned byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // compare with index + ite lo + ldrblo r0, [lr, r0] // get indexed byte out of table + ldrbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switchu8) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync-ops.h b/contrib/libs/cxxsupp/builtins/arm/sync-ops.h index 17d617ae4c8..ee02c30c6ea 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync-ops.h +++ b/contrib/libs/cxxsupp/builtins/arm/sync-ops.h @@ -1,64 +1,64 @@ -/*===-- sync-ops.h - --===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements outline macros for the __sync_fetch_and_* - * operations. Different instantiations will generate appropriate assembly for - * ARM and Thumb-2 versions of the functions. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - -#define SYNC_OP_4(op) \ - .p2align 2 ; \ - .thumb ; \ - .syntax unified ; \ - DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ - dmb ; \ - mov r12, r0 ; \ - LOCAL_LABEL(tryatomic_ ## op): \ - ldrex r0, [r12] ; \ - op(r2, r0, r1) ; \ - strex r3, r2, [r12] ; \ - cmp r3, #0 ; \ - bne LOCAL_LABEL(tryatomic_ ## op) ; \ - dmb ; \ - bx lr - -#define SYNC_OP_8(op) \ - .p2align 2 ; \ - .thumb ; \ - .syntax unified ; \ - DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ - push {r4, r5, r6, lr} ; \ - dmb ; \ - mov r12, r0 ; \ - LOCAL_LABEL(tryatomic_ ## op): \ - ldrexd r0, r1, [r12] ; \ - op(r4, r5, r0, r1, r2, r3) ; \ - strexd r6, r4, r5, [r12] ; \ - cmp r6, #0 ; \ - bne LOCAL_LABEL(tryatomic_ ## op) ; \ - dmb ; \ - pop {r4, r5, r6, pc} - -#define MINMAX_4(rD, rN, rM, cmp_kind) \ - cmp rN, rM ; \ - mov rD, rM ; \ - it cmp_kind ; \ - mov##cmp_kind rD, rN - -#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ - cmp rN_LO, rM_LO ; \ - sbcs rN_HI, rM_HI ; \ - mov rD_LO, rM_LO ; \ - mov rD_HI, rM_HI ; \ - itt cmp_kind ; \ - mov##cmp_kind rD_LO, rN_LO ; \ - mov##cmp_kind rD_HI, rN_HI +/*===-- sync-ops.h - --===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements outline macros for the __sync_fetch_and_* + * operations. Different instantiations will generate appropriate assembly for + * ARM and Thumb-2 versions of the functions. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define SYNC_OP_4(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrex r0, [r12] ; \ + op(r2, r0, r1) ; \ + strex r3, r2, [r12] ; \ + cmp r3, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + bx lr + +#define SYNC_OP_8(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + push {r4, r5, r6, lr} ; \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrexd r0, r1, [r12] ; \ + op(r4, r5, r0, r1, r2, r3) ; \ + strexd r6, r4, r5, [r12] ; \ + cmp r6, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + pop {r4, r5, r6, pc} + +#define MINMAX_4(rD, rN, rM, cmp_kind) \ + cmp rN, rM ; \ + mov rD, rM ; \ + it cmp_kind ; \ + mov##cmp_kind rD, rN + +#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ + cmp rN_LO, rM_LO ; \ + sbcs rN_HI, rM_HI ; \ + mov rD_LO, rM_LO ; \ + mov rD_HI, rM_HI ; \ + itt cmp_kind ; \ + mov##cmp_kind rD_LO, rN_LO ; \ + mov##cmp_kind rD_HI, rN_HI diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S index a9b7ef04a77..54c33e2d26b 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_add_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_add_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -/* "adds" is 2 bytes shorter than "add". */ -#define add_4(rD, rN, rM) add rD, rN, rM - -SYNC_OP_4(add_4) - +/*===-- sync_fetch_and_add_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "adds" is 2 bytes shorter than "add". */ +#define add_4(rD, rN, rM) add rD, rN, rM + +SYNC_OP_4(add_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S index 274c2989b51..5724bb148ba 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S @@ -1,24 +1,24 @@ -/*===-- sync_fetch_and_add_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_add_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - adds rD_LO, rN_LO, rM_LO ; \ - adc rD_HI, rN_HI, rM_HI - -SYNC_OP_8(add_8) -#endif - +/*===-- sync_fetch_and_add_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + adds rD_LO, rN_LO, rM_LO ; \ + adc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(add_8) +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S index b7620e6e101..e2b77a1a87d 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S @@ -1,19 +1,19 @@ -/*===-- sync_fetch_and_and_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_and_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define and_4(rD, rN, rM) and rD, rN, rM - -SYNC_OP_4(and_4) +/*===-- sync_fetch_and_and_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define and_4(rD, rN, rM) and rD, rN, rM + +SYNC_OP_4(and_4) diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S index 29c4986a906..a74163a8600 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S @@ -1,23 +1,23 @@ -/*===-- sync_fetch_and_and_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_and_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - and rD_LO, rN_LO, rM_LO ; \ - and rD_HI, rN_HI, rM_HI - -SYNC_OP_8(and_8) -#endif +/*===-- sync_fetch_and_and_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + and rD_LO, rN_LO, rM_LO ; \ + and rD_HI, rN_HI, rM_HI + +SYNC_OP_8(and_8) +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S index 6d16be3ec16..01e4f444c2f 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_max_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_max_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt) - -SYNC_OP_4(max_4) - +/*===-- sync_fetch_and_max_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt) + +SYNC_OP_4(max_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S index 68a186e9bf7..1eef2b22366 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_max_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_max_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) - -SYNC_OP_8(max_8) -#endif +/*===-- sync_fetch_and_max_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) + +SYNC_OP_8(max_8) +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S index 5333413c464..015626b63da 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_min_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_min_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) - -SYNC_OP_4(min_4) - +/*===-- sync_fetch_and_min_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) + +SYNC_OP_4(min_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S index 18b7811f23c..ad5cce07544 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_min_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_min_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) - -SYNC_OP_8(min_8) -#endif +/*===-- sync_fetch_and_min_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) + +SYNC_OP_8(min_8) +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S index f4f764ffced..b32a314b397 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_nand_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define nand_4(rD, rN, rM) bic rD, rN, rM - -SYNC_OP_4(nand_4) - +/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define nand_4(rD, rN, rM) bic rD, rN, rM + +SYNC_OP_4(nand_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S index 2f8f707d19c..a2c17c09c08 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S @@ -1,24 +1,24 @@ -/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_nand_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - bic rD_LO, rN_LO, rM_LO ; \ - bic rD_HI, rN_HI, rM_HI - -SYNC_OP_8(nand_8) -#endif - +/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + bic rD_LO, rN_LO, rM_LO ; \ + bic rD_HI, rN_HI, rM_HI + +SYNC_OP_8(nand_8) +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S index 30b25890134..f2e08576aaa 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_or_4.S - -------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_or_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define or_4(rD, rN, rM) orr rD, rN, rM - -SYNC_OP_4(or_4) - +/*===-- sync_fetch_and_or_4.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define or_4(rD, rN, rM) orr rD, rN, rM + +SYNC_OP_4(or_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S index 16af58e257c..87b940bf620 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S @@ -1,24 +1,24 @@ -/*===-- sync_fetch_and_or_8.S - -------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_or_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - orr rD_LO, rN_LO, rM_LO ; \ - orr rD_HI, rN_HI, rM_HI - -SYNC_OP_8(or_8) -#endif - +/*===-- sync_fetch_and_or_8.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + orr rD_LO, rN_LO, rM_LO ; \ + orr rD_HI, rN_HI, rM_HI + +SYNC_OP_8(or_8) +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S index 3e9d8a2e1a6..460b2bc1ed6 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_sub_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -/* "subs" is 2 bytes shorter than "sub". */ -#define sub_4(rD, rN, rM) sub rD, rN, rM - -SYNC_OP_4(sub_4) - +/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "subs" is 2 bytes shorter than "sub". */ +#define sub_4(rD, rN, rM) sub rD, rN, rM + +SYNC_OP_4(sub_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S index 4dd26a5bfc3..a8035a27685 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S @@ -1,24 +1,24 @@ -/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_sub_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - subs rD_LO, rN_LO, rM_LO ; \ - sbc rD_HI, rN_HI, rM_HI - -SYNC_OP_8(sub_8) -#endif - +/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + subs rD_LO, rN_LO, rM_LO ; \ + sbc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(sub_8) +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S index 627e7b72d7b..c5915303193 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_umax_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi) - -SYNC_OP_4(umax_4) - +/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi) + +SYNC_OP_4(umax_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S index 633538e4dec..d9b7965e52b 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_umax_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) - -SYNC_OP_8(umax_8) -#endif +/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) + +SYNC_OP_8(umax_8) +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S index 90c9c101d83..9f3896fca80 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_umin_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) - -SYNC_OP_4(umin_4) - +/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) + +SYNC_OP_4(umin_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S index 6c6214ef847..7bf5e235653 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S @@ -1,21 +1,21 @@ -/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_umin_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) - -SYNC_OP_8(umin_8) -#endif +/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) + +SYNC_OP_8(umin_8) +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S index 5c42ff54c76..7e7c90c9627 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S @@ -1,20 +1,20 @@ -/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_xor_4 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#define xor_4(rD, rN, rM) eor rD, rN, rM - -SYNC_OP_4(xor_4) - +/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define xor_4(rD, rN, rM) eor rD, rN, rM + +SYNC_OP_4(xor_4) + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S index 34ae23ba00b..ea9aa6d4b0e 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S @@ -1,24 +1,24 @@ -/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __sync_fetch_and_xor_8 function for the ARM - * architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "sync-ops.h" - -#if __ARM_ARCH_PROFILE != 'M' -#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - eor rD_LO, rN_LO, rM_LO ; \ - eor rD_HI, rN_HI, rM_HI - -SYNC_OP_8(xor_8) -#endif - +/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + eor rD_LO, rN_LO, rM_LO ; \ + eor rD_HI, rN_HI, rM_HI + +SYNC_OP_8(xor_8) +#endif + diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S b/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S index 8d400bcfcbc..178f24534c7 100644 --- a/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S +++ b/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S @@ -1,35 +1,35 @@ -//===-- sync_synchronize - Implement memory barrier * ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode -// the compiler may emit a call to __sync_synchronize. -// On Darwin the implementation jumps to an OS supplied function named -// OSMemoryBarrier -// - - .text - .syntax unified - -#if __APPLE__ - - .p2align 2 -DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) - stmfd sp!, {r7, lr} - add r7, sp, #0 - bl _OSMemoryBarrier - ldmfd sp!, {r7, pc} -END_COMPILERRT_FUNCTION(__sync_synchronize) - - // tell linker it can break up file at label boundaries - .subsections_via_symbols - -#endif +//===-- sync_synchronize - Implement memory barrier * ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode +// the compiler may emit a call to __sync_synchronize. +// On Darwin the implementation jumps to an OS supplied function named +// OSMemoryBarrier +// + + .text + .syntax unified + +#if __APPLE__ + + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) + stmfd sp!, {r7, lr} + add r7, sp, #0 + bl _OSMemoryBarrier + ldmfd sp!, {r7, pc} +END_COMPILERRT_FUNCTION(__sync_synchronize) + + // tell linker it can break up file at label boundaries + .subsections_via_symbols + +#endif diff --git a/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S index a138dfceb2d..fa4362c45e7 100644 --- a/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S @@ -1,26 +1,26 @@ -//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern float __truncdfsf2vfp(double a); -// -// Converts double precision float to signle precision result. -// Uses Darwin calling convention where a double precision parameter is -// passed in a R0/R1 pair and a signle precision result is returned in R0. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) - vmov d7, r0, r1 // load double from r0/r1 pair - vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) - vmov r0, s15 // return result in r0 - bx lr -END_COMPILERRT_FUNCTION(__truncdfsf2vfp) +//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __truncdfsf2vfp(double a); +// +// Converts double precision float to signle precision result. +// Uses Darwin calling convention where a double precision parameter is +// passed in a R0/R1 pair and a signle precision result is returned in R0. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) + vmov d7, r0, r1 // load double from r0/r1 pair + vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) + vmov r0, s15 // return result in r0 + bx lr +END_COMPILERRT_FUNCTION(__truncdfsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S b/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S index ae23f197533..85b84936c4b 100644 --- a/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S +++ b/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S @@ -1,184 +1,184 @@ -/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __udivmodsi4 (32-bit unsigned integer divide and - * modulus) function for the ARM 32-bit architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - - .syntax unified - .text - -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, -@ unsigned int *remainder) -@ Calculate the quotient and remainder of the (unsigned) division. The return -@ value is the quotient, the remainder is placed in the variable. - - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) -#else -DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1, r1 - beq LOCAL_LABEL(divby0) - mov r3, r0 - udiv r0, r3, r1 - mls r1, r0, r1, r3 - str r1, [r2] - bx lr -#else - cmp r1, #1 - bcc LOCAL_LABEL(divby0) - beq LOCAL_LABEL(divby1) - cmp r0, r1 - bcc LOCAL_LABEL(quotient0) - /* - * Implement division using binary long division algorithm. - * - * r0 is the numerator, r1 the denominator. - * - * The code before JMP computes the correct shift I, so that - * r0 and (r1 << I) have the highest bit set in the same position. - * At the time of JMP, ip := .Ldiv0block - 12 * I. - * This depends on the fixed instruction size of block. - * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. - * - * block(shift) implements the test-and-update-quotient core. - * It assumes (r0 << shift) can be computed without overflow and - * that (r0 << shift) < 2 * r1. The quotient is stored in r3. - */ - -# ifdef __ARM_FEATURE_CLZ - clz ip, r0 - clz r3, r1 - /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ - sub r3, r3, ip -# if __ARM_ARCH_ISA_THUMB == 2 - adr ip, LOCAL_LABEL(div0block) + 1 - sub ip, ip, r3, lsl #1 -# else - adr ip, LOCAL_LABEL(div0block) -# endif - sub ip, ip, r3, lsl #2 - sub ip, ip, r3, lsl #3 - mov r3, #0 - bx ip -# else -# if __ARM_ARCH_ISA_THUMB == 2 -# error THUMB mode requires CLZ or UDIV -# endif - str r4, [sp, #-8]! - - mov r4, r0 - adr ip, LOCAL_LABEL(div0block) - - lsr r3, r4, #16 - cmp r3, r1 - movhs r4, r3 - subhs ip, ip, #(16 * 12) - - lsr r3, r4, #8 - cmp r3, r1 - movhs r4, r3 - subhs ip, ip, #(8 * 12) - - lsr r3, r4, #4 - cmp r3, r1 - movhs r4, r3 - subhs ip, #(4 * 12) - - lsr r3, r4, #2 - cmp r3, r1 - movhs r4, r3 - subhs ip, ip, #(2 * 12) - - /* Last block, no need to update r3 or r4. */ - cmp r1, r4, lsr #1 - subls ip, ip, #(1 * 12) - - ldr r4, [sp], #8 /* restore r4, we are done with it. */ - mov r3, #0 - - JMP(ip) -# endif - -#define IMM # - -#define block(shift) \ - cmp r0, r1, lsl IMM shift; \ - ITT(hs); \ - WIDE(addhs) r3, r3, IMM (1 << shift); \ - WIDE(subhs) r0, r0, r1, lsl IMM shift - - block(31) - block(30) - block(29) - block(28) - block(27) - block(26) - block(25) - block(24) - block(23) - block(22) - block(21) - block(20) - block(19) - block(18) - block(17) - block(16) - block(15) - block(14) - block(13) - block(12) - block(11) - block(10) - block(9) - block(8) - block(7) - block(6) - block(5) - block(4) - block(3) - block(2) - block(1) -LOCAL_LABEL(div0block): - block(0) - - str r0, [r2] - mov r0, r3 - JMP(lr) - -LOCAL_LABEL(quotient0): - str r0, [r2] - mov r0, #0 - JMP(lr) - -LOCAL_LABEL(divby1): - mov r3, #0 - str r3, [r2] - JMP(lr) -#endif /* __ARM_ARCH_EXT_IDIV__ */ - -LOCAL_LABEL(divby0): - mov r0, #0 -#ifdef __ARM_EABI__ - b __aeabi_idiv0 -#else - JMP(lr) -#endif - -END_COMPILERRT_FUNCTION(__udivmodsi4) +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@ unsigned int *remainder) +@ Calculate the quotient and remainder of the (unsigned) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) +#else +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + beq LOCAL_LABEL(divby1) + cmp r0, r1 + bcc LOCAL_LABEL(quotient0) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + str r4, [sp, #-8]! + + mov r4, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r4, #16 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r4, #8 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r4, #4 + cmp r3, r1 + movhs r4, r3 + subhs ip, #(4 * 12) + + lsr r3, r4, #2 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r3 or r4. */ + cmp r1, r4, lsr #1 + subls ip, ip, #(1 * 12) + + ldr r4, [sp], #8 /* restore r4, we are done with it. */ + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + str r0, [r2] + mov r0, r3 + JMP(lr) + +LOCAL_LABEL(quotient0): + str r0, [r2] + mov r0, #0 + JMP(lr) + +LOCAL_LABEL(divby1): + mov r3, #0 + str r3, [r2] + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivmodsi4) diff --git a/contrib/libs/cxxsupp/builtins/arm/udivsi3.S b/contrib/libs/cxxsupp/builtins/arm/udivsi3.S index dd6765bfbb5..165b2b58acb 100644 --- a/contrib/libs/cxxsupp/builtins/arm/udivsi3.S +++ b/contrib/libs/cxxsupp/builtins/arm/udivsi3.S @@ -1,170 +1,170 @@ -/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __udivsi3 (32-bit unsigned integer divide) - * function for the ARM 32-bit architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - - .syntax unified - .text - -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - - .p2align 2 -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) - -@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) -@ Calculate and return the quotient of the (unsigned) division. - -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3) -#else -DEFINE_COMPILERRT_FUNCTION(__udivsi3) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1, r1 - beq LOCAL_LABEL(divby0) - udiv r0, r0, r1 - bx lr -#else - cmp r1, #1 - bcc LOCAL_LABEL(divby0) - IT(eq) - JMPc(lr, eq) - cmp r0, r1 - ITT(cc) - movcc r0, #0 - JMPc(lr, cc) - /* - * Implement division using binary long division algorithm. - * - * r0 is the numerator, r1 the denominator. - * - * The code before JMP computes the correct shift I, so that - * r0 and (r1 << I) have the highest bit set in the same position. - * At the time of JMP, ip := .Ldiv0block - 12 * I. - * This depends on the fixed instruction size of block. - * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. - * - * block(shift) implements the test-and-update-quotient core. - * It assumes (r0 << shift) can be computed without overflow and - * that (r0 << shift) < 2 * r1. The quotient is stored in r3. - */ - -# ifdef __ARM_FEATURE_CLZ - clz ip, r0 - clz r3, r1 - /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ - sub r3, r3, ip -# if __ARM_ARCH_ISA_THUMB == 2 - adr ip, LOCAL_LABEL(div0block) + 1 - sub ip, ip, r3, lsl #1 -# else - adr ip, LOCAL_LABEL(div0block) -# endif - sub ip, ip, r3, lsl #2 - sub ip, ip, r3, lsl #3 - mov r3, #0 - bx ip -# else -# if __ARM_ARCH_ISA_THUMB == 2 -# error THUMB mode requires CLZ or UDIV -# endif - mov r2, r0 - adr ip, LOCAL_LABEL(div0block) - - lsr r3, r2, #16 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(16 * 12) - - lsr r3, r2, #8 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(8 * 12) - - lsr r3, r2, #4 - cmp r3, r1 - movhs r2, r3 - subhs ip, #(4 * 12) - - lsr r3, r2, #2 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(2 * 12) - - /* Last block, no need to update r2 or r3. */ - cmp r1, r2, lsr #1 - subls ip, ip, #(1 * 12) - - mov r3, #0 - - JMP(ip) -# endif - -#define IMM # - -#define block(shift) \ - cmp r0, r1, lsl IMM shift; \ - ITT(hs); \ - WIDE(addhs) r3, r3, IMM (1 << shift); \ - WIDE(subhs) r0, r0, r1, lsl IMM shift - - block(31) - block(30) - block(29) - block(28) - block(27) - block(26) - block(25) - block(24) - block(23) - block(22) - block(21) - block(20) - block(19) - block(18) - block(17) - block(16) - block(15) - block(14) - block(13) - block(12) - block(11) - block(10) - block(9) - block(8) - block(7) - block(6) - block(5) - block(4) - block(3) - block(2) - block(1) -LOCAL_LABEL(div0block): - block(0) - - mov r0, r3 - JMP(lr) -#endif /* __ARM_ARCH_EXT_IDIV__ */ - -LOCAL_LABEL(divby0): - mov r0, #0 -#ifdef __ARM_EABI__ - b __aeabi_idiv0 -#else - JMP(lr) -#endif - -END_COMPILERRT_FUNCTION(__udivsi3) +/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivsi3 (32-bit unsigned integer divide) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 2 +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) + +@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the quotient of the (unsigned) division. + +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r0, r0, r1 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + IT(eq) + JMPc(lr, eq) + cmp r0, r1 + ITT(cc) + movcc r0, #0 + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 12) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 12) + + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + mov r0, r3 + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivsi3) diff --git a/contrib/libs/cxxsupp/builtins/arm/umodsi3.S b/contrib/libs/cxxsupp/builtins/arm/umodsi3.S index 6380d455d75..9e7a148ce46 100644 --- a/contrib/libs/cxxsupp/builtins/arm/umodsi3.S +++ b/contrib/libs/cxxsupp/builtins/arm/umodsi3.S @@ -1,161 +1,161 @@ -/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===// - * - * This file implements the __umodsi3 (32-bit unsigned integer modulus) - * function for the ARM 32-bit architecture. - * - *===----------------------------------------------------------------------===*/ - -#include "../assembly.h" - - .syntax unified - .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif - -@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) -@ Calculate and return the remainder of the (unsigned) division. - - .p2align 2 -#if __ARM_ARCH_ISA_THUMB == 2 -DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3) -#else -DEFINE_COMPILERRT_FUNCTION(__umodsi3) -#endif -#if __ARM_ARCH_EXT_IDIV__ - tst r1, r1 - beq LOCAL_LABEL(divby0) - udiv r2, r0, r1 - mls r0, r2, r1, r0 - bx lr -#else - cmp r1, #1 - bcc LOCAL_LABEL(divby0) - ITT(eq) - moveq r0, #0 - JMPc(lr, eq) - cmp r0, r1 - IT(cc) - JMPc(lr, cc) - /* - * Implement division using binary long division algorithm. - * - * r0 is the numerator, r1 the denominator. - * - * The code before JMP computes the correct shift I, so that - * r0 and (r1 << I) have the highest bit set in the same position. - * At the time of JMP, ip := .Ldiv0block - 8 * I. - * This depends on the fixed instruction size of block. - * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. - * - * block(shift) implements the test-and-update-quotient core. - * It assumes (r0 << shift) can be computed without overflow and - * that (r0 << shift) < 2 * r1. The quotient is stored in r3. - */ - -# ifdef __ARM_FEATURE_CLZ - clz ip, r0 - clz r3, r1 - /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ - sub r3, r3, ip -# if __ARM_ARCH_ISA_THUMB == 2 - adr ip, LOCAL_LABEL(div0block) + 1 - sub ip, ip, r3, lsl #1 -# else - adr ip, LOCAL_LABEL(div0block) -# endif - sub ip, ip, r3, lsl #3 - bx ip -# else -# if __ARM_ARCH_ISA_THUMB == 2 -# error THUMB mode requires CLZ or UDIV -# endif - mov r2, r0 - adr ip, LOCAL_LABEL(div0block) - - lsr r3, r2, #16 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(16 * 8) - - lsr r3, r2, #8 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(8 * 8) - - lsr r3, r2, #4 - cmp r3, r1 - movhs r2, r3 - subhs ip, #(4 * 8) - - lsr r3, r2, #2 - cmp r3, r1 - movhs r2, r3 - subhs ip, ip, #(2 * 8) - - /* Last block, no need to update r2 or r3. */ - cmp r1, r2, lsr #1 - subls ip, ip, #(1 * 8) - - JMP(ip) -# endif - -#define IMM # - -#define block(shift) \ - cmp r0, r1, lsl IMM shift; \ - IT(hs); \ - WIDE(subhs) r0, r0, r1, lsl IMM shift - - block(31) - block(30) - block(29) - block(28) - block(27) - block(26) - block(25) - block(24) - block(23) - block(22) - block(21) - block(20) - block(19) - block(18) - block(17) - block(16) - block(15) - block(14) - block(13) - block(12) - block(11) - block(10) - block(9) - block(8) - block(7) - block(6) - block(5) - block(4) - block(3) - block(2) - block(1) -LOCAL_LABEL(div0block): - block(0) - JMP(lr) -#endif /* __ARM_ARCH_EXT_IDIV__ */ - -LOCAL_LABEL(divby0): - mov r0, #0 -#ifdef __ARM_EABI__ - b __aeabi_idiv0 -#else - JMP(lr) -#endif - -END_COMPILERRT_FUNCTION(__umodsi3) +/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __umodsi3 (32-bit unsigned integer modulus) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the remainder of the (unsigned) division. + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__umodsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + ITT(eq) + moveq r0, #0 + JMPc(lr, eq) + cmp r0, r1 + IT(cc) + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 8 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #3 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 8) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 8) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 8) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 8) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 8) + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + IT(hs); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__umodsi3) diff --git a/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S index 2efa2e6ebbb..c4bea2d5eeb 100644 --- a/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S @@ -1,29 +1,29 @@ -//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __unorddf2vfp(double a, double b); -// -// Returns one iff a or b is NaN -// Uses Darwin calling convention where double precision arguments are passsed -// like in GPR pairs. -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) - vmov d6, r0, r1 // load r0/r1 pair in double register - vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 - vmrs apsr_nzcv, fpscr - movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) - movvc r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__unorddf2vfp) +//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unorddf2vfp(double a, double b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unorddf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S index d5e69f53824..886e9656810 100644 --- a/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S +++ b/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S @@ -1,29 +1,29 @@ -//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// extern int __unordsf2vfp(float a, float b); -// -// Returns one iff a or b is NaN -// Uses Darwin calling convention where single precision arguments are passsed -// like 32-bit ints -// - .syntax unified - .p2align 2 -DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) - vmov s14, r0 // move from GPR 0 to float register - vmov s15, r1 // move from GPR 1 to float register - vcmp.f32 s14, s15 - vmrs apsr_nzcv, fpscr - movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) - movvc r0, #0 - bx lr -END_COMPILERRT_FUNCTION(__unordsf2vfp) +//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unordsf2vfp(float a, float b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unordsf2vfp) diff --git a/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk b/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk index 8c8768d8fd8..7f7e3866130 100644 --- a/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := arm64 - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := arm64 + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk b/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk index 65185140188..f3c1807f01b 100644 --- a/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := armv6m - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := armv6m + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/ashldi3.c b/contrib/libs/cxxsupp/builtins/ashldi3.c index f9f5bfe60e6..eb4698ac517 100644 --- a/contrib/libs/cxxsupp/builtins/ashldi3.c +++ b/contrib/libs/cxxsupp/builtins/ashldi3.c @@ -1,43 +1,43 @@ -/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ashldi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a << b */ - -/* Precondition: 0 <= b < bits_in_dword */ - -ARM_EABI_FNALIAS(llsl, ashldi3) - -COMPILER_RT_ABI di_int -__ashldi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - dwords input; - dwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - result.s.low = 0; - result.s.high = input.s.low << (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.low = input.s.low << b; - result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); - } - return result.all; -} +/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashldi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a << b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +ARM_EABI_FNALIAS(llsl, ashldi3) + +COMPILER_RT_ABI di_int +__ashldi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); + } + return result.all; +} diff --git a/contrib/libs/cxxsupp/builtins/ashlti3.c b/contrib/libs/cxxsupp/builtins/ashlti3.c index 9e33701d68d..638ae845ff0 100644 --- a/contrib/libs/cxxsupp/builtins/ashlti3.c +++ b/contrib/libs/cxxsupp/builtins/ashlti3.c @@ -1,45 +1,45 @@ -/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ashlti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a << b */ - -/* Precondition: 0 <= b < bits_in_tword */ - -COMPILER_RT_ABI ti_int -__ashlti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - twords input; - twords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - result.s.low = 0; - result.s.high = input.s.low << (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.low = input.s.low << b; - result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); - } - return result.all; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashlti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a << b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__ashlti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/ashrdi3.c b/contrib/libs/cxxsupp/builtins/ashrdi3.c index d750f193e34..14c878bb779 100644 --- a/contrib/libs/cxxsupp/builtins/ashrdi3.c +++ b/contrib/libs/cxxsupp/builtins/ashrdi3.c @@ -1,44 +1,44 @@ -/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ashrdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: arithmetic a >> b */ - -/* Precondition: 0 <= b < bits_in_dword */ - -ARM_EABI_FNALIAS(lasr, ashrdi3) - -COMPILER_RT_ABI di_int -__ashrdi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - dwords input; - dwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - /* result.s.high = input.s.high < 0 ? -1 : 0 */ - result.s.high = input.s.high >> (bits_in_word - 1); - result.s.low = input.s.high >> (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); - } - return result.all; -} +/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashrdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: arithmetic a >> b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +ARM_EABI_FNALIAS(lasr, ashrdi3) + +COMPILER_RT_ABI di_int +__ashrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_word - 1); + result.s.low = input.s.high >> (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} diff --git a/contrib/libs/cxxsupp/builtins/ashrti3.c b/contrib/libs/cxxsupp/builtins/ashrti3.c index 987f286ca3e..f78205d961e 100644 --- a/contrib/libs/cxxsupp/builtins/ashrti3.c +++ b/contrib/libs/cxxsupp/builtins/ashrti3.c @@ -1,46 +1,46 @@ -/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ashrti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: arithmetic a >> b */ - -/* Precondition: 0 <= b < bits_in_tword */ - -COMPILER_RT_ABI ti_int -__ashrti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - twords input; - twords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - /* result.s.high = input.s.high < 0 ? -1 : 0 */ - result.s.high = input.s.high >> (bits_in_dword - 1); - result.s.low = input.s.high >> (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); - } - return result.all; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashrti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: arithmetic a >> b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__ashrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_dword - 1); + result.s.low = input.s.high >> (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/assembly.h b/contrib/libs/cxxsupp/builtins/assembly.h index e4a4d45b202..c28970534cc 100644 --- a/contrib/libs/cxxsupp/builtins/assembly.h +++ b/contrib/libs/cxxsupp/builtins/assembly.h @@ -1,158 +1,158 @@ -/* ===-- assembly.h - compiler-rt assembler support macros -----------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file defines macros for use in compiler-rt assembler source. - * This file is not part of the interface of this library. - * - * ===----------------------------------------------------------------------=== - */ - -#ifndef COMPILERRT_ASSEMBLY_H -#define COMPILERRT_ASSEMBLY_H - -#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) -#define SEPARATOR @ -#else -#define SEPARATOR ; -#endif - -#if defined(__APPLE__) -#define HIDDEN(name) .private_extern name -#define LOCAL_LABEL(name) L_##name -// tell linker it can break up file at label boundaries -#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols -#define SYMBOL_IS_FUNC(name) -#define CONST_SECTION .const - -#elif defined(__ELF__) - -#define HIDDEN(name) .hidden name -#define LOCAL_LABEL(name) .L_##name -#define FILE_LEVEL_DIRECTIVE -#if defined(__arm__) -#define SYMBOL_IS_FUNC(name) .type name,%function -#else -#define SYMBOL_IS_FUNC(name) .type name,@function -#endif -#define CONST_SECTION .section .rodata - -#else // !__APPLE__ && !__ELF__ - -#define HIDDEN(name) -#define LOCAL_LABEL(name) .L ## name -#define FILE_LEVEL_DIRECTIVE -#define SYMBOL_IS_FUNC(name) \ - .def name SEPARATOR \ - .scl 2 SEPARATOR \ - .type 32 SEPARATOR \ - .endef -#define CONST_SECTION .section .rdata,"rd" - -#endif - -#if defined(__arm__) -#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 -#define ARM_HAS_BX -#endif -#if !defined(__ARM_FEATURE_CLZ) && \ - (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) -#define __ARM_FEATURE_CLZ -#endif - -#ifdef ARM_HAS_BX -#define JMP(r) bx r -#define JMPc(r, c) bx##c r -#else -#define JMP(r) mov pc, r -#define JMPc(r, c) mov##c pc, r -#endif - -// pop {pc} can't switch Thumb mode on ARMv4T -#if __ARM_ARCH >= 5 -#define POP_PC() pop {pc} -#else -#define POP_PC() \ - pop {ip}; \ - JMP(ip) -#endif - -#if __ARM_ARCH_ISA_THUMB == 2 -#define IT(cond) it cond -#define ITT(cond) itt cond -#else -#define IT(cond) -#define ITT(cond) -#endif - -#if __ARM_ARCH_ISA_THUMB == 2 -#define WIDE(op) op.w -#else -#define WIDE(op) op -#endif -#endif - -#define GLUE2(a, b) a##b -#define GLUE(a, b) GLUE2(a, b) -#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) - -#ifdef VISIBILITY_HIDDEN -#define DECLARE_SYMBOL_VISIBILITY(name) \ - HIDDEN(SYMBOL_NAME(name)) SEPARATOR -#else -#define DECLARE_SYMBOL_VISIBILITY(name) -#endif - -#define DEFINE_COMPILERRT_FUNCTION(name) \ - FILE_LEVEL_DIRECTIVE SEPARATOR \ - .globl SYMBOL_NAME(name) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - DECLARE_SYMBOL_VISIBILITY(name) \ - SYMBOL_NAME(name): - -#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ - FILE_LEVEL_DIRECTIVE SEPARATOR \ - .globl SYMBOL_NAME(name) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ - .thumb_func SEPARATOR \ - SYMBOL_NAME(name): - -#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ - FILE_LEVEL_DIRECTIVE SEPARATOR \ - .globl SYMBOL_NAME(name) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ - SYMBOL_NAME(name): - -#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ - .globl name SEPARATOR \ - SYMBOL_IS_FUNC(name) SEPARATOR \ - HIDDEN(name) SEPARATOR \ - name: - -#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ - .globl SYMBOL_NAME(name) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR - -#if defined(__ARM_EABI__) -#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ - DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) -#else -#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) -#endif - -#ifdef __ELF__ -#define END_COMPILERRT_FUNCTION(name) \ - .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) -#else -#define END_COMPILERRT_FUNCTION(name) -#endif - -#endif /* COMPILERRT_ASSEMBLY_H */ +/* ===-- assembly.h - compiler-rt assembler support macros -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file defines macros for use in compiler-rt assembler source. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef COMPILERRT_ASSEMBLY_H +#define COMPILERRT_ASSEMBLY_H + +#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) +#define SEPARATOR @ +#else +#define SEPARATOR ; +#endif + +#if defined(__APPLE__) +#define HIDDEN(name) .private_extern name +#define LOCAL_LABEL(name) L_##name +// tell linker it can break up file at label boundaries +#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols +#define SYMBOL_IS_FUNC(name) +#define CONST_SECTION .const + +#elif defined(__ELF__) + +#define HIDDEN(name) .hidden name +#define LOCAL_LABEL(name) .L_##name +#define FILE_LEVEL_DIRECTIVE +#if defined(__arm__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define CONST_SECTION .section .rodata + +#else // !__APPLE__ && !__ELF__ + +#define HIDDEN(name) +#define LOCAL_LABEL(name) .L ## name +#define FILE_LEVEL_DIRECTIVE +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define CONST_SECTION .section .rdata,"rd" + +#endif + +#if defined(__arm__) +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif +#if !defined(__ARM_FEATURE_CLZ) && \ + (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) +#define __ARM_FEATURE_CLZ +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#define JMPc(r, c) bx##c r +#else +#define JMP(r) mov pc, r +#define JMPc(r, c) mov##c pc, r +#endif + +// pop {pc} can't switch Thumb mode on ARMv4T +#if __ARM_ARCH >= 5 +#define POP_PC() pop {pc} +#else +#define POP_PC() \ + pop {ip}; \ + JMP(ip) +#endif + +#if __ARM_ARCH_ISA_THUMB == 2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#else +#define IT(cond) +#define ITT(cond) +#endif + +#if __ARM_ARCH_ISA_THUMB == 2 +#define WIDE(op) op.w +#else +#define WIDE(op) op +#endif +#endif + +#define GLUE2(a, b) a##b +#define GLUE(a, b) GLUE2(a, b) +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#ifdef VISIBILITY_HIDDEN +#define DECLARE_SYMBOL_VISIBILITY(name) \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR +#else +#define DECLARE_SYMBOL_VISIBILITY(name) +#endif + +#define DEFINE_COMPILERRT_FUNCTION(name) \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .thumb_func SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + HIDDEN(name) SEPARATOR \ + name: + +#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR + +#if defined(__ARM_EABI__) +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ + DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) +#else +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) +#endif + +#ifdef __ELF__ +#define END_COMPILERRT_FUNCTION(name) \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#else +#define END_COMPILERRT_FUNCTION(name) +#endif + +#endif /* COMPILERRT_ASSEMBLY_H */ diff --git a/contrib/libs/cxxsupp/builtins/atomic.c b/contrib/libs/cxxsupp/builtins/atomic.c index 42753ee8099..f1ddc3e0c52 100644 --- a/contrib/libs/cxxsupp/builtins/atomic.c +++ b/contrib/libs/cxxsupp/builtins/atomic.c @@ -1,331 +1,331 @@ -/*===-- atomic.c - Implement support functions for atomic operations.------=== +/*===-- atomic.c - Implement support functions for atomic operations.------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * atomic.c defines a set of functions for performing atomic accesses on + * arbitrary-sized memory locations. This design uses locks that should + * be fast in the uncontended case, for two reasons: * - * The LLVM Compiler Infrastructure + * 1) This code must work with C programs that do not link to anything + * (including pthreads) and so it should not depend on any pthread + * functions. + * 2) Atomic operations, rather than explicit mutexes, are most commonly used + * on code where contended operations are rate. * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------=== - * - * atomic.c defines a set of functions for performing atomic accesses on - * arbitrary-sized memory locations. This design uses locks that should - * be fast in the uncontended case, for two reasons: - * - * 1) This code must work with C programs that do not link to anything - * (including pthreads) and so it should not depend on any pthread - * functions. - * 2) Atomic operations, rather than explicit mutexes, are most commonly used - * on code where contended operations are rate. - * - * To avoid needing a per-object lock, this code allocates an array of - * locks and hashes the object pointers to find the one that it should use. - * For operations that must be atomic on two locations, the lower lock is - * always acquired first, to avoid deadlock. - * - *===----------------------------------------------------------------------=== - */ - -#include -#include - -#include "assembly.h" - -// Clang objects if you redefine a builtin. This little hack allows us to -// define a function with the same name as an intrinsic. -#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) -#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) -#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) -#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange) - -/// Number of locks. This allocates one page on 32-bit platforms, two on -/// 64-bit. This can be specified externally if a different trade between -/// memory usage and contention probability is required for a given platform. -#ifndef SPINLOCK_COUNT -#define SPINLOCK_COUNT (1<<10) -#endif -static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; - -//////////////////////////////////////////////////////////////////////////////// -// Platform-specific lock implementation. Falls back to spinlocks if none is -// defined. Each platform should define the Lock type, and corresponding -// lock() and unlock() functions. -//////////////////////////////////////////////////////////////////////////////// -#ifdef __FreeBSD__ -#include -#include -#include -#include -typedef struct _usem Lock; -__inline static void unlock(Lock *l) { - __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE); - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - if (l->_has_waiters) - _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); -} -__inline static void lock(Lock *l) { - uint32_t old = 1; - while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old, - 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { - _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); - old = 1; - } -} -/// locks for atomic operations -static Lock locks[SPINLOCK_COUNT] = { [0 ... SPINLOCK_COUNT-1] = {0,1,0} }; - -#elif defined(__APPLE__) -#include -typedef OSSpinLock Lock; -__inline static void unlock(Lock *l) { - OSSpinLockUnlock(l); -} -/// Locks a lock. In the current implementation, this is potentially -/// unbounded in the contended case. -__inline static void lock(Lock *l) { - OSSpinLockLock(l); -} -static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 - -#else -typedef _Atomic(uintptr_t) Lock; -/// Unlock a lock. This is a release operation. -__inline static void unlock(Lock *l) { - __c11_atomic_store(l, 0, __ATOMIC_RELEASE); -} -/// Locks a lock. In the current implementation, this is potentially -/// unbounded in the contended case. -__inline static void lock(Lock *l) { - uintptr_t old = 0; - while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED)) - old = 0; -} -/// locks for atomic operations -static Lock locks[SPINLOCK_COUNT]; -#endif - - -/// Returns a lock to use for a given pointer. -static __inline Lock *lock_for_pointer(void *ptr) { - intptr_t hash = (intptr_t)ptr; - // Disregard the lowest 4 bits. We want all values that may be part of the - // same memory operation to hash to the same value and therefore use the same - // lock. - hash >>= 4; - // Use the next bits as the basis for the hash - intptr_t low = hash & SPINLOCK_MASK; - // Now use the high(er) set of bits to perturb the hash, so that we don't - // get collisions from atomic fields in a single object - hash >>= 16; - hash ^= low; - // Return a pointer to the word to use - return locks + (hash & SPINLOCK_MASK); -} - -/// Macros for determining whether a size is lock free. Clang can not yet -/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are -/// not lock free. -#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) -#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) -#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) -#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) -#define IS_LOCK_FREE_16 0 - -/// Macro that calls the compiler-generated lock-free versions of functions -/// when they exist. -#define LOCK_FREE_CASES() \ - do {\ - switch (size) {\ - case 2:\ - if (IS_LOCK_FREE_2) {\ - LOCK_FREE_ACTION(uint16_t);\ - }\ - case 4:\ - if (IS_LOCK_FREE_4) {\ - LOCK_FREE_ACTION(uint32_t);\ - }\ - case 8:\ - if (IS_LOCK_FREE_8) {\ - LOCK_FREE_ACTION(uint64_t);\ - }\ - case 16:\ - if (IS_LOCK_FREE_16) {\ - /* FIXME: __uint128_t isn't available on 32 bit platforms. - LOCK_FREE_ACTION(__uint128_t);*/\ - }\ - }\ - } while (0) - - -/// An atomic load operation. This is atomic with respect to the source -/// pointer only. -void __atomic_load_c(int size, void *src, void *dest, int model) { -#define LOCK_FREE_ACTION(type) \ - *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ - return; - LOCK_FREE_CASES(); -#undef LOCK_FREE_ACTION - Lock *l = lock_for_pointer(src); - lock(l); - memcpy(dest, src, size); - unlock(l); -} - -/// An atomic store operation. This is atomic with respect to the destination -/// pointer only. -void __atomic_store_c(int size, void *dest, void *src, int model) { -#define LOCK_FREE_ACTION(type) \ - __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ - return; - LOCK_FREE_CASES(); -#undef LOCK_FREE_ACTION - Lock *l = lock_for_pointer(dest); - lock(l); - memcpy(dest, src, size); - unlock(l); -} - -/// Atomic compare and exchange operation. If the value at *ptr is identical -/// to the value at *expected, then this copies value at *desired to *ptr. If -/// they are not, then this stores the current value from *ptr in *expected. -/// -/// This function returns 1 if the exchange takes place or 0 if it fails. -int __atomic_compare_exchange_c(int size, void *ptr, void *expected, - void *desired, int success, int failure) { -#define LOCK_FREE_ACTION(type) \ - return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ - *(type*)desired, success, failure) - LOCK_FREE_CASES(); -#undef LOCK_FREE_ACTION - Lock *l = lock_for_pointer(ptr); - lock(l); - if (memcmp(ptr, expected, size) == 0) { - memcpy(ptr, desired, size); - unlock(l); - return 1; - } - memcpy(expected, ptr, size); - unlock(l); - return 0; -} - -/// Performs an atomic exchange operation between two pointers. This is atomic -/// with respect to the target address. -void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { -#define LOCK_FREE_ACTION(type) \ - *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ - model);\ - return; - LOCK_FREE_CASES(); -#undef LOCK_FREE_ACTION - Lock *l = lock_for_pointer(ptr); - lock(l); - memcpy(old, ptr, size); - memcpy(ptr, val, size); - unlock(l); -} - -//////////////////////////////////////////////////////////////////////////////// -// Where the size is known at compile time, the compiler may emit calls to -// specialised versions of the above functions. -//////////////////////////////////////////////////////////////////////////////// -#define OPTIMISED_CASES\ - OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ - OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ - OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ - OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ - /* FIXME: __uint128_t isn't available on 32 bit platforms. - OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\ - -#define OPTIMISED_CASE(n, lockfree, type)\ -type __atomic_load_##n(type *src, int model) {\ - if (lockfree)\ - return __c11_atomic_load((_Atomic(type)*)src, model);\ - Lock *l = lock_for_pointer(src);\ - lock(l);\ - type val = *src;\ - unlock(l);\ - return val;\ -} -OPTIMISED_CASES -#undef OPTIMISED_CASE - -#define OPTIMISED_CASE(n, lockfree, type)\ -void __atomic_store_##n(type *dest, type val, int model) {\ - if (lockfree) {\ - __c11_atomic_store((_Atomic(type)*)dest, val, model);\ - return;\ - }\ - Lock *l = lock_for_pointer(dest);\ - lock(l);\ - *dest = val;\ - unlock(l);\ - return;\ -} -OPTIMISED_CASES -#undef OPTIMISED_CASE - -#define OPTIMISED_CASE(n, lockfree, type)\ -type __atomic_exchange_##n(type *dest, type val, int model) {\ - if (lockfree)\ - return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ - Lock *l = lock_for_pointer(dest);\ - lock(l);\ - type tmp = *dest;\ - *dest = val;\ - unlock(l);\ - return tmp;\ -} -OPTIMISED_CASES -#undef OPTIMISED_CASE - -#define OPTIMISED_CASE(n, lockfree, type)\ -int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ - int success, int failure) {\ - if (lockfree)\ - return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ - success, failure);\ - Lock *l = lock_for_pointer(ptr);\ - lock(l);\ - if (*ptr == *expected) {\ - *ptr = desired;\ - unlock(l);\ - return 1;\ - }\ - *expected = *ptr;\ - unlock(l);\ - return 0;\ -} -OPTIMISED_CASES -#undef OPTIMISED_CASE - -//////////////////////////////////////////////////////////////////////////////// -// Atomic read-modify-write operations for integers of various sizes. -//////////////////////////////////////////////////////////////////////////////// -#define ATOMIC_RMW(n, lockfree, type, opname, op) \ -type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ - if (lockfree) \ - return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ - Lock *l = lock_for_pointer(ptr);\ - lock(l);\ - type tmp = *ptr;\ - *ptr = tmp op val;\ - unlock(l);\ - return tmp;\ -} - -#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) -OPTIMISED_CASES -#undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) -OPTIMISED_CASES -#undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) -OPTIMISED_CASES -#undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) -OPTIMISED_CASES -#undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) -OPTIMISED_CASES -#undef OPTIMISED_CASE + * To avoid needing a per-object lock, this code allocates an array of + * locks and hashes the object pointers to find the one that it should use. + * For operations that must be atomic on two locations, the lower lock is + * always acquired first, to avoid deadlock. + * + *===----------------------------------------------------------------------=== + */ + +#include +#include + +#include "assembly.h" + +// Clang objects if you redefine a builtin. This little hack allows us to +// define a function with the same name as an intrinsic. +#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) +#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) +#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) +#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange) + +/// Number of locks. This allocates one page on 32-bit platforms, two on +/// 64-bit. This can be specified externally if a different trade between +/// memory usage and contention probability is required for a given platform. +#ifndef SPINLOCK_COUNT +#define SPINLOCK_COUNT (1<<10) +#endif +static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; + +//////////////////////////////////////////////////////////////////////////////// +// Platform-specific lock implementation. Falls back to spinlocks if none is +// defined. Each platform should define the Lock type, and corresponding +// lock() and unlock() functions. +//////////////////////////////////////////////////////////////////////////////// +#ifdef __FreeBSD__ +#include +#include +#include +#include +typedef struct _usem Lock; +__inline static void unlock(Lock *l) { + __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE); + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + if (l->_has_waiters) + _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); +} +__inline static void lock(Lock *l) { + uint32_t old = 1; + while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); + old = 1; + } +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT] = { [0 ... SPINLOCK_COUNT-1] = {0,1,0} }; + +#elif defined(__APPLE__) +#include +typedef OSSpinLock Lock; +__inline static void unlock(Lock *l) { + OSSpinLockUnlock(l); +} +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { + OSSpinLockLock(l); +} +static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 + +#else +typedef _Atomic(uintptr_t) Lock; +/// Unlock a lock. This is a release operation. +__inline static void unlock(Lock *l) { + __c11_atomic_store(l, 0, __ATOMIC_RELEASE); +} +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { + uintptr_t old = 0; + while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) + old = 0; +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT]; +#endif + + +/// Returns a lock to use for a given pointer. +static __inline Lock *lock_for_pointer(void *ptr) { + intptr_t hash = (intptr_t)ptr; + // Disregard the lowest 4 bits. We want all values that may be part of the + // same memory operation to hash to the same value and therefore use the same + // lock. + hash >>= 4; + // Use the next bits as the basis for the hash + intptr_t low = hash & SPINLOCK_MASK; + // Now use the high(er) set of bits to perturb the hash, so that we don't + // get collisions from atomic fields in a single object + hash >>= 16; + hash ^= low; + // Return a pointer to the word to use + return locks + (hash & SPINLOCK_MASK); +} + +/// Macros for determining whether a size is lock free. Clang can not yet +/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are +/// not lock free. +#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) +#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) +#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) +#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) +#define IS_LOCK_FREE_16 0 + +/// Macro that calls the compiler-generated lock-free versions of functions +/// when they exist. +#define LOCK_FREE_CASES() \ + do {\ + switch (size) {\ + case 2:\ + if (IS_LOCK_FREE_2) {\ + LOCK_FREE_ACTION(uint16_t);\ + }\ + case 4:\ + if (IS_LOCK_FREE_4) {\ + LOCK_FREE_ACTION(uint32_t);\ + }\ + case 8:\ + if (IS_LOCK_FREE_8) {\ + LOCK_FREE_ACTION(uint64_t);\ + }\ + case 16:\ + if (IS_LOCK_FREE_16) {\ + /* FIXME: __uint128_t isn't available on 32 bit platforms. + LOCK_FREE_ACTION(__uint128_t);*/\ + }\ + }\ + } while (0) + + +/// An atomic load operation. This is atomic with respect to the source +/// pointer only. +void __atomic_load_c(int size, void *src, void *dest, int model) { +#define LOCK_FREE_ACTION(type) \ + *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(src); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// An atomic store operation. This is atomic with respect to the destination +/// pointer only. +void __atomic_store_c(int size, void *dest, void *src, int model) { +#define LOCK_FREE_ACTION(type) \ + __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(dest); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// Atomic compare and exchange operation. If the value at *ptr is identical +/// to the value at *expected, then this copies value at *desired to *ptr. If +/// they are not, then this stores the current value from *ptr in *expected. +/// +/// This function returns 1 if the exchange takes place or 0 if it fails. +int __atomic_compare_exchange_c(int size, void *ptr, void *expected, + void *desired, int success, int failure) { +#define LOCK_FREE_ACTION(type) \ + return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ + *(type*)desired, success, failure) + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + if (memcmp(ptr, expected, size) == 0) { + memcpy(ptr, desired, size); + unlock(l); + return 1; + } + memcpy(expected, ptr, size); + unlock(l); + return 0; +} + +/// Performs an atomic exchange operation between two pointers. This is atomic +/// with respect to the target address. +void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { +#define LOCK_FREE_ACTION(type) \ + *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ + model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + memcpy(old, ptr, size); + memcpy(ptr, val, size); + unlock(l); +} + +//////////////////////////////////////////////////////////////////////////////// +// Where the size is known at compile time, the compiler may emit calls to +// specialised versions of the above functions. +//////////////////////////////////////////////////////////////////////////////// +#define OPTIMISED_CASES\ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ + /* FIXME: __uint128_t isn't available on 32 bit platforms. + OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\ + +#define OPTIMISED_CASE(n, lockfree, type)\ +type __atomic_load_##n(type *src, int model) {\ + if (lockfree)\ + return __c11_atomic_load((_Atomic(type)*)src, model);\ + Lock *l = lock_for_pointer(src);\ + lock(l);\ + type val = *src;\ + unlock(l);\ + return val;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +void __atomic_store_##n(type *dest, type val, int model) {\ + if (lockfree) {\ + __c11_atomic_store((_Atomic(type)*)dest, val, model);\ + return;\ + }\ + Lock *l = lock_for_pointer(dest);\ + lock(l);\ + *dest = val;\ + unlock(l);\ + return;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +type __atomic_exchange_##n(type *dest, type val, int model) {\ + if (lockfree)\ + return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ + Lock *l = lock_for_pointer(dest);\ + lock(l);\ + type tmp = *dest;\ + *dest = val;\ + unlock(l);\ + return tmp;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ + int success, int failure) {\ + if (lockfree)\ + return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ + success, failure);\ + Lock *l = lock_for_pointer(ptr);\ + lock(l);\ + if (*ptr == *expected) {\ + *ptr = desired;\ + unlock(l);\ + return 1;\ + }\ + *expected = *ptr;\ + unlock(l);\ + return 0;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +//////////////////////////////////////////////////////////////////////////////// +// Atomic read-modify-write operations for integers of various sizes. +//////////////////////////////////////////////////////////////////////////////// +#define ATOMIC_RMW(n, lockfree, type, opname, op) \ +type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ + if (lockfree) \ + return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ + Lock *l = lock_for_pointer(ptr);\ + lock(l);\ + type tmp = *ptr;\ + *ptr = tmp op val;\ + unlock(l);\ + return tmp;\ +} + +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) +OPTIMISED_CASES +#undef OPTIMISED_CASE diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c b/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c index 58e2508cd0d..da912af6431 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c +++ b/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c @@ -1,27 +1,27 @@ -/*===-- atomic_flag_clear.c -------------------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_flag_clear from C11's stdatomic.h. - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_flag_clear -void atomic_flag_clear(volatile atomic_flag *object) { - __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST); -} - -#endif +/*===-- atomic_flag_clear.c -------------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_clear from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_clear +void atomic_flag_clear(volatile atomic_flag *object) { + __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c b/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c index 3efab84f778..1059b787f16 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c +++ b/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c @@ -1,28 +1,28 @@ -/*===-- atomic_flag_clear_explicit.c ----------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_flag_clear_explicit from C11's stdatomic.h. - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_flag_clear_explicit -void atomic_flag_clear_explicit(volatile atomic_flag *object, - memory_order order) { - __c11_atomic_store(&(object)->_Value, 0, order); -} - -#endif +/*===-- atomic_flag_clear_explicit.c ----------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_clear_explicit from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_clear_explicit +void atomic_flag_clear_explicit(volatile atomic_flag *object, + memory_order order) { + __c11_atomic_store(&(object)->_Value, 0, order); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c index d7fe7f8be27..e8811d39ef2 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c +++ b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c @@ -1,27 +1,27 @@ -/*===-- atomic_flag_test_and_set.c ------------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_flag_test_and_set from C11's stdatomic.h. - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_flag_test_and_set -_Bool atomic_flag_test_and_set(volatile atomic_flag *object) { - return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST); -} - -#endif +/*===-- atomic_flag_test_and_set.c ------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_test_and_set from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_test_and_set +_Bool atomic_flag_test_and_set(volatile atomic_flag *object) { + return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c index 273e9b651c9..5c8c2df9054 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c +++ b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c @@ -1,28 +1,28 @@ -/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_flag_test_and_set_explicit -_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, - memory_order order) { - return __c11_atomic_exchange(&(object)->_Value, 1, order); -} - -#endif +/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_test_and_set_explicit +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, + memory_order order) { + return __c11_atomic_exchange(&(object)->_Value, 1, order); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c b/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c index 23077c0aa37..9ccc2ae60ad 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c +++ b/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c @@ -1,27 +1,27 @@ -/*===-- atomic_signal_fence.c -----------------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_signal_fence from C11's stdatomic.h. - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_signal_fence -void atomic_signal_fence(memory_order order) { - __c11_atomic_signal_fence(order); -} - -#endif +/*===-- atomic_signal_fence.c -----------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_signal_fence from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_signal_fence +void atomic_signal_fence(memory_order order) { + __c11_atomic_signal_fence(order); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c b/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c index c81ae4f9bd4..d22560151bc 100644 --- a/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c +++ b/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c @@ -1,27 +1,27 @@ -/*===-- atomic_thread_fence.c -----------------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===------------------------------------------------------------------------=== - * - * This file implements atomic_thread_fence from C11's stdatomic.h. - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __has_include -#define __has_include(inc) 0 -#endif - -#if __has_include() - -#include -#undef atomic_thread_fence -void atomic_thread_fence(memory_order order) { - __c11_atomic_thread_fence(order); -} - -#endif +/*===-- atomic_thread_fence.c -----------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_thread_fence from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_thread_fence +void atomic_thread_fence(memory_order order) { + __c11_atomic_thread_fence(order); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/clear_cache.c b/contrib/libs/cxxsupp/builtins/clear_cache.c index ad013eaded2..8eec0689394 100644 --- a/contrib/libs/cxxsupp/builtins/clear_cache.c +++ b/contrib/libs/cxxsupp/builtins/clear_cache.c @@ -1,159 +1,159 @@ -/* ===-- clear_cache.c - Implement __clear_cache ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include - -#if __APPLE__ - #include -#endif -#if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__) - #include - #include -#endif - -#if defined(__NetBSD__) && defined(__arm__) - #include -#endif - -#if defined(__mips__) - #include - #include - #include - #if defined(__ANDROID__) && defined(__LP64__) - /* - * clear_mips_cache - Invalidates instruction cache for Mips. - */ - static void clear_mips_cache(const void* Addr, size_t Size) { - asm volatile ( - ".set push\n" - ".set noreorder\n" - ".set noat\n" - "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */ - "nop\n" - "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */ - "rdhwr $v0, $1\n" /* Get step size for SYNCI. - $1 is $HW_SYNCI_Step */ - "beq $v0, $zero, 20f\n" /* If no caches require - synchronization, branch - around. */ - "nop\n" - "10:\n" - "synci 0(%[Addr])\n" /* Synchronize all caches around - address. */ - "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */ - "sltu $at, %[Addr], %[Size]\n" /* Compare current with end - address. */ - "bne $at, $zero, 10b\n" /* Branch if more to do. */ - "nop\n" - "sync\n" /* Clear memory hazards. */ - "20:\n" - "bal 30f\n" - "nop\n" - "30:\n" - "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here. - Add offset of 12 to point to the - instruction after the last nop. - */ - "jr.hb $ra\n" /* Return, clearing instruction - hazards. */ - "nop\n" - ".set pop\n" - : [Addr] "+r"(Addr), [Size] "+r"(Size) - :: "at", "ra", "v0", "memory" - ); - } - #endif -#endif - -#if defined(__ANDROID__) && defined(__arm__) - #include -#endif - -/* - * The compiler generates calls to __clear_cache() when creating - * trampoline functions on the stack for use with nested functions. - * It is expected to invalidate the instruction cache for the - * specified range. - */ - -void __clear_cache(void *start, void *end) { -#if __i386__ || __x86_64__ -/* - * Intel processors have a unified instruction and data cache - * so there is nothing to do - */ -#elif defined(__arm__) && !defined(__APPLE__) - #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__Bitrig__) - struct arm_sync_icache_args arg; - - arg.addr = (uintptr_t)start; - arg.len = (uintptr_t)end - (uintptr_t)start; - - sysarch(ARM_SYNC_ICACHE, &arg); - #elif defined(__ANDROID__) +/* ===-- clear_cache.c - Implement __clear_cache ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include + +#if __APPLE__ + #include +#endif +#if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__) + #include + #include +#endif + +#if defined(__NetBSD__) && defined(__arm__) + #include +#endif + +#if defined(__mips__) + #include + #include + #include + #if defined(__ANDROID__) && defined(__LP64__) + /* + * clear_mips_cache - Invalidates instruction cache for Mips. + */ + static void clear_mips_cache(const void* Addr, size_t Size) { + asm volatile ( + ".set push\n" + ".set noreorder\n" + ".set noat\n" + "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */ + "nop\n" + "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */ + "rdhwr $v0, $1\n" /* Get step size for SYNCI. + $1 is $HW_SYNCI_Step */ + "beq $v0, $zero, 20f\n" /* If no caches require + synchronization, branch + around. */ + "nop\n" + "10:\n" + "synci 0(%[Addr])\n" /* Synchronize all caches around + address. */ + "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */ + "sltu $at, %[Addr], %[Size]\n" /* Compare current with end + address. */ + "bne $at, $zero, 10b\n" /* Branch if more to do. */ + "nop\n" + "sync\n" /* Clear memory hazards. */ + "20:\n" + "bal 30f\n" + "nop\n" + "30:\n" + "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here. + Add offset of 12 to point to the + instruction after the last nop. + */ + "jr.hb $ra\n" /* Return, clearing instruction + hazards. */ + "nop\n" + ".set pop\n" + : [Addr] "+r"(Addr), [Size] "+r"(Size) + :: "at", "ra", "v0", "memory" + ); + } + #endif +#endif + +#if defined(__ANDROID__) && defined(__arm__) + #include +#endif + +/* + * The compiler generates calls to __clear_cache() when creating + * trampoline functions on the stack for use with nested functions. + * It is expected to invalidate the instruction cache for the + * specified range. + */ + +void __clear_cache(void *start, void *end) { +#if __i386__ || __x86_64__ +/* + * Intel processors have a unified instruction and data cache + * so there is nothing to do + */ +#elif defined(__arm__) && !defined(__APPLE__) + #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__Bitrig__) + struct arm_sync_icache_args arg; + + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; + + sysarch(ARM_SYNC_ICACHE, &arg); + #elif defined(__ANDROID__) int start_reg __asm("r0") = (int) (intptr_t) start; - const register int end_reg __asm("r1") = (int) (intptr_t) end; - const register int flags __asm("r2") = 0; - const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; - __asm __volatile("svc 0x0" : "=r"(start_reg) - : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0"); - if (start_reg != 0) { - compilerrt_abort(); - } - #else - compilerrt_abort(); - #endif -#elif defined(__mips__) - const uintptr_t start_int = (uintptr_t) start; - const uintptr_t end_int = (uintptr_t) end; - #if defined(__ANDROID__) && defined(__LP64__) - // Call synci implementation for short address range. - const uintptr_t address_range_limit = 256; - if ((end_int - start_int) <= address_range_limit) { - clear_mips_cache(start, (end_int - start_int)); - } else { - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); - } - #else - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); - #endif -#elif defined(__aarch64__) && !defined(__APPLE__) - uint64_t xstart = (uint64_t)(uintptr_t) start; - uint64_t xend = (uint64_t)(uintptr_t) end; - uint64_t addr; - - // Get Cache Type Info - uint64_t ctr_el0; - __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); - - /* - * dc & ic instructions must use 64bit registers so we don't use - * uintptr_t in case this runs in an IPL32 environment. - */ - const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); - for (addr = xstart; addr < xend; addr += dcache_line_size) - __asm __volatile("dc cvau, %0" :: "r"(addr)); - __asm __volatile("dsb ish"); - - const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); - for (addr = xstart; addr < xend; addr += icache_line_size) - __asm __volatile("ic ivau, %0" :: "r"(addr)); - __asm __volatile("isb sy"); -#else - #if __APPLE__ - /* On Darwin, sys_icache_invalidate() provides this functionality */ - sys_icache_invalidate(start, end-start); - #else - compilerrt_abort(); - #endif -#endif -} - + const register int end_reg __asm("r1") = (int) (intptr_t) end; + const register int flags __asm("r2") = 0; + const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; + __asm __volatile("svc 0x0" : "=r"(start_reg) + : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0"); + if (start_reg != 0) { + compilerrt_abort(); + } + #else + compilerrt_abort(); + #endif +#elif defined(__mips__) + const uintptr_t start_int = (uintptr_t) start; + const uintptr_t end_int = (uintptr_t) end; + #if defined(__ANDROID__) && defined(__LP64__) + // Call synci implementation for short address range. + const uintptr_t address_range_limit = 256; + if ((end_int - start_int) <= address_range_limit) { + clear_mips_cache(start, (end_int - start_int)); + } else { + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); + } + #else + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); + #endif +#elif defined(__aarch64__) && !defined(__APPLE__) + uint64_t xstart = (uint64_t)(uintptr_t) start; + uint64_t xend = (uint64_t)(uintptr_t) end; + uint64_t addr; + + // Get Cache Type Info + uint64_t ctr_el0; + __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + + /* + * dc & ic instructions must use 64bit registers so we don't use + * uintptr_t in case this runs in an IPL32 environment. + */ + const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); + for (addr = xstart; addr < xend; addr += dcache_line_size) + __asm __volatile("dc cvau, %0" :: "r"(addr)); + __asm __volatile("dsb ish"); + + const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); + for (addr = xstart; addr < xend; addr += icache_line_size) + __asm __volatile("ic ivau, %0" :: "r"(addr)); + __asm __volatile("isb sy"); +#else + #if __APPLE__ + /* On Darwin, sys_icache_invalidate() provides this functionality */ + sys_icache_invalidate(start, end-start); + #else + compilerrt_abort(); + #endif +#endif +} + diff --git a/contrib/libs/cxxsupp/builtins/clzdi2.c b/contrib/libs/cxxsupp/builtins/clzdi2.c index 5d1539ae5e8..b9e64da492b 100644 --- a/contrib/libs/cxxsupp/builtins/clzdi2.c +++ b/contrib/libs/cxxsupp/builtins/clzdi2.c @@ -1,29 +1,29 @@ -/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __clzdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: the number of leading 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__clzdi2(di_int a) -{ - dwords x; - x.all = a; - const si_int f = -(x.s.high == 0); - return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + - (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); -} +/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of leading 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/contrib/libs/cxxsupp/builtins/clzsi2.c b/contrib/libs/cxxsupp/builtins/clzsi2.c index 4dab1fbf5dc..25b8ed2c4c2 100644 --- a/contrib/libs/cxxsupp/builtins/clzsi2.c +++ b/contrib/libs/cxxsupp/builtins/clzsi2.c @@ -1,53 +1,53 @@ -/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __clzsi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: the number of leading 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__clzsi2(si_int a) -{ - su_int x = (su_int)a; - si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */ - x >>= 16 - t; /* x = [0 - 0xFFFF] */ - su_int r = t; /* r = [0, 16] */ - /* return r + clz(x) */ - t = ((x & 0xFF00) == 0) << 3; - x >>= 8 - t; /* x = [0 - 0xFF] */ - r += t; /* r = [0, 8, 16, 24] */ - /* return r + clz(x) */ - t = ((x & 0xF0) == 0) << 2; - x >>= 4 - t; /* x = [0 - 0xF] */ - r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ - /* return r + clz(x) */ - t = ((x & 0xC) == 0) << 1; - x >>= 2 - t; /* x = [0 - 3] */ - r += t; /* r = [0 - 30] and is even */ - /* return r + clz(x) */ -/* switch (x) - * { - * case 0: - * return r + 2; - * case 1: - * return r + 1; - * case 2: - * case 3: - * return r; - * } - */ - return r + ((2 - x) & -((x & 2) == 0)); -} +/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of leading 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */ + x >>= 16 - t; /* x = [0 - 0xFFFF] */ + su_int r = t; /* r = [0, 16] */ + /* return r + clz(x) */ + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; /* x = [0 - 0xFF] */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + clz(x) */ + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; /* x = [0 - 0xF] */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + clz(x) */ + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + clz(x) */ +/* switch (x) + * { + * case 0: + * return r + 2; + * case 1: + * return r + 1; + * case 2: + * case 3: + * return r; + * } + */ + return r + ((2 - x) & -((x & 2) == 0)); +} diff --git a/contrib/libs/cxxsupp/builtins/clzti2.c b/contrib/libs/cxxsupp/builtins/clzti2.c index 8b8b928c2a8..15a7b3c9000 100644 --- a/contrib/libs/cxxsupp/builtins/clzti2.c +++ b/contrib/libs/cxxsupp/builtins/clzti2.c @@ -1,33 +1,33 @@ -/* ===-- clzti2.c - Implement __clzti2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __clzti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: the number of leading 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__clzti2(ti_int a) -{ - twords x; - x.all = a; - const di_int f = -(x.s.high == 0); - return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + - ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- clzti2.c - Implement __clzti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the number of leading 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.s.high == 0); + return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/cmpdi2.c b/contrib/libs/cxxsupp/builtins/cmpdi2.c index 7477eb8cc91..52634d9c336 100644 --- a/contrib/libs/cxxsupp/builtins/cmpdi2.c +++ b/contrib/libs/cxxsupp/builtins/cmpdi2.c @@ -1,51 +1,51 @@ -/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __cmpdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: if (a < b) returns 0 -* if (a == b) returns 1 -* if (a > b) returns 2 -*/ - -COMPILER_RT_ABI si_int -__cmpdi2(di_int a, di_int b) -{ - dwords x; - x.all = a; - dwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; -} - -#ifdef __ARM_EABI__ -/* Returns: if (a < b) returns -1 -* if (a == b) returns 0 -* if (a > b) returns 1 -*/ -COMPILER_RT_ABI si_int -__aeabi_lcmp(di_int a, di_int b) -{ - return __cmpdi2(a, b) - 1; -} -#endif - +/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __cmpdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: if (a < b) returns 0 +* if (a == b) returns 1 +* if (a > b) returns 2 +*/ + +COMPILER_RT_ABI si_int +__cmpdi2(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#ifdef __ARM_EABI__ +/* Returns: if (a < b) returns -1 +* if (a == b) returns 0 +* if (a > b) returns 1 +*/ +COMPILER_RT_ABI si_int +__aeabi_lcmp(di_int a, di_int b) +{ + return __cmpdi2(a, b) - 1; +} +#endif + diff --git a/contrib/libs/cxxsupp/builtins/cmpti2.c b/contrib/libs/cxxsupp/builtins/cmpti2.c index 9280903fe17..2c8b56e29a0 100644 --- a/contrib/libs/cxxsupp/builtins/cmpti2.c +++ b/contrib/libs/cxxsupp/builtins/cmpti2.c @@ -1,42 +1,42 @@ -/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __cmpti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: if (a < b) returns 0 - * if (a == b) returns 1 - * if (a > b) returns 2 - */ - -COMPILER_RT_ABI si_int -__cmpti2(ti_int a, ti_int b) -{ - twords x; - x.all = a; - twords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __cmpti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__cmpti2(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/comparedf2.c b/contrib/libs/cxxsupp/builtins/comparedf2.c index 83503f2cabf..9e29752231e 100644 --- a/contrib/libs/cxxsupp/builtins/comparedf2.c +++ b/contrib/libs/cxxsupp/builtins/comparedf2.c @@ -1,146 +1,146 @@ -//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// // This file implements the following soft-float comparison routines: -// -// __eqdf2 __gedf2 __unorddf2 -// __ledf2 __gtdf2 -// __ltdf2 -// __nedf2 -// -// The semantics of the routines grouped in each column are identical, so there -// is a single implementation for each, and wrappers to provide the other names. -// -// The main routines behave as follows: -// -// __ledf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// 1 if either a or b is NaN -// -// __gedf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// -1 if either a or b is NaN -// -// __unorddf2(a,b) returns 0 if both a and b are numbers -// 1 if either a or b is NaN -// -// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of -// NaN values. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; - -COMPILER_RT_ABI enum LE_RESULT -__ledf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a floating-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - else { - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } -} - -#if defined(__ELF__) -// Alias for libgcc compatibility -FNALIAS(__cmpdf2, __ledf2); -#endif - -enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED -}; - -COMPILER_RT_ABI enum GE_RESULT -__gedf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } -} - -ARM_EABI_FNALIAS(dcmpun, unorddf2) - -COMPILER_RT_ABI int -__unorddf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; -} - -// The following are alternative names for the preceding routines. - -COMPILER_RT_ABI enum LE_RESULT -__eqdf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT -__ltdf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT -__nedf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} - -COMPILER_RT_ABI enum GE_RESULT -__gtdf2(fp_t a, fp_t b) { - return __gedf2(a, b); -} - +//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqdf2 __gedf2 __unorddf2 +// __ledf2 __gtdf2 +// __ltdf2 +// __nedf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __ledf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gedf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unorddf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT +__ledf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmpdf2, __ledf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT +__gedf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +ARM_EABI_FNALIAS(dcmpun, unorddf2) + +COMPILER_RT_ABI int +__unorddf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT +__eqdf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__ltdf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__nedf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT +__gtdf2(fp_t a, fp_t b) { + return __gedf2(a, b); +} + diff --git a/contrib/libs/cxxsupp/builtins/comparesf2.c b/contrib/libs/cxxsupp/builtins/comparesf2.c index 0ed6dd6a4b3..1fd50636aba 100644 --- a/contrib/libs/cxxsupp/builtins/comparesf2.c +++ b/contrib/libs/cxxsupp/builtins/comparesf2.c @@ -1,145 +1,145 @@ -//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the following soft-fp_t comparison routines: -// -// __eqsf2 __gesf2 __unordsf2 -// __lesf2 __gtsf2 -// __ltsf2 -// __nesf2 -// -// The semantics of the routines grouped in each column are identical, so there -// is a single implementation for each, and wrappers to provide the other names. -// -// The main routines behave as follows: -// -// __lesf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// 1 if either a or b is NaN -// -// __gesf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// -1 if either a or b is NaN -// -// __unordsf2(a,b) returns 0 if both a and b are numbers -// 1 if either a or b is NaN -// -// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of -// NaN values. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; - -COMPILER_RT_ABI enum LE_RESULT -__lesf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a fp_ting-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - else { - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } -} - -#if defined(__ELF__) -// Alias for libgcc compatibility -FNALIAS(__cmpsf2, __lesf2); -#endif - -enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED -}; - -COMPILER_RT_ABI enum GE_RESULT -__gesf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } -} - -ARM_EABI_FNALIAS(fcmpun, unordsf2) - -COMPILER_RT_ABI int -__unordsf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; -} - -// The following are alternative names for the preceding routines. - -COMPILER_RT_ABI enum LE_RESULT -__eqsf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT -__ltsf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT -__nesf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} - -COMPILER_RT_ABI enum GE_RESULT -__gtsf2(fp_t a, fp_t b) { - return __gesf2(a, b); -} +//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT +__lesf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmpsf2, __lesf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT +__gesf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +ARM_EABI_FNALIAS(fcmpun, unordsf2) + +COMPILER_RT_ABI int +__unordsf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT +__eqsf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__ltsf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__nesf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT +__gtsf2(fp_t a, fp_t b) { + return __gesf2(a, b); +} diff --git a/contrib/libs/cxxsupp/builtins/comparetf2.c b/contrib/libs/cxxsupp/builtins/comparetf2.c index 0b4c16b1e32..c0ad8ed0aec 100644 --- a/contrib/libs/cxxsupp/builtins/comparetf2.c +++ b/contrib/libs/cxxsupp/builtins/comparetf2.c @@ -1,138 +1,138 @@ -//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// // This file implements the following soft-float comparison routines: -// -// __eqtf2 __getf2 __unordtf2 -// __letf2 __gttf2 -// __lttf2 -// __netf2 -// -// The semantics of the routines grouped in each column are identical, so there -// is a single implementation for each, and wrappers to provide the other names. -// -// The main routines behave as follows: -// -// __letf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// 1 if either a or b is NaN -// -// __getf2(a,b) returns -1 if a < b -// 0 if a == b -// 1 if a > b -// -1 if either a or b is NaN -// -// __unordtf2(a,b) returns 0 if both a and b are numbers -// 1 if either a or b is NaN -// -// Note that __letf2( ) and __getf2( ) are identical except in their handling of -// NaN values. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; - -COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a floating-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - else { - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } -} - -#if defined(__ELF__) -// Alias for libgcc compatibility -FNALIAS(__cmptf2, __letf2); -#endif - -enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED -}; - -COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } -} - -COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; -} - -// The following are alternative names for the preceding routines. - -COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { - return __letf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { - return __letf2(a, b); -} - -COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { - return __letf2(a, b); -} - -COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { - return __getf2(a, b); -} - -#endif +//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqtf2 __getf2 __unordtf2 +// __letf2 __gttf2 +// __lttf2 +// __netf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __letf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __getf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordtf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __letf2( ) and __getf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmptf2, __letf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { + return __getf2(a, b); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/ctzdi2.c b/contrib/libs/cxxsupp/builtins/ctzdi2.c index 6474996b5f0..db3c6fdc08f 100644 --- a/contrib/libs/cxxsupp/builtins/ctzdi2.c +++ b/contrib/libs/cxxsupp/builtins/ctzdi2.c @@ -1,29 +1,29 @@ -/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ctzdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: the number of trailing 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__ctzdi2(di_int a) -{ - dwords x; - x.all = a; - const si_int f = -(x.s.low == 0); - return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + - (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); -} +/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of trailing 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/contrib/libs/cxxsupp/builtins/ctzsi2.c b/contrib/libs/cxxsupp/builtins/ctzsi2.c index bc31f8d4055..c69486ea445 100644 --- a/contrib/libs/cxxsupp/builtins/ctzsi2.c +++ b/contrib/libs/cxxsupp/builtins/ctzsi2.c @@ -1,57 +1,57 @@ -/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ctzsi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: the number of trailing 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__ctzsi2(si_int a) -{ - su_int x = (su_int)a; - si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */ - x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */ - su_int r = t; /* r = [0, 16] */ - /* return r + ctz(x) */ - t = ((x & 0x00FF) == 0) << 3; - x >>= t; /* x = [0 - 0xFF] + higher garbage bits */ - r += t; /* r = [0, 8, 16, 24] */ - /* return r + ctz(x) */ - t = ((x & 0x0F) == 0) << 2; - x >>= t; /* x = [0 - 0xF] + higher garbage bits */ - r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ - /* return r + ctz(x) */ - t = ((x & 0x3) == 0) << 1; - x >>= t; - x &= 3; /* x = [0 - 3] */ - r += t; /* r = [0 - 30] and is even */ - /* return r + ctz(x) */ - -/* The branch-less return statement below is equivalent - * to the following switch statement: - * switch (x) - * { - * case 0: - * return r + 2; - * case 2: - * return r + 1; - * case 1: - * case 3: - * return r; - * } - */ - return r + ((2 - (x >> 1)) & -((x & 1) == 0)); -} +/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of trailing 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */ + x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */ + su_int r = t; /* r = [0, 16] */ + /* return r + ctz(x) */ + t = ((x & 0x00FF) == 0) << 3; + x >>= t; /* x = [0 - 0xFF] + higher garbage bits */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + ctz(x) */ + t = ((x & 0x0F) == 0) << 2; + x >>= t; /* x = [0 - 0xF] + higher garbage bits */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + ctz(x) */ + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + ctz(x) */ + +/* The branch-less return statement below is equivalent + * to the following switch statement: + * switch (x) + * { + * case 0: + * return r + 2; + * case 2: + * return r + 1; + * case 1: + * case 3: + * return r; + * } + */ + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} diff --git a/contrib/libs/cxxsupp/builtins/ctzti2.c b/contrib/libs/cxxsupp/builtins/ctzti2.c index b9f4f16123e..45de682700c 100644 --- a/contrib/libs/cxxsupp/builtins/ctzti2.c +++ b/contrib/libs/cxxsupp/builtins/ctzti2.c @@ -1,33 +1,33 @@ -/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ctzti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: the number of trailing 0-bits */ - -/* Precondition: a != 0 */ - -COMPILER_RT_ABI si_int -__ctzti2(ti_int a) -{ - twords x; - x.all = a; - const di_int f = -(x.s.low == 0); - return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + - ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the number of trailing 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.s.low == 0); + return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/divdc3.c b/contrib/libs/cxxsupp/builtins/divdc3.c index 57f4ce4c3c3..3c88390b5e7 100644 --- a/contrib/libs/cxxsupp/builtins/divdc3.c +++ b/contrib/libs/cxxsupp/builtins/divdc3.c @@ -1,60 +1,60 @@ -/* ===-- divdc3.c - Implement __divdc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divdc3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the quotient of (a + ib) / (c + id) */ - -COMPILER_RT_ABI Dcomplex -__divdc3(double __a, double __b, double __c, double __d) -{ - int __ilogbw = 0; - double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbn(__c, -__ilogbw); - __d = crt_scalbn(__d, -__ilogbw); - } - double __denom = __c * __c + __d * __d; - Dcomplex z; - COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); - __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0.0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); - __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); - COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); - } - } - return z; -} +/* ===-- divdc3.c - Implement __divdc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divdc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Dcomplex +__divdc3(double __a, double __b, double __c, double __d) +{ + int __ilogbw = 0; + double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbn(__c, -__ilogbw); + __d = crt_scalbn(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + Dcomplex z; + COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0.0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/divdf3.c b/contrib/libs/cxxsupp/builtins/divdf3.c index 21fb6d366bd..ab44c2b25fe 100644 --- a/contrib/libs/cxxsupp/builtins/divdf3.c +++ b/contrib/libs/cxxsupp/builtins/divdf3.c @@ -1,185 +1,185 @@ -//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements double-precision soft-float division -// with the IEEE-754 default rounding (to nearest, ties to even). -// -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(ddiv, divdf3) - -COMPILER_RT_ABI fp_t -__divdf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); - } - - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint32_t q31b = bSignificand >> 21; - uint32_t recip32 = UINT32_C(0x7504f333) - q31b; - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: +//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(ddiv, divdf3) + +COMPILER_RT_ABI fp_t +__divdf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint32_t q31b = bSignificand >> 21; + uint32_t recip32 = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction32; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + + // recip32 might have overflowed to exactly zero in the preceding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32--; + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + const uint32_t q63blo = bSignificand << 11; + uint64_t correction, reciprocal; + correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); + uint32_t cHi = correction >> 32; + uint32_t cLo = correction; + reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - uint32_t correction32; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - - // recip32 might have overflowed to exactly zero in the preceding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip32 downward by one bit. - recip32--; - - // We need to perform one more iteration to get us to 56 binary digits; - // The last iteration needs to happen with extra precision. - const uint32_t q63blo = bSignificand << 11; - uint64_t correction, reciprocal; - correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); - uint32_t cHi = correction >> 32; - uint32_t cLo = correction; - reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); - - // We already adjusted the 32-bit estimate, now we need to adjust the final - // 64-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-56, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q53 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-53 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 64 x 64 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 53) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 52) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - - else if (writtenExponent < 1) { - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } - - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return - const double result = fromRep(absResult | quotientSign); - return result; - } -} + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 53) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 52) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const double result = fromRep(absResult | quotientSign); + return result; + } +} diff --git a/contrib/libs/cxxsupp/builtins/divdi3.c b/contrib/libs/cxxsupp/builtins/divdi3.c index d757ce5f220..b8eebcb2046 100644 --- a/contrib/libs/cxxsupp/builtins/divdi3.c +++ b/contrib/libs/cxxsupp/builtins/divdi3.c @@ -1,29 +1,29 @@ -/* ===-- divdi3.c - Implement __divdi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b */ - -COMPILER_RT_ABI di_int -__divdi3(di_int a, di_int b) -{ - const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; - di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ - di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /*sign of quotient */ - return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ -} +/* ===-- divdi3.c - Implement __divdi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +COMPILER_RT_ABI di_int +__divdi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ + di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /*sign of quotient */ + return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +} diff --git a/contrib/libs/cxxsupp/builtins/divmoddi4.c b/contrib/libs/cxxsupp/builtins/divmoddi4.c index f5ae0f5d133..0d4df67a63e 100644 --- a/contrib/libs/cxxsupp/builtins/divmoddi4.c +++ b/contrib/libs/cxxsupp/builtins/divmoddi4.c @@ -1,25 +1,25 @@ -/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divmoddi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b, *rem = a % b */ - -COMPILER_RT_ABI di_int -__divmoddi4(di_int a, di_int b, di_int* rem) -{ - di_int d = __divdi3(a,b); - *rem = a - (d*b); - return d; -} +/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI di_int +__divmoddi4(di_int a, di_int b, di_int* rem) +{ + di_int d = __divdi3(a,b); + *rem = a - (d*b); + return d; +} diff --git a/contrib/libs/cxxsupp/builtins/divmodsi4.c b/contrib/libs/cxxsupp/builtins/divmodsi4.c index 8efe1567078..dabe2874397 100644 --- a/contrib/libs/cxxsupp/builtins/divmodsi4.c +++ b/contrib/libs/cxxsupp/builtins/divmodsi4.c @@ -1,27 +1,27 @@ -/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divmodsi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b, *rem = a % b */ - -COMPILER_RT_ABI si_int -__divmodsi4(si_int a, si_int b, si_int* rem) -{ - si_int d = __divsi3(a,b); - *rem = a - (d*b); - return d; -} - - +/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divmodsi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI si_int +__divmodsi4(si_int a, si_int b, si_int* rem) +{ + si_int d = __divsi3(a,b); + *rem = a - (d*b); + return d; +} + + diff --git a/contrib/libs/cxxsupp/builtins/divsc3.c b/contrib/libs/cxxsupp/builtins/divsc3.c index 933a3547446..42a48315e66 100644 --- a/contrib/libs/cxxsupp/builtins/divsc3.c +++ b/contrib/libs/cxxsupp/builtins/divsc3.c @@ -1,60 +1,60 @@ -/*===-- divsc3.c - Implement __divsc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divsc3 for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the quotient of (a + ib) / (c + id) */ - -COMPILER_RT_ABI Fcomplex -__divsc3(float __a, float __b, float __c, float __d) -{ - int __ilogbw = 0; - float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnf(__c, -__ilogbw); - __d = crt_scalbnf(__d, -__ilogbw); - } - float __denom = __c * __c + __d * __d; - Fcomplex z; - COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); - COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); - } - } - return z; -} +/*===-- divsc3.c - Implement __divsc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divsc3 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Fcomplex +__divsc3(float __a, float __b, float __c, float __d) +{ + int __ilogbw = 0; + float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnf(__c, -__ilogbw); + __d = crt_scalbnf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + Fcomplex z; + COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/divsf3.c b/contrib/libs/cxxsupp/builtins/divsf3.c index d88b3048c79..de2e376125b 100644 --- a/contrib/libs/cxxsupp/builtins/divsf3.c +++ b/contrib/libs/cxxsupp/builtins/divsf3.c @@ -1,169 +1,169 @@ -//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float division -// with the IEEE-754 default rounding (to nearest, ties to even). -// -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(fdiv, divsf3) - -COMPILER_RT_ABI fp_t -__divsf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); - } - - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - uint32_t q31b = bSignificand << 8; - uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: +//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(fdiv, divsf3) + +COMPILER_RT_ABI fp_t +__divsf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + uint32_t q31b = bSignificand << 8; + uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - uint32_t correction; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - - // Exhaustive testing shows that the error in reciprocal after three steps - // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our - // expectations. We bump the reciprocal by a tiny value to force the error - // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to - // be specific). This also causes 1/1 to give a sensible approximation - // instead of zero (due to overflow). - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-28, lies in the - // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller - // than the true reciprocal of b. Multiplying a by this reciprocal thus - // gives a numerical q = a/b in Q24 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) - // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes - // from the fact that we truncate the product, and the 2^27 term - // is the error in the reciprocal of b scaled by the maximum - // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded - rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 24) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 23) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - - else if (writtenExponent < 1) { - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } - - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return - return fromRep(absResult | quotientSign); - } -} + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 24) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 23) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + return fromRep(absResult | quotientSign); + } +} diff --git a/contrib/libs/cxxsupp/builtins/divsi3.c b/contrib/libs/cxxsupp/builtins/divsi3.c index dd8c6fb8919..bab4aefda30 100644 --- a/contrib/libs/cxxsupp/builtins/divsi3.c +++ b/contrib/libs/cxxsupp/builtins/divsi3.c @@ -1,37 +1,37 @@ -/* ===-- divsi3.c - Implement __divsi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b */ - -ARM_EABI_FNALIAS(idiv, divsi3) - -COMPILER_RT_ABI si_int -__divsi3(si_int a, si_int b) -{ - const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; - si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */ - si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /* sign of quotient */ - /* - * On CPUs without unsigned hardware division support, - * this calls __udivsi3 (notice the cast to su_int). - * On CPUs with unsigned hardware division support, - * this uses the unsigned division instruction. - */ - return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ -} +/* ===-- divsi3.c - Implement __divsi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +ARM_EABI_FNALIAS(idiv, divsi3) + +COMPILER_RT_ABI si_int +__divsi3(si_int a, si_int b) +{ + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */ + si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + /* + * On CPUs without unsigned hardware division support, + * this calls __udivsi3 (notice the cast to su_int). + * On CPUs with unsigned hardware division support, + * this uses the unsigned division instruction. + */ + return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ +} diff --git a/contrib/libs/cxxsupp/builtins/divtc3.c b/contrib/libs/cxxsupp/builtins/divtc3.c index 72581de5eaf..04693df471f 100644 --- a/contrib/libs/cxxsupp/builtins/divtc3.c +++ b/contrib/libs/cxxsupp/builtins/divtc3.c @@ -1,60 +1,60 @@ -/*===-- divtc3.c - Implement __divtc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divtc3 for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the quotient of (a + ib) / (c + id) */ - -COMPILER_RT_ABI long double _Complex -__divtc3(long double __a, long double __b, long double __c, long double __d) -{ - int __ilogbw = 0; - long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnl(__c, -__ilogbw); - __d = crt_scalbnl(__d, -__ilogbw); - } - long double __denom = __c * __c + __d * __d; - long double _Complex z; - __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) - { - if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a; - __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); - __real__ z = CRT_INFINITY * (__a * __c + __b * __d); - __imag__ z = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0.0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); - __real__ z = 0.0 * (__a * __c + __b * __d); - __imag__ z = 0.0 * (__b * __c - __a * __d); - } - } - return z; -} +/*===-- divtc3.c - Implement __divtc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divtc3 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI long double _Complex +__divtc3(long double __a, long double __b, long double __c, long double __d) +{ + int __ilogbw = 0; + long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + long double _Complex z; + __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) + { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a; + __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); + __real__ z = CRT_INFINITY * (__a * __c + __b * __d); + __imag__ z = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0.0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); + __real__ z = 0.0 * (__a * __c + __b * __d); + __imag__ z = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/divtf3.c b/contrib/libs/cxxsupp/builtins/divtf3.c index 80471b381d9..e81dab826bd 100644 --- a/contrib/libs/cxxsupp/builtins/divtf3.c +++ b/contrib/libs/cxxsupp/builtins/divtf3.c @@ -1,203 +1,203 @@ -//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements quad-precision soft-float division -// with the IEEE-754 default rounding (to nearest, ties to even). -// -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); - } - - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q63 fixed-point number in the range - // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint64_t q63b = bSignificand >> 49; - uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; - // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration. - uint64_t correction64; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - - // recip64 might have overflowed to exactly zero in the preceeding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip64 downward by one bit. - recip64--; - - // We need to perform one more iteration to get us to 112 binary digits; - // The last iteration needs to happen with extra precision. - const uint64_t q127blo = bSignificand << 15; - rep_t correction, reciprocal; - - // NOTE: This operation is equivalent to __multi3, which is not implemented - // in some architechure - rep_t r64q63, r64q127, r64cH, r64cL, dummy; - wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); - wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); - - correction = -(r64q63 + (r64q127 >> 64)); - - uint64_t cHi = correction >> 64; - uint64_t cLo = correction; - - wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); - wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); - - reciprocal = r64cH + (r64cL >> 64); - - // We already adjusted the 64-bit estimate, now we need to adjust the final - // 128-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-112, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q127 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-113 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 128 x 128 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - rep_t qb; - - if (quotient < (implicitBit << 1)) { - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 113) - qb; - quotientExponent--; - } else { - quotient >>= 1; - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 112) - qb; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - else if (writtenExponent < 1) { - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } - else { - const bool round = (residual << 1) >= bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return - const long double result = fromRep(absResult | quotientSign); - return result; - } -} - -#endif +//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q63 fixed-point number in the range + // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint64_t q63b = bSignificand >> 49; + uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; + // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration. + uint64_t correction64; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + + // recip64 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip64 downward by one bit. + recip64--; + + // We need to perform one more iteration to get us to 112 binary digits; + // The last iteration needs to happen with extra precision. + const uint64_t q127blo = bSignificand << 15; + rep_t correction, reciprocal; + + // NOTE: This operation is equivalent to __multi3, which is not implemented + // in some architechure + rep_t r64q63, r64q127, r64cH, r64cL, dummy; + wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); + wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); + + correction = -(r64q63 + (r64q127 >> 64)); + + uint64_t cHi = correction >> 64; + uint64_t cLo = correction; + + wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); + wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); + + reciprocal = r64cH + (r64cL >> 64); + + // We already adjusted the 64-bit estimate, now we need to adjust the final + // 128-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-112, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q127 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-113 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 128 x 128 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + rep_t qb; + + if (quotient < (implicitBit << 1)) { + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 113) - qb; + quotientExponent--; + } else { + quotient >>= 1; + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 112) - qb; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + else { + const bool round = (residual << 1) >= bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const long double result = fromRep(absResult | quotientSign); + return result; + } +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/divti3.c b/contrib/libs/cxxsupp/builtins/divti3.c index 1bcf0d53a25..c73eae28fe0 100644 --- a/contrib/libs/cxxsupp/builtins/divti3.c +++ b/contrib/libs/cxxsupp/builtins/divti3.c @@ -1,33 +1,33 @@ -/* ===-- divti3.c - Implement __divti3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a / b */ - -COMPILER_RT_ABI ti_int -__divti3(ti_int a, ti_int b) -{ - const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; - ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */ - ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /* sign of quotient */ - return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- divti3.c - Implement __divti3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a / b */ + +COMPILER_RT_ABI ti_int +__divti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */ + ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/divxc3.c b/contrib/libs/cxxsupp/builtins/divxc3.c index 1c0705704bc..6f49280e5f6 100644 --- a/contrib/libs/cxxsupp/builtins/divxc3.c +++ b/contrib/libs/cxxsupp/builtins/divxc3.c @@ -1,63 +1,63 @@ -/* ===-- divxc3.c - Implement __divxc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __divxc3 for the compiler_rt library. - * - */ - -#if !_ARCH_PPC - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the quotient of (a + ib) / (c + id) */ - -COMPILER_RT_ABI Lcomplex -__divxc3(long double __a, long double __b, long double __c, long double __d) -{ - int __ilogbw = 0; - long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnl(__c, -__ilogbw); - __d = crt_scalbnl(__d, -__ilogbw); - } - long double __denom = __c * __c + __d * __d; - Lcomplex z; - COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); - COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); - } - } - return z; -} - -#endif +/* ===-- divxc3.c - Implement __divxc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divxc3 for the compiler_rt library. + * + */ + +#if !_ARCH_PPC + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Lcomplex +__divxc3(long double __a, long double __b, long double __c, long double __d) +{ + int __ilogbw = 0; + long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/emutls.c b/contrib/libs/cxxsupp/builtins/emutls.c index c2c4318fd44..09e79568bd5 100644 --- a/contrib/libs/cxxsupp/builtins/emutls.c +++ b/contrib/libs/cxxsupp/builtins/emutls.c @@ -1,183 +1,183 @@ -/* ===---------- emutls.c - Implements __emutls_get_address ---------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ -#include -#include -#include -#include - -#include "int_lib.h" -#include "int_util.h" - -/* Default is not to use posix_memalign, so systems like Android - * can use thread local data without heavier POSIX memory allocators. - */ -#ifndef EMUTLS_USE_POSIX_MEMALIGN -#define EMUTLS_USE_POSIX_MEMALIGN 0 -#endif - -/* For every TLS variable xyz, - * there is one __emutls_control variable named __emutls_v.xyz. - * If xyz has non-zero initial value, __emutls_v.xyz's "value" - * will point to __emutls_t.xyz, which has the initial value. - */ -typedef struct __emutls_control { - size_t size; /* size of the object in bytes */ - size_t align; /* alignment of the object in bytes */ - union { - uintptr_t index; /* data[index-1] is the object address */ - void* address; /* object address, when in single thread env */ - } object; - void* value; /* null or non-zero initial value for the object */ -} __emutls_control; - -static __inline void *emutls_memalign_alloc(size_t align, size_t size) { - void *base; -#if EMUTLS_USE_POSIX_MEMALIGN - if (posix_memalign(&base, align, size) != 0) - abort(); -#else - #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) - char* object; - if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) - abort(); - base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) - & ~(uintptr_t)(align - 1)); - - ((void**)base)[-1] = object; -#endif - return base; -} - -static __inline void emutls_memalign_free(void *base) { -#if EMUTLS_USE_POSIX_MEMALIGN - free(base); -#else - /* The mallocated address is in ((void**)base)[-1] */ - free(((void**)base)[-1]); -#endif -} - -/* Emulated TLS objects are always allocated at run-time. */ -static __inline void *emutls_allocate_object(__emutls_control *control) { - /* Use standard C types, check with gcc's emutls.o. */ - typedef unsigned int gcc_word __attribute__((mode(word))); - typedef unsigned int gcc_pointer __attribute__((mode(pointer))); - COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word)); - COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); - COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); - - size_t size = control->size; - size_t align = control->align; - if (align < sizeof(void*)) - align = sizeof(void*); - /* Make sure that align is power of 2. */ - if ((align & (align - 1)) != 0) - abort(); - - void* base = emutls_memalign_alloc(align, size); - if (control->value) - memcpy(base, control->value, size); - else - memset(base, 0, size); - return base; -} - -static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; - -static size_t emutls_num_object = 0; /* number of allocated TLS objects */ - -typedef struct emutls_address_array { - uintptr_t size; /* number of elements in the 'data' array */ - void* data[]; -} emutls_address_array; - -static pthread_key_t emutls_pthread_key; - -static void emutls_key_destructor(void* ptr) { - emutls_address_array* array = (emutls_address_array*)ptr; - uintptr_t i; - for (i = 0; i < array->size; ++i) { - if (array->data[i]) - emutls_memalign_free(array->data[i]); - } - free(ptr); -} - -static void emutls_init(void) { - if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) - abort(); -} - -/* Returns control->object.index; set index if not allocated yet. */ -static __inline uintptr_t emutls_get_index(__emutls_control *control) { - uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); - if (!index) { - static pthread_once_t once = PTHREAD_ONCE_INIT; - pthread_once(&once, emutls_init); - pthread_mutex_lock(&emutls_mutex); - index = control->object.index; - if (!index) { - index = ++emutls_num_object; - __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); - } - pthread_mutex_unlock(&emutls_mutex); - } - return index; -} - -/* Updates newly allocated thread local emutls_address_array. */ -static __inline void emutls_check_array_set_size(emutls_address_array *array, - uintptr_t size) { - if (array == NULL) - abort(); - array->size = size; - pthread_setspecific(emutls_pthread_key, (void*)array); -} - -/* Returns the new 'data' array size, number of elements, - * which must be no smaller than the given index. - */ -static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { - /* Need to allocate emutls_address_array with one extra slot - * to store the data array size. - * Round up the emutls_address_array size to multiple of 16. - */ - return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; -} - -/* Returns the thread local emutls_address_array. - * Extends its size if necessary to hold address at index. - */ -static __inline emutls_address_array * -emutls_get_address_array(uintptr_t index) { - emutls_address_array* array = pthread_getspecific(emutls_pthread_key); - if (array == NULL) { - uintptr_t new_size = emutls_new_data_array_size(index); - array = calloc(new_size + 1, sizeof(void*)); - emutls_check_array_set_size(array, new_size); - } else if (index > array->size) { - uintptr_t orig_size = array->size; - uintptr_t new_size = emutls_new_data_array_size(index); - array = realloc(array, (new_size + 1) * sizeof(void*)); - if (array) - memset(array->data + orig_size, 0, - (new_size - orig_size) * sizeof(void*)); - emutls_check_array_set_size(array, new_size); - } - return array; -} - -void* __emutls_get_address(__emutls_control* control) { - uintptr_t index = emutls_get_index(control); - emutls_address_array* array = emutls_get_address_array(index); - if (array->data[index - 1] == NULL) - array->data[index - 1] = emutls_allocate_object(control); - return array->data[index - 1]; -} +/* ===---------- emutls.c - Implements __emutls_get_address ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ +#include +#include +#include +#include + +#include "int_lib.h" +#include "int_util.h" + +/* Default is not to use posix_memalign, so systems like Android + * can use thread local data without heavier POSIX memory allocators. + */ +#ifndef EMUTLS_USE_POSIX_MEMALIGN +#define EMUTLS_USE_POSIX_MEMALIGN 0 +#endif + +/* For every TLS variable xyz, + * there is one __emutls_control variable named __emutls_v.xyz. + * If xyz has non-zero initial value, __emutls_v.xyz's "value" + * will point to __emutls_t.xyz, which has the initial value. + */ +typedef struct __emutls_control { + size_t size; /* size of the object in bytes */ + size_t align; /* alignment of the object in bytes */ + union { + uintptr_t index; /* data[index-1] is the object address */ + void* address; /* object address, when in single thread env */ + } object; + void* value; /* null or non-zero initial value for the object */ +} __emutls_control; + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base; +#if EMUTLS_USE_POSIX_MEMALIGN + if (posix_memalign(&base, align, size) != 0) + abort(); +#else + #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) + char* object; + if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + abort(); + base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) + & ~(uintptr_t)(align - 1)); + + ((void**)base)[-1] = object; +#endif + return base; +} + +static __inline void emutls_memalign_free(void *base) { +#if EMUTLS_USE_POSIX_MEMALIGN + free(base); +#else + /* The mallocated address is in ((void**)base)[-1] */ + free(((void**)base)[-1]); +#endif +} + +/* Emulated TLS objects are always allocated at run-time. */ +static __inline void *emutls_allocate_object(__emutls_control *control) { + /* Use standard C types, check with gcc's emutls.o. */ + typedef unsigned int gcc_word __attribute__((mode(word))); + typedef unsigned int gcc_pointer __attribute__((mode(pointer))); + COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word)); + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); + + size_t size = control->size; + size_t align = control->align; + if (align < sizeof(void*)) + align = sizeof(void*); + /* Make sure that align is power of 2. */ + if ((align & (align - 1)) != 0) + abort(); + + void* base = emutls_memalign_alloc(align, size); + if (control->value) + memcpy(base, control->value, size); + else + memset(base, 0, size); + return base; +} + +static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; + +static size_t emutls_num_object = 0; /* number of allocated TLS objects */ + +typedef struct emutls_address_array { + uintptr_t size; /* number of elements in the 'data' array */ + void* data[]; +} emutls_address_array; + +static pthread_key_t emutls_pthread_key; + +static void emutls_key_destructor(void* ptr) { + emutls_address_array* array = (emutls_address_array*)ptr; + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); + } + free(ptr); +} + +static void emutls_init(void) { + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); +} + +/* Returns control->object.index; set index if not allocated yet. */ +static __inline uintptr_t emutls_get_index(__emutls_control *control) { + uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); + if (!index) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); + pthread_mutex_lock(&emutls_mutex); + index = control->object.index; + if (!index) { + index = ++emutls_num_object; + __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); + } + pthread_mutex_unlock(&emutls_mutex); + } + return index; +} + +/* Updates newly allocated thread local emutls_address_array. */ +static __inline void emutls_check_array_set_size(emutls_address_array *array, + uintptr_t size) { + if (array == NULL) + abort(); + array->size = size; + pthread_setspecific(emutls_pthread_key, (void*)array); +} + +/* Returns the new 'data' array size, number of elements, + * which must be no smaller than the given index. + */ +static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { + /* Need to allocate emutls_address_array with one extra slot + * to store the data array size. + * Round up the emutls_address_array size to multiple of 16. + */ + return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; +} + +/* Returns the thread local emutls_address_array. + * Extends its size if necessary to hold address at index. + */ +static __inline emutls_address_array * +emutls_get_address_array(uintptr_t index) { + emutls_address_array* array = pthread_getspecific(emutls_pthread_key); + if (array == NULL) { + uintptr_t new_size = emutls_new_data_array_size(index); + array = calloc(new_size + 1, sizeof(void*)); + emutls_check_array_set_size(array, new_size); + } else if (index > array->size) { + uintptr_t orig_size = array->size; + uintptr_t new_size = emutls_new_data_array_size(index); + array = realloc(array, (new_size + 1) * sizeof(void*)); + if (array) + memset(array->data + orig_size, 0, + (new_size - orig_size) * sizeof(void*)); + emutls_check_array_set_size(array, new_size); + } + return array; +} + +void* __emutls_get_address(__emutls_control* control) { + uintptr_t index = emutls_get_index(control); + emutls_address_array* array = emutls_get_address_array(index); + if (array->data[index - 1] == NULL) + array->data[index - 1] = emutls_allocate_object(control); + return array->data[index - 1]; +} diff --git a/contrib/libs/cxxsupp/builtins/enable_execute_stack.c b/contrib/libs/cxxsupp/builtins/enable_execute_stack.c index eb25a333305..0dc3482c446 100644 --- a/contrib/libs/cxxsupp/builtins/enable_execute_stack.c +++ b/contrib/libs/cxxsupp/builtins/enable_execute_stack.c @@ -1,72 +1,72 @@ -/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifndef _WIN32 -#include -#endif - -/* #include "config.h" - * FIXME: CMake - include when cmake system is ready. - * Remove #define HAVE_SYSCONF 1 line. - */ -#define HAVE_SYSCONF 1 - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#else -#ifndef __APPLE__ -#include -#endif /* __APPLE__ */ -#endif /* _WIN32 */ - -#if __LP64__ - #define TRAMPOLINE_SIZE 48 -#else - #define TRAMPOLINE_SIZE 40 -#endif - -/* - * The compiler generates calls to __enable_execute_stack() when creating - * trampoline functions on the stack for use with nested functions. - * It is expected to mark the page(s) containing the address - * and the next 48 bytes as executable. Since the stack is normally rw- - * that means changing the protection on those page(s) to rwx. - */ - -COMPILER_RT_ABI void -__enable_execute_stack(void* addr) -{ - -#if _WIN32 - MEMORY_BASIC_INFORMATION mbi; - if (!VirtualQuery (addr, &mbi, sizeof(mbi))) - return; /* We should probably assert here because there is no return value */ - VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect); -#else -#if __APPLE__ - /* On Darwin, pagesize is always 4096 bytes */ - const uintptr_t pageSize = 4096; -#elif !defined(HAVE_SYSCONF) -#error "HAVE_SYSCONF not defined! See enable_execute_stack.c" -#else - const uintptr_t pageSize = sysconf(_SC_PAGESIZE); -#endif /* __APPLE__ */ - - const uintptr_t pageAlignMask = ~(pageSize-1); - uintptr_t p = (uintptr_t)addr; - unsigned char* startPage = (unsigned char*)(p & pageAlignMask); - unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask); - size_t length = endPage - startPage; - (void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); -#endif -} +/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifndef _WIN32 +#include +#endif + +/* #include "config.h" + * FIXME: CMake - include when cmake system is ready. + * Remove #define HAVE_SYSCONF 1 line. + */ +#define HAVE_SYSCONF 1 + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#else +#ifndef __APPLE__ +#include +#endif /* __APPLE__ */ +#endif /* _WIN32 */ + +#if __LP64__ + #define TRAMPOLINE_SIZE 48 +#else + #define TRAMPOLINE_SIZE 40 +#endif + +/* + * The compiler generates calls to __enable_execute_stack() when creating + * trampoline functions on the stack for use with nested functions. + * It is expected to mark the page(s) containing the address + * and the next 48 bytes as executable. Since the stack is normally rw- + * that means changing the protection on those page(s) to rwx. + */ + +COMPILER_RT_ABI void +__enable_execute_stack(void* addr) +{ + +#if _WIN32 + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery (addr, &mbi, sizeof(mbi))) + return; /* We should probably assert here because there is no return value */ + VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect); +#else +#if __APPLE__ + /* On Darwin, pagesize is always 4096 bytes */ + const uintptr_t pageSize = 4096; +#elif !defined(HAVE_SYSCONF) +#error "HAVE_SYSCONF not defined! See enable_execute_stack.c" +#else + const uintptr_t pageSize = sysconf(_SC_PAGESIZE); +#endif /* __APPLE__ */ + + const uintptr_t pageAlignMask = ~(pageSize-1); + uintptr_t p = (uintptr_t)addr; + unsigned char* startPage = (unsigned char*)(p & pageAlignMask); + unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask); + size_t length = endPage - startPage; + (void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); +#endif +} diff --git a/contrib/libs/cxxsupp/builtins/eprintf.c b/contrib/libs/cxxsupp/builtins/eprintf.c index 8ae0fdf8150..89f34b15457 100644 --- a/contrib/libs/cxxsupp/builtins/eprintf.c +++ b/contrib/libs/cxxsupp/builtins/eprintf.c @@ -1,35 +1,35 @@ -/* ===---------- eprintf.c - Implements __eprintf --------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - - - -#include "int_lib.h" -#include - - -/* - * __eprintf() was used in an old version of . - * It can eventually go away, but it is needed when linking - * .o files built with the old . - * - * It should never be exported from a dylib, so it is marked - * visibility hidden. - */ -#ifndef _WIN32 -__attribute__((visibility("hidden"))) -#endif -COMPILER_RT_ABI void -__eprintf(const char* format, const char* assertion_expression, - const char* line, const char* file) -{ - fprintf(stderr, format, assertion_expression, line, file); - fflush(stderr); - compilerrt_abort(); -} +/* ===---------- eprintf.c - Implements __eprintf --------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + + + +#include "int_lib.h" +#include + + +/* + * __eprintf() was used in an old version of . + * It can eventually go away, but it is needed when linking + * .o files built with the old . + * + * It should never be exported from a dylib, so it is marked + * visibility hidden. + */ +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +COMPILER_RT_ABI void +__eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file) +{ + fprintf(stderr, format, assertion_expression, line, file); + fflush(stderr); + compilerrt_abort(); +} diff --git a/contrib/libs/cxxsupp/builtins/extenddftf2.c b/contrib/libs/cxxsupp/builtins/extenddftf2.c index c55e2e8a256..86dab8f03a8 100644 --- a/contrib/libs/cxxsupp/builtins/extenddftf2.c +++ b/contrib/libs/cxxsupp/builtins/extenddftf2.c @@ -1,23 +1,23 @@ -//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#define SRC_DOUBLE -#define DST_QUAD -#include "fp_extend_impl.inc" - -COMPILER_RT_ABI long double __extenddftf2(double a) { - return __extendXfYf2__(a); -} - -#endif +//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_DOUBLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI long double __extenddftf2(double a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/extendhfsf2.c b/contrib/libs/cxxsupp/builtins/extendhfsf2.c index aee4fd8787f..27115a48c18 100644 --- a/contrib/libs/cxxsupp/builtins/extendhfsf2.c +++ b/contrib/libs/cxxsupp/builtins/extendhfsf2.c @@ -1,25 +1,25 @@ -//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// - -#define SRC_HALF -#define DST_SINGLE -#include "fp_extend_impl.inc" - -ARM_EABI_FNALIAS(h2f, extendhfsf2) - -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. -COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { - return __extendXfYf2__(a); -} - -COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { - return __extendhfsf2(a); -} +//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define SRC_HALF +#define DST_SINGLE +#include "fp_extend_impl.inc" + +ARM_EABI_FNALIAS(h2f, extendhfsf2) + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { + return __extendXfYf2__(a); +} + +COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { + return __extendhfsf2(a); +} diff --git a/contrib/libs/cxxsupp/builtins/extendsfdf2.c b/contrib/libs/cxxsupp/builtins/extendsfdf2.c index 595264f3a23..7a267c2f47a 100644 --- a/contrib/libs/cxxsupp/builtins/extendsfdf2.c +++ b/contrib/libs/cxxsupp/builtins/extendsfdf2.c @@ -1,19 +1,19 @@ -//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// - -#define SRC_SINGLE -#define DST_DOUBLE -#include "fp_extend_impl.inc" - -ARM_EABI_FNALIAS(f2d, extendsfdf2) - -COMPILER_RT_ABI double __extendsfdf2(float a) { - return __extendXfYf2__(a); -} +//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define SRC_SINGLE +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +ARM_EABI_FNALIAS(f2d, extendsfdf2) + +COMPILER_RT_ABI double __extendsfdf2(float a) { + return __extendXfYf2__(a); +} diff --git a/contrib/libs/cxxsupp/builtins/extendsftf2.c b/contrib/libs/cxxsupp/builtins/extendsftf2.c index 90120a2ba0c..2eeeba28484 100644 --- a/contrib/libs/cxxsupp/builtins/extendsftf2.c +++ b/contrib/libs/cxxsupp/builtins/extendsftf2.c @@ -1,23 +1,23 @@ -//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#define SRC_SINGLE -#define DST_QUAD -#include "fp_extend_impl.inc" - -COMPILER_RT_ABI long double __extendsftf2(float a) { - return __extendXfYf2__(a); -} - -#endif +//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_SINGLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI long double __extendsftf2(float a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/ffsdi2.c b/contrib/libs/cxxsupp/builtins/ffsdi2.c index a1473d4a3c4..a5ac9900ff1 100644 --- a/contrib/libs/cxxsupp/builtins/ffsdi2.c +++ b/contrib/libs/cxxsupp/builtins/ffsdi2.c @@ -1,33 +1,33 @@ -/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ffsdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: the index of the least significant 1-bit in a, or - * the value zero if a is zero. The least significant bit is index one. - */ - -COMPILER_RT_ABI si_int -__ffsdi2(di_int a) -{ - dwords x; - x.all = a; - if (x.s.low == 0) - { - if (x.s.high == 0) - return 0; - return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); - } - return __builtin_ctz(x.s.low) + 1; -} +/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffsdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffsdi2(di_int a) +{ + dwords x; + x.all = a; + if (x.s.low == 0) + { + if (x.s.high == 0) + return 0; + return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return __builtin_ctz(x.s.low) + 1; +} diff --git a/contrib/libs/cxxsupp/builtins/ffsti2.c b/contrib/libs/cxxsupp/builtins/ffsti2.c index 6689881101c..dcdb3bd7f80 100644 --- a/contrib/libs/cxxsupp/builtins/ffsti2.c +++ b/contrib/libs/cxxsupp/builtins/ffsti2.c @@ -1,37 +1,37 @@ -/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ffsti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: the index of the least significant 1-bit in a, or - * the value zero if a is zero. The least significant bit is index one. - */ - -COMPILER_RT_ABI si_int -__ffsti2(ti_int a) -{ - twords x; - x.all = a; - if (x.s.low == 0) - { - if (x.s.high == 0) - return 0; - return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); - } - return __builtin_ctzll(x.s.low) + 1; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffsti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffsti2(ti_int a) +{ + twords x; + x.all = a; + if (x.s.low == 0) + { + if (x.s.high == 0) + return 0; + return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); + } + return __builtin_ctzll(x.s.low) + 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/fixdfdi.c b/contrib/libs/cxxsupp/builtins/fixdfdi.c index e37029b34ea..14283ef42e6 100644 --- a/contrib/libs/cxxsupp/builtins/fixdfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixdfdi.c @@ -1,46 +1,46 @@ -/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define DOUBLE_PRECISION -#include "fp_lib.h" -ARM_EABI_FNALIAS(d2lz, fixdfdi) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; can set the invalid - * flag as a side-effect of computation. - */ - -COMPILER_RT_ABI du_int __fixunsdfdi(double a); - -COMPILER_RT_ABI di_int -__fixdfdi(double a) -{ - if (a < 0.0) { - return -__fixunsdfdi(-a); - } - return __fixunsdfdi(a); -} - -#else -/* Support for systems that don't have hardware floating-point; there are no - * flags to set, and we don't want to code-gen to an unknown soft-float - * implementation. - */ - -typedef di_int fixint_t; -typedef du_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI di_int -__fixdfdi(fp_t a) { - return __fixint(a); -} - -#endif +/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +ARM_EABI_FNALIAS(d2lz, fixdfdi) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int __fixunsdfdi(double a); + +COMPILER_RT_ABI di_int +__fixdfdi(double a) +{ + if (a < 0.0) { + return -__fixunsdfdi(-a); + } + return __fixunsdfdi(a); +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixdfdi(fp_t a) { + return __fixint(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixdfsi.c b/contrib/libs/cxxsupp/builtins/fixdfsi.c index 952c0b07b95..704e65bc43a 100644 --- a/contrib/libs/cxxsupp/builtins/fixdfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixdfsi.c @@ -1,22 +1,22 @@ -/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define DOUBLE_PRECISION -#include "fp_lib.h" -typedef si_int fixint_t; -typedef su_int fixuint_t; -#include "fp_fixint_impl.inc" - -ARM_EABI_FNALIAS(d2iz, fixdfsi) - -COMPILER_RT_ABI si_int -__fixdfsi(fp_t a) { - return __fixint(a); -} +/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +ARM_EABI_FNALIAS(d2iz, fixdfsi) + +COMPILER_RT_ABI si_int +__fixdfsi(fp_t a) { + return __fixint(a); +} diff --git a/contrib/libs/cxxsupp/builtins/fixdfti.c b/contrib/libs/cxxsupp/builtins/fixdfti.c index ce207e4cce0..aaf225e74f8 100644 --- a/contrib/libs/cxxsupp/builtins/fixdfti.c +++ b/contrib/libs/cxxsupp/builtins/fixdfti.c @@ -1,26 +1,26 @@ -/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT -#define DOUBLE_PRECISION -#include "fp_lib.h" - -typedef ti_int fixint_t; -typedef tu_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI ti_int -__fixdfti(fp_t a) { - return __fixint(a); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixdfti(fp_t a) { + return __fixint(a); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/fixsfdi.c b/contrib/libs/cxxsupp/builtins/fixsfdi.c index ca1ffcdfe8a..fab47e272a2 100644 --- a/contrib/libs/cxxsupp/builtins/fixsfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixsfdi.c @@ -1,47 +1,47 @@ -/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define SINGLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(f2lz, fixsfdi) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; can set the invalid - * flag as a side-effect of computation. - */ - -COMPILER_RT_ABI du_int __fixunssfdi(float a); - -COMPILER_RT_ABI di_int -__fixsfdi(float a) -{ - if (a < 0.0f) { - return -__fixunssfdi(-a); - } - return __fixunssfdi(a); -} - -#else -/* Support for systems that don't have hardware floating-point; there are no - * flags to set, and we don't want to code-gen to an unknown soft-float - * implementation. - */ - -typedef di_int fixint_t; -typedef du_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI di_int -__fixsfdi(fp_t a) { - return __fixint(a); -} - -#endif +/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(f2lz, fixsfdi) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int __fixunssfdi(float a); + +COMPILER_RT_ABI di_int +__fixsfdi(float a) +{ + if (a < 0.0f) { + return -__fixunssfdi(-a); + } + return __fixunssfdi(a); +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixsfdi(fp_t a) { + return __fixint(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixsfsi.c b/contrib/libs/cxxsupp/builtins/fixsfsi.c index 29079704247..f045536d685 100644 --- a/contrib/libs/cxxsupp/builtins/fixsfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixsfsi.c @@ -1,22 +1,22 @@ -/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define SINGLE_PRECISION -#include "fp_lib.h" -typedef si_int fixint_t; -typedef su_int fixuint_t; -#include "fp_fixint_impl.inc" - -ARM_EABI_FNALIAS(f2iz, fixsfsi) - -COMPILER_RT_ABI si_int -__fixsfsi(fp_t a) { - return __fixint(a); -} +/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +ARM_EABI_FNALIAS(f2iz, fixsfsi) + +COMPILER_RT_ABI si_int +__fixsfsi(fp_t a) { + return __fixint(a); +} diff --git a/contrib/libs/cxxsupp/builtins/fixsfti.c b/contrib/libs/cxxsupp/builtins/fixsfti.c index 060f3faf3df..3a159b3e18e 100644 --- a/contrib/libs/cxxsupp/builtins/fixsfti.c +++ b/contrib/libs/cxxsupp/builtins/fixsfti.c @@ -1,26 +1,26 @@ -/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT -#define SINGLE_PRECISION -#include "fp_lib.h" - -typedef ti_int fixint_t; -typedef tu_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI ti_int -__fixsfti(fp_t a) { - return __fixint(a); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define SINGLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixsfti(fp_t a) { + return __fixint(a); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/fixtfdi.c b/contrib/libs/cxxsupp/builtins/fixtfdi.c index a036f062929..bc9dea1f4f8 100644 --- a/contrib/libs/cxxsupp/builtins/fixtfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixtfdi.c @@ -1,23 +1,23 @@ -/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef di_int fixint_t; -typedef du_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI di_int -__fixtfdi(fp_t a) { - return __fixint(a); -} -#endif +/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixtfdi(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixtfsi.c b/contrib/libs/cxxsupp/builtins/fixtfsi.c index 326454dc523..feb3de88509 100644 --- a/contrib/libs/cxxsupp/builtins/fixtfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixtfsi.c @@ -1,23 +1,23 @@ -/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef si_int fixint_t; -typedef su_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI si_int -__fixtfsi(fp_t a) { - return __fixint(a); -} -#endif +/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int +__fixtfsi(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixtfti.c b/contrib/libs/cxxsupp/builtins/fixtfti.c index 2776c29fa2e..ee4ada85cb4 100644 --- a/contrib/libs/cxxsupp/builtins/fixtfti.c +++ b/contrib/libs/cxxsupp/builtins/fixtfti.c @@ -1,23 +1,23 @@ -/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef ti_int fixint_t; -typedef tu_int fixuint_t; -#include "fp_fixint_impl.inc" - -COMPILER_RT_ABI ti_int -__fixtfti(fp_t a) { - return __fixint(a); -} -#endif +/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixtfti(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfdi.c b/contrib/libs/cxxsupp/builtins/fixunsdfdi.c index d708e6c3805..4b0bc9e1d05 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsdfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixunsdfdi.c @@ -1,44 +1,44 @@ -/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(d2ulz, fixunsdfdi) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; can set the invalid - * flag as a side-effect of computation. - */ - -COMPILER_RT_ABI du_int -__fixunsdfdi(double a) -{ - if (a <= 0.0) return 0; - su_int high = a / 4294967296.f; /* a / 0x1p32f; */ - su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ - return ((du_int)high << 32) | low; -} - -#else -/* Support for systems that don't have hardware floating-point; there are no - * flags to set, and we don't want to code-gen to an unknown soft-float - * implementation. - */ - -typedef du_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI du_int -__fixunsdfdi(fp_t a) { - return __fixuint(a); -} - -#endif +/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(d2ulz, fixunsdfdi) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int +__fixunsdfdi(double a) +{ + if (a <= 0.0) return 0; + su_int high = a / 4294967296.f; /* a / 0x1p32f; */ + su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunsdfdi(fp_t a) { + return __fixuint(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfsi.c b/contrib/libs/cxxsupp/builtins/fixunsdfsi.c index 1e1216188a8..232d342d77d 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsdfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixunsdfsi.c @@ -1,21 +1,21 @@ -/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define DOUBLE_PRECISION -#include "fp_lib.h" -typedef su_int fixuint_t; -#include "fp_fixuint_impl.inc" - -ARM_EABI_FNALIAS(d2uiz, fixunsdfsi) - -COMPILER_RT_ABI su_int -__fixunsdfsi(fp_t a) { - return __fixuint(a); -} +/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +ARM_EABI_FNALIAS(d2uiz, fixunsdfsi) + +COMPILER_RT_ABI su_int +__fixunsdfsi(fp_t a) { + return __fixuint(a); +} diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfti.c b/contrib/libs/cxxsupp/builtins/fixunsdfti.c index 699dd464ea2..f8046a02632 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsdfti.c +++ b/contrib/libs/cxxsupp/builtins/fixunsdfti.c @@ -1,23 +1,23 @@ -/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT -#define DOUBLE_PRECISION -#include "fp_lib.h" -typedef tu_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI tu_int -__fixunsdfti(fp_t a) { - return __fixuint(a); -} -#endif /* CRT_HAS_128BIT */ +/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunsdfti(fp_t a) { + return __fixuint(a); +} +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/fixunssfdi.c b/contrib/libs/cxxsupp/builtins/fixunssfdi.c index ab0513bc076..f8ebab854f9 100644 --- a/contrib/libs/cxxsupp/builtins/fixunssfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixunssfdi.c @@ -1,45 +1,45 @@ -/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define SINGLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(f2ulz, fixunssfdi) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; can set the invalid - * flag as a side-effect of computation. - */ - -COMPILER_RT_ABI du_int -__fixunssfdi(float a) -{ - if (a <= 0.0f) return 0; - double da = a; - su_int high = da / 4294967296.f; /* da / 0x1p32f; */ - su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ - return ((du_int)high << 32) | low; -} - -#else -/* Support for systems that don't have hardware floating-point; there are no - * flags to set, and we don't want to code-gen to an unknown soft-float - * implementation. - */ - -typedef du_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI du_int -__fixunssfdi(fp_t a) { - return __fixuint(a); -} - -#endif +/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(f2ulz, fixunssfdi) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int +__fixunssfdi(float a) +{ + if (a <= 0.0f) return 0; + double da = a; + su_int high = da / 4294967296.f; /* da / 0x1p32f; */ + su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunssfdi(fp_t a) { + return __fixuint(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunssfsi.c b/contrib/libs/cxxsupp/builtins/fixunssfsi.c index a7be73f802c..cc2b05bd84f 100644 --- a/contrib/libs/cxxsupp/builtins/fixunssfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixunssfsi.c @@ -1,25 +1,25 @@ -/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixunssfsi for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#define SINGLE_PRECISION -#include "fp_lib.h" -typedef su_int fixuint_t; -#include "fp_fixuint_impl.inc" - -ARM_EABI_FNALIAS(f2uiz, fixunssfsi) - -COMPILER_RT_ABI su_int -__fixunssfsi(fp_t a) { - return __fixuint(a); -} +/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunssfsi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +ARM_EABI_FNALIAS(f2uiz, fixunssfsi) + +COMPILER_RT_ABI su_int +__fixunssfsi(fp_t a) { + return __fixuint(a); +} diff --git a/contrib/libs/cxxsupp/builtins/fixunssfti.c b/contrib/libs/cxxsupp/builtins/fixunssfti.c index e18617293eb..862d7bd6c7a 100644 --- a/contrib/libs/cxxsupp/builtins/fixunssfti.c +++ b/contrib/libs/cxxsupp/builtins/fixunssfti.c @@ -1,26 +1,26 @@ -/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixunssfti for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#define SINGLE_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) -typedef tu_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI tu_int -__fixunssfti(fp_t a) { - return __fixuint(a); -} -#endif +/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunssfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunssfti(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunstfdi.c b/contrib/libs/cxxsupp/builtins/fixunstfdi.c index 08d158f52ea..b2995f65834 100644 --- a/contrib/libs/cxxsupp/builtins/fixunstfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixunstfdi.c @@ -1,22 +1,22 @@ -/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef du_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI du_int -__fixunstfdi(fp_t a) { - return __fixuint(a); -} -#endif +/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunstfdi(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunstfsi.c b/contrib/libs/cxxsupp/builtins/fixunstfsi.c index 4468bcdf6c2..b5d3f6a7d38 100644 --- a/contrib/libs/cxxsupp/builtins/fixunstfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixunstfsi.c @@ -1,22 +1,22 @@ -/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef su_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI su_int -__fixunstfsi(fp_t a) { - return __fixuint(a); -} -#endif +/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int +__fixunstfsi(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunstfti.c b/contrib/libs/cxxsupp/builtins/fixunstfti.c index 5828998cbf7..22ff9dfc033 100644 --- a/contrib/libs/cxxsupp/builtins/fixunstfti.c +++ b/contrib/libs/cxxsupp/builtins/fixunstfti.c @@ -1,22 +1,22 @@ -/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -typedef tu_int fixuint_t; -#include "fp_fixuint_impl.inc" - -COMPILER_RT_ABI tu_int -__fixunstfti(fp_t a) { - return __fixuint(a); -} -#endif +/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunstfti(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfdi.c b/contrib/libs/cxxsupp/builtins/fixunsxfdi.c index 485f5b293aa..075304e78dc 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsxfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixunsxfdi.c @@ -1,46 +1,46 @@ -/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixunsxfdi for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: convert a to a unsigned long long, rounding toward zero. - * Negative values all become zero. - */ - -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * du_int is a 64 bit integral type - * value in long double is representable in du_int or is negative - * (no range checking performed) - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI du_int -__fixunsxfdi(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(du_int) * CHAR_BIT) - return ~(du_int)0; - return fb.u.low.all >> (63 - e); -} - -#endif +/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfdi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a unsigned long long, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * du_int is a 64 bit integral type + * value in long double is representable in du_int or is negative + * (no range checking performed) + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI du_int +__fixunsxfdi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(du_int) * CHAR_BIT) + return ~(du_int)0; + return fb.u.low.all >> (63 - e); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfsi.c b/contrib/libs/cxxsupp/builtins/fixunsxfsi.c index 0e0faf53757..c3c70f743de 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsxfsi.c +++ b/contrib/libs/cxxsupp/builtins/fixunsxfsi.c @@ -1,45 +1,45 @@ -/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixunsxfsi for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: convert a to a unsigned int, rounding toward zero. - * Negative values all become zero. - */ - -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * su_int is a 32 bit integral type - * value in long double is representable in su_int or is negative - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI su_int -__fixunsxfsi(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(su_int) * CHAR_BIT) - return ~(su_int)0; - return fb.u.low.s.high >> (31 - e); -} - -#endif /* !_ARCH_PPC */ +/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfsi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a unsigned int, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * su_int is a 32 bit integral type + * value in long double is representable in su_int or is negative + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI su_int +__fixunsxfsi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(su_int) * CHAR_BIT) + return ~(su_int)0; + return fb.u.low.s.high >> (31 - e); +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfti.c b/contrib/libs/cxxsupp/builtins/fixunsxfti.c index 9ed2988424d..fb39d00ff5b 100644 --- a/contrib/libs/cxxsupp/builtins/fixunsxfti.c +++ b/contrib/libs/cxxsupp/builtins/fixunsxfti.c @@ -1,50 +1,50 @@ -/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixunsxfti for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a unsigned long long, rounding toward zero. - * Negative values all become zero. - */ - -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * tu_int is a 128 bit integral type - * value in long double is representable in tu_int or is negative - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI tu_int -__fixunsxfti(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) - return ~(tu_int)0; - tu_int r = fb.u.low.all; - if (e > 63) - r <<= (e - 63); - else - r >>= (63 - e); - return r; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a unsigned long long, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * tu_int is a 128 bit integral type + * value in long double is representable in tu_int or is negative + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI tu_int +__fixunsxfti(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) + return ~(tu_int)0; + tu_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return r; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/fixxfdi.c b/contrib/libs/cxxsupp/builtins/fixxfdi.c index 65ba7ce89dc..011787f9e4b 100644 --- a/contrib/libs/cxxsupp/builtins/fixxfdi.c +++ b/contrib/libs/cxxsupp/builtins/fixxfdi.c @@ -1,48 +1,48 @@ -/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixxfdi for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: convert a to a signed long long, rounding toward zero. */ - -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * di_int is a 64 bit integral type - * value in long double is representable in di_int (no range checking performed) - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI di_int -__fixxfdi(long double a) -{ - const di_int di_max = (di_int)((~(du_int)0) / 2); - const di_int di_min = -di_max - 1; - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0) - return 0; - if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) - return a > 0 ? di_max : di_min; - di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); - di_int r = fb.u.low.all; - r = (du_int)r >> (63 - e); - return (r ^ s) - s; -} - -#endif /* !_ARCH_PPC */ +/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixxfdi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a signed long long, rounding toward zero. */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * di_int is a 64 bit integral type + * value in long double is representable in di_int (no range checking performed) + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI di_int +__fixxfdi(long double a) +{ + const di_int di_max = (di_int)((~(du_int)0) / 2); + const di_int di_min = -di_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) + return a > 0 ? di_max : di_min; + di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + di_int r = fb.u.low.all; + r = (du_int)r >> (63 - e); + return (r ^ s) - s; +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/libs/cxxsupp/builtins/fixxfti.c b/contrib/libs/cxxsupp/builtins/fixxfti.c index 4dc36dd0b7c..968a4f0d5ee 100644 --- a/contrib/libs/cxxsupp/builtins/fixxfti.c +++ b/contrib/libs/cxxsupp/builtins/fixxfti.c @@ -1,51 +1,51 @@ -/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __fixxfti for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a signed long long, rounding toward zero. */ - -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * ti_int is a 128 bit integral type - * value in long double is representable in ti_int - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI ti_int -__fixxfti(long double a) -{ - const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); - const ti_int ti_min = -ti_max - 1; - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0) - return 0; - ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); - ti_int r = fb.u.low.all; - if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) - return a > 0 ? ti_max : ti_min; - if (e > 63) - r <<= (e - 63); - else - r >>= (63 - e); - return (r ^ s) - s; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixxfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a signed long long, rounding toward zero. */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * ti_int is a 128 bit integral type + * value in long double is representable in ti_int + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI ti_int +__fixxfti(long double a) +{ + const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); + const ti_int ti_min = -ti_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + ti_int r = fb.u.low.all; + if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) + return a > 0 ? ti_max : ti_min; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return (r ^ s) - s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floatdidf.c b/contrib/libs/cxxsupp/builtins/floatdidf.c index 00aa9138adc..a300c9f312d 100644 --- a/contrib/libs/cxxsupp/builtins/floatdidf.c +++ b/contrib/libs/cxxsupp/builtins/floatdidf.c @@ -1,107 +1,107 @@ -/*===-- floatdidf.c - Implement __floatdidf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------=== - * - * This file implements __floatdidf for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: convert a to a double, rounding toward even. */ - -/* Assumption: double is a IEEE 64 bit floating point type - * di_int is a 64 bit integral type - */ - -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ - -ARM_EABI_FNALIAS(l2d, floatdidf) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; we'll set the inexact flag - * as a side-effect of this computation. - */ - -COMPILER_RT_ABI double -__floatdidf(di_int a) -{ - static const double twop52 = 4503599627370496.0; // 0x1.0p52 - static const double twop32 = 4294967296.0; // 0x1.0p32 - - union { int64_t x; double d; } low = { .d = twop52 }; - - const double high = (int32_t)(a >> 32) * twop32; - low.x |= a & INT64_C(0x00000000ffffffff); - - const double result = (high - twop52) + low.d; - return result; -} - -#else -/* Support for systems that don't have hardware floating-point; there are no flags to - * set, and we don't want to code-gen to an unknown soft-float implementation. - */ - -COMPILER_RT_ABI double -__floatdidf(di_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((du_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.high = ((su_int)s & 0x80000000) | /* sign */ - ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.low = (su_int)a; /* mantissa-low */ - return fb.f; -} -#endif +/*===-- floatdidf.c - Implement __floatdidf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __floatdidf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * di_int is a 64 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +ARM_EABI_FNALIAS(l2d, floatdidf) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; we'll set the inexact flag + * as a side-effect of this computation. + */ + +COMPILER_RT_ABI double +__floatdidf(di_int a) +{ + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop32 = 4294967296.0; // 0x1.0p32 + + union { int64_t x; double d; } low = { .d = twop52 }; + + const double high = (int32_t)(a >> 32) * twop32; + low.x |= a & INT64_C(0x00000000ffffffff); + + const double result = (high - twop52) + low.d; + return result; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no flags to + * set, and we don't want to code-gen to an unknown soft-float implementation. + */ + +COMPILER_RT_ABI double +__floatdidf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.low = (su_int)a; /* mantissa-low */ + return fb.f; +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/floatdisf.c b/contrib/libs/cxxsupp/builtins/floatdisf.c index 31fcb8ff17d..3e47580ef57 100644 --- a/contrib/libs/cxxsupp/builtins/floatdisf.c +++ b/contrib/libs/cxxsupp/builtins/floatdisf.c @@ -1,80 +1,80 @@ -/*===-- floatdisf.c - Implement __floatdisf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------=== - * - * This file implements __floatdisf for the compiler_rt library. - * - *===----------------------------------------------------------------------=== +/*===-- floatdisf.c - Implement __floatdisf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __floatdisf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a float, rounding toward even.*/ + +/* Assumption: float is a IEEE 32 bit floating point type + * di_int is a 64 bit integral type */ - -/* Returns: convert a to a float, rounding toward even.*/ - -/* Assumption: float is a IEEE 32 bit floating point type - * di_int is a 64 bit integral type - */ - -/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ - -#include "int_lib.h" - -ARM_EABI_FNALIAS(l2f, floatdisf) - -COMPILER_RT_ABI float -__floatdisf(di_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((du_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ - } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((su_int)s & 0x80000000) | /* sign */ - ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; -} + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +ARM_EABI_FNALIAS(l2f, floatdisf) + +COMPILER_RT_ABI float +__floatdisf(di_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} diff --git a/contrib/libs/cxxsupp/builtins/floatditf.c b/contrib/libs/cxxsupp/builtins/floatditf.c index 6effa55f5ec..cd51dd8aade 100644 --- a/contrib/libs/cxxsupp/builtins/floatditf.c +++ b/contrib/libs/cxxsupp/builtins/floatditf.c @@ -1,50 +1,50 @@ -//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements di_int to quad-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatditf(di_int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - du_int aAbs = (du_int)a; - if (a < 0) { - sign = signBit; - aAbs = ~(du_int)a + 1U; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - const int shift = significandBits - exponent; - result = (rep_t)aAbs << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); -} - -#endif +//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements di_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatditf(di_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + du_int aAbs = (du_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(du_int)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/floatdixf.c b/contrib/libs/cxxsupp/builtins/floatdixf.c index 64a46e11b30..d39e81d7ca7 100644 --- a/contrib/libs/cxxsupp/builtins/floatdixf.c +++ b/contrib/libs/cxxsupp/builtins/floatdixf.c @@ -1,46 +1,46 @@ -/* ===-- floatdixf.c - Implement __floatdixf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatdixf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: convert a to a long double, rounding toward even. */ - -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * di_int is a 64 bit integral type +/* ===-- floatdixf.c - Implement __floatdixf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatdixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI long double -__floatdixf(di_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int clz = __builtin_clzll(a); - int e = (N - 1) - clz ; /* exponent */ - long_double_bits fb; - fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ - (e + 16383); /* exponent */ - fb.u.low.all = a << clz; /* mantissa */ - return fb.f; -} - -#endif /* !_ARCH_PPC */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * di_int is a 64 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floatdixf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/libs/cxxsupp/builtins/floatsidf.c b/contrib/libs/cxxsupp/builtins/floatsidf.c index 84958598037..1cf99b782a6 100644 --- a/contrib/libs/cxxsupp/builtins/floatsidf.c +++ b/contrib/libs/cxxsupp/builtins/floatsidf.c @@ -1,53 +1,53 @@ -//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements integer to double-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -#include "int_lib.h" - -ARM_EABI_FNALIAS(i2d, floatsidf) - -COMPILER_RT_ABI fp_t -__floatsidf(int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - if (a < 0) { - sign = signBit; - a = -a; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. Extra - // cast to unsigned int is necessary to get the correct behavior for - // the input INT_MIN. - const int shift = significandBits - exponent; - result = (rep_t)(unsigned int)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); -} +//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +ARM_EABI_FNALIAS(i2d, floatsidf) + +COMPILER_RT_ABI fp_t +__floatsidf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)(unsigned int)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} diff --git a/contrib/libs/cxxsupp/builtins/floatsisf.c b/contrib/libs/cxxsupp/builtins/floatsisf.c index 3047aed6db5..467dd1d1eaf 100644 --- a/contrib/libs/cxxsupp/builtins/floatsisf.c +++ b/contrib/libs/cxxsupp/builtins/floatsisf.c @@ -1,59 +1,59 @@ -//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements integer to single-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -#include "int_lib.h" - -ARM_EABI_FNALIAS(i2f, floatsisf) - -COMPILER_RT_ABI fp_t -__floatsisf(int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - if (a < 0) { - sign = signBit; - a = -a; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - if (exponent <= significandBits) { - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - } else { - const int shift = exponent - significandBits; - result = (rep_t)a >> shift ^ implicitBit; - rep_t round = (rep_t)a << (typeWidth - shift); - if (round > signBit) result++; - if (round == signBit) result += result & 1; - } - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); -} +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +ARM_EABI_FNALIAS(i2f, floatsisf) + +COMPILER_RT_ABI fp_t +__floatsisf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) result++; + if (round == signBit) result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} diff --git a/contrib/libs/cxxsupp/builtins/floatsitf.c b/contrib/libs/cxxsupp/builtins/floatsitf.c index 751a6ad8ca5..f0abca363b5 100644 --- a/contrib/libs/cxxsupp/builtins/floatsitf.c +++ b/contrib/libs/cxxsupp/builtins/floatsitf.c @@ -1,50 +1,50 @@ -//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements integer to quad-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatsitf(int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - unsigned aAbs = (unsigned)a; - if (a < 0) { - sign = signBit; - aAbs = ~(unsigned)a + 1U; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(aAbs); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)aAbs << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); -} - -#endif +//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatsitf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + unsigned aAbs = (unsigned)a; + if (a < 0) { + sign = signBit; + aAbs = ~(unsigned)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/floattidf.c b/contrib/libs/cxxsupp/builtins/floattidf.c index bf3af1d51e5..6331ba57376 100644 --- a/contrib/libs/cxxsupp/builtins/floattidf.c +++ b/contrib/libs/cxxsupp/builtins/floattidf.c @@ -1,83 +1,83 @@ -/* ===-- floattidf.c - Implement __floattidf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floattidf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a double, rounding toward even.*/ - -/* Assumption: double is a IEEE 64 bit floating point type - * ti_int is a 128 bit integral type +/* ===-- floattidf.c - Implement __floattidf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== */ - -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ - -COMPILER_RT_ABI double -__floattidf(ti_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ - ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; -} - -#endif /* CRT_HAS_128BIT */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a double, rounding toward even.*/ + +/* Assumption: double is a IEEE 64 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI double +__floattidf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floattisf.c b/contrib/libs/cxxsupp/builtins/floattisf.c index aa57609aa5b..f1b585f2c32 100644 --- a/contrib/libs/cxxsupp/builtins/floattisf.c +++ b/contrib/libs/cxxsupp/builtins/floattisf.c @@ -1,82 +1,82 @@ -/* ===-- floattisf.c - Implement __floattisf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floattisf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a float, rounding toward even. */ - -/* Assumption: float is a IEEE 32 bit floating point type - * ti_int is a 128 bit integral type - */ - -/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ - -COMPILER_RT_ABI float -__floattisf(ti_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ - } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((su_int)s & 0x80000000) | /* sign */ - ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- floattisf.c - Implement __floattisf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattisf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI float +__floattisf(ti_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floattixf.c b/contrib/libs/cxxsupp/builtins/floattixf.c index a63e2389404..1203b3a96e7 100644 --- a/contrib/libs/cxxsupp/builtins/floattixf.c +++ b/contrib/libs/cxxsupp/builtins/floattixf.c @@ -1,84 +1,84 @@ -/* ===-- floattixf.c - Implement __floattixf -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floattixf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a long double, rounding toward even. */ - -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * ti_int is a 128 bit integral type - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI long double -__floattixf(ti_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ - } - else - { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ - } - long_double_bits fb; - fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */ - (e + 16383); /* exponent */ - fb.u.low.all = (du_int)a; /* mantissa */ - return fb.f; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- floattixf.c - Implement __floattixf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * ti_int is a 128 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floattixf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } + else + { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floatundidf.c b/contrib/libs/cxxsupp/builtins/floatundidf.c index 4136d4719f5..67aa86e5e5b 100644 --- a/contrib/libs/cxxsupp/builtins/floatundidf.c +++ b/contrib/libs/cxxsupp/builtins/floatundidf.c @@ -1,106 +1,106 @@ -/* ===-- floatundidf.c - Implement __floatundidf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatundidf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== +/* ===-- floatundidf.c - Implement __floatundidf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * du_int is a 64 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +ARM_EABI_FNALIAS(ul2d, floatundidf) + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; we'll set the inexact flag + * as a side-effect of this computation. + */ + +COMPILER_RT_ABI double +__floatundidf(du_int a) +{ + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 + static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84 + + union { uint64_t x; double d; } high = { .d = twop84 }; + union { uint64_t x; double d; } low = { .d = twop52 }; + + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); + + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no flags to + * set, and we don't want to code-gen to an unknown soft-float implementation. */ - -/* Returns: convert a to a double, rounding toward even. */ - -/* Assumption: double is a IEEE 64 bit floating point type - * du_int is a 64 bit integral type - */ - -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ - -#include "int_lib.h" - -ARM_EABI_FNALIAS(ul2d, floatundidf) - -#ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; we'll set the inexact flag - * as a side-effect of this computation. - */ - -COMPILER_RT_ABI double -__floatundidf(du_int a) -{ - static const double twop52 = 4503599627370496.0; // 0x1.0p52 - static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 - static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84 - - union { uint64_t x; double d; } high = { .d = twop84 }; - union { uint64_t x; double d; } low = { .d = twop52 }; - - high.x |= a >> 32; - low.x |= a & UINT64_C(0x00000000ffffffff); - - const double result = (high.d - twop84_plus_twop52) + low.d; - return result; -} - -#else -/* Support for systems that don't have hardware floating-point; there are no flags to - * set, and we don't want to code-gen to an unknown soft-float implementation. - */ - -COMPILER_RT_ABI double -__floatundidf(du_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((du_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.high = ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.low = (su_int)a; /* mantissa-low */ - return fb.f; -} -#endif + +COMPILER_RT_ABI double +__floatundidf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.high = ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.low = (su_int)a; /* mantissa-low */ + return fb.f; +} +#endif diff --git a/contrib/libs/cxxsupp/builtins/floatundisf.c b/contrib/libs/cxxsupp/builtins/floatundisf.c index fbf2a9ec562..713a44abc8b 100644 --- a/contrib/libs/cxxsupp/builtins/floatundisf.c +++ b/contrib/libs/cxxsupp/builtins/floatundisf.c @@ -1,77 +1,77 @@ -/*===-- floatundisf.c - Implement __floatundisf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatundisf for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -/* Returns: convert a to a float, rounding toward even. */ - -/* Assumption: float is a IEEE 32 bit floating point type - * du_int is a 64 bit integral type - */ - -/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ - -#include "int_lib.h" - -ARM_EABI_FNALIAS(ul2f, floatundisf) - -COMPILER_RT_ABI float -__floatundisf(du_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* 8 exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((du_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ - } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; -} +/*===-- floatundisf.c - Implement __floatundisf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundisf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * du_int is a 64 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +ARM_EABI_FNALIAS(ul2f, floatundisf) + +COMPILER_RT_ABI float +__floatundisf(du_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* 8 exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} diff --git a/contrib/libs/cxxsupp/builtins/floatunditf.c b/contrib/libs/cxxsupp/builtins/floatunditf.c index f6706c860e8..8098e95e82b 100644 --- a/contrib/libs/cxxsupp/builtins/floatunditf.c +++ b/contrib/libs/cxxsupp/builtins/floatunditf.c @@ -1,40 +1,40 @@ -//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements du_int to quad-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatunditf(du_int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clzll(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); -} - -#endif +//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements du_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatunditf(du_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/floatundixf.c b/contrib/libs/cxxsupp/builtins/floatundixf.c index d8eaf5b80d8..ca5e06d64dc 100644 --- a/contrib/libs/cxxsupp/builtins/floatundixf.c +++ b/contrib/libs/cxxsupp/builtins/floatundixf.c @@ -1,42 +1,42 @@ -/* ===-- floatundixf.c - Implement __floatundixf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatundixf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: convert a to a long double, rounding toward even. */ - -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * du_int is a 64 bit integral type - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ -COMPILER_RT_ABI long double -__floatundixf(du_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int clz = __builtin_clzll(a); - int e = (N - 1) - clz ; /* exponent */ - long_double_bits fb; - fb.u.high.s.low = (e + 16383); /* exponent */ - fb.u.low.all = a << clz; /* mantissa */ - return fb.f; -} - -#endif /* _ARCH_PPC */ +/* ===-- floatundixf.c - Implement __floatundixf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * du_int is a 64 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ +COMPILER_RT_ABI long double +__floatundixf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; +} + +#endif /* _ARCH_PPC */ diff --git a/contrib/libs/cxxsupp/builtins/floatunsidf.c b/contrib/libs/cxxsupp/builtins/floatunsidf.c index ed45a1658fa..445e18041c4 100644 --- a/contrib/libs/cxxsupp/builtins/floatunsidf.c +++ b/contrib/libs/cxxsupp/builtins/floatunsidf.c @@ -1,42 +1,42 @@ -//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements unsigned integer to double-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -#include "int_lib.h" - -ARM_EABI_FNALIAS(ui2d, floatunsidf) - -COMPILER_RT_ABI fp_t -__floatunsidf(unsigned int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); -} +//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +ARM_EABI_FNALIAS(ui2d, floatunsidf) + +COMPILER_RT_ABI fp_t +__floatunsidf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} diff --git a/contrib/libs/cxxsupp/builtins/floatunsisf.c b/contrib/libs/cxxsupp/builtins/floatunsisf.c index 54073a29dac..ea6f161adc0 100644 --- a/contrib/libs/cxxsupp/builtins/floatunsisf.c +++ b/contrib/libs/cxxsupp/builtins/floatunsisf.c @@ -1,50 +1,50 @@ -//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements unsigned integer to single-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -#include "int_lib.h" - -ARM_EABI_FNALIAS(ui2f, floatunsisf) - -COMPILER_RT_ABI fp_t -__floatunsisf(unsigned int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - if (exponent <= significandBits) { - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - } else { - const int shift = exponent - significandBits; - result = (rep_t)a >> shift ^ implicitBit; - rep_t round = (rep_t)a << (typeWidth - shift); - if (round > signBit) result++; - if (round == signBit) result += result & 1; - } - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); -} +//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +ARM_EABI_FNALIAS(ui2f, floatunsisf) + +COMPILER_RT_ABI fp_t +__floatunsisf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) result++; + if (round == signBit) result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} diff --git a/contrib/libs/cxxsupp/builtins/floatunsitf.c b/contrib/libs/cxxsupp/builtins/floatunsitf.c index 502c3bf7caa..1cd1842e709 100644 --- a/contrib/libs/cxxsupp/builtins/floatunsitf.c +++ b/contrib/libs/cxxsupp/builtins/floatunsitf.c @@ -1,40 +1,40 @@ -//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements unsigned integer to quad-precision conversion for the -// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even -// mode. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); -} - -#endif +//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/floatuntidf.c b/contrib/libs/cxxsupp/builtins/floatuntidf.c index 53f48a3598e..06202d9679e 100644 --- a/contrib/libs/cxxsupp/builtins/floatuntidf.c +++ b/contrib/libs/cxxsupp/builtins/floatuntidf.c @@ -1,80 +1,80 @@ -/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatuntidf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a double, rounding toward even. */ - -/* Assumption: double is a IEEE 64 bit floating point type - * tu_int is a 128 bit integral type - */ - -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ - -COMPILER_RT_ABI double -__floatuntidf(tu_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.s.high = ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI double +__floatuntidf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floatuntisf.c b/contrib/libs/cxxsupp/builtins/floatuntisf.c index 9e0c8e14890..c0dd0275ddb 100644 --- a/contrib/libs/cxxsupp/builtins/floatuntisf.c +++ b/contrib/libs/cxxsupp/builtins/floatuntisf.c @@ -1,79 +1,79 @@ -/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatuntisf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a float, rounding toward even. */ - -/* Assumption: float is a IEEE 32 bit floating point type - * tu_int is a 128 bit integral type - */ - -/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ - -COMPILER_RT_ABI float -__floatuntisf(tu_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ - } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntisf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI float +__floatuntisf(tu_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/floatuntixf.c b/contrib/libs/cxxsupp/builtins/floatuntixf.c index c18a1127a27..ea81cb1bcda 100644 --- a/contrib/libs/cxxsupp/builtins/floatuntixf.c +++ b/contrib/libs/cxxsupp/builtins/floatuntixf.c @@ -1,81 +1,81 @@ -/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __floatuntixf for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: convert a to a long double, rounding toward even. */ - -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * tu_int is a 128 bit integral type - */ - -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm - */ - -COMPILER_RT_ABI long double -__floatuntixf(tu_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ - } - else - { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ - } - long_double_bits fb; - fb.u.high.s.low = (e + 16383); /* exponent */ - fb.u.low.all = (du_int)a; /* mantissa */ - return fb.f; -} - -#endif +/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * tu_int is a 128 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floatuntixf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } + else + { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc index 57d079e9d3d..b47be1b648e 100644 --- a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc @@ -1,144 +1,144 @@ -//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements soft-float addition with the IEEE-754 default rounding -// (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#include "fp_lib.h" - -static __inline fp_t __addXf3__(fp_t a, fp_t b) { - rep_t aRep = toRep(a); - rep_t bRep = toRep(b); - const rep_t aAbs = aRep & absMask; - const rep_t bAbs = bRep & absMask; - - // Detect if a or b is zero, infinity, or NaN. - if (aAbs - REP_C(1) >= infRep - REP_C(1) || - bAbs - REP_C(1) >= infRep - REP_C(1)) { - // NaN + anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything + NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // +/-infinity + -/+infinity = qNaN - if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep); - // +/-infinity + anything remaining = +/- infinity - else return a; - } - - // anything remaining + +/-infinity = +/-infinity - if (bAbs == infRep) return b; - - // zero + anything = anything - if (!aAbs) { - // but we need to get the sign right for zero + zero - if (!bAbs) return fromRep(toRep(a) & toRep(b)); - else return b; - } - - // anything + zero = anything - if (!bAbs) return a; - } - - // Swap a and b if necessary so that a has the larger absolute value. - if (bAbs > aAbs) { - const rep_t temp = aRep; - aRep = bRep; - bRep = temp; - } - - // Extract the exponent and significand from the (possibly swapped) a and b. - int aExponent = aRep >> significandBits & maxExponent; - int bExponent = bRep >> significandBits & maxExponent; - rep_t aSignificand = aRep & significandMask; - rep_t bSignificand = bRep & significandMask; - - // Normalize any denormals, and adjust the exponent accordingly. - if (aExponent == 0) aExponent = normalize(&aSignificand); - if (bExponent == 0) bExponent = normalize(&bSignificand); - - // The sign of the result is the sign of the larger operand, a. If they - // have opposite signs, we are performing a subtraction; otherwise addition. - const rep_t resultSign = aRep & signBit; - const bool subtraction = (aRep ^ bRep) & signBit; - - // Shift the significands to give us round, guard and sticky, and or in the - // implicit significand bit. (If we fell through from the denormal path it - // was already set by normalize( ), but setting it twice won't hurt - // anything.) - aSignificand = (aSignificand | implicitBit) << 3; - bSignificand = (bSignificand | implicitBit) << 3; - - // Shift the significand of b by the difference in exponents, with a sticky - // bottom bit to get rounding correct. - const unsigned int align = aExponent - bExponent; - if (align) { - if (align < typeWidth) { - const bool sticky = bSignificand << (typeWidth - align); - bSignificand = bSignificand >> align | sticky; - } else { - bSignificand = 1; // sticky; b is known to be non-zero. - } - } - if (subtraction) { - aSignificand -= bSignificand; - // If a == -b, return +zero. - if (aSignificand == 0) return fromRep(0); - - // If partial cancellation occured, we need to left-shift the result - // and adjust the exponent: - if (aSignificand < implicitBit << 3) { - const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); - aSignificand <<= shift; - aExponent -= shift; - } - } - else /* addition */ { - aSignificand += bSignificand; - - // If the addition carried up, we need to right-shift the result and - // adjust the exponent: - if (aSignificand & implicitBit << 4) { - const bool sticky = aSignificand & 1; - aSignificand = aSignificand >> 1 | sticky; - aExponent += 1; - } - } - - // If we have overflowed the type, return +/- infinity: - if (aExponent >= maxExponent) return fromRep(infRep | resultSign); - - if (aExponent <= 0) { - // Result is denormal before rounding; the exponent is zero and we - // need to shift the significand. - const int shift = 1 - aExponent; - const bool sticky = aSignificand << (typeWidth - shift); - aSignificand = aSignificand >> shift | sticky; - aExponent = 0; - } - - // Low three bits are round, guard, and sticky. - const int roundGuardSticky = aSignificand & 0x7; - - // Shift the significand into place, and mask off the implicit bit. - rep_t result = aSignificand >> 3 & significandMask; - - // Insert the exponent and sign. - result |= (rep_t)aExponent << significandBits; - result |= resultSign; - - // Final rounding. The result may overflow to infinity, but that is the - // correct result in that case. - if (roundGuardSticky > 0x4) result++; - if (roundGuardSticky == 0x4) result += result & 1; - return fromRep(result); -} +//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float addition with the IEEE-754 default rounding +// (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __addXf3__(fp_t a, fp_t b) { + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) return b; + + // zero + anything = anything + if (!aAbs) { + // but we need to get the sign right for zero + zero + if (!bAbs) return fromRep(toRep(a) & toRep(b)); + else return b; + } + + // anything + zero = anything + if (!bAbs) return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) aExponent = normalize(&aSignificand); + if (bExponent == 0) bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything.) + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = bSignificand << (typeWidth - align); + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // sticky; b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } + else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if (aExponent >= maxExponent) return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = aSignificand << (typeWidth - shift); + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if (roundGuardSticky > 0x4) result++; + if (roundGuardSticky == 0x4) result += result & 1; + return fromRep(result); +} diff --git a/contrib/libs/cxxsupp/builtins/fp_extend.h b/contrib/libs/cxxsupp/builtins/fp_extend.h index 65692dc0e55..6d95a068070 100644 --- a/contrib/libs/cxxsupp/builtins/fp_extend.h +++ b/contrib/libs/cxxsupp/builtins/fp_extend.h @@ -1,89 +1,89 @@ -//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Set source and destination setting -// -//===----------------------------------------------------------------------===// - -#ifndef FP_EXTEND_HEADER -#define FP_EXTEND_HEADER - -#include "int_lib.h" - -#if defined SRC_SINGLE -typedef float src_t; -typedef uint32_t src_rep_t; -#define SRC_REP_C UINT32_C -static const int srcSigBits = 23; -#define src_rep_t_clz __builtin_clz - -#elif defined SRC_DOUBLE -typedef double src_t; -typedef uint64_t src_rep_t; -#define SRC_REP_C UINT64_C -static const int srcSigBits = 52; -static __inline int src_rep_t_clz(src_rep_t a) { -#if defined __LP64__ - return __builtin_clzl(a); -#else - if (a & REP_C(0xffffffff00000000)) - return __builtin_clz(a >> 32); - else - return 32 + __builtin_clz(a & REP_C(0xffffffff)); -#endif -} - -#elif defined SRC_HALF -typedef uint16_t src_t; -typedef uint16_t src_rep_t; -#define SRC_REP_C UINT16_C -static const int srcSigBits = 10; -#define src_rep_t_clz __builtin_clz - -#else -#error Source should be half, single, or double precision! -#endif //end source precision - -#if defined DST_SINGLE -typedef float dst_t; -typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C -static const int dstSigBits = 23; - -#elif defined DST_DOUBLE -typedef double dst_t; -typedef uint64_t dst_rep_t; -#define DST_REP_C UINT64_C -static const int dstSigBits = 52; - -#elif defined DST_QUAD -typedef long double dst_t; -typedef __uint128_t dst_rep_t; -#define DST_REP_C (__uint128_t) -static const int dstSigBits = 112; - -#else -#error Destination should be single, double, or quad precision! -#endif //end destination precision - -// End of specialization parameters. Two helper routines for conversion to and -// from the representation of floating-point data as integer values follow. - -static __inline src_rep_t srcToRep(src_t x) { - const union { src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; -} - -static __inline dst_t dstFromRep(dst_rep_t x) { - const union { dst_t f; dst_rep_t i; } rep = {.i = x}; - return rep.f; -} -// End helper routines. Conversion implementation follows. - -#endif //FP_EXTEND_HEADER +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Set source and destination setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_EXTEND_HEADER +#define FP_EXTEND_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; +#define src_rep_t_clz __builtin_clz + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; +static __inline int src_rep_t_clz(src_rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); +#endif +} + +#elif defined SRC_HALF +typedef uint16_t src_t; +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcSigBits = 10; +#define src_rep_t_clz __builtin_clz + +#else +#error Source should be half, single, or double precision! +#endif //end source precision + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_QUAD +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstSigBits = 112; + +#else +#error Destination should be single, double, or quad precision! +#endif //end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { src_t f; src_rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { dst_t f; dst_rep_t i; } rep = {.i = x}; + return rep.f; +} +// End helper routines. Conversion implementation follows. + +#endif //FP_EXTEND_HEADER diff --git a/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc b/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc index 6ef3338385c..b785cc7687a 100644 --- a/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc @@ -1,108 +1,108 @@ -//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a fairly generic conversion from a narrower to a wider -// IEEE-754 floating-point type. The constants and types defined following the -// includes below parameterize the conversion. -// -// It does not support types that don't use the usual IEEE-754 interchange -// formats; specifically, some work would be needed to adapt it to -// (for example) the Intel 80-bit format or PowerPC double-double format. -// -// Note please, however, that this implementation is only intended to support -// *widening* operations; if you need to convert to a *narrower* floating-point -// type (e.g. double -> float), then this routine will not do what you want it -// to. -// -// It also requires that integer types at least as large as both formats -// are available on the target platform; this may pose a problem when trying -// to add support for quad on some 32-bit systems, for example. You also may -// run into trouble finding an appropriate CLZ function for wide source types; -// you will likely need to roll your own on some platforms. -// -// Finally, the following assumptions are made: -// -// 1. floating-point types and integer types have the same endianness on the -// target platform -// -// 2. quiet NaNs, if supported, are indicated by the leading bit of the -// significand field being set -// -//===----------------------------------------------------------------------===// - -#include "fp_extend.h" - -static __inline dst_t __extendXfYf2__(src_t a) { - // Various constants whose values follow from the type parameters. - // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t)*CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; - const int srcInfExp = (1 << srcExpBits) - 1; - const int srcExpBias = srcInfExp >> 1; - - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); - const src_rep_t srcNaNCode = srcQNaN - 1; - - const int dstBits = sizeof(dst_t)*CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; - const int dstInfExp = (1 << dstExpBits) - 1; - const int dstExpBias = dstInfExp >> 1; - - const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; - - // Break a into a sign and representation of the absolute value - const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; - - // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted - // to (signed) int. To avoid that, explicitly cast to src_rep_t. - if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { - // a is a normal number. - // Extend to the destination type by shifting the significand and - // exponent into the proper position and rebiasing the exponent. - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); - absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; - } - - else if (aAbs >= srcInfinity) { - // a is NaN or infinity. - // Conjure the result by beginning with infinity, then setting the qNaN - // bit (if needed) and right-aligning the rest of the trailing NaN - // payload field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); - absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); - } - - else if (aAbs) { - // a is denormal. - // renormalize the significand and clear the leading bit, then insert - // the correct adjusted exponent in the destination type. - const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); - absResult ^= dstMinNormal; - const int resultExponent = dstExpBias - srcExpBias - scale + 1; - absResult |= (dst_rep_t)resultExponent << dstSigBits; - } - - else { - // a is zero. - absResult = 0; - } - - // Apply the signbit to (dst_t)abs(a). - const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); - return dstFromRep(result); -} +//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a narrower to a wider +// IEEE-754 floating-point type. The constants and types defined following the +// includes below parameterize the conversion. +// +// It does not support types that don't use the usual IEEE-754 interchange +// formats; specifically, some work would be needed to adapt it to +// (for example) the Intel 80-bit format or PowerPC double-double format. +// +// Note please, however, that this implementation is only intended to support +// *widening* operations; if you need to convert to a *narrower* floating-point +// type (e.g. double -> float), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. You also may +// run into trouble finding an appropriate CLZ function for wide source types; +// you will likely need to roll your own on some platforms. +// +// Finally, the following assumptions are made: +// +// 1. floating-point types and integer types have the same endianness on the +// target platform +// +// 2. quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set +// +//===----------------------------------------------------------------------===// + +#include "fp_extend.h" + +static __inline dst_t __extendXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t)*CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t)*CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to src_rep_t. + if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + } + + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (dst_rep_t)resultExponent << dstSigBits; + } + + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + return dstFromRep(result); +} diff --git a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc index 433887a5031..da70d4d3930 100644 --- a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc @@ -1,41 +1,41 @@ -//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements float to integer conversion for the -// compiler-rt library. -// -//===----------------------------------------------------------------------===// - -#include "fp_lib.h" - -static __inline fixint_t __fixint(fp_t a) { - const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); - const fixint_t fixint_min = -fixint_max - 1; - // Break a into sign, exponent, significand - const rep_t aRep = toRep(a); - const rep_t aAbs = aRep & absMask; - const fixint_t sign = aRep & signBit ? -1 : 1; - const int exponent = (aAbs >> significandBits) - exponentBias; - const rep_t significand = (aAbs & significandMask) | implicitBit; - - // If exponent is negative, the result is zero. - if (exponent < 0) - return 0; - - // If the value is too large for the integer type, saturate. - if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) - return sign == 1 ? fixint_max : fixint_min; - - // If 0 <= exponent < significandBits, right shift to get the result. - // Otherwise, shift left. - if (exponent < significandBits) - return sign * (significand >> (significandBits - exponent)); - else - return sign * ((fixint_t)significand << (exponent - significandBits)); -} +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float to integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixint_t __fixint(fp_t a) { + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixint_t)significand << (exponent - significandBits)); +} diff --git a/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc index 1588ebf8f39..d68ccf27a79 100644 --- a/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc @@ -1,39 +1,39 @@ -//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements float to unsigned integer conversion for the -// compiler-rt library. -// -//===----------------------------------------------------------------------===// - -#include "fp_lib.h" - -static __inline fixuint_t __fixuint(fp_t a) { - // Break a into sign, exponent, significand - const rep_t aRep = toRep(a); - const rep_t aAbs = aRep & absMask; - const int sign = aRep & signBit ? -1 : 1; - const int exponent = (aAbs >> significandBits) - exponentBias; - const rep_t significand = (aAbs & significandMask) | implicitBit; - - // If either the value or the exponent is negative, the result is zero. - if (sign == -1 || exponent < 0) - return 0; - - // If the value is too large for the integer type, saturate. - if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) - return ~(fixuint_t)0; - - // If 0 <= exponent < significandBits, right shift to get the result. - // Otherwise, shift left. - if (exponent < significandBits) - return significand >> (significandBits - exponent); - else - return (fixuint_t)significand << (exponent - significandBits); -} +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float to unsigned integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixuint_t __fixuint(fp_t a) { + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If either the value or the exponent is negative, the result is zero. + if (sign == -1 || exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) + return ~(fixuint_t)0; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return significand >> (significandBits - exponent); + else + return (fixuint_t)significand << (exponent - significandBits); +} diff --git a/contrib/libs/cxxsupp/builtins/fp_lib.h b/contrib/libs/cxxsupp/builtins/fp_lib.h index 08928458421..223fb980aae 100644 --- a/contrib/libs/cxxsupp/builtins/fp_lib.h +++ b/contrib/libs/cxxsupp/builtins/fp_lib.h @@ -1,270 +1,270 @@ -//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a configuration header for soft-float routines in compiler-rt. -// This file does not provide any part of the compiler-rt interface, but defines -// many useful constants and utility routines that are used in the -// implementation of the soft-float routines in compiler-rt. -// -// Assumes that float, double and long double correspond to the IEEE-754 -// binary32, binary64 and binary 128 types, respectively, and that integer -// endianness matches floating point endianness on the target platform. -// -//===----------------------------------------------------------------------===// - -#ifndef FP_LIB_HEADER -#define FP_LIB_HEADER - -#include -#include -#include -#include "int_lib.h" - -// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in -// 32-bit mode. -#if defined(__FreeBSD__) && defined(__i386__) -# include -# if __FreeBSD_version < 903000 // v9.3 -# define uint64_t unsigned long long -# define int64_t long long -# undef UINT64_C -# define UINT64_C(c) (c ## ULL) -# endif -#endif - -#if defined SINGLE_PRECISION - -typedef uint32_t rep_t; -typedef int32_t srep_t; -typedef float fp_t; -#define REP_C UINT32_C -#define significandBits 23 - -static __inline int rep_clz(rep_t a) { - return __builtin_clz(a); -} - -// 32x32 --> 64 bit multiply -static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - const uint64_t product = (uint64_t)a*b; - *hi = product >> 32; - *lo = product; -} -COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); - -#elif defined DOUBLE_PRECISION - -typedef uint64_t rep_t; -typedef int64_t srep_t; -typedef double fp_t; -#define REP_C UINT64_C -#define significandBits 52 - -static __inline int rep_clz(rep_t a) { -#if defined __LP64__ - return __builtin_clzl(a); -#else - if (a & REP_C(0xffffffff00000000)) - return __builtin_clz(a >> 32); - else - return 32 + __builtin_clz(a & REP_C(0xffffffff)); -#endif -} - -#define loWord(a) (a & 0xffffffffU) -#define hiWord(a) (a >> 32) - -// 64x64 -> 128 wide multiply for platforms that don't have such an operation; -// many 64-bit platforms have this operation, but they tend to have hardware -// floating-point, so we don't bother with a special case for them here. -static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - // Each of the component 32x32 -> 64 products - const uint64_t plolo = loWord(a) * loWord(b); - const uint64_t plohi = loWord(a) * hiWord(b); - const uint64_t philo = hiWord(a) * loWord(b); - const uint64_t phihi = hiWord(a) * hiWord(b); - // Sum terms that contribute to lo in a way that allows us to get the carry - const uint64_t r0 = loWord(plolo); - const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); - *lo = r0 + (r1 << 32); - // Sum terms contributing to hi with the carry from lo - *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; -} -#undef loWord -#undef hiWord - -COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); - -#elif defined QUAD_PRECISION -#if __LDBL_MANT_DIG__ == 113 -#define CRT_LDBL_128BIT -typedef __uint128_t rep_t; -typedef __int128_t srep_t; -typedef long double fp_t; -#define REP_C (__uint128_t) -// Note: Since there is no explicit way to tell compiler the constant is a -// 128-bit integer, we let the constant be casted to 128-bit integer -#define significandBits 112 - -static __inline int rep_clz(rep_t a) { - const union - { - __uint128_t ll; -#if _YUGA_BIG_ENDIAN - struct { uint64_t high, low; } s; -#else - struct { uint64_t low, high; } s; -#endif - } uu = { .ll = a }; - - uint64_t word; - uint64_t add; - - if (uu.s.high){ - word = uu.s.high; - add = 0; - } - else{ - word = uu.s.low; - add = 64; - } - return __builtin_clzll(word) + add; -} - -#define Word_LoMask UINT64_C(0x00000000ffffffff) -#define Word_HiMask UINT64_C(0xffffffff00000000) -#define Word_FullMask UINT64_C(0xffffffffffffffff) -#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) -#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) -#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) -#define Word_4(a) (uint64_t)(a & Word_LoMask) - -// 128x128 -> 256 wide multiply for platforms that don't have such an operation; -// many 64-bit platforms have this operation, but they tend to have hardware -// floating-point, so we don't bother with a special case for them here. -static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - - const uint64_t product11 = Word_1(a) * Word_1(b); - const uint64_t product12 = Word_1(a) * Word_2(b); - const uint64_t product13 = Word_1(a) * Word_3(b); - const uint64_t product14 = Word_1(a) * Word_4(b); - const uint64_t product21 = Word_2(a) * Word_1(b); - const uint64_t product22 = Word_2(a) * Word_2(b); - const uint64_t product23 = Word_2(a) * Word_3(b); - const uint64_t product24 = Word_2(a) * Word_4(b); - const uint64_t product31 = Word_3(a) * Word_1(b); - const uint64_t product32 = Word_3(a) * Word_2(b); - const uint64_t product33 = Word_3(a) * Word_3(b); - const uint64_t product34 = Word_3(a) * Word_4(b); - const uint64_t product41 = Word_4(a) * Word_1(b); - const uint64_t product42 = Word_4(a) * Word_2(b); - const uint64_t product43 = Word_4(a) * Word_3(b); - const uint64_t product44 = Word_4(a) * Word_4(b); - - const __uint128_t sum0 = (__uint128_t)product44; - const __uint128_t sum1 = (__uint128_t)product34 + - (__uint128_t)product43; - const __uint128_t sum2 = (__uint128_t)product24 + - (__uint128_t)product33 + - (__uint128_t)product42; - const __uint128_t sum3 = (__uint128_t)product14 + - (__uint128_t)product23 + - (__uint128_t)product32 + - (__uint128_t)product41; - const __uint128_t sum4 = (__uint128_t)product13 + - (__uint128_t)product22 + - (__uint128_t)product31; - const __uint128_t sum5 = (__uint128_t)product12 + - (__uint128_t)product21; - const __uint128_t sum6 = (__uint128_t)product11; - - const __uint128_t r0 = (sum0 & Word_FullMask) + - ((sum1 & Word_LoMask) << 32); - const __uint128_t r1 = (sum0 >> 64) + - ((sum1 >> 32) & Word_FullMask) + - (sum2 & Word_FullMask) + - ((sum3 << 32) & Word_HiMask); - - *lo = r0 + (r1 << 64); - *hi = (r1 >> 64) + - (sum1 >> 96) + - (sum2 >> 64) + - (sum3 >> 32) + - sum4 + - (sum5 << 32) + - (sum6 << 64); -} -#undef Word_1 -#undef Word_2 -#undef Word_3 -#undef Word_4 -#undef Word_HiMask -#undef Word_LoMask -#undef Word_FullMask -#endif // __LDBL_MANT_DIG__ == 113 -#else -#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. -#endif - -#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) -#define typeWidth (sizeof(rep_t)*CHAR_BIT) -#define exponentBits (typeWidth - significandBits - 1) -#define maxExponent ((1 << exponentBits) - 1) -#define exponentBias (maxExponent >> 1) - -#define implicitBit (REP_C(1) << significandBits) -#define significandMask (implicitBit - 1U) -#define signBit (REP_C(1) << (significandBits + exponentBits)) -#define absMask (signBit - 1U) -#define exponentMask (absMask ^ significandMask) -#define oneRep ((rep_t)exponentBias << significandBits) -#define infRep exponentMask -#define quietBit (implicitBit >> 1) -#define qnanRep (exponentMask | quietBit) - -static __inline rep_t toRep(fp_t x) { - const union { fp_t f; rep_t i; } rep = {.f = x}; - return rep.i; -} - -static __inline fp_t fromRep(rep_t x) { - const union { fp_t f; rep_t i; } rep = {.i = x}; - return rep.f; -} - -static __inline int normalize(rep_t *significand) { - const int shift = rep_clz(*significand) - rep_clz(implicitBit); - *significand <<= shift; - return 1 - shift; -} - -static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { - *hi = *hi << count | *lo >> (typeWidth - count); - *lo = *lo << count; -} - -static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { - if (count < typeWidth) { - const bool sticky = *lo << (typeWidth - count); - *lo = *hi << (typeWidth - count) | *lo >> count | sticky; - *hi = *hi >> count; - } - else if (count < 2*typeWidth) { - const bool sticky = *hi << (2*typeWidth - count) | *lo; - *lo = *hi >> (count - typeWidth) | sticky; - *hi = 0; - } else { - const bool sticky = *hi | *lo; - *lo = sticky; - *hi = 0; - } -} -#endif - -#endif // FP_LIB_HEADER +//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for soft-float routines in compiler-rt. +// This file does not provide any part of the compiler-rt interface, but defines +// many useful constants and utility routines that are used in the +// implementation of the soft-float routines in compiler-rt. +// +// Assumes that float, double and long double correspond to the IEEE-754 +// binary32, binary64 and binary 128 types, respectively, and that integer +// endianness matches floating point endianness on the target platform. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_LIB_HEADER +#define FP_LIB_HEADER + +#include +#include +#include +#include "int_lib.h" + +// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in +// 32-bit mode. +#if defined(__FreeBSD__) && defined(__i386__) +# include +# if __FreeBSD_version < 903000 // v9.3 +# define uint64_t unsigned long long +# define int64_t long long +# undef UINT64_C +# define UINT64_C(c) (c ## ULL) +# endif +#endif + +#if defined SINGLE_PRECISION + +typedef uint32_t rep_t; +typedef int32_t srep_t; +typedef float fp_t; +#define REP_C UINT32_C +#define significandBits 23 + +static __inline int rep_clz(rep_t a) { + return __builtin_clz(a); +} + +// 32x32 --> 64 bit multiply +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a*b; + *hi = product >> 32; + *lo = product; +} +COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); + +#elif defined DOUBLE_PRECISION + +typedef uint64_t rep_t; +typedef int64_t srep_t; +typedef double fp_t; +#define REP_C UINT64_C +#define significandBits 52 + +static __inline int rep_clz(rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); +#endif +} + +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} +#undef loWord +#undef hiWord + +COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); + +#elif defined QUAD_PRECISION +#if __LDBL_MANT_DIG__ == 113 +#define CRT_LDBL_128BIT +typedef __uint128_t rep_t; +typedef __int128_t srep_t; +typedef long double fp_t; +#define REP_C (__uint128_t) +// Note: Since there is no explicit way to tell compiler the constant is a +// 128-bit integer, we let the constant be casted to 128-bit integer +#define significandBits 112 + +static __inline int rep_clz(rep_t a) { + const union + { + __uint128_t ll; +#if _YUGA_BIG_ENDIAN + struct { uint64_t high, low; } s; +#else + struct { uint64_t low, high; } s; +#endif + } uu = { .ll = a }; + + uint64_t word; + uint64_t add; + + if (uu.s.high){ + word = uu.s.high; + add = 0; + } + else{ + word = uu.s.low; + add = 64; + } + return __builtin_clzll(word) + add; +} + +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_FullMask UINT64_C(0xffffffffffffffff) +#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) +#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) +#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) +#define Word_4(a) (uint64_t)(a & Word_LoMask) + +// 128x128 -> 256 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + + (__uint128_t)product43; + const __uint128_t sum2 = (__uint128_t)product24 + + (__uint128_t)product33 + + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + + (__uint128_t)product23 + + (__uint128_t)product32 + + (__uint128_t)product41; + const __uint128_t sum4 = (__uint128_t)product13 + + (__uint128_t)product22 + + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + + ((sum3 << 32) & Word_HiMask); + + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64); +} +#undef Word_1 +#undef Word_2 +#undef Word_3 +#undef Word_4 +#undef Word_HiMask +#undef Word_LoMask +#undef Word_FullMask +#endif // __LDBL_MANT_DIG__ == 113 +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) +#define typeWidth (sizeof(rep_t)*CHAR_BIT) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +static __inline rep_t toRep(fp_t x) { + const union { fp_t f; rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline fp_t fromRep(rep_t x) { + const union { fp_t f; rep_t i; } rep = {.i = x}; + return rep.f; +} + +static __inline int normalize(rep_t *significand) { + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; +} + +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; +} + +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { + if (count < typeWidth) { + const bool sticky = *lo << (typeWidth - count); + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } + else if (count < 2*typeWidth) { + const bool sticky = *hi << (2*typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } +} +#endif + +#endif // FP_LIB_HEADER diff --git a/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc b/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc index 72d1736d1d7..b34aa1b8f54 100644 --- a/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc @@ -1,116 +1,116 @@ -//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements soft-float multiplication with the IEEE-754 default -// rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#include "fp_lib.h" - -static __inline fp_t __mulXf3__(fp_t a, fp_t b) { - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN * anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything * NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity * non-zero = +/- infinity - if (bAbs) return fromRep(aAbs | productSign); - // infinity * zero = NaN - else return fromRep(qnanRep); - } - - if (bAbs == infRep) { - //? non-zero * infinity = +/- infinity - if (aAbs) return fromRep(bAbs | productSign); - // zero * infinity = NaN - else return fromRep(qnanRep); - } - - // zero * anything = +/- zero - if (!aAbs) return fromRep(productSign); - // anything * zero = +/- zero - if (!bAbs) return fromRep(productSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale += normalize(&bSignificand); - } - - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - - // Get the significand of a*b. Before multiplying the significands, shift - // one of them left to left-align it in the field. Thus, the product will - // have (exponentBits + 2) integral digits, all but two of which must be - // zero. Normalizing this result is just a conditional left-shift by one - // and bumping the exponent accordingly. - rep_t productHi, productLo; - wideMultiply(aSignificand, bSignificand << exponentBits, - &productHi, &productLo); - - int productExponent = aExponent + bExponent - exponentBias + scale; - - // Normalize the significand, adjust exponent if needed. - if (productHi & implicitBit) productExponent++; - else wideLeftShift(&productHi, &productLo, 1); - - // If we have overflowed the type, return +/- infinity. - if (productExponent >= maxExponent) return fromRep(infRep | productSign); - - if (productExponent <= 0) { - // Result is denormal before rounding - // - // If the result is so small that it just underflows to zero, return - // a zero of the appropriate sign. Mathematically there is no need to - // handle this case separately, but we make it a special case to - // simplify the shift logic. - const unsigned int shift = REP_C(1) - (unsigned int)productExponent; - if (shift >= typeWidth) return fromRep(productSign); - - // Otherwise, shift the significand of the result so that the round - // bit is the high bit of productLo. - wideRightShiftWithSticky(&productHi, &productLo, shift); - } - else { - // Result is normal before rounding; insert the exponent. - productHi &= significandMask; - productHi |= (rep_t)productExponent << significandBits; - } - - // Insert the sign of the result: - productHi |= productSign; - - // Final rounding. The final result may overflow to infinity, or underflow - // to zero, but those are the correct results in those cases. We use the - // default IEEE-754 round-to-nearest, ties-to-even rounding mode. - if (productLo > signBit) productHi++; - if (productLo == signBit) productHi += productHi & 1; - return fromRep(productHi); -} +//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float multiplication with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __mulXf3__(fp_t a, fp_t b) { + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) return fromRep(aAbs | productSign); + // infinity * zero = NaN + else return fromRep(qnanRep); + } + + if (bAbs == infRep) { + //? non-zero * infinity = +/- infinity + if (aAbs) return fromRep(bAbs | productSign); + // zero * infinity = NaN + else return fromRep(qnanRep); + } + + // zero * anything = +/- zero + if (!aAbs) return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) return fromRep(productSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale += normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, + &productHi, &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand, adjust exponent if needed. + if (productHi & implicitBit) productExponent++; + else wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } + else { + // Result is normal before rounding; insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result: + productHi |= productSign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) productHi++; + if (productLo == signBit) productHi += productHi & 1; + return fromRep(productHi); +} diff --git a/contrib/libs/cxxsupp/builtins/fp_trunc.h b/contrib/libs/cxxsupp/builtins/fp_trunc.h index 2d7c5b422a6..d5e79bb5b86 100644 --- a/contrib/libs/cxxsupp/builtins/fp_trunc.h +++ b/contrib/libs/cxxsupp/builtins/fp_trunc.h @@ -1,76 +1,76 @@ -//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Set source and destination precision setting -// -//===----------------------------------------------------------------------===// - -#ifndef FP_TRUNC_HEADER -#define FP_TRUNC_HEADER - -#include "int_lib.h" - -#if defined SRC_SINGLE -typedef float src_t; -typedef uint32_t src_rep_t; -#define SRC_REP_C UINT32_C -static const int srcSigBits = 23; - -#elif defined SRC_DOUBLE -typedef double src_t; -typedef uint64_t src_rep_t; -#define SRC_REP_C UINT64_C -static const int srcSigBits = 52; - -#elif defined SRC_QUAD -typedef long double src_t; -typedef __uint128_t src_rep_t; -#define SRC_REP_C (__uint128_t) -static const int srcSigBits = 112; - -#else -#error Source should be double precision or quad precision! -#endif //end source precision - -#if defined DST_DOUBLE -typedef double dst_t; -typedef uint64_t dst_rep_t; -#define DST_REP_C UINT64_C -static const int dstSigBits = 52; - -#elif defined DST_SINGLE -typedef float dst_t; -typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C -static const int dstSigBits = 23; - -#elif defined DST_HALF -typedef uint16_t dst_t; -typedef uint16_t dst_rep_t; -#define DST_REP_C UINT16_C -static const int dstSigBits = 10; - -#else -#error Destination should be single precision or double precision! -#endif //end destination precision - -// End of specialization parameters. Two helper routines for conversion to and -// from the representation of floating-point data as integer values follow. - -static __inline src_rep_t srcToRep(src_t x) { - const union { src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; -} - -static __inline dst_t dstFromRep(dst_rep_t x) { - const union { dst_t f; dst_rep_t i; } rep = {.i = x}; - return rep.f; -} - -#endif // FP_TRUNC_HEADER +//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Set source and destination precision setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_TRUNC_HEADER +#define FP_TRUNC_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; + +#elif defined SRC_QUAD +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcSigBits = 112; + +#else +#error Source should be double precision or quad precision! +#endif //end source precision + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_HALF +typedef uint16_t dst_t; +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstSigBits = 10; + +#else +#error Destination should be single precision or double precision! +#endif //end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { src_t f; src_rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { dst_t f; dst_rep_t i; } rep = {.i = x}; + return rep.f; +} + +#endif // FP_TRUNC_HEADER diff --git a/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc b/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc index 2b5199dc970..d88ae060913 100644 --- a/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc @@ -1,135 +1,135 @@ -//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a fairly generic conversion from a wider to a narrower -// IEEE-754 floating-point type in the default (round to nearest, ties to even) -// rounding mode. The constants and types defined following the includes below -// parameterize the conversion. -// -// This routine can be trivially adapted to support conversions to -// half-precision or from quad-precision. It does not support types that don't -// use the usual IEEE-754 interchange formats; specifically, some work would be -// needed to adapt it to (for example) the Intel 80-bit format or PowerPC -// double-double format. -// -// Note please, however, that this implementation is only intended to support -// *narrowing* operations; if you need to convert to a *wider* floating-point -// type (e.g. float -> double), then this routine will not do what you want it -// to. -// -// It also requires that integer types at least as large as both formats -// are available on the target platform; this may pose a problem when trying -// to add support for quad on some 32-bit systems, for example. -// -// Finally, the following assumptions are made: -// -// 1. floating-point types and integer types have the same endianness on the -// target platform -// -// 2. quiet NaNs, if supported, are indicated by the leading bit of the -// significand field being set -// -//===----------------------------------------------------------------------===// - -#include "fp_trunc.h" - -static __inline dst_t __truncXfYf2__(src_t a) { - // Various constants whose values follow from the type parameters. - // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t)*CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; - const int srcInfExp = (1 << srcExpBits) - 1; - const int srcExpBias = srcInfExp >> 1; - - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcSignificandMask = srcMinNormal - 1; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; - const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); - const src_rep_t srcNaNCode = srcQNaN - 1; - - const int dstBits = sizeof(dst_t)*CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; - const int dstInfExp = (1 << dstExpBits) - 1; - const int dstExpBias = dstInfExp >> 1; - - const int underflowExponent = srcExpBias + 1 - dstExpBias; - const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; - const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; - const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; - - const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); - const dst_rep_t dstNaNCode = dstQNaN - 1; - - // Break a into a sign and representation of the absolute value - const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; - - if (aAbs - underflow < aAbs - overflow) { - // The exponent of a is within the range of normal numbers in the - // destination format. We can convert by simply right-shifting with - // rounding and adjusting the exponent. - absResult = aAbs >> (srcSigBits - dstSigBits); - absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; - - const src_rep_t roundBits = aAbs & roundMask; - // Round to nearest - if (roundBits > halfway) - absResult++; - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } - else if (aAbs > srcInfinity) { - // a is NaN. - // Conjure the result by beginning with infinity, setting the qNaN - // bit and inserting the (truncated) trailing NaN field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= dstQNaN; - absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; - } - else if (aAbs >= overflow) { - // a overflows to infinity. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - } - else { - // a underflows on conversion to the destination type or is an exact - // zero. The result may be a denormal or zero. Extract the exponent - // to get the shift amount for the denormalization. - const int aExp = aAbs >> srcSigBits; - const int shift = srcExpBias - dstExpBias - aExp + 1; - - const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; - - // Right shift by the denormalization amount with sticky. - if (shift > srcSigBits) { - absResult = 0; - } else { - const bool sticky = significand << (srcBits - shift); - src_rep_t denormalizedSignificand = significand >> shift | sticky; - absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); - const src_rep_t roundBits = denormalizedSignificand & roundMask; - // Round to nearest - if (roundBits > halfway) - absResult++; - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } - } - - // Apply the signbit to (dst_t)abs(a). - const dst_rep_t result = absResult | sign >> (srcBits - dstBits); - return dstFromRep(result); -} +//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a wider to a narrower +// IEEE-754 floating-point type in the default (round to nearest, ties to even) +// rounding mode. The constants and types defined following the includes below +// parameterize the conversion. +// +// This routine can be trivially adapted to support conversions to +// half-precision or from quad-precision. It does not support types that don't +// use the usual IEEE-754 interchange formats; specifically, some work would be +// needed to adapt it to (for example) the Intel 80-bit format or PowerPC +// double-double format. +// +// Note please, however, that this implementation is only intended to support +// *narrowing* operations; if you need to convert to a *wider* floating-point +// type (e.g. float -> double), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. +// +// Finally, the following assumptions are made: +// +// 1. floating-point types and integer types have the same endianness on the +// target platform +// +// 2. quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set +// +//===----------------------------------------------------------------------===// + +#include "fp_trunc.h" + +static __inline dst_t __truncXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t)*CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcSignificandMask = srcMinNormal - 1; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t)*CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; + const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + if (aAbs - underflow < aAbs - overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = aAbs >> (srcSigBits - dstSigBits); + absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + + const src_rep_t roundBits = aAbs & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= dstQNaN; + absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; + } + else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + } + else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const int aExp = aAbs >> srcSigBits; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigBits) { + absResult = 0; + } else { + const bool sticky = significand << (srcBits - shift); + src_rep_t denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + const src_rep_t roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + } + + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | sign >> (srcBits - dstBits); + return dstFromRep(result); +} diff --git a/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c b/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c index db4a716f92b..331dc2bea2e 100644 --- a/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c +++ b/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c @@ -1,209 +1,209 @@ -/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - */ - -#include "int_lib.h" - -#include - -/* - * Pointer encodings documented at: - * http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html - */ - -#define DW_EH_PE_omit 0xff /* no data follows */ - -#define DW_EH_PE_absptr 0x00 -#define DW_EH_PE_uleb128 0x01 -#define DW_EH_PE_udata2 0x02 -#define DW_EH_PE_udata4 0x03 -#define DW_EH_PE_udata8 0x04 -#define DW_EH_PE_sleb128 0x09 -#define DW_EH_PE_sdata2 0x0A -#define DW_EH_PE_sdata4 0x0B -#define DW_EH_PE_sdata8 0x0C - -#define DW_EH_PE_pcrel 0x10 -#define DW_EH_PE_textrel 0x20 -#define DW_EH_PE_datarel 0x30 -#define DW_EH_PE_funcrel 0x40 -#define DW_EH_PE_aligned 0x50 -#define DW_EH_PE_indirect 0x80 /* gcc extension */ - - - -/* read a uleb128 encoded value and advance pointer */ -static uintptr_t readULEB128(const uint8_t** data) -{ - uintptr_t result = 0; - uintptr_t shift = 0; - unsigned char byte; - const uint8_t* p = *data; - do { - byte = *p++; - result |= (byte & 0x7f) << shift; - shift += 7; - } while (byte & 0x80); - *data = p; - return result; -} - -/* read a pointer encoded value and advance pointer */ -static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) -{ - const uint8_t* p = *data; - uintptr_t result = 0; - - if ( encoding == DW_EH_PE_omit ) - return 0; - - /* first get value */ - switch (encoding & 0x0F) { - case DW_EH_PE_absptr: - result = *((const uintptr_t*)p); - p += sizeof(uintptr_t); - break; - case DW_EH_PE_uleb128: - result = readULEB128(&p); - break; - case DW_EH_PE_udata2: - result = *((const uint16_t*)p); - p += sizeof(uint16_t); - break; - case DW_EH_PE_udata4: - result = *((const uint32_t*)p); - p += sizeof(uint32_t); - break; - case DW_EH_PE_udata8: - result = *((const uint64_t*)p); - p += sizeof(uint64_t); - break; - case DW_EH_PE_sdata2: - result = *((const int16_t*)p); - p += sizeof(int16_t); - break; - case DW_EH_PE_sdata4: - result = *((const int32_t*)p); - p += sizeof(int32_t); - break; - case DW_EH_PE_sdata8: - result = *((const int64_t*)p); - p += sizeof(int64_t); - break; - case DW_EH_PE_sleb128: - default: - /* not supported */ - compilerrt_abort(); - break; - } - - /* then add relative offset */ - switch ( encoding & 0x70 ) { - case DW_EH_PE_absptr: - /* do nothing */ - break; - case DW_EH_PE_pcrel: - result += (uintptr_t)(*data); - break; - case DW_EH_PE_textrel: - case DW_EH_PE_datarel: - case DW_EH_PE_funcrel: - case DW_EH_PE_aligned: - default: - /* not supported */ - compilerrt_abort(); - break; - } - - /* then apply indirection */ - if (encoding & DW_EH_PE_indirect) { - result = *((const uintptr_t*)result); - } - - *data = p; - return result; -} - - -/* - * The C compiler makes references to __gcc_personality_v0 in - * the dwarf unwind information for translation units that use - * __attribute__((cleanup(xx))) on local variables. - * This personality routine is called by the system unwinder - * on each frame as the stack is unwound during a C++ exception - * throw through a C function compiled with -fexceptions. - */ -#if __USING_SJLJ_EXCEPTIONS__ -// the setjump-longjump based exceptions personality routine has a different name -COMPILER_RT_ABI _Unwind_Reason_Code -__gcc_personality_sj0(int version, _Unwind_Action actions, - uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, - struct _Unwind_Context *context) -#else -COMPILER_RT_ABI _Unwind_Reason_Code -__gcc_personality_v0(int version, _Unwind_Action actions, - uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, - struct _Unwind_Context *context) -#endif -{ - /* Since C does not have catch clauses, there is nothing to do during */ - /* phase 1 (the search phase). */ - if ( actions & _UA_SEARCH_PHASE ) - return _URC_CONTINUE_UNWIND; - - /* There is nothing to do if there is no LSDA for this frame. */ - const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context); - if ( lsda == (uint8_t*) 0 ) - return _URC_CONTINUE_UNWIND; - - uintptr_t pc = _Unwind_GetIP(context)-1; - uintptr_t funcStart = _Unwind_GetRegionStart(context); - uintptr_t pcOffset = pc - funcStart; - - /* Parse LSDA header. */ - uint8_t lpStartEncoding = *lsda++; - if (lpStartEncoding != DW_EH_PE_omit) { - readEncodedPointer(&lsda, lpStartEncoding); - } - uint8_t ttypeEncoding = *lsda++; - if (ttypeEncoding != DW_EH_PE_omit) { - readULEB128(&lsda); - } - /* Walk call-site table looking for range that includes current PC. */ - uint8_t callSiteEncoding = *lsda++; - uint32_t callSiteTableLength = readULEB128(&lsda); - const uint8_t* callSiteTableStart = lsda; - const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength; - const uint8_t* p=callSiteTableStart; - while (p < callSiteTableEnd) { - uintptr_t start = readEncodedPointer(&p, callSiteEncoding); - uintptr_t length = readEncodedPointer(&p, callSiteEncoding); - uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); - readULEB128(&p); /* action value not used for C code */ - if ( landingPad == 0 ) - continue; /* no landing pad for this entry */ - if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { - /* Found landing pad for the PC. - * Set Instruction Pointer to so we re-enter function - * at landing pad. The landing pad is created by the compiler - * to take two parameters in registers. - */ - _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), - (uintptr_t)exceptionObject); - _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); - _Unwind_SetIP(context, (funcStart + landingPad)); - return _URC_INSTALL_CONTEXT; - } - } - - /* No landing pad found, continue unwinding. */ - return _URC_CONTINUE_UNWIND; -} - +/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + */ + +#include "int_lib.h" + +#include + +/* + * Pointer encodings documented at: + * http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html + */ + +#define DW_EH_PE_omit 0xff /* no data follows */ + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 /* gcc extension */ + + + +/* read a uleb128 encoded value and advance pointer */ +static uintptr_t readULEB128(const uint8_t** data) +{ + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t* p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; +} + +/* read a pointer encoded value and advance pointer */ +static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) +{ + const uint8_t* p = *data; + uintptr_t result = 0; + + if ( encoding == DW_EH_PE_omit ) + return 0; + + /* first get value */ + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((const uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((const uint16_t*)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((const uint32_t*)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((const uint64_t*)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((const int16_t*)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((const int32_t*)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((const int64_t*)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then add relative offset */ + switch ( encoding & 0x70 ) { + case DW_EH_PE_absptr: + /* do nothing */ + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then apply indirection */ + if (encoding & DW_EH_PE_indirect) { + result = *((const uintptr_t*)result); + } + + *data = p; + return result; +} + + +/* + * The C compiler makes references to __gcc_personality_v0 in + * the dwarf unwind information for translation units that use + * __attribute__((cleanup(xx))) on local variables. + * This personality routine is called by the system unwinder + * on each frame as the stack is unwound during a C++ exception + * throw through a C function compiled with -fexceptions. + */ +#if __USING_SJLJ_EXCEPTIONS__ +// the setjump-longjump based exceptions personality routine has a different name +COMPILER_RT_ABI _Unwind_Reason_Code +__gcc_personality_sj0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + struct _Unwind_Context *context) +#else +COMPILER_RT_ABI _Unwind_Reason_Code +__gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + struct _Unwind_Context *context) +#endif +{ + /* Since C does not have catch clauses, there is nothing to do during */ + /* phase 1 (the search phase). */ + if ( actions & _UA_SEARCH_PHASE ) + return _URC_CONTINUE_UNWIND; + + /* There is nothing to do if there is no LSDA for this frame. */ + const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context); + if ( lsda == (uint8_t*) 0 ) + return _URC_CONTINUE_UNWIND; + + uintptr_t pc = _Unwind_GetIP(context)-1; + uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + /* Parse LSDA header. */ + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + /* Walk call-site table looking for range that includes current PC. */ + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t* callSiteTableStart = lsda; + const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t* p=callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + uintptr_t length = readEncodedPointer(&p, callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); /* action value not used for C code */ + if ( landingPad == 0 ) + continue; /* no landing pad for this entry */ + if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { + /* Found landing pad for the PC. + * Set Instruction Pointer to so we re-enter function + * at landing pad. The landing pad is created by the compiler + * to take two parameters in registers. + */ + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, (funcStart + landingPad)); + return _URC_INSTALL_CONTEXT; + } + } + + /* No landing pad found, continue unwinding. */ + return _URC_CONTINUE_UNWIND; +} + diff --git a/contrib/libs/cxxsupp/builtins/i386/Makefile.mk b/contrib/libs/cxxsupp/builtins/i386/Makefile.mk index 3d116afe95f..f3776a02c0d 100644 --- a/contrib/libs/cxxsupp/builtins/i386/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/i386/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := i386 - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := i386 + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/i386/ashldi3.S b/contrib/libs/cxxsupp/builtins/i386/ashldi3.S index bc1cf1e6315..3fbd7390384 100644 --- a/contrib/libs/cxxsupp/builtins/i386/ashldi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/ashldi3.S @@ -1,58 +1,58 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __ashldi3(di_int input, int count); - -// This routine has some extra memory traffic, loading the 64-bit input via two -// 32-bit loads, then immediately storing it back to the stack via a single 64-bit -// store. This is to avoid a write-small, read-large stall. -// However, if callers of this routine can be safely assumed to store the argument -// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. -// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. - -#ifdef __i386__ -#ifdef __SSE2__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__ashldi3) - movd 12(%esp), %xmm2 // Load count -#ifndef TRUST_CALLERS_USE_64_BIT_STORES - movd 4(%esp), %xmm0 - movd 8(%esp), %xmm1 - punpckldq %xmm1, %xmm0 // Load input -#else - movq 4(%esp), %xmm0 // Load input -#endif - psllq %xmm2, %xmm0 // shift input by count - movd %xmm0, %eax - psrlq $32, %xmm0 - movd %xmm0, %edx - ret -END_COMPILERRT_FUNCTION(__ashldi3) - -#else // Use GPRs instead of SSE2 instructions, if they aren't available. - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__ashldi3) - movl 12(%esp), %ecx // Load count - movl 8(%esp), %edx // Load high - movl 4(%esp), %eax // Load low - - testl $0x20, %ecx // If count >= 32 - jnz 1f // goto 1 - shldl %cl, %eax, %edx // left shift high by count - shll %cl, %eax // left shift low by count - ret - -1: movl %eax, %edx // Move low to high - xorl %eax, %eax // clear low - shll %cl, %edx // shift high by count - 32 - ret -END_COMPILERRT_FUNCTION(__ashldi3) - -#endif // __SSE2__ -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __ashldi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashldi3) + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psllq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__ashldi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashldi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + shldl %cl, %eax, %edx // left shift high by count + shll %cl, %eax // left shift low by count + ret + +1: movl %eax, %edx // Move low to high + xorl %eax, %eax // clear low + shll %cl, %edx // shift high by count - 32 + ret +END_COMPILERRT_FUNCTION(__ashldi3) + +#endif // __SSE2__ +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S b/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S index b4549a198d4..8f4742481b4 100644 --- a/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S @@ -1,69 +1,69 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __ashrdi3(di_int input, int count); - -#ifdef __i386__ -#ifdef __SSE2__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__ashrdi3) - movd 12(%esp), %xmm2 // Load count - movl 8(%esp), %eax -#ifndef TRUST_CALLERS_USE_64_BIT_STORES - movd 4(%esp), %xmm0 - movd 8(%esp), %xmm1 - punpckldq %xmm1, %xmm0 // Load input -#else - movq 4(%esp), %xmm0 // Load input -#endif - - psrlq %xmm2, %xmm0 // unsigned shift input by count - - testl %eax, %eax // check the sign-bit of the input - jns 1f // early out for positive inputs - - // If the input is negative, we need to construct the shifted sign bit - // to or into the result, as xmm does not have a signed right shift. - pcmpeqb %xmm1, %xmm1 // -1ULL - psrlq $58, %xmm1 // 0x3f - pandn %xmm1, %xmm2 // 63 - count - pcmpeqb %xmm1, %xmm1 // -1ULL - psubq %xmm1, %xmm2 // 64 - count - psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits - por %xmm1, %xmm0 - - // Move the result back to the general purpose registers and return -1: movd %xmm0, %eax - psrlq $32, %xmm0 - movd %xmm0, %edx - ret -END_COMPILERRT_FUNCTION(__ashrdi3) - -#else // Use GPRs instead of SSE2 instructions, if they aren't available. - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__ashrdi3) - movl 12(%esp), %ecx // Load count - movl 8(%esp), %edx // Load high - movl 4(%esp), %eax // Load low - - testl $0x20, %ecx // If count >= 32 - jnz 1f // goto 1 - - shrdl %cl, %edx, %eax // right shift low by count - sarl %cl, %edx // right shift high by count - ret - -1: movl %edx, %eax // Move high to low - sarl $31, %edx // clear high - sarl %cl, %eax // shift low by count - 32 - ret -END_COMPILERRT_FUNCTION(__ashrdi3) - -#endif // __SSE2__ -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __ashrdi3(di_int input, int count); + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashrdi3) + movd 12(%esp), %xmm2 // Load count + movl 8(%esp), %eax +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + + psrlq %xmm2, %xmm0 // unsigned shift input by count + + testl %eax, %eax // check the sign-bit of the input + jns 1f // early out for positive inputs + + // If the input is negative, we need to construct the shifted sign bit + // to or into the result, as xmm does not have a signed right shift. + pcmpeqb %xmm1, %xmm1 // -1ULL + psrlq $58, %xmm1 // 0x3f + pandn %xmm1, %xmm2 // 63 - count + pcmpeqb %xmm1, %xmm1 // -1ULL + psubq %xmm1, %xmm2 // 64 - count + psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits + por %xmm1, %xmm0 + + // Move the result back to the general purpose registers and return +1: movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__ashrdi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashrdi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + + shrdl %cl, %edx, %eax // right shift low by count + sarl %cl, %edx // right shift high by count + ret + +1: movl %edx, %eax // Move high to low + sarl $31, %edx // clear high + sarl %cl, %eax // shift low by count - 32 + ret +END_COMPILERRT_FUNCTION(__ashrdi3) + +#endif // __SSE2__ +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/chkstk.S b/contrib/libs/cxxsupp/builtins/i386/chkstk.S index ee5daaef1ed..b59974868f2 100644 --- a/contrib/libs/cxxsupp/builtins/i386/chkstk.S +++ b/contrib/libs/cxxsupp/builtins/i386/chkstk.S @@ -1,34 +1,34 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// _chkstk routine -// This routine is windows specific -// http://msdn.microsoft.com/en-us/library/ms648426.aspx - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__chkstk_ms) - push %ecx - push %eax - cmp $0x1000,%eax - lea 12(%esp),%ecx - jb 1f -2: - sub $0x1000,%ecx - test %ecx,(%ecx) - sub $0x1000,%eax - cmp $0x1000,%eax - ja 2b -1: - sub %eax,%ecx - test %ecx,(%ecx) - pop %eax - pop %ecx - ret -END_COMPILERRT_FUNCTION(__chkstk_ms) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__chkstk_ms) + push %ecx + push %eax + cmp $0x1000,%eax + lea 12(%esp),%ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + pop %eax + pop %ecx + ret +END_COMPILERRT_FUNCTION(__chkstk_ms) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/chkstk2.S b/contrib/libs/cxxsupp/builtins/i386/chkstk2.S index e4dbf586220..7d65bb08892 100644 --- a/contrib/libs/cxxsupp/builtins/i386/chkstk2.S +++ b/contrib/libs/cxxsupp/builtins/i386/chkstk2.S @@ -1,40 +1,40 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -#ifdef __i386__ - -// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments, -// then decrement %esp by %eax. Preserves all registers except %esp and flags. -// This routine is windows specific -// http://msdn.microsoft.com/en-us/library/ms648426.aspx - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function -DEFINE_COMPILERRT_FUNCTION(__chkstk) - push %ecx - cmp $0x1000,%eax - lea 8(%esp),%ecx // esp before calling this routine -> ecx - jb 1f -2: - sub $0x1000,%ecx - test %ecx,(%ecx) - sub $0x1000,%eax - cmp $0x1000,%eax - ja 2b -1: - sub %eax,%ecx - test %ecx,(%ecx) - - lea 4(%esp),%eax // load pointer to the return address into eax - mov %ecx,%esp // install the new top of stack pointer into esp - mov -4(%eax),%ecx // restore ecx - push (%eax) // push return address onto the stack - sub %esp,%eax // restore the original value in eax - ret -END_COMPILERRT_FUNCTION(__chkstk) -END_COMPILERRT_FUNCTION(_alloca) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __i386__ + +// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments, +// then decrement %esp by %eax. Preserves all registers except %esp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function +DEFINE_COMPILERRT_FUNCTION(__chkstk) + push %ecx + cmp $0x1000,%eax + lea 8(%esp),%ecx // esp before calling this routine -> ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + + lea 4(%esp),%eax // load pointer to the return address into eax + mov %ecx,%esp // install the new top of stack pointer into esp + mov -4(%eax),%ecx // restore ecx + push (%eax) // push return address onto the stack + sub %esp,%eax // restore the original value in eax + ret +END_COMPILERRT_FUNCTION(__chkstk) +END_COMPILERRT_FUNCTION(_alloca) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/divdi3.S b/contrib/libs/cxxsupp/builtins/i386/divdi3.S index ceb0e802706..2cb0ddd4c29 100644 --- a/contrib/libs/cxxsupp/builtins/i386/divdi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/divdi3.S @@ -1,162 +1,162 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __divdi3(di_int a, di_int b); - -// result = a / b. -// both inputs and the output are 64-bit signed integers. -// This will do whatever the underlying hardware is set to do on division by zero. -// No other exceptions are generated, as the divide cannot overflow. -// -// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware -// on x86_64. The performance goal is ~40 cycles per divide, which is faster than -// currently possible via simulation of integer divides on the x87 unit. -// -// Stephen Canon, December 2008 - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__divdi3) - -/* This is currently implemented by wrapping the unsigned divide up in an absolute - value, then restoring the correct sign at the end of the computation. This could - certainly be improved upon. */ - - pushl %esi - movl 20(%esp), %edx // high word of b - movl 16(%esp), %eax // low word of b - movl %edx, %ecx - sarl $31, %ecx // (b < 0) ? -1 : 0 - xorl %ecx, %eax - xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b - subl %ecx, %eax - sbbl %ecx, %edx // EDX:EAX = abs(b) - movl %edx, 20(%esp) - movl %eax, 16(%esp) // store abs(b) back to stack - movl %ecx, %esi // set aside sign of b +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __divdi3(di_int a, di_int b); + +// result = a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__divdi3) + +/* This is currently implemented by wrapping the unsigned divide up in an absolute + value, then restoring the correct sign at the end of the computation. This could + certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + movl %ecx, %esi // set aside sign of b + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - movl 12(%esp), %edx // high word of b - movl 8(%esp), %eax // low word of b - movl %edx, %ecx - sarl $31, %ecx // (a < 0) ? -1 : 0 - xorl %ecx, %eax - xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a - subl %ecx, %eax - sbbl %ecx, %edx // EDX:EAX = abs(a) - movl %edx, 12(%esp) - movl %eax, 8(%esp) // store abs(a) back to stack - xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) - - pushl %ebx - movl 24(%esp), %ebx // Find the index i of the leading bit in b. - bsrl %ebx, %ecx // If the high word of b is zero, jump to - jz 9f // the code to handle that special case [9]. - - /* High word of b is known to be non-zero on this branch */ - - movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b - - shrl %cl, %eax // Practically, this means that bhi is given by: - shrl %eax // - notl %ecx // bhi = (high word of b) << (31 - i) | - shll %cl, %ebx // (low word of b) >> (1 + i) - orl %eax, %ebx // - movl 16(%esp), %edx // Load the high and low words of a, and jump - movl 12(%esp), %eax // to [1] if the high word is larger than bhi - cmpl %ebx, %edx // to avoid overflowing the upcoming divide. - jae 1f - - /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r - - pushl %edi - notl %ecx - shrl %eax - shrl %cl, %eax // q = qs >> (1 + i) - movl %eax, %edi - mull 24(%esp) // q*blo - movl 16(%esp), %ebx - movl 20(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 28(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - sbbl $0, %edi // decrement q if remainder is negative - xorl %edx, %edx - movl %edi, %eax - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %edi // Restore callee-save registers - popl %ebx - popl %esi - retl // Return - - -1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - subl %ebx, %edx // subtract bhi from ahi so that divide will not - divl %ebx // overflow, and find q and r such that - // - // ahi:alo = (1:q)*bhi + r - // - // Note that q is a number in (31-i).(1+i) - // fix point. - - pushl %edi - notl %ecx - shrl %eax - orl $0x80000000, %eax - shrl %cl, %eax // q = (1:qs) >> (1 + i) - movl %eax, %edi - mull 24(%esp) // q*blo - movl 16(%esp), %ebx - movl 20(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 28(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - sbbl $0, %edi // decrement q if remainder is negative - xorl %edx, %edx - movl %edi, %eax - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %edi // Restore callee-save registers - popl %ebx - popl %esi - retl // Return - - -9: /* High word of b is zero on this branch */ - - movl 16(%esp), %eax // Find qhi and rhi such that - movl 20(%esp), %ecx // - xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b - divl %ecx // - movl %eax, %ebx // - movl 12(%esp), %eax // Find qlo such that - divl %ecx // - movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %ebx // Restore callee-save registers - popl %esi - retl // Return -END_COMPILERRT_FUNCTION(__divdi3) - -#endif // __i386__ + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %ebx // Restore callee-save registers + popl %esi + retl // Return +END_COMPILERRT_FUNCTION(__divdi3) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdidf.S b/contrib/libs/cxxsupp/builtins/i386/floatdidf.S index 21d6154a766..dcc32f8ed85 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatdidf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatdidf.S @@ -1,39 +1,39 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// double __floatundidf(du_int a); - -#ifdef __i386__ - -CONST_SECTION - - .balign 16 -twop52: - .quad 0x4330000000000000 - - .balign 16 -twop32: - .quad 0x41f0000000000000 - -#define REL_ADDR(_a) (_a)-0b(%eax) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatdidf) - cvtsi2sd 8(%esp), %xmm1 - movss 4(%esp), %xmm0 // low 32 bits of a - calll 0f -0: popl %eax - mulsd REL_ADDR(twop32), %xmm1 // a_hi as a double (without rounding) - movsd REL_ADDR(twop52), %xmm2 // 0x1.0p52 - subsd %xmm2, %xmm1 // a_hi - 0x1p52 (no rounding occurs) - orpd %xmm2, %xmm0 // 0x1p52 + a_lo (no rounding occurs) - addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) - movsd %xmm0, 4(%esp) - fldl 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatdidf) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop32: + .quad 0x41f0000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdidf) + cvtsi2sd 8(%esp), %xmm1 + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + mulsd REL_ADDR(twop32), %xmm1 // a_hi as a double (without rounding) + movsd REL_ADDR(twop52), %xmm2 // 0x1.0p52 + subsd %xmm2, %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd %xmm2, %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdidf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdisf.S b/contrib/libs/cxxsupp/builtins/i386/floatdisf.S index ee6f07c8609..f6427670360 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatdisf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatdisf.S @@ -1,32 +1,32 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// float __floatdisf(di_int a); - -// This routine has some extra memory traffic, loading the 64-bit input via two -// 32-bit loads, then immediately storing it back to the stack via a single 64-bit -// store. This is to avoid a write-small, read-large stall. -// However, if callers of this routine can be safely assumed to store the argument -// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. -// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatdisf) -#ifndef TRUST_CALLERS_USE_64_BIT_STORES - movd 4(%esp), %xmm0 - movd 8(%esp), %xmm1 - punpckldq %xmm1, %xmm0 - movq %xmm0, 4(%esp) -#endif - fildll 4(%esp) - fstps 4(%esp) - flds 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatdisf) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatdisf(di_int a); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdisf) +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + fstps 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdisf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdixf.S b/contrib/libs/cxxsupp/builtins/i386/floatdixf.S index 2e9ee2ce611..839b0434c0c 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatdixf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatdixf.S @@ -1,30 +1,30 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// float __floatdixf(di_int a); - -#ifdef __i386__ - -// This routine has some extra memory traffic, loading the 64-bit input via two -// 32-bit loads, then immediately storing it back to the stack via a single 64-bit -// store. This is to avoid a write-small, read-large stall. -// However, if callers of this routine can be safely assumed to store the argument -// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. -// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatdixf) -#ifndef TRUST_CALLERS_USE_64_BIT_STORES - movd 4(%esp), %xmm0 - movd 8(%esp), %xmm1 - punpckldq %xmm1, %xmm0 - movq %xmm0, 4(%esp) -#endif - fildll 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatdixf) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatdixf(di_int a); + +#ifdef __i386__ + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdixf) +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdixf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundidf.S b/contrib/libs/cxxsupp/builtins/i386/floatundidf.S index 104ee8dc81b..8058c2ac0ae 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatundidf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatundidf.S @@ -1,52 +1,52 @@ -//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements __floatundidf for the compiler_rt library. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// double __floatundidf(du_int a); - -#ifdef __i386__ - -CONST_SECTION - - .balign 16 -twop52: - .quad 0x4330000000000000 - - .balign 16 -twop84_plus_twop52: - .quad 0x4530000000100000 - - .balign 16 -twop84: - .quad 0x4530000000000000 - -#define REL_ADDR(_a) (_a)-0b(%eax) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundidf) - movss 8(%esp), %xmm1 // high 32 bits of a - movss 4(%esp), %xmm0 // low 32 bits of a - calll 0f -0: popl %eax - orpd REL_ADDR(twop84), %xmm1 // 0x1p84 + a_hi (no rounding occurs) - subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) - orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) - addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) - movsd %xmm0, 4(%esp) - fldl 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatundidf) - -#endif // __i386__ +//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52: + .quad 0x4530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundidf) + movss 8(%esp), %xmm1 // high 32 bits of a + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + orpd REL_ADDR(twop84), %xmm1 // 0x1p84 + a_hi (no rounding occurs) + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundidf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundisf.S b/contrib/libs/cxxsupp/builtins/i386/floatundisf.S index b7db9582834..94c97e25aa8 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatundisf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatundisf.S @@ -1,105 +1,105 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// float __floatundisf(du_int a); - -// Note that there is a hardware instruction, fildll, that does most of what -// this function needs to do. However, because of our ia32 ABI, it will take -// a write-small read-large stall, so the software implementation here is -// actually several cycles faster. - -// This is a branch-free implementation. A branchy implementation might be -// faster for the common case if you know something a priori about the input -// distribution. - -/* branch-free x87 implementation - one cycle slower than without x87. - -#ifdef __i386__ - -CONST_SECTION -.balign 3 - - .quad 0x43f0000000000000 -twop64: .quad 0x0000000000000000 - -#define TWOp64 twop64-0b(%ecx,%eax,8) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundisf) - movl 8(%esp), %eax - movd 8(%esp), %xmm1 - movd 4(%esp), %xmm0 - punpckldq %xmm1, %xmm0 - calll 0f -0: popl %ecx - sarl $31, %eax - movq %xmm0, 4(%esp) - fildll 4(%esp) - faddl TWOp64 - fstps 4(%esp) - flds 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatundisf) - -#endif // __i386__ - -*/ - -/* branch-free, x87-free implementation - faster at the expense of code size */ - -#ifdef __i386__ - -CONST_SECTION - - .balign 16 -twop52: - .quad 0x4330000000000000 - .quad 0x0000000000000fff - - .balign 16 -sticky: - .quad 0x0000000000000000 - .long 0x00000012 - - .balign 16 -twelve: - .long 0x00000000 - -#define TWOp52 twop52-0b(%ecx) -#define STICKY sticky-0b(%ecx,%eax,8) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundisf) - movl 8(%esp), %eax - movd 8(%esp), %xmm1 - movd 4(%esp), %xmm0 - punpckldq %xmm1, %xmm0 - - calll 0f -0: popl %ecx - shrl %eax // high 31 bits of input as sint32 - addl $0x7ff80000, %eax - sarl $31, %eax // (big input) ? -1 : 0 - movsd STICKY, %xmm1 // (big input) ? 0xfff : 0 - movl $12, %edx - andl %eax, %edx // (big input) ? 12 : 0 - movd %edx, %xmm3 - andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0 - movsd TWOp52, %xmm2 // 0x1.0p52 - psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input - orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input) - orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input) - subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 | input & 0xfff) : input) - cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 | input & 0xfff) : input) - pslld $23, %xmm3 - paddd %xmm3, %xmm0 // (float)input - movd %xmm0, 4(%esp) - flds 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatundisf) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatundisf(du_int a); + +// Note that there is a hardware instruction, fildll, that does most of what +// this function needs to do. However, because of our ia32 ABI, it will take +// a write-small read-large stall, so the software implementation here is +// actually several cycles faster. + +// This is a branch-free implementation. A branchy implementation might be +// faster for the common case if you know something a priori about the input +// distribution. + +/* branch-free x87 implementation - one cycle slower than without x87. + +#ifdef __i386__ + +CONST_SECTION +.balign 3 + + .quad 0x43f0000000000000 +twop64: .quad 0x0000000000000000 + +#define TWOp64 twop64-0b(%ecx,%eax,8) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + calll 0f +0: popl %ecx + sarl $31, %eax + movq %xmm0, 4(%esp) + fildll 4(%esp) + faddl TWOp64 + fstps 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __i386__ + +*/ + +/* branch-free, x87-free implementation - faster at the expense of code size */ + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + .quad 0x0000000000000fff + + .balign 16 +sticky: + .quad 0x0000000000000000 + .long 0x00000012 + + .balign 16 +twelve: + .long 0x00000000 + +#define TWOp52 twop52-0b(%ecx) +#define STICKY sticky-0b(%ecx,%eax,8) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + + calll 0f +0: popl %ecx + shrl %eax // high 31 bits of input as sint32 + addl $0x7ff80000, %eax + sarl $31, %eax // (big input) ? -1 : 0 + movsd STICKY, %xmm1 // (big input) ? 0xfff : 0 + movl $12, %edx + andl %eax, %edx // (big input) ? 12 : 0 + movd %edx, %xmm3 + andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0 + movsd TWOp52, %xmm2 // 0x1.0p52 + psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input + orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input) + orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input) + subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 | input & 0xfff) : input) + cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 | input & 0xfff) : input) + pslld $23, %xmm3 + paddd %xmm3, %xmm0 // (float)input + movd %xmm0, 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundixf.S b/contrib/libs/cxxsupp/builtins/i386/floatundixf.S index c6c29e67f19..814b52f941d 100644 --- a/contrib/libs/cxxsupp/builtins/i386/floatundixf.S +++ b/contrib/libs/cxxsupp/builtins/i386/floatundixf.S @@ -1,43 +1,43 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// long double __floatundixf(du_int a);16 - -#ifdef __i386__ - -CONST_SECTION - - .balign 16 -twop52: - .quad 0x4330000000000000 - - .balign 16 -twop84_plus_twop52_neg: - .quad 0xc530000000100000 - - .balign 16 -twop84: - .quad 0x4530000000000000 - -#define REL_ADDR(_a) (_a)-0b(%eax) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundixf) - calll 0f -0: popl %eax - movss 8(%esp), %xmm0 // hi 32 bits of input - movss 4(%esp), %xmm1 // lo 32 bits of input - orpd REL_ADDR(twop84), %xmm0 // 2^84 + hi (as a double) - orpd REL_ADDR(twop52), %xmm1 // 2^52 + lo (as a double) - addsd REL_ADDR(twop84_plus_twop52_neg), %xmm0 // hi - 2^52 (no rounding occurs) - movsd %xmm1, 4(%esp) - fldl 4(%esp) - movsd %xmm0, 4(%esp) - faddl 4(%esp) - ret -END_COMPILERRT_FUNCTION(__floatundixf) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// long double __floatundixf(du_int a);16 + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + calll 0f +0: popl %eax + movss 8(%esp), %xmm0 // hi 32 bits of input + movss 4(%esp), %xmm1 // lo 32 bits of input + orpd REL_ADDR(twop84), %xmm0 // 2^84 + hi (as a double) + orpd REL_ADDR(twop52), %xmm1 // 2^52 + lo (as a double) + addsd REL_ADDR(twop84_plus_twop52_neg), %xmm0 // hi - 2^52 (no rounding occurs) + movsd %xmm1, 4(%esp) + fldl 4(%esp) + movsd %xmm0, 4(%esp) + faddl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S b/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S index 74673c03dec..b80f11a3806 100644 --- a/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S @@ -1,59 +1,59 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __lshrdi3(di_int input, int count); - -// This routine has some extra memory traffic, loading the 64-bit input via two -// 32-bit loads, then immediately storing it back to the stack via a single 64-bit -// store. This is to avoid a write-small, read-large stall. -// However, if callers of this routine can be safely assumed to store the argument -// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. -// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. - -#ifdef __i386__ -#ifdef __SSE2__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__lshrdi3) - movd 12(%esp), %xmm2 // Load count -#ifndef TRUST_CALLERS_USE_64_BIT_STORES - movd 4(%esp), %xmm0 - movd 8(%esp), %xmm1 - punpckldq %xmm1, %xmm0 // Load input -#else - movq 4(%esp), %xmm0 // Load input -#endif - psrlq %xmm2, %xmm0 // shift input by count - movd %xmm0, %eax - psrlq $32, %xmm0 - movd %xmm0, %edx - ret -END_COMPILERRT_FUNCTION(__lshrdi3) - -#else // Use GPRs instead of SSE2 instructions, if they aren't available. - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__lshrdi3) - movl 12(%esp), %ecx // Load count - movl 8(%esp), %edx // Load high - movl 4(%esp), %eax // Load low - - testl $0x20, %ecx // If count >= 32 - jnz 1f // goto 1 - - shrdl %cl, %edx, %eax // right shift low by count - shrl %cl, %edx // right shift high by count - ret - -1: movl %edx, %eax // Move high to low - xorl %edx, %edx // clear high - shrl %cl, %eax // shift low by count - 32 - ret -END_COMPILERRT_FUNCTION(__lshrdi3) - -#endif // __SSE2__ -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __lshrdi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__lshrdi3) + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psrlq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__lshrdi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__lshrdi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + + shrdl %cl, %edx, %eax // right shift low by count + shrl %cl, %edx // right shift high by count + ret + +1: movl %edx, %eax // Move high to low + xorl %edx, %edx // clear high + shrl %cl, %eax // shift low by count - 32 + ret +END_COMPILERRT_FUNCTION(__lshrdi3) + +#endif // __SSE2__ +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/moddi3.S b/contrib/libs/cxxsupp/builtins/i386/moddi3.S index 94f922d447c..b9cee9d7aa7 100644 --- a/contrib/libs/cxxsupp/builtins/i386/moddi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/moddi3.S @@ -1,166 +1,166 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __moddi3(di_int a, di_int b); - -// result = remainder of a / b. -// both inputs and the output are 64-bit signed integers. -// This will do whatever the underlying hardware is set to do on division by zero. -// No other exceptions are generated, as the divide cannot overflow. -// -// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware -// on x86_64. The performance goal is ~40 cycles per divide, which is faster than -// currently possible via simulation of integer divides on the x87 unit. -// - -// Stephen Canon, December 2008 - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__moddi3) - -/* This is currently implemented by wrapping the unsigned modulus up in an absolute - value. This could certainly be improved upon. */ - - pushl %esi - movl 20(%esp), %edx // high word of b - movl 16(%esp), %eax // low word of b - movl %edx, %ecx - sarl $31, %ecx // (b < 0) ? -1 : 0 - xorl %ecx, %eax - xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b - subl %ecx, %eax - sbbl %ecx, %edx // EDX:EAX = abs(b) - movl %edx, 20(%esp) - movl %eax, 16(%esp) // store abs(b) back to stack +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __moddi3(di_int a, di_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__moddi3) + +/* This is currently implemented by wrapping the unsigned modulus up in an absolute + value. This could certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + movl %ecx, %esi // set aside sign of a + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - movl 12(%esp), %edx // high word of b - movl 8(%esp), %eax // low word of b - movl %edx, %ecx - sarl $31, %ecx // (a < 0) ? -1 : 0 - xorl %ecx, %eax - xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a - subl %ecx, %eax - sbbl %ecx, %edx // EDX:EAX = abs(a) - movl %edx, 12(%esp) - movl %eax, 8(%esp) // store abs(a) back to stack - movl %ecx, %esi // set aside sign of a - - pushl %ebx - movl 24(%esp), %ebx // Find the index i of the leading bit in b. - bsrl %ebx, %ecx // If the high word of b is zero, jump to - jz 9f // the code to handle that special case [9]. - - /* High word of b is known to be non-zero on this branch */ - - movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b - - shrl %cl, %eax // Practically, this means that bhi is given by: - shrl %eax // - notl %ecx // bhi = (high word of b) << (31 - i) | - shll %cl, %ebx // (low word of b) >> (1 + i) - orl %eax, %ebx // - movl 16(%esp), %edx // Load the high and low words of a, and jump - movl 12(%esp), %eax // to [2] if the high word is larger than bhi - cmpl %ebx, %edx // to avoid overflowing the upcoming divide. - jae 2f - - /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r - - pushl %edi - notl %ecx - shrl %eax - shrl %cl, %eax // q = qs >> (1 + i) - movl %eax, %edi - mull 24(%esp) // q*blo - movl 16(%esp), %ebx - movl 20(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 28(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - - jnc 1f // if positive, this is the result. - addl 24(%esp), %ebx // otherwise - adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result -1: movl %ebx, %eax - movl %ecx, %edx - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %edi // Restore callee-save registers - popl %ebx - popl %esi - retl // Return - -2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - subl %ebx, %edx // subtract bhi from ahi so that divide will not - divl %ebx // overflow, and find q and r such that - // - // ahi:alo = (1:q)*bhi + r - // - // Note that q is a number in (31-i).(1+i) - // fix point. - - pushl %edi - notl %ecx - shrl %eax - orl $0x80000000, %eax - shrl %cl, %eax // q = (1:qs) >> (1 + i) - movl %eax, %edi - mull 24(%esp) // q*blo - movl 16(%esp), %ebx - movl 20(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 28(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - - jnc 3f // if positive, this is the result. - addl 24(%esp), %ebx // otherwise - adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result -3: movl %ebx, %eax - movl %ecx, %edx - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %edi // Restore callee-save registers - popl %ebx - popl %esi - retl // Return - -9: /* High word of b is zero on this branch */ - - movl 16(%esp), %eax // Find qhi and rhi such that - movl 20(%esp), %ecx // - xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b - divl %ecx // - movl %eax, %ebx // - movl 12(%esp), %eax // Find rlo such that - divl %ecx // - movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b - popl %ebx // - xorl %edx, %edx // and return 0:rlo - - addl %esi, %eax // Restore correct sign to result - adcl %esi, %edx - xorl %esi, %eax - xorl %esi, %edx - popl %esi - retl // Return -END_COMPILERRT_FUNCTION(__moddi3) - -#endif // __i386__ + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %esi + retl // Return +END_COMPILERRT_FUNCTION(__moddi3) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/muldi3.S b/contrib/libs/cxxsupp/builtins/i386/muldi3.S index d19eb13d96b..15b6b499845 100644 --- a/contrib/libs/cxxsupp/builtins/i386/muldi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/muldi3.S @@ -1,30 +1,30 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// di_int __muldi3(di_int a, di_int b); - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__muldi3) - pushl %ebx - movl 16(%esp), %eax // b.lo - movl 12(%esp), %ecx // a.hi - imull %eax, %ecx // b.lo * a.hi - - movl 8(%esp), %edx // a.lo - movl 20(%esp), %ebx // b.hi - imull %edx, %ebx // a.lo * b.hi - - mull %edx // EDX:EAX = a.lo * b.lo - addl %ecx, %ebx // EBX = (a.lo*b.hi + a.hi*b.lo) - addl %ebx, %edx - - popl %ebx - retl -END_COMPILERRT_FUNCTION(__muldi3) - -#endif // __i386__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __muldi3(di_int a, di_int b); + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__muldi3) + pushl %ebx + movl 16(%esp), %eax // b.lo + movl 12(%esp), %ecx // a.hi + imull %eax, %ecx // b.lo * a.hi + + movl 8(%esp), %edx // a.lo + movl 20(%esp), %ebx // b.hi + imull %edx, %ebx // a.lo * b.hi + + mull %edx // EDX:EAX = a.lo * b.lo + addl %ecx, %ebx // EBX = (a.lo*b.hi + a.hi*b.lo) + addl %ebx, %edx + + popl %ebx + retl +END_COMPILERRT_FUNCTION(__muldi3) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/udivdi3.S b/contrib/libs/cxxsupp/builtins/i386/udivdi3.S index 6c369193c0b..41b2edf03e3 100644 --- a/contrib/libs/cxxsupp/builtins/i386/udivdi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/udivdi3.S @@ -1,115 +1,115 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// du_int __udivdi3(du_int a, du_int b); - -// result = a / b. -// both inputs and the output are 64-bit unsigned integers. -// This will do whatever the underlying hardware is set to do on division by zero. -// No other exceptions are generated, as the divide cannot overflow. -// -// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware -// on x86_64. The performance goal is ~40 cycles per divide, which is faster than -// currently possible via simulation of integer divides on the x87 unit. -// -// Stephen Canon, December 2008 - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__udivdi3) - - pushl %ebx - movl 20(%esp), %ebx // Find the index i of the leading bit in b. - bsrl %ebx, %ecx // If the high word of b is zero, jump to - jz 9f // the code to handle that special case [9]. +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// du_int __udivdi3(du_int a, du_int b); + +// result = a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__udivdi3) + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - /* High word of b is known to be non-zero on this branch */ - - movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b - - shrl %cl, %eax // Practically, this means that bhi is given by: - shrl %eax // - notl %ecx // bhi = (high word of b) << (31 - i) | - shll %cl, %ebx // (low word of b) >> (1 + i) - orl %eax, %ebx // - movl 12(%esp), %edx // Load the high and low words of a, and jump - movl 8(%esp), %eax // to [1] if the high word is larger than bhi - cmpl %ebx, %edx // to avoid overflowing the upcoming divide. - jae 1f - - /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r - - pushl %edi - notl %ecx - shrl %eax - shrl %cl, %eax // q = qs >> (1 + i) - movl %eax, %edi - mull 20(%esp) // q*blo - movl 12(%esp), %ebx - movl 16(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 24(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - sbbl $0, %edi // decrement q if remainder is negative - xorl %edx, %edx - movl %edi, %eax - popl %edi - popl %ebx - retl - - -1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - subl %ebx, %edx // subtract bhi from ahi so that divide will not - divl %ebx // overflow, and find q and r such that - // - // ahi:alo = (1:q)*bhi + r - // - // Note that q is a number in (31-i).(1+i) - // fix point. - - pushl %edi - notl %ecx - shrl %eax - orl $0x80000000, %eax - shrl %cl, %eax // q = (1:qs) >> (1 + i) - movl %eax, %edi - mull 20(%esp) // q*blo - movl 12(%esp), %ebx - movl 16(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 24(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - sbbl $0, %edi // decrement q if remainder is negative - xorl %edx, %edx - movl %edi, %eax - popl %edi - popl %ebx - retl - - -9: /* High word of b is zero on this branch */ - - movl 12(%esp), %eax // Find qhi and rhi such that - movl 16(%esp), %ecx // - xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b - divl %ecx // - movl %eax, %ebx // - movl 8(%esp), %eax // Find qlo such that - divl %ecx // - movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b - popl %ebx // - retl // and return qhi:qlo -END_COMPILERRT_FUNCTION(__udivdi3) - -#endif // __i386__ + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + retl // and return qhi:qlo +END_COMPILERRT_FUNCTION(__udivdi3) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/i386/umoddi3.S b/contrib/libs/cxxsupp/builtins/i386/umoddi3.S index c88ce8c0dc2..a190a7d397d 100644 --- a/contrib/libs/cxxsupp/builtins/i386/umoddi3.S +++ b/contrib/libs/cxxsupp/builtins/i386/umoddi3.S @@ -1,126 +1,126 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// du_int __umoddi3(du_int a, du_int b); - -// result = remainder of a / b. -// both inputs and the output are 64-bit unsigned integers. -// This will do whatever the underlying hardware is set to do on division by zero. -// No other exceptions are generated, as the divide cannot overflow. -// -// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware -// on x86_64. The performance goal is ~40 cycles per divide, which is faster than -// currently possible via simulation of integer divides on the x87 unit. -// - -// Stephen Canon, December 2008 - -#ifdef __i386__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__umoddi3) - - pushl %ebx - movl 20(%esp), %ebx // Find the index i of the leading bit in b. - bsrl %ebx, %ecx // If the high word of b is zero, jump to - jz 9f // the code to handle that special case [9]. +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// du_int __umoddi3(du_int a, du_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__umoddi3) + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - /* High word of b is known to be non-zero on this branch */ - - movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b - - shrl %cl, %eax // Practically, this means that bhi is given by: - shrl %eax // - notl %ecx // bhi = (high word of b) << (31 - i) | - shll %cl, %ebx // (low word of b) >> (1 + i) - orl %eax, %ebx // - movl 12(%esp), %edx // Load the high and low words of a, and jump - movl 8(%esp), %eax // to [2] if the high word is larger than bhi - cmpl %ebx, %edx // to avoid overflowing the upcoming divide. - jae 2f - - /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r - - pushl %edi - notl %ecx - shrl %eax - shrl %cl, %eax // q = qs >> (1 + i) - movl %eax, %edi - mull 20(%esp) // q*blo - movl 12(%esp), %ebx - movl 16(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 24(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - - jnc 1f // if positive, this is the result. - addl 20(%esp), %ebx // otherwise - adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result -1: movl %ebx, %eax - movl %ecx, %edx - - popl %edi - popl %ebx - retl - - -2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ - - subl %ebx, %edx // subtract bhi from ahi so that divide will not - divl %ebx // overflow, and find q and r such that - // - // ahi:alo = (1:q)*bhi + r - // - // Note that q is a number in (31-i).(1+i) - // fix point. - - pushl %edi - notl %ecx - shrl %eax - orl $0x80000000, %eax - shrl %cl, %eax // q = (1:qs) >> (1 + i) - movl %eax, %edi - mull 20(%esp) // q*blo - movl 12(%esp), %ebx - movl 16(%esp), %ecx // ECX:EBX = a - subl %eax, %ebx - sbbl %edx, %ecx // ECX:EBX = a - q*blo - movl 24(%esp), %eax - imull %edi, %eax // q*bhi - subl %eax, %ecx // ECX:EBX = a - q*b - - jnc 3f // if positive, this is the result. - addl 20(%esp), %ebx // otherwise - adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result -3: movl %ebx, %eax - movl %ecx, %edx - - popl %edi - popl %ebx - retl - - - -9: /* High word of b is zero on this branch */ - - movl 12(%esp), %eax // Find qhi and rhi such that - movl 16(%esp), %ecx // - xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b - divl %ecx // - movl %eax, %ebx // - movl 8(%esp), %eax // Find rlo such that - divl %ecx // - movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b - popl %ebx // - xorl %edx, %edx // and return 0:rlo - retl // -END_COMPILERRT_FUNCTION(__umoddi3) - -#endif // __i386__ + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + retl // +END_COMPILERRT_FUNCTION(__umoddi3) + +#endif // __i386__ diff --git a/contrib/libs/cxxsupp/builtins/int_endianness.h b/contrib/libs/cxxsupp/builtins/int_endianness.h index 2a813489dc9..7995ddbb953 100644 --- a/contrib/libs/cxxsupp/builtins/int_endianness.h +++ b/contrib/libs/cxxsupp/builtins/int_endianness.h @@ -1,116 +1,116 @@ -/* ===-- int_endianness.h - configuration header for compiler-rt ------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file is a configuration header for compiler-rt. - * This file is not part of the interface of this library. - * - * ===----------------------------------------------------------------------=== - */ - -#ifndef INT_ENDIANNESS_H -#define INT_ENDIANNESS_H - -#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ - defined(__ORDER_LITTLE_ENDIAN__) - -/* Clang and GCC provide built-in endianness definitions. */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 -#endif /* __BYTE_ORDER__ */ - -#else /* Compilers other than Clang or GCC. */ - -#if defined(__SVR4) && defined(__sun) -#include - -#if defined(_BIG_ENDIAN) -#define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 -#elif defined(_LITTLE_ENDIAN) -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 -#else /* !_LITTLE_ENDIAN */ -#error "unknown endianness" -#endif /* !_LITTLE_ENDIAN */ - -#endif /* Solaris and AuroraUX. */ - -/* .. */ - -#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ - defined(__minix) -#include - -#if _BYTE_ORDER == _BIG_ENDIAN -#define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 -#elif _BYTE_ORDER == _LITTLE_ENDIAN -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 -#endif /* _BYTE_ORDER */ - -#endif /* *BSD */ - -#if defined(__OpenBSD__) || defined(__Bitrig__) -#include - -#if _BYTE_ORDER == _BIG_ENDIAN -#define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 -#elif _BYTE_ORDER == _LITTLE_ENDIAN -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 -#endif /* _BYTE_ORDER */ - -#endif /* OpenBSD and Bitrig. */ - -/* .. */ - -/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the - * compiler (at least with GCC) */ -#if defined(__APPLE__) || defined(__ellcc__ ) - -#ifdef __BIG_ENDIAN__ -#if __BIG_ENDIAN__ -#define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 -#endif -#endif /* __BIG_ENDIAN__ */ - -#ifdef __LITTLE_ENDIAN__ -#if __LITTLE_ENDIAN__ -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 -#endif -#endif /* __LITTLE_ENDIAN__ */ - -#endif /* Mac OSX */ - -/* .. */ - -#if defined(_WIN32) - -#define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 - -#endif /* Windows */ - -#endif /* Clang or GCC. */ - -/* . */ - -#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) -#error Unable to determine endian -#endif /* Check we found an endianness correctly. */ - -#endif /* INT_ENDIANNESS_H */ +/* ===-- int_endianness.h - configuration header for compiler-rt ------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is a configuration header for compiler-rt. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + defined(__ORDER_LITTLE_ENDIAN__) + +/* Clang and GCC provide built-in endianness definitions. */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* __BYTE_ORDER__ */ + +#else /* Compilers other than Clang or GCC. */ + +#if defined(__SVR4) && defined(__sun) +#include + +#if defined(_BIG_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif defined(_LITTLE_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#else /* !_LITTLE_ENDIAN */ +#error "unknown endianness" +#endif /* !_LITTLE_ENDIAN */ + +#endif /* Solaris and AuroraUX. */ + +/* .. */ + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ + defined(__minix) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* *BSD */ + +#if defined(__OpenBSD__) || defined(__Bitrig__) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* OpenBSD and Bitrig. */ + +/* .. */ + +/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the + * compiler (at least with GCC) */ +#if defined(__APPLE__) || defined(__ellcc__ ) + +#ifdef __BIG_ENDIAN__ +#if __BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#endif +#endif /* __BIG_ENDIAN__ */ + +#ifdef __LITTLE_ENDIAN__ +#if __LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif +#endif /* __LITTLE_ENDIAN__ */ + +#endif /* Mac OSX */ + +/* .. */ + +#if defined(_WIN32) + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif /* Windows */ + +#endif /* Clang or GCC. */ + +/* . */ + +#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) +#error Unable to determine endian +#endif /* Check we found an endianness correctly. */ + +#endif /* INT_ENDIANNESS_H */ diff --git a/contrib/libs/cxxsupp/builtins/int_lib.h b/contrib/libs/cxxsupp/builtins/int_lib.h index c231bcbcdfb..272f9d9dad7 100644 --- a/contrib/libs/cxxsupp/builtins/int_lib.h +++ b/contrib/libs/cxxsupp/builtins/int_lib.h @@ -1,133 +1,133 @@ -/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file is a configuration header for compiler-rt. - * This file is not part of the interface of this library. - * - * ===----------------------------------------------------------------------=== - */ - -#ifndef INT_LIB_H -#define INT_LIB_H - -/* Assumption: Signed integral is 2's complement. */ -/* Assumption: Right shift of signed negative is arithmetic shift. */ -/* Assumption: Endianness is little or big (not mixed). */ - -#if defined(__ELF__) -#define FNALIAS(alias_name, original_name) \ - void alias_name() __attribute__((alias(#original_name))) -#else -#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") -#endif - -/* ABI macro definitions */ - -#if __ARM_EABI__ -# define ARM_EABI_FNALIAS(aeabi_name, name) \ - void __aeabi_##aeabi_name() __attribute__((alias("__" #name))); -# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) -#else -# define ARM_EABI_FNALIAS(aeabi_name, name) -# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__)) -# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) -# else -# define COMPILER_RT_ABI -# endif -#endif - -#ifdef _MSC_VER -#define ALWAYS_INLINE __forceinline -#define NOINLINE __declspec(noinline) -#define NORETURN __declspec(noreturn) -#define UNUSED -#else -#define ALWAYS_INLINE __attribute__((always_inline)) -#define NOINLINE __attribute__((noinline)) -#define NORETURN __attribute__((noreturn)) -#define UNUSED __attribute__((unused)) -#endif - -#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE)) -/* - * Kernel and boot environment can't use normal headers, - * so use the equivalent system headers. - */ -# include -# include -# include -#else -/* Include the standard compiler builtin headers we use functionality from. */ -# include -# include -# include -# include -#endif - -/* Include the commonly used internal type definitions. */ -#include "int_types.h" - -/* Include internal utility function declarations. */ -#include "int_util.h" - -COMPILER_RT_ABI si_int __paritysi2(si_int a); -COMPILER_RT_ABI si_int __paritydi2(di_int a); - -COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); -COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); -COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); - -COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); -COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); -#ifdef CRT_HAS_128BIT -COMPILER_RT_ABI si_int __clzti2(ti_int a); -COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); -#endif - -/* Definitions for builtins unavailable on MSVC */ -#if defined(_MSC_VER) && !defined(__clang__) -#include - -uint32_t __inline __builtin_ctz(uint32_t value) { - uint32_t trailing_zero = 0; - if (_BitScanForward(&trailing_zero, value)) - return trailing_zero; - return 32; -} - -uint32_t __inline __builtin_clz(uint32_t value) { - uint32_t leading_zero = 0; - if (_BitScanReverse(&leading_zero, value)) - return 31 - leading_zero; - return 32; -} - -#if defined(_M_ARM) || defined(_M_X64) -uint32_t __inline __builtin_clzll(uint64_t value) { - uint32_t leading_zero = 0; - if (_BitScanReverse64(&leading_zero, value)) - return 63 - leading_zero; - return 64; -} -#else -uint32_t __inline __builtin_clzll(uint64_t value) { - if (value == 0) - return 64; - uint32_t msh = (uint32_t)(value >> 32); - uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); - if (msh != 0) - return __builtin_clz(msh); - return 32 + __builtin_clz(lsh); -} -#endif - -#define __builtin_clzl __builtin_clzll -#endif // defined(_MSC_VER) && !defined(__clang__) - -#endif /* INT_LIB_H */ +/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is a configuration header for compiler-rt. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_LIB_H +#define INT_LIB_H + +/* Assumption: Signed integral is 2's complement. */ +/* Assumption: Right shift of signed negative is arithmetic shift. */ +/* Assumption: Endianness is little or big (not mixed). */ + +#if defined(__ELF__) +#define FNALIAS(alias_name, original_name) \ + void alias_name() __attribute__((alias(#original_name))) +#else +#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") +#endif + +/* ABI macro definitions */ + +#if __ARM_EABI__ +# define ARM_EABI_FNALIAS(aeabi_name, name) \ + void __aeabi_##aeabi_name() __attribute__((alias("__" #name))); +# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) +#else +# define ARM_EABI_FNALIAS(aeabi_name, name) +# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__)) +# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) +# else +# define COMPILER_RT_ABI +# endif +#endif + +#ifdef _MSC_VER +#define ALWAYS_INLINE __forceinline +#define NOINLINE __declspec(noinline) +#define NORETURN __declspec(noreturn) +#define UNUSED +#else +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#endif + +#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE)) +/* + * Kernel and boot environment can't use normal headers, + * so use the equivalent system headers. + */ +# include +# include +# include +#else +/* Include the standard compiler builtin headers we use functionality from. */ +# include +# include +# include +# include +#endif + +/* Include the commonly used internal type definitions. */ +#include "int_types.h" + +/* Include internal utility function declarations. */ +#include "int_util.h" + +COMPILER_RT_ABI si_int __paritysi2(si_int a); +COMPILER_RT_ABI si_int __paritydi2(di_int a); + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); + +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); +#ifdef CRT_HAS_128BIT +COMPILER_RT_ABI si_int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); +#endif + +/* Definitions for builtins unavailable on MSVC */ +#if defined(_MSC_VER) && !defined(__clang__) +#include + +uint32_t __inline __builtin_ctz(uint32_t value) { + uint32_t trailing_zero = 0; + if (_BitScanForward(&trailing_zero, value)) + return trailing_zero; + return 32; +} + +uint32_t __inline __builtin_clz(uint32_t value) { + uint32_t leading_zero = 0; + if (_BitScanReverse(&leading_zero, value)) + return 31 - leading_zero; + return 32; +} + +#if defined(_M_ARM) || defined(_M_X64) +uint32_t __inline __builtin_clzll(uint64_t value) { + uint32_t leading_zero = 0; + if (_BitScanReverse64(&leading_zero, value)) + return 63 - leading_zero; + return 64; +} +#else +uint32_t __inline __builtin_clzll(uint64_t value) { + if (value == 0) + return 64; + uint32_t msh = (uint32_t)(value >> 32); + uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); + if (msh != 0) + return __builtin_clz(msh); + return 32 + __builtin_clz(lsh); +} +#endif + +#define __builtin_clzl __builtin_clzll +#endif // defined(_MSC_VER) && !defined(__clang__) + +#endif /* INT_LIB_H */ diff --git a/contrib/libs/cxxsupp/builtins/int_math.h b/contrib/libs/cxxsupp/builtins/int_math.h index 9c718df92cb..fc81fb7f022 100644 --- a/contrib/libs/cxxsupp/builtins/int_math.h +++ b/contrib/libs/cxxsupp/builtins/int_math.h @@ -1,114 +1,114 @@ -/* ===-- int_math.h - internal math inlines ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===-----------------------------------------------------------------------=== - * - * This file is not part of the interface of this library. - * - * This file defines substitutes for the libm functions used in some of the - * compiler-rt implementations, defined in such a way that there is not a direct - * dependency on libm or math.h. Instead, we use the compiler builtin versions - * where available. This reduces our dependencies on the system SDK by foisting - * the responsibility onto the compiler. - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef INT_MATH_H -#define INT_MATH_H - -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#include -#include -#include -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define CRT_INFINITY INFINITY -#else -#define CRT_INFINITY __builtin_huge_valf() -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_isfinite(x) _finite((x)) -#define crt_isinf(x) !_finite((x)) -#define crt_isnan(x) _isnan((x)) -#else -/* Define crt_isfinite in terms of the builtin if available, otherwise provide - * an alternate version in terms of our other functions. This supports some - * versions of GCC which didn't have __builtin_isfinite. - */ -#if __has_builtin(__builtin_isfinite) -# define crt_isfinite(x) __builtin_isfinite((x)) -#elif defined(__GNUC__) -# define crt_isfinite(x) \ - __extension__(({ \ - __typeof((x)) x_ = (x); \ - !crt_isinf(x_) && !crt_isnan(x_); \ - })) -#else -# error "Do not know how to check for infinity" -#endif /* __has_builtin(__builtin_isfinite) */ -#define crt_isinf(x) __builtin_isinf((x)) -#define crt_isnan(x) __builtin_isnan((x)) -#endif /* _MSC_VER */ - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_copysign(x, y) copysign((x), (y)) -#define crt_copysignf(x, y) copysignf((x), (y)) -#define crt_copysignl(x, y) copysignl((x), (y)) -#else -#define crt_copysign(x, y) __builtin_copysign((x), (y)) -#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) -#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_fabs(x) fabs((x)) -#define crt_fabsf(x) fabsf((x)) -#define crt_fabsl(x) fabs((x)) -#else -#define crt_fabs(x) __builtin_fabs((x)) -#define crt_fabsf(x) __builtin_fabsf((x)) -#define crt_fabsl(x) __builtin_fabsl((x)) -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_fmax(x, y) __max((x), (y)) -#define crt_fmaxf(x, y) __max((x), (y)) -#define crt_fmaxl(x, y) __max((x), (y)) -#else -#define crt_fmax(x, y) __builtin_fmax((x), (y)) -#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y)) -#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_logb(x) logb((x)) -#define crt_logbf(x) logbf((x)) -#define crt_logbl(x) logbl((x)) -#else -#define crt_logb(x) __builtin_logb((x)) -#define crt_logbf(x) __builtin_logbf((x)) -#define crt_logbl(x) __builtin_logbl((x)) -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define crt_scalbn(x, y) scalbn((x), (y)) -#define crt_scalbnf(x, y) scalbnf((x), (y)) -#define crt_scalbnl(x, y) scalbnl((x), (y)) -#else -#define crt_scalbn(x, y) __builtin_scalbn((x), (y)) -#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y)) -#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) -#endif - -#endif /* INT_MATH_H */ +/* ===-- int_math.h - internal math inlines ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===-----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines substitutes for the libm functions used in some of the + * compiler-rt implementations, defined in such a way that there is not a direct + * dependency on libm or math.h. Instead, we use the compiler builtin versions + * where available. This reduces our dependencies on the system SDK by foisting + * the responsibility onto the compiler. + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef INT_MATH_H +#define INT_MATH_H + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#include +#include +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define CRT_INFINITY INFINITY +#else +#define CRT_INFINITY __builtin_huge_valf() +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_isfinite(x) _finite((x)) +#define crt_isinf(x) !_finite((x)) +#define crt_isnan(x) _isnan((x)) +#else +/* Define crt_isfinite in terms of the builtin if available, otherwise provide + * an alternate version in terms of our other functions. This supports some + * versions of GCC which didn't have __builtin_isfinite. + */ +#if __has_builtin(__builtin_isfinite) +# define crt_isfinite(x) __builtin_isfinite((x)) +#elif defined(__GNUC__) +# define crt_isfinite(x) \ + __extension__(({ \ + __typeof((x)) x_ = (x); \ + !crt_isinf(x_) && !crt_isnan(x_); \ + })) +#else +# error "Do not know how to check for infinity" +#endif /* __has_builtin(__builtin_isfinite) */ +#define crt_isinf(x) __builtin_isinf((x)) +#define crt_isnan(x) __builtin_isnan((x)) +#endif /* _MSC_VER */ + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_copysign(x, y) copysign((x), (y)) +#define crt_copysignf(x, y) copysignf((x), (y)) +#define crt_copysignl(x, y) copysignl((x), (y)) +#else +#define crt_copysign(x, y) __builtin_copysign((x), (y)) +#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) +#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fabs(x) fabs((x)) +#define crt_fabsf(x) fabsf((x)) +#define crt_fabsl(x) fabs((x)) +#else +#define crt_fabs(x) __builtin_fabs((x)) +#define crt_fabsf(x) __builtin_fabsf((x)) +#define crt_fabsl(x) __builtin_fabsl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fmax(x, y) __max((x), (y)) +#define crt_fmaxf(x, y) __max((x), (y)) +#define crt_fmaxl(x, y) __max((x), (y)) +#else +#define crt_fmax(x, y) __builtin_fmax((x), (y)) +#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y)) +#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_logb(x) logb((x)) +#define crt_logbf(x) logbf((x)) +#define crt_logbl(x) logbl((x)) +#else +#define crt_logb(x) __builtin_logb((x)) +#define crt_logbf(x) __builtin_logbf((x)) +#define crt_logbl(x) __builtin_logbl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_scalbn(x, y) scalbn((x), (y)) +#define crt_scalbnf(x, y) scalbnf((x), (y)) +#define crt_scalbnl(x, y) scalbnl((x), (y)) +#else +#define crt_scalbn(x, y) __builtin_scalbn((x), (y)) +#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y)) +#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) +#endif + +#endif /* INT_MATH_H */ diff --git a/contrib/libs/cxxsupp/builtins/int_types.h b/contrib/libs/cxxsupp/builtins/int_types.h index 2784a9c54d9..23065b89bd8 100644 --- a/contrib/libs/cxxsupp/builtins/int_types.h +++ b/contrib/libs/cxxsupp/builtins/int_types.h @@ -1,165 +1,165 @@ -/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file is not part of the interface of this library. - * - * This file defines various standard types, most importantly a number of unions - * used to access parts of larger types. - * - * ===----------------------------------------------------------------------=== - */ - -#ifndef INT_TYPES_H -#define INT_TYPES_H - -#include "int_endianness.h" - -/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */ -#ifdef si_int -#undef si_int -#endif -typedef int si_int; -typedef unsigned su_int; - -typedef long long di_int; -typedef unsigned long long du_int; - -typedef union -{ - di_int all; - struct - { -#if _YUGA_LITTLE_ENDIAN - su_int low; - si_int high; -#else - si_int high; - su_int low; -#endif /* _YUGA_LITTLE_ENDIAN */ - }s; -} dwords; - -typedef union -{ - du_int all; - struct - { -#if _YUGA_LITTLE_ENDIAN - su_int low; - su_int high; -#else - su_int high; - su_int low; -#endif /* _YUGA_LITTLE_ENDIAN */ - }s; -} udwords; - -/* MIPS64 issue: PR 20098 */ +/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines various standard types, most importantly a number of unions + * used to access parts of larger types. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */ +#ifdef si_int +#undef si_int +#endif +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +typedef union +{ + di_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + si_int high; +#else + si_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} dwords; + +typedef union +{ + du_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + su_int high; +#else + su_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} udwords; + +/* MIPS64 issue: PR 20098 */ #if defined(__LP64__) && !(defined(__mips__) && defined(__clang__)) || (defined(_MSC_VER) && defined(__clang__)) -#define CRT_HAS_128BIT -#endif - -#ifdef CRT_HAS_128BIT -typedef int ti_int __attribute__ ((mode (TI))); -typedef unsigned tu_int __attribute__ ((mode (TI))); - -typedef union -{ - ti_int all; - struct - { -#if _YUGA_LITTLE_ENDIAN - du_int low; - di_int high; -#else - di_int high; - du_int low; -#endif /* _YUGA_LITTLE_ENDIAN */ - }s; -} twords; - -typedef union -{ - tu_int all; - struct - { -#if _YUGA_LITTLE_ENDIAN - du_int low; - du_int high; -#else - du_int high; - du_int low; -#endif /* _YUGA_LITTLE_ENDIAN */ - }s; -} utwords; - -static __inline ti_int make_ti(di_int h, di_int l) { - twords r; - r.s.high = h; - r.s.low = l; - return r.all; -} - -static __inline tu_int make_tu(du_int h, du_int l) { - utwords r; - r.s.high = h; - r.s.low = l; - return r.all; -} - -#endif /* CRT_HAS_128BIT */ - -typedef union -{ - su_int u; - float f; -} float_bits; - -typedef union -{ - udwords u; - double f; -} double_bits; - -typedef struct -{ -#if _YUGA_LITTLE_ENDIAN - udwords low; - udwords high; -#else - udwords high; - udwords low; -#endif /* _YUGA_LITTLE_ENDIAN */ -} uqwords; - -typedef union -{ - uqwords u; - long double f; -} long_double_bits; - -#if __STDC_VERSION__ >= 199901L -typedef float _Complex Fcomplex; -typedef double _Complex Dcomplex; -typedef long double _Complex Lcomplex; - -#define COMPLEX_REAL(x) __real__(x) -#define COMPLEX_IMAGINARY(x) __imag__(x) -#else -typedef struct { float real, imaginary; } Fcomplex; - -typedef struct { double real, imaginary; } Dcomplex; - -typedef struct { long double real, imaginary; } Lcomplex; - -#define COMPLEX_REAL(x) (x).real -#define COMPLEX_IMAGINARY(x) (x).imaginary -#endif -#endif /* INT_TYPES_H */ - +#define CRT_HAS_128BIT +#endif + +#ifdef CRT_HAS_128BIT +typedef int ti_int __attribute__ ((mode (TI))); +typedef unsigned tu_int __attribute__ ((mode (TI))); + +typedef union +{ + ti_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} twords; + +typedef union +{ + tu_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +#endif /* CRT_HAS_128BIT */ + +typedef union +{ + su_int u; + float f; +} float_bits; + +typedef union +{ + udwords u; + double f; +} double_bits; + +typedef struct +{ +#if _YUGA_LITTLE_ENDIAN + udwords low; + udwords high; +#else + udwords high; + udwords low; +#endif /* _YUGA_LITTLE_ENDIAN */ +} uqwords; + +typedef union +{ + uqwords u; + long double f; +} long_double_bits; + +#if __STDC_VERSION__ >= 199901L +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; + +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) +#else +typedef struct { float real, imaginary; } Fcomplex; + +typedef struct { double real, imaginary; } Dcomplex; + +typedef struct { long double real, imaginary; } Lcomplex; + +#define COMPLEX_REAL(x) (x).real +#define COMPLEX_IMAGINARY(x) (x).imaginary +#endif +#endif /* INT_TYPES_H */ + diff --git a/contrib/libs/cxxsupp/builtins/int_util.c b/contrib/libs/cxxsupp/builtins/int_util.c index 550c7234870..420d1e237aa 100644 --- a/contrib/libs/cxxsupp/builtins/int_util.c +++ b/contrib/libs/cxxsupp/builtins/int_util.c @@ -1,61 +1,61 @@ -/* ===-- int_util.c - Implement internal utilities --------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_util.h" - -/* NOTE: The definitions in this file are declared weak because we clients to be - * able to arbitrarily package individual functions into separate .a files. If - * we did not declare these weak, some link situations might end up seeing - * duplicate strong definitions of the same symbol. - * - * We can't use this solution for kernel use (which may not support weak), but - * currently expect that when built for kernel use all the functionality is - * packaged into a single library. - */ - -#ifdef KERNEL_USE - -NORETURN extern void panic(const char *, ...); -#ifndef _WIN32 -__attribute__((visibility("hidden"))) -#endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { - panic("%s:%d: abort in %s", file, line, function); -} - -#elif __APPLE__ - -/* from libSystem.dylib */ -NORETURN extern void __assert_rtn(const char *func, const char *file, int line, - const char *message); - -#ifndef _WIN32 -__attribute__((weak)) -__attribute__((visibility("hidden"))) -#endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { - __assert_rtn(function, file, line, "libcompiler_rt abort"); -} - -#else - -/* Get the system definition of abort() */ -#include - -#ifndef _WIN32 -__attribute__((weak)) -__attribute__((visibility("hidden"))) -#endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { - abort(); -} - -#endif +/* ===-- int_util.c - Implement internal utilities --------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_util.h" + +/* NOTE: The definitions in this file are declared weak because we clients to be + * able to arbitrarily package individual functions into separate .a files. If + * we did not declare these weak, some link situations might end up seeing + * duplicate strong definitions of the same symbol. + * + * We can't use this solution for kernel use (which may not support weak), but + * currently expect that when built for kernel use all the functionality is + * packaged into a single library. + */ + +#ifdef KERNEL_USE + +NORETURN extern void panic(const char *, ...); +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +void compilerrt_abort_impl(const char *file, int line, const char *function) { + panic("%s:%d: abort in %s", file, line, function); +} + +#elif __APPLE__ + +/* from libSystem.dylib */ +NORETURN extern void __assert_rtn(const char *func, const char *file, int line, + const char *message); + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void compilerrt_abort_impl(const char *file, int line, const char *function) { + __assert_rtn(function, file, line, "libcompiler_rt abort"); +} + +#else + +/* Get the system definition of abort() */ +#include + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void compilerrt_abort_impl(const char *file, int line, const char *function) { + abort(); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/int_util.h b/contrib/libs/cxxsupp/builtins/int_util.h index 649a4a0d910..a7b20ed6624 100644 --- a/contrib/libs/cxxsupp/builtins/int_util.h +++ b/contrib/libs/cxxsupp/builtins/int_util.h @@ -1,33 +1,33 @@ -/* ===-- int_util.h - internal utility functions ----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===-----------------------------------------------------------------------=== - * - * This file is not part of the interface of this library. - * - * This file defines non-inline utilities which are available for use in the - * library. The function definitions themselves are all contained in int_util.c - * which will always be compiled into any compiler-rt library. - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef INT_UTIL_H -#define INT_UTIL_H - -/** \brief Trigger a program abort (or panic for kernel code). */ -#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) - -NORETURN void compilerrt_abort_impl(const char *file, int line, - const char *function); - -#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) -#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) -#define COMPILE_TIME_ASSERT2(expr, cnt) \ - typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED - -#endif /* INT_UTIL_H */ +/* ===-- int_util.h - internal utility functions ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===-----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines non-inline utilities which are available for use in the + * library. The function definitions themselves are all contained in int_util.c + * which will always be compiled into any compiler-rt library. + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef INT_UTIL_H +#define INT_UTIL_H + +/** \brief Trigger a program abort (or panic for kernel code). */ +#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) + +NORETURN void compilerrt_abort_impl(const char *file, int line, + const char *function); + +#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) +#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) +#define COMPILE_TIME_ASSERT2(expr, cnt) \ + typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED + +#endif /* INT_UTIL_H */ diff --git a/contrib/libs/cxxsupp/builtins/lshrdi3.c b/contrib/libs/cxxsupp/builtins/lshrdi3.c index 981b316005b..6b1ea923b77 100644 --- a/contrib/libs/cxxsupp/builtins/lshrdi3.c +++ b/contrib/libs/cxxsupp/builtins/lshrdi3.c @@ -1,43 +1,43 @@ -/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __lshrdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: logical a >> b */ - -/* Precondition: 0 <= b < bits_in_dword */ - -ARM_EABI_FNALIAS(llsr, lshrdi3) - -COMPILER_RT_ABI di_int -__lshrdi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - udwords input; - udwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - result.s.high = 0; - result.s.low = input.s.high >> (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); - } - return result.all; -} +/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __lshrdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: logical a >> b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +ARM_EABI_FNALIAS(llsr, lshrdi3) + +COMPILER_RT_ABI di_int +__lshrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + udwords input; + udwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} diff --git a/contrib/libs/cxxsupp/builtins/lshrti3.c b/contrib/libs/cxxsupp/builtins/lshrti3.c index 9060165bb59..e4170ff84a5 100644 --- a/contrib/libs/cxxsupp/builtins/lshrti3.c +++ b/contrib/libs/cxxsupp/builtins/lshrti3.c @@ -1,45 +1,45 @@ -/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __lshrti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: logical a >> b */ - -/* Precondition: 0 <= b < bits_in_tword */ - -COMPILER_RT_ABI ti_int -__lshrti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - utwords input; - utwords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - result.s.high = 0; - result.s.low = input.s.high >> (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); - } - return result.all; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __lshrti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: logical a >> b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__lshrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + utwords input; + utwords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt index bc6fcefc207..266e4221524 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt @@ -1,4 +1,4 @@ -file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) -foreach(filter_file ${filter_files}) - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) -endforeach() +file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) +foreach(filter_file ${filter_files}) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) +endforeach() diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt index 55d61934a11..4b1683a6bae 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt @@ -1,16 +1,16 @@ -aeabi_cdcmpeq -aeabi_cdrcmple -aeabi_cfcmpeq -aeabi_cfrcmple -aeabi_dcmpeq -aeabi_dcmpge -aeabi_dcmpgt -aeabi_dcmple -aeabi_dcmplt -aeabi_drsub -aeabi_fcmpeq -aeabi_fcmpge -aeabi_fcmpgt -aeabi_fcmple -aeabi_fcmplt -aeabi_frsub +aeabi_cdcmpeq +aeabi_cdrcmple +aeabi_cfcmpeq +aeabi_cfrcmple +aeabi_dcmpeq +aeabi_dcmpge +aeabi_dcmpgt +aeabi_dcmple +aeabi_dcmplt +aeabi_drsub +aeabi_fcmpeq +aeabi_fcmpge +aeabi_fcmpgt +aeabi_fcmple +aeabi_fcmplt +aeabi_frsub diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt index aa208376122..6ac85a771fc 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt @@ -1,92 +1,92 @@ -absvdi2 -absvsi2 -addvdi3 -addvsi3 -ashldi3 -ashrdi3 -clzdi2 -clzsi2 -cmpdi2 -ctzdi2 -ctzsi2 -divdc3 -divdi3 -divsc3 -divmodsi4 -udivmodsi4 -do_global_dtors -ffsdi2 -fixdfdi -fixsfdi -fixunsdfdi -fixunsdfsi -fixunssfdi -fixunssfsi -floatdidf -floatdisf -floatundidf -floatundisf -gcc_bcmp -lshrdi3 -moddi3 -muldc3 -muldi3 -mulsc3 -mulvdi3 -mulvsi3 -negdi2 -negvdi2 -negvsi2 -paritydi2 -paritysi2 -popcountdi2 -popcountsi2 -powidf2 -powisf2 -subvdi3 -subvsi3 -ucmpdi2 -udiv_w_sdiv -udivdi3 -udivmoddi4 -umoddi3 -adddf3 -addsf3 -cmpdf2 -cmpsf2 -div0 -divdf3 -divsf3 -divsi3 -extendsfdf2 -extendhfsf2 -ffssi2 -fixdfsi -fixsfsi -floatsidf -floatsisf -floatunsidf -floatunsisf -comparedf2 -comparesf2 -modsi3 -muldf3 -mulsf3 -negdf2 -negsf2 -subdf3 -subsf3 -truncdfhf2 -truncdfsf2 -truncsfhf2 -udivsi3 -umodsi3 -unorddf2 -unordsf2 -atomic_flag_clear -atomic_flag_clear_explicit -atomic_flag_test_and_set -atomic_flag_test_and_set_explicit -atomic_signal_fence -atomic_thread_fence -int_util +absvdi2 +absvsi2 +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdi3 +divsc3 +divmodsi4 +udivmodsi4 +do_global_dtors +ffsdi2 +fixdfdi +fixsfdi +fixunsdfdi +fixunsdfsi +fixunssfdi +fixunssfsi +floatdidf +floatdisf +floatundidf +floatundisf +gcc_bcmp +lshrdi3 +moddi3 +muldc3 +muldi3 +mulsc3 +mulvdi3 +mulvsi3 +negdi2 +negvdi2 +negvsi2 +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +subvdi3 +subvsi3 +ucmpdi2 +udiv_w_sdiv +udivdi3 +udivmoddi4 +umoddi3 +adddf3 +addsf3 +cmpdf2 +cmpsf2 +div0 +divdf3 +divsf3 +divsi3 +extendsfdf2 +extendhfsf2 +ffssi2 +fixdfsi +fixsfsi +floatsidf +floatsisf +floatunsidf +floatunsisf +comparedf2 +comparesf2 +modsi3 +muldf3 +mulsf3 +negdf2 +negsf2 +subdf3 +subsf3 +truncdfhf2 +truncdfsf2 +truncsfhf2 +udivsi3 +umodsi3 +unorddf2 +unordsf2 +atomic_flag_clear +atomic_flag_clear_explicit +atomic_flag_test_and_set +atomic_flag_test_and_set_explicit +atomic_signal_fence +atomic_thread_fence +int_util diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt index 71f0f6993cd..b92e44bb35a 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt @@ -1,7 +1,7 @@ -i686.get_pc_thunk.eax -i686.get_pc_thunk.ebp -i686.get_pc_thunk.ebx -i686.get_pc_thunk.ecx -i686.get_pc_thunk.edi -i686.get_pc_thunk.edx -i686.get_pc_thunk.esi +i686.get_pc_thunk.eax +i686.get_pc_thunk.ebp +i686.get_pc_thunk.ebx +i686.get_pc_thunk.ecx +i686.get_pc_thunk.edi +i686.get_pc_thunk.edx +i686.get_pc_thunk.esi diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt index 6f4b75888b7..1c72fb1c3c6 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt @@ -1,10 +1,10 @@ -sync_fetch_and_add_8 -sync_fetch_and_sub_8 -sync_fetch_and_and_8 -sync_fetch_and_or_8 -sync_fetch_and_xor_8 -sync_fetch_and_nand_8 -sync_fetch_and_max_8 -sync_fetch_and_umax_8 -sync_fetch_and_min_8 -sync_fetch_and_umin_8 +sync_fetch_and_add_8 +sync_fetch_and_sub_8 +sync_fetch_and_and_8 +sync_fetch_and_or_8 +sync_fetch_and_xor_8 +sync_fetch_and_nand_8 +sync_fetch_and_max_8 +sync_fetch_and_umax_8 +sync_fetch_and_min_8 +sync_fetch_and_umin_8 diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt index 2783f516b9a..6add5ecd2dc 100644 --- a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt +++ b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt @@ -1,14 +1,14 @@ -switch16 -switch32 -switch8 -switchu8 -sync_fetch_and_add_4 -sync_fetch_and_sub_4 -sync_fetch_and_and_4 -sync_fetch_and_or_4 -sync_fetch_and_xor_4 -sync_fetch_and_nand_4 -sync_fetch_and_max_4 -sync_fetch_and_umax_4 -sync_fetch_and_min_4 -sync_fetch_and_umin_4 +switch16 +switch32 +switch8 +switchu8 +sync_fetch_and_add_4 +sync_fetch_and_sub_4 +sync_fetch_and_and_4 +sync_fetch_and_or_4 +sync_fetch_and_xor_4 +sync_fetch_and_nand_4 +sync_fetch_and_max_4 +sync_fetch_and_umax_4 +sync_fetch_and_min_4 +sync_fetch_and_umin_4 diff --git a/contrib/libs/cxxsupp/builtins/moddi3.c b/contrib/libs/cxxsupp/builtins/moddi3.c index 003966075a0..a04279e3875 100644 --- a/contrib/libs/cxxsupp/builtins/moddi3.c +++ b/contrib/libs/cxxsupp/builtins/moddi3.c @@ -1,30 +1,30 @@ -/*===-- moddi3.c - Implement __moddi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __moddi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a % b */ - -COMPILER_RT_ABI di_int -__moddi3(di_int a, di_int b) -{ - const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; - di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ - b = (b ^ s) - s; /* negate if s == -1 */ - s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ - a = (a ^ s) - s; /* negate if s == -1 */ - du_int r; - __udivmoddi4(a, b, &r); - return ((di_int)r ^ s) - s; /* negate if s == -1 */ -} +/*===-- moddi3.c - Implement __moddi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __moddi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI di_int +__moddi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + du_int r; + __udivmoddi4(a, b, &r); + return ((di_int)r ^ s) - s; /* negate if s == -1 */ +} diff --git a/contrib/libs/cxxsupp/builtins/modsi3.c b/contrib/libs/cxxsupp/builtins/modsi3.c index e876f0487c8..86c73ce1377 100644 --- a/contrib/libs/cxxsupp/builtins/modsi3.c +++ b/contrib/libs/cxxsupp/builtins/modsi3.c @@ -1,23 +1,23 @@ -/* ===-- modsi3.c - Implement __modsi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __modsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a % b */ - -COMPILER_RT_ABI si_int -__modsi3(si_int a, si_int b) -{ - return a - __divsi3(a, b) * b; -} +/* ===-- modsi3.c - Implement __modsi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __modsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI si_int +__modsi3(si_int a, si_int b) +{ + return a - __divsi3(a, b) * b; +} diff --git a/contrib/libs/cxxsupp/builtins/modti3.c b/contrib/libs/cxxsupp/builtins/modti3.c index f6edbdfb344..d505c07ac16 100644 --- a/contrib/libs/cxxsupp/builtins/modti3.c +++ b/contrib/libs/cxxsupp/builtins/modti3.c @@ -1,34 +1,34 @@ -/* ===-- modti3.c - Implement __modti3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __modti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/*Returns: a % b */ - -COMPILER_RT_ABI ti_int -__modti3(ti_int a, ti_int b) -{ - const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; - ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */ - b = (b ^ s) - s; /* negate if s == -1 */ - s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */ - a = (a ^ s) - s; /* negate if s == -1 */ - tu_int r; - __udivmodti4(a, b, &r); - return ((ti_int)r ^ s) - s; /* negate if s == -1 */ -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- modti3.c - Implement __modti3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __modti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/*Returns: a % b */ + +COMPILER_RT_ABI ti_int +__modti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + tu_int r; + __udivmodti4(a, b, &r); + return ((ti_int)r ^ s) - s; /* negate if s == -1 */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/muldc3.c b/contrib/libs/cxxsupp/builtins/muldc3.c index e40debca03a..16d8e98390a 100644 --- a/contrib/libs/cxxsupp/builtins/muldc3.c +++ b/contrib/libs/cxxsupp/builtins/muldc3.c @@ -1,73 +1,73 @@ -/* ===-- muldc3.c - Implement __muldc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __muldc3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the product of a + ib and c + id */ - -COMPILER_RT_ABI Dcomplex -__muldc3(double __a, double __b, double __c, double __d) -{ - double __ac = __a * __c; - double __bd = __b * __d; - double __ad = __a * __d; - double __bc = __b * __c; - Dcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysign(0, __c); - if (crt_isnan(__d)) - __d = crt_copysign(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysign(0, __a); - if (crt_isnan(__b)) - __b = crt_copysign(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysign(0, __a); - if (crt_isnan(__b)) - __b = crt_copysign(0, __b); - if (crt_isnan(__c)) - __c = crt_copysign(0, __c); - if (crt_isnan(__d)) - __d = crt_copysign(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } - } - return z; -} +/* ===-- muldc3.c - Implement __muldc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muldc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Dcomplex +__muldc3(double __a, double __b, double __c, double __d) +{ + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + Dcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/muldf3.c b/contrib/libs/cxxsupp/builtins/muldf3.c index 59827d8ce42..1eb733849e5 100644 --- a/contrib/libs/cxxsupp/builtins/muldf3.c +++ b/contrib/libs/cxxsupp/builtins/muldf3.c @@ -1,22 +1,22 @@ -//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements double-precision soft-float multiplication -// with the IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_mul_impl.inc" - -ARM_EABI_FNALIAS(dmul, muldf3) - -COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} +//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_mul_impl.inc" + +ARM_EABI_FNALIAS(dmul, muldf3) + +COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} diff --git a/contrib/libs/cxxsupp/builtins/muldi3.c b/contrib/libs/cxxsupp/builtins/muldi3.c index 9beacb33b20..2dae44c11b9 100644 --- a/contrib/libs/cxxsupp/builtins/muldi3.c +++ b/contrib/libs/cxxsupp/builtins/muldi3.c @@ -1,56 +1,56 @@ -/* ===-- muldi3.c - Implement __muldi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __muldi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a * b */ - -static -di_int -__muldsi3(su_int a, su_int b) -{ - dwords r; - const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; - const su_int lower_mask = (su_int)~0 >> bits_in_word_2; - r.s.low = (a & lower_mask) * (b & lower_mask); - su_int t = r.s.low >> bits_in_word_2; - r.s.low &= lower_mask; - t += (a >> bits_in_word_2) * (b & lower_mask); - r.s.low += (t & lower_mask) << bits_in_word_2; - r.s.high = t >> bits_in_word_2; - t = r.s.low >> bits_in_word_2; - r.s.low &= lower_mask; - t += (b >> bits_in_word_2) * (a & lower_mask); - r.s.low += (t & lower_mask) << bits_in_word_2; - r.s.high += t >> bits_in_word_2; - r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); - return r.all; -} - -/* Returns: a * b */ - -ARM_EABI_FNALIAS(lmul, muldi3) - -COMPILER_RT_ABI di_int -__muldi3(di_int a, di_int b) -{ - dwords x; - x.all = a; - dwords y; - y.all = b; - dwords r; - r.all = __muldsi3(x.s.low, y.s.low); - r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; - return r.all; -} +/* ===-- muldi3.c - Implement __muldi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muldi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +static +di_int +__muldsi3(su_int a, su_int b) +{ + dwords r; + const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; + const su_int lower_mask = (su_int)~0 >> bits_in_word_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + su_int t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (a >> bits_in_word_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high = t >> bits_in_word_2; + t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (b >> bits_in_word_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high += t >> bits_in_word_2; + r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); + return r.all; +} + +/* Returns: a * b */ + +ARM_EABI_FNALIAS(lmul, muldi3) + +COMPILER_RT_ABI di_int +__muldi3(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + dwords r; + r.all = __muldsi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} diff --git a/contrib/libs/cxxsupp/builtins/mulodi4.c b/contrib/libs/cxxsupp/builtins/mulodi4.c index b97d99ce0f7..d2fd7db2bcd 100644 --- a/contrib/libs/cxxsupp/builtins/mulodi4.c +++ b/contrib/libs/cxxsupp/builtins/mulodi4.c @@ -1,58 +1,58 @@ -/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulodi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a * b */ - -/* Effects: sets *overflow to 1 if a * b overflows */ - -COMPILER_RT_ABI di_int -__mulodi4(di_int a, di_int b, int* overflow) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - const di_int MIN = (di_int)1 << (N-1); - const di_int MAX = ~MIN; - *overflow = 0; - di_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - di_int sa = a >> (N - 1); - di_int abs_a = (a ^ sa) - sa; - di_int sb = b >> (N - 1); - di_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } - return result; -} +/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulodi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + +COMPILER_RT_ABI di_int +__mulodi4(di_int a, di_int b, int* overflow) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N-1); + const di_int MAX = ~MIN; + *overflow = 0; + di_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} diff --git a/contrib/libs/cxxsupp/builtins/mulosi4.c b/contrib/libs/cxxsupp/builtins/mulosi4.c index 69ab084f9d3..422528085c4 100644 --- a/contrib/libs/cxxsupp/builtins/mulosi4.c +++ b/contrib/libs/cxxsupp/builtins/mulosi4.c @@ -1,58 +1,58 @@ -/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulosi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a * b */ - -/* Effects: sets *overflow to 1 if a * b overflows */ - -COMPILER_RT_ABI si_int -__mulosi4(si_int a, si_int b, int* overflow) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - const si_int MIN = (si_int)1 << (N-1); - const si_int MAX = ~MIN; - *overflow = 0; - si_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - si_int sa = a >> (N - 1); - si_int abs_a = (a ^ sa) - sa; - si_int sb = b >> (N - 1); - si_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } - return result; -} +/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulosi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + +COMPILER_RT_ABI si_int +__mulosi4(si_int a, si_int b, int* overflow) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N-1); + const si_int MAX = ~MIN; + *overflow = 0; + si_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} diff --git a/contrib/libs/cxxsupp/builtins/muloti4.c b/contrib/libs/cxxsupp/builtins/muloti4.c index 67fcbcc6f7a..aef8207aaa8 100644 --- a/contrib/libs/cxxsupp/builtins/muloti4.c +++ b/contrib/libs/cxxsupp/builtins/muloti4.c @@ -1,63 +1,63 @@ -/*===-- muloti4.c - Implement __muloti4 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __muloti4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a * b */ - -/* Effects: sets *overflow to 1 if a * b overflows */ - +/*===-- muloti4.c - Implement __muloti4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muloti4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + __attribute__((no_sanitize("undefined"))) -COMPILER_RT_ABI ti_int -__muloti4(ti_int a, ti_int b, int* overflow) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - const ti_int MIN = (ti_int)1 << (N-1); - const ti_int MAX = ~MIN; - *overflow = 0; - ti_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - ti_int sa = a >> (N - 1); - ti_int abs_a = (a ^ sa) - sa; - ti_int sb = b >> (N - 1); - ti_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } - return result; -} - -#endif /* CRT_HAS_128BIT */ +COMPILER_RT_ABI ti_int +__muloti4(ti_int a, ti_int b, int* overflow) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N-1); + const ti_int MAX = ~MIN; + *overflow = 0; + ti_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/mulsc3.c b/contrib/libs/cxxsupp/builtins/mulsc3.c index 270c6de8207..c89cfd247a1 100644 --- a/contrib/libs/cxxsupp/builtins/mulsc3.c +++ b/contrib/libs/cxxsupp/builtins/mulsc3.c @@ -1,73 +1,73 @@ -/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulsc3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the product of a + ib and c + id */ - -COMPILER_RT_ABI Fcomplex -__mulsc3(float __a, float __b, float __c, float __d) -{ - float __ac = __a * __c; - float __bd = __b * __d; - float __ad = __a * __d; - float __bc = __b * __c; - Fcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysignf(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignf(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysignf(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignf(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysignf(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignf(0, __b); - if (crt_isnan(__c)) - __c = crt_copysignf(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignf(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } - } - return z; -} +/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulsc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Fcomplex +__mulsc3(float __a, float __b, float __c, float __d) +{ + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + Fcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/mulsf3.c b/contrib/libs/cxxsupp/builtins/mulsf3.c index 7c7f94531ef..478b3bc0e0e 100644 --- a/contrib/libs/cxxsupp/builtins/mulsf3.c +++ b/contrib/libs/cxxsupp/builtins/mulsf3.c @@ -1,22 +1,22 @@ -//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float multiplication -// with the IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_mul_impl.inc" - -ARM_EABI_FNALIAS(fmul, mulsf3) - -COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} +//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_mul_impl.inc" + +ARM_EABI_FNALIAS(fmul, mulsf3) + +COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} diff --git a/contrib/libs/cxxsupp/builtins/multc3.c b/contrib/libs/cxxsupp/builtins/multc3.c index aabe0634b38..0518bc2569f 100644 --- a/contrib/libs/cxxsupp/builtins/multc3.c +++ b/contrib/libs/cxxsupp/builtins/multc3.c @@ -1,68 +1,68 @@ -/* ===-- multc3.c - Implement __multc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __multc3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the product of a + ib and c + id */ - -COMPILER_RT_ABI long double _Complex -__multc3(long double a, long double b, long double c, long double d) -{ - long double ac = a * c; - long double bd = b * d; - long double ad = a * d; - long double bc = b * c; - long double _Complex z; - __real__ z = ac - bd; - __imag__ z = ad + bc; - if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { - int recalc = 0; - if (crt_isinf(a) || crt_isinf(b)) { - a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); - b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); - if (crt_isnan(c)) - c = crt_copysignl(0, c); - if (crt_isnan(d)) - d = crt_copysignl(0, d); - recalc = 1; - } - if (crt_isinf(c) || crt_isinf(d)) { - c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); - d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); - if (crt_isnan(a)) - a = crt_copysignl(0, a); - if (crt_isnan(b)) - b = crt_copysignl(0, b); - recalc = 1; - } - if (!recalc && (crt_isinf(ac) || crt_isinf(bd) || - crt_isinf(ad) || crt_isinf(bc))) { - if (crt_isnan(a)) - a = crt_copysignl(0, a); - if (crt_isnan(b)) - b = crt_copysignl(0, b); - if (crt_isnan(c)) - c = crt_copysignl(0, c); - if (crt_isnan(d)) - d = crt_copysignl(0, d); - recalc = 1; - } - if (recalc) { - __real__ z = CRT_INFINITY * (a * c - b * d); - __imag__ z = CRT_INFINITY * (a * d + b * c); - } - } - return z; -} +/* ===-- multc3.c - Implement __multc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __multc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI long double _Complex +__multc3(long double a, long double b, long double c, long double d) +{ + long double ac = a * c; + long double bd = b * d; + long double ad = a * d; + long double bc = b * c; + long double _Complex z; + __real__ z = ac - bd; + __imag__ z = ad + bc; + if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { + int recalc = 0; + if (crt_isinf(a) || crt_isinf(b)) { + a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); + b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; + } + if (crt_isinf(c) || crt_isinf(d)) { + c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); + d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + recalc = 1; + } + if (!recalc && (crt_isinf(ac) || crt_isinf(bd) || + crt_isinf(ad) || crt_isinf(bc))) { + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; + } + if (recalc) { + __real__ z = CRT_INFINITY * (a * c - b * d); + __imag__ z = CRT_INFINITY * (a * d + b * c); + } + } + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/multf3.c b/contrib/libs/cxxsupp/builtins/multf3.c index 3df42e3c2ac..0b915923ea0 100644 --- a/contrib/libs/cxxsupp/builtins/multf3.c +++ b/contrib/libs/cxxsupp/builtins/multf3.c @@ -1,25 +1,25 @@ -//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements quad-precision soft-float multiplication -// with the IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#include "fp_mul_impl.inc" - -COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} - -#endif +//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/multi3.c b/contrib/libs/cxxsupp/builtins/multi3.c index 5b3f558713d..e0d52d430b6 100644 --- a/contrib/libs/cxxsupp/builtins/multi3.c +++ b/contrib/libs/cxxsupp/builtins/multi3.c @@ -1,58 +1,58 @@ -/* ===-- multi3.c - Implement __multi3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - - * This file implements __multi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a * b */ - -static -ti_int -__mulddi3(du_int a, du_int b) -{ - twords r; - const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; - const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; - r.s.low = (a & lower_mask) * (b & lower_mask); - du_int t = r.s.low >> bits_in_dword_2; - r.s.low &= lower_mask; - t += (a >> bits_in_dword_2) * (b & lower_mask); - r.s.low += (t & lower_mask) << bits_in_dword_2; - r.s.high = t >> bits_in_dword_2; - t = r.s.low >> bits_in_dword_2; - r.s.low &= lower_mask; - t += (b >> bits_in_dword_2) * (a & lower_mask); - r.s.low += (t & lower_mask) << bits_in_dword_2; - r.s.high += t >> bits_in_dword_2; - r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); - return r.all; -} - -/* Returns: a * b */ - -COMPILER_RT_ABI ti_int -__multi3(ti_int a, ti_int b) -{ - twords x; - x.all = a; - twords y; - y.all = b; - twords r; - r.all = __mulddi3(x.s.low, y.s.low); - r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; - return r.all; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- multi3.c - Implement __multi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + + * This file implements __multi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +static +ti_int +__mulddi3(du_int a, du_int b) +{ + twords r; + const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; + const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + du_int t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (a >> bits_in_dword_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high = t >> bits_in_dword_2; + t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (b >> bits_in_dword_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high += t >> bits_in_dword_2; + r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); + return r.all; +} + +/* Returns: a * b */ + +COMPILER_RT_ABI ti_int +__multi3(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + twords r; + r.all = __mulddi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/mulvdi3.c b/contrib/libs/cxxsupp/builtins/mulvdi3.c index 69095e02415..e63249e0a04 100644 --- a/contrib/libs/cxxsupp/builtins/mulvdi3.c +++ b/contrib/libs/cxxsupp/builtins/mulvdi3.c @@ -1,56 +1,56 @@ -/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulvdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a * b */ - -/* Effects: aborts if a * b overflows */ - -COMPILER_RT_ABI di_int -__mulvdi3(di_int a, di_int b) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - const di_int MIN = (di_int)1 << (N-1); - const di_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - di_int sa = a >> (N - 1); - di_int abs_a = (a ^ sa) - sa; - di_int sb = b >> (N - 1); - di_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } - return a * b; -} +/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI di_int +__mulvdi3(di_int a, di_int b) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N-1); + const di_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} diff --git a/contrib/libs/cxxsupp/builtins/mulvsi3.c b/contrib/libs/cxxsupp/builtins/mulvsi3.c index 210a20138ef..74ea4f2da22 100644 --- a/contrib/libs/cxxsupp/builtins/mulvsi3.c +++ b/contrib/libs/cxxsupp/builtins/mulvsi3.c @@ -1,56 +1,56 @@ -/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulvsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a * b */ - -/* Effects: aborts if a * b overflows */ - -COMPILER_RT_ABI si_int -__mulvsi3(si_int a, si_int b) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - const si_int MIN = (si_int)1 << (N-1); - const si_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - si_int sa = a >> (N - 1); - si_int abs_a = (a ^ sa) - sa; - si_int sb = b >> (N - 1); - si_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } - return a * b; -} +/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI si_int +__mulvsi3(si_int a, si_int b) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N-1); + const si_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} diff --git a/contrib/libs/cxxsupp/builtins/mulvti3.c b/contrib/libs/cxxsupp/builtins/mulvti3.c index e8e817a2df5..f4c7d1612ba 100644 --- a/contrib/libs/cxxsupp/builtins/mulvti3.c +++ b/contrib/libs/cxxsupp/builtins/mulvti3.c @@ -1,60 +1,60 @@ -/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulvti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a * b */ - -/* Effects: aborts if a * b overflows */ - -COMPILER_RT_ABI ti_int -__mulvti3(ti_int a, ti_int b) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - const ti_int MIN = (ti_int)1 << (N-1); - const ti_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - ti_int sa = a >> (N - 1); - ti_int abs_a = (a ^ sa) - sa; - ti_int sb = b >> (N - 1); - ti_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } - return a * b; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI ti_int +__mulvti3(ti_int a, ti_int b) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N-1); + const ti_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/mulxc3.c b/contrib/libs/cxxsupp/builtins/mulxc3.c index ba615ddf5fe..ba322169182 100644 --- a/contrib/libs/cxxsupp/builtins/mulxc3.c +++ b/contrib/libs/cxxsupp/builtins/mulxc3.c @@ -1,77 +1,77 @@ -/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __mulxc3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" -#include "int_math.h" - -/* Returns: the product of a + ib and c + id */ - -COMPILER_RT_ABI Lcomplex -__mulxc3(long double __a, long double __b, long double __c, long double __d) -{ - long double __ac = __a * __c; - long double __bd = __b * __d; - long double __ad = __a * __d; - long double __bc = __b * __c; - Lcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysignl(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignl(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysignl(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignl(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysignl(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignl(0, __b); - if (crt_isnan(__c)) - __c = crt_copysignl(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignl(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } - } - return z; -} - -#endif +/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulxc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Lcomplex +__mulxc3(long double __a, long double __b, long double __c, long double __d) +{ + long double __ac = __a * __c; + long double __bd = __b * __d; + long double __ad = __a * __d; + long double __bc = __b * __c; + Lcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/negdf2.c b/contrib/libs/cxxsupp/builtins/negdf2.c index 39a6de087ee..d634b421cb7 100644 --- a/contrib/libs/cxxsupp/builtins/negdf2.c +++ b/contrib/libs/cxxsupp/builtins/negdf2.c @@ -1,22 +1,22 @@ -//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements double-precision soft-float negation. -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(dneg, negdf2) - -COMPILER_RT_ABI fp_t -__negdf2(fp_t a) { - return fromRep(toRep(a) ^ signBit); -} +//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(dneg, negdf2) + +COMPILER_RT_ABI fp_t +__negdf2(fp_t a) { + return fromRep(toRep(a) ^ signBit); +} diff --git a/contrib/libs/cxxsupp/builtins/negdi2.c b/contrib/libs/cxxsupp/builtins/negdi2.c index d8e579e049f..3d49ba2899d 100644 --- a/contrib/libs/cxxsupp/builtins/negdi2.c +++ b/contrib/libs/cxxsupp/builtins/negdi2.c @@ -1,26 +1,26 @@ -/* ===-- negdi2.c - Implement __negdi2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __negdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: -a */ - -COMPILER_RT_ABI di_int -__negdi2(di_int a) -{ - /* Note: this routine is here for API compatibility; any sane compiler - * should expand it inline. - */ - return -a; -} +/* ===-- negdi2.c - Implement __negdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +COMPILER_RT_ABI di_int +__negdi2(di_int a) +{ + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; +} diff --git a/contrib/libs/cxxsupp/builtins/negsf2.c b/contrib/libs/cxxsupp/builtins/negsf2.c index 2d67569329c..29c17be4145 100644 --- a/contrib/libs/cxxsupp/builtins/negsf2.c +++ b/contrib/libs/cxxsupp/builtins/negsf2.c @@ -1,22 +1,22 @@ -//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float negation. -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(fneg, negsf2) - -COMPILER_RT_ABI fp_t -__negsf2(fp_t a) { - return fromRep(toRep(a) ^ signBit); -} +//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(fneg, negsf2) + +COMPILER_RT_ABI fp_t +__negsf2(fp_t a) { + return fromRep(toRep(a) ^ signBit); +} diff --git a/contrib/libs/cxxsupp/builtins/negti2.c b/contrib/libs/cxxsupp/builtins/negti2.c index 084abca4971..9b00b303f85 100644 --- a/contrib/libs/cxxsupp/builtins/negti2.c +++ b/contrib/libs/cxxsupp/builtins/negti2.c @@ -1,30 +1,30 @@ -/* ===-- negti2.c - Implement __negti2 -------------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __negti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: -a */ - -COMPILER_RT_ABI ti_int -__negti2(ti_int a) -{ - /* Note: this routine is here for API compatibility; any sane compiler - * should expand it inline. - */ - return -a; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- negti2.c - Implement __negti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: -a */ + +COMPILER_RT_ABI ti_int +__negti2(ti_int a) +{ + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/negvdi2.c b/contrib/libs/cxxsupp/builtins/negvdi2.c index 9ea5988d2d6..e336ecf28f0 100644 --- a/contrib/libs/cxxsupp/builtins/negvdi2.c +++ b/contrib/libs/cxxsupp/builtins/negvdi2.c @@ -1,28 +1,28 @@ -/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __negvdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: -a */ - -/* Effects: aborts if -a overflows */ - -COMPILER_RT_ABI di_int -__negvdi2(di_int a) -{ - const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; -} +/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negvdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI di_int +__negvdi2(di_int a) +{ + const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} diff --git a/contrib/libs/cxxsupp/builtins/negvsi2.c b/contrib/libs/cxxsupp/builtins/negvsi2.c index 065c487ad6b..b9e93fef06c 100644 --- a/contrib/libs/cxxsupp/builtins/negvsi2.c +++ b/contrib/libs/cxxsupp/builtins/negvsi2.c @@ -1,28 +1,28 @@ -/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __negvsi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: -a */ - -/* Effects: aborts if -a overflows */ - -COMPILER_RT_ABI si_int -__negvsi2(si_int a) -{ - const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; -} +/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negvsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI si_int +__negvsi2(si_int a) +{ + const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} diff --git a/contrib/libs/cxxsupp/builtins/negvti2.c b/contrib/libs/cxxsupp/builtins/negvti2.c index c5bf2aea2be..85f9f7d19d9 100644 --- a/contrib/libs/cxxsupp/builtins/negvti2.c +++ b/contrib/libs/cxxsupp/builtins/negvti2.c @@ -1,32 +1,32 @@ -/*===-- negvti2.c - Implement __negvti2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------=== - * - *This file implements __negvti2 for the compiler_rt library. - * - *===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: -a */ - -/* Effects: aborts if -a overflows */ - -COMPILER_RT_ABI ti_int -__negvti2(ti_int a) -{ - const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; -} - -#endif /* CRT_HAS_128BIT */ +/*===-- negvti2.c - Implement __negvti2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + *This file implements __negvti2 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI ti_int +__negvti2(ti_int a) +{ + const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/paritydi2.c b/contrib/libs/cxxsupp/builtins/paritydi2.c index f8a248290fb..8ea5ab4214e 100644 --- a/contrib/libs/cxxsupp/builtins/paritydi2.c +++ b/contrib/libs/cxxsupp/builtins/paritydi2.c @@ -1,25 +1,25 @@ -/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __paritydi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: 1 if number of bits is odd else returns 0 */ - -COMPILER_RT_ABI si_int -__paritydi2(di_int a) -{ - dwords x; - x.all = a; - return __paritysi2(x.s.high ^ x.s.low); -} +/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __paritydi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__paritydi2(di_int a) +{ + dwords x; + x.all = a; + return __paritysi2(x.s.high ^ x.s.low); +} diff --git a/contrib/libs/cxxsupp/builtins/paritysi2.c b/contrib/libs/cxxsupp/builtins/paritysi2.c index 5ea59fd2b62..59998466384 100644 --- a/contrib/libs/cxxsupp/builtins/paritysi2.c +++ b/contrib/libs/cxxsupp/builtins/paritysi2.c @@ -1,27 +1,27 @@ -/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __paritysi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: 1 if number of bits is odd else returns 0 */ - -COMPILER_RT_ABI si_int -__paritysi2(si_int a) -{ - su_int x = (su_int)a; - x ^= x >> 16; - x ^= x >> 8; - x ^= x >> 4; - return (0x6996 >> (x & 0xF)) & 1; -} +/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __paritysi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__paritysi2(si_int a) +{ + su_int x = (su_int)a; + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + return (0x6996 >> (x & 0xF)) & 1; +} diff --git a/contrib/libs/cxxsupp/builtins/parityti2.c b/contrib/libs/cxxsupp/builtins/parityti2.c index 385eab0e695..5a4fe492486 100644 --- a/contrib/libs/cxxsupp/builtins/parityti2.c +++ b/contrib/libs/cxxsupp/builtins/parityti2.c @@ -1,29 +1,29 @@ -/* ===-- parityti2.c - Implement __parityti2 -------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __parityti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: 1 if number of bits is odd else returns 0 */ - -COMPILER_RT_ABI si_int -__parityti2(ti_int a) -{ - twords x; - x.all = a; - return __paritydi2(x.s.high ^ x.s.low); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- parityti2.c - Implement __parityti2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __parityti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__parityti2(ti_int a) +{ + twords x; + x.all = a; + return __paritydi2(x.s.high ^ x.s.low); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/popcountdi2.c b/contrib/libs/cxxsupp/builtins/popcountdi2.c index ee88c79f10d..5e8a62f075e 100644 --- a/contrib/libs/cxxsupp/builtins/popcountdi2.c +++ b/contrib/libs/cxxsupp/builtins/popcountdi2.c @@ -1,36 +1,36 @@ -/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __popcountdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: count of 1 bits */ - -COMPILER_RT_ABI si_int -__popcountdi2(di_int a) -{ - du_int x2 = (du_int)a; - x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); - /* Every 2 bits holds the sum of every pair of bits (32) */ - x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ - x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ - su_int x = (su_int)(x2 + (x2 >> 32)); - /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ - /* Upper 32 bits are garbage */ - x = x + (x >> 16); - /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ -} +/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountdi2(di_int a) +{ + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + /* Every 2 bits holds the sum of every pair of bits (32) */ + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ + /* Upper 32 bits are garbage */ + x = x + (x >> 16); + /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ +} diff --git a/contrib/libs/cxxsupp/builtins/popcountsi2.c b/contrib/libs/cxxsupp/builtins/popcountsi2.c index 7ef6d899ee0..44544ff4989 100644 --- a/contrib/libs/cxxsupp/builtins/popcountsi2.c +++ b/contrib/libs/cxxsupp/builtins/popcountsi2.c @@ -1,33 +1,33 @@ -/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __popcountsi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: count of 1 bits */ - -COMPILER_RT_ABI si_int -__popcountsi2(si_int a) -{ - su_int x = (su_int)a; - x = x - ((x >> 1) & 0x55555555); - /* Every 2 bits holds the sum of every pair of bits */ - x = ((x >> 2) & 0x33333333) + (x & 0x33333333); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ - x = (x + (x >> 4)) & 0x0F0F0F0F; - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ - x = (x + (x >> 16)); - /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ -} +/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountsi2(si_int a) +{ + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + /* Every 2 bits holds the sum of every pair of bits */ + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ + x = (x + (x >> 4)) & 0x0F0F0F0F; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ + x = (x + (x >> 16)); + /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ +} diff --git a/contrib/libs/cxxsupp/builtins/popcountti2.c b/contrib/libs/cxxsupp/builtins/popcountti2.c index 0f3ba706daa..7451bbb286b 100644 --- a/contrib/libs/cxxsupp/builtins/popcountti2.c +++ b/contrib/libs/cxxsupp/builtins/popcountti2.c @@ -1,44 +1,44 @@ -/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __popcountti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: count of 1 bits */ - -COMPILER_RT_ABI si_int -__popcountti2(ti_int a) -{ - tu_int x3 = (tu_int)a; - x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | - 0x5555555555555555uLL)); - /* Every 2 bits holds the sum of every pair of bits (64) */ - x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) - + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */ - x3 = (x3 + (x3 >> 4)) - & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */ - du_int x2 = (du_int)(x3 + (x3 >> 64)); - /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */ - su_int x = (su_int)(x2 + (x2 >> 32)); - /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */ - x = x + (x >> 16); - /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */ -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountti2(ti_int a) +{ + tu_int x3 = (tu_int)a; + x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | + 0x5555555555555555uLL)); + /* Every 2 bits holds the sum of every pair of bits (64) */ + x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) + + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */ + x3 = (x3 + (x3 >> 4)) + & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */ + du_int x2 = (du_int)(x3 + (x3 >> 64)); + /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */ + x = x + (x >> 16); + /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/powidf2.c b/contrib/libs/cxxsupp/builtins/powidf2.c index ec0791358c1..ac13b172b04 100644 --- a/contrib/libs/cxxsupp/builtins/powidf2.c +++ b/contrib/libs/cxxsupp/builtins/powidf2.c @@ -1,34 +1,34 @@ -/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __powidf2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a ^ b */ - -COMPILER_RT_ABI double -__powidf2(double a, si_int b) -{ - const int recip = b < 0; - double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; -} +/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powidf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI double +__powidf2(double a, si_int b) +{ + const int recip = b < 0; + double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/contrib/libs/cxxsupp/builtins/powisf2.c b/contrib/libs/cxxsupp/builtins/powisf2.c index 945618e653e..0c400ec6dd6 100644 --- a/contrib/libs/cxxsupp/builtins/powisf2.c +++ b/contrib/libs/cxxsupp/builtins/powisf2.c @@ -1,34 +1,34 @@ -/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __powisf2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a ^ b */ - -COMPILER_RT_ABI float -__powisf2(float a, si_int b) -{ - const int recip = b < 0; - float r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; -} +/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powisf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI float +__powisf2(float a, si_int b) +{ + const int recip = b < 0; + float r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/contrib/libs/cxxsupp/builtins/powitf2.c b/contrib/libs/cxxsupp/builtins/powitf2.c index f17988f8685..172f29f58f2 100644 --- a/contrib/libs/cxxsupp/builtins/powitf2.c +++ b/contrib/libs/cxxsupp/builtins/powitf2.c @@ -1,38 +1,38 @@ -/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __powitf2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#if _ARCH_PPC - -/* Returns: a ^ b */ - -COMPILER_RT_ABI long double -__powitf2(long double a, si_int b) -{ - const int recip = b < 0; - long double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; -} - -#endif +/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powitf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#if _ARCH_PPC + +/* Returns: a ^ b */ + +COMPILER_RT_ABI long double +__powitf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/powixf2.c b/contrib/libs/cxxsupp/builtins/powixf2.c index c2d54db9df9..0fd96e503e7 100644 --- a/contrib/libs/cxxsupp/builtins/powixf2.c +++ b/contrib/libs/cxxsupp/builtins/powixf2.c @@ -1,38 +1,38 @@ -/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __powixf2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#if !_ARCH_PPC - -#include "int_lib.h" - -/* Returns: a ^ b */ - -COMPILER_RT_ABI long double -__powixf2(long double a, si_int b) -{ - const int recip = b < 0; - long double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; -} - -#endif +/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powixf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI long double +__powixf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/ppc/DD.h b/contrib/libs/cxxsupp/builtins/ppc/DD.h index 45bbc846ea3..3e5f9e58c13 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/DD.h +++ b/contrib/libs/cxxsupp/builtins/ppc/DD.h @@ -1,45 +1,45 @@ -#ifndef COMPILERRT_DD_HEADER -#define COMPILERRT_DD_HEADER - -#include "../int_lib.h" - +#ifndef COMPILERRT_DD_HEADER +#define COMPILERRT_DD_HEADER + +#include "../int_lib.h" + +typedef union { + long double ld; + struct { + double hi; + double lo; + }s; +} DD; + typedef union { - long double ld; - struct { - double hi; - double lo; - }s; -} DD; - -typedef union { - double d; - uint64_t x; -} doublebits; - -#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ - (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) - -static __inline ALWAYS_INLINE double local_fabs(double x) { - doublebits result = {.d = x}; - result.x &= UINT64_C(0x7fffffffffffffff); - return result.d; -} - -static __inline ALWAYS_INLINE double high26bits(double x) { - doublebits result = {.d = x}; - result.x &= UINT64_C(0xfffffffff8000000); - return result.d; -} - -static __inline ALWAYS_INLINE int different_sign(double x, double y) { - doublebits xsignbit = {.d = x}, ysignbit = {.d = y}; - int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63); - return result; -} - -long double __gcc_qadd(long double, long double); -long double __gcc_qsub(long double, long double); -long double __gcc_qmul(long double, long double); -long double __gcc_qdiv(long double, long double); - -#endif /* COMPILERRT_DD_HEADER */ + double d; + uint64_t x; +} doublebits; + +#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ + (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) + +static __inline ALWAYS_INLINE double local_fabs(double x) { + doublebits result = {.d = x}; + result.x &= UINT64_C(0x7fffffffffffffff); + return result.d; +} + +static __inline ALWAYS_INLINE double high26bits(double x) { + doublebits result = {.d = x}; + result.x &= UINT64_C(0xfffffffff8000000); + return result.d; +} + +static __inline ALWAYS_INLINE int different_sign(double x, double y) { + doublebits xsignbit = {.d = x}, ysignbit = {.d = y}; + int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63); + return result; +} + +long double __gcc_qadd(long double, long double); +long double __gcc_qsub(long double, long double); +long double __gcc_qmul(long double, long double); +long double __gcc_qdiv(long double, long double); + +#endif /* COMPILERRT_DD_HEADER */ diff --git a/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk b/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk index 6d5af81fafc..0adc623aa04 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := ppc - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := ppc + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/ppc/divtc3.c b/contrib/libs/cxxsupp/builtins/ppc/divtc3.c index 5d55ab4b1a2..8ec41c528ab 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/divtc3.c +++ b/contrib/libs/cxxsupp/builtins/ppc/divtc3.c @@ -1,91 +1,91 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -#include "DD.h" -#include "../int_math.h" - -#if !defined(CRT_INFINITY) && defined(HUGE_VAL) -#define CRT_INFINITY HUGE_VAL -#endif /* CRT_INFINITY */ - -#define makeFinite(x) { \ - (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ - } - -long double _Complex -__divtc3(long double a, long double b, long double c, long double d) -{ - DD cDD = { .ld = c }; - DD dDD = { .ld = d }; - - int ilogbw = 0; - const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) )); - - if (crt_isfinite(logbw)) - { - ilogbw = (int)logbw; - - cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); - cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); - dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); - dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); - } - - const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); - const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); - const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); - - DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; - DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; - - real.s.hi = crt_scalbn(real.s.hi, -ilogbw); - real.s.lo = crt_scalbn(real.s.lo, -ilogbw); - imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); - imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); - - if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) - { - DD aDD = { .ld = a }; - DD bDD = { .ld = b }; - DD rDD = { .ld = denom }; - - if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || - !crt_isnan(bDD.s.hi))) - { - real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi; - real.s.lo = 0.0; - imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi; - imag.s.lo = 0.0; - } - - else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && - crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) - { - makeFinite(aDD); - makeFinite(bDD); - real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi); - real.s.lo = 0.0; - imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi); - imag.s.lo = 0.0; - } - - else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && - crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) - { - makeFinite(cDD); - makeFinite(dDD); - real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi)); - real.s.lo = 0.0; - imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi)); - imag.s.lo = 0.0; - } - } - - long double _Complex z; - __real__ z = real.ld; - __imag__ z = imag.ld; - - return z; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#include "DD.h" +#include "../int_math.h" + +#if !defined(CRT_INFINITY) && defined(HUGE_VAL) +#define CRT_INFINITY HUGE_VAL +#endif /* CRT_INFINITY */ + +#define makeFinite(x) { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } + +long double _Complex +__divtc3(long double a, long double b, long double c, long double d) +{ + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + int ilogbw = 0; + const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) )); + + if (crt_isfinite(logbw)) + { + ilogbw = (int)logbw; + + cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); + cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); + dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); + dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); + } + + const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); + const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); + const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); + + DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; + DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; + + real.s.hi = crt_scalbn(real.s.hi, -ilogbw); + real.s.lo = crt_scalbn(real.s.lo, -ilogbw); + imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); + imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) + { + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD rDD = { .ld = denom }; + + if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || + !crt_isnan(bDD.s.hi))) + { + real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi; + real.s.lo = 0.0; + imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi; + imag.s.lo = 0.0; + } + + else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && + crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi); + imag.s.lo = 0.0; + } + + else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && + crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi)); + real.s.lo = 0.0; + imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi)); + imag.s.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c b/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c index 77112905de4..2c7c0f8e279 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c +++ b/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c @@ -1,104 +1,104 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* int64_t __fixunstfdi(long double x); - * This file implements the PowerPC 128-bit double-double -> int64_t conversion - */ - -#include "DD.h" -#include "../int_math.h" - -uint64_t __fixtfdi(long double input) -{ - const DD x = { .ld = input }; - const doublebits hibits = { .d = x.s.hi }; - - const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); - const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); - - /* If (1.0 - tiny) <= input < 0x1.0p63: */ - if (UINT32_C(0x03f00000) > absHighWordMinusOne) - { - /* Do an unsigned conversion of the absolute value, then restore the sign. */ - const int unbiasedHeadExponent = absHighWordMinusOne >> 20; - - int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ - result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ - result <<= 10; /* mantissa(hi) with one zero preceding bit. */ - - const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; - - /* If the tail is non-zero, we need to patch in the tail bits. */ - if (0.0 != x.s.lo) - { - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* At this point we have the mantissa of |tail| */ - /* We need to negate it if head and tail have different signs. */ - const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; - const int64_t negationMask = loNegationMask ^ hiNegationMask; - tailMantissa = (tailMantissa ^ negationMask) - negationMask; - - /* Now we have the mantissa of tail as a signed 2s-complement integer */ - - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - - /* Shift the tail mantissa into the right position, accounting for the - * bias of 10 that we shifted the head mantissa by. - */ - tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); - - result += tailMantissa; - } - - result >>= (62 - unbiasedHeadExponent); - - /* Restore the sign of the result and return */ - result = (result ^ hiNegationMask) - hiNegationMask; - return result; - - } - - /* Edge cases handled here: */ - - /* |x| < 1, result is zero. */ - if (1.0 > crt_fabs(x.s.hi)) - return INT64_C(0); - - /* x very close to INT64_MIN, care must be taken to see which side we are on. */ - if (x.s.hi == -0x1.0p63) { - - int64_t result = INT64_MIN; - - if (0.0 < x.s.lo) - { - /* If the tail is positive, the correct result is something other than INT64_MIN. - * we'll need to figure out what it is. +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* int64_t __fixunstfdi(long double x); + * This file implements the PowerPC 128-bit double-double -> int64_t conversion + */ + +#include "DD.h" +#include "../int_math.h" + +uint64_t __fixtfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.s.hi }; + + const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); + const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p63: */ + if (UINT32_C(0x03f00000) > absHighWordMinusOne) + { + /* Do an unsigned conversion of the absolute value, then restore the sign. */ + const int unbiasedHeadExponent = absHighWordMinusOne >> 20; + + int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 10; /* mantissa(hi) with one zero preceding bit. */ + + const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) + { + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + /* We need to negate it if head and tail have different signs. */ + const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; + const int64_t negationMask = loNegationMask ^ hiNegationMask; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 10 that we shifted the head mantissa by. */ - - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* Now we negate the tailMantissa */ - tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); - - /* And shift it by the appropriate amount */ - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - tailMantissa >>= 1075 - biasedTailExponent; - - result -= tailMantissa; - } - - return result; - } - - /* Signed overflows, infinities, and NaNs */ - if (x.s.hi > 0.0) - return INT64_MAX; - else - return INT64_MIN; -} + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); + + result += tailMantissa; + } + + result >>= (62 - unbiasedHeadExponent); + + /* Restore the sign of the result and return */ + result = (result ^ hiNegationMask) - hiNegationMask; + return result; + + } + + /* Edge cases handled here: */ + + /* |x| < 1, result is zero. */ + if (1.0 > crt_fabs(x.s.hi)) + return INT64_C(0); + + /* x very close to INT64_MIN, care must be taken to see which side we are on. */ + if (x.s.hi == -0x1.0p63) { + + int64_t result = INT64_MIN; + + if (0.0 < x.s.lo) + { + /* If the tail is positive, the correct result is something other than INT64_MIN. + * we'll need to figure out what it is. + */ + + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* Now we negate the tailMantissa */ + tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); + + /* And shift it by the appropriate amount */ + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + tailMantissa >>= 1075 - biasedTailExponent; + + result -= tailMantissa; + } + + return result; + } + + /* Signed overflows, infinities, and NaNs */ + if (x.s.hi > 0.0) + return INT64_MAX; + else + return INT64_MIN; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c b/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c index 277f30ecca4..5e6e2cedf6a 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c +++ b/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c @@ -1,59 +1,59 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* uint64_t __fixunstfdi(long double x); */ -/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */ - -#include "DD.h" - -uint64_t __fixunstfdi(long double input) -{ - const DD x = { .ld = input }; - const doublebits hibits = { .d = x.s.hi }; - - const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); - - /* If (1.0 - tiny) <= input < 0x1.0p64: */ - if (UINT32_C(0x04000000) > highWordMinusOne) - { - const int unbiasedHeadExponent = highWordMinusOne >> 20; - - uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ - result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ - result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ - - /* If the tail is non-zero, we need to patch in the tail bits. */ - if (0.0 != x.s.lo) - { - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* At this point we have the mantissa of |tail| */ - - const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; - tailMantissa = (tailMantissa ^ negationMask) - negationMask; - - /* Now we have the mantissa of tail as a signed 2s-complement integer */ - - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - - /* Shift the tail mantissa into the right position, accounting for the - * bias of 11 that we shifted the head mantissa by. - */ - tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); - - result += tailMantissa; - } - - result >>= (63 - unbiasedHeadExponent); - return result; - } - - /* Edge cases are handled here, with saturation. */ - if (1.0 > x.s.hi) - return UINT64_C(0); - else - return UINT64_MAX; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* uint64_t __fixunstfdi(long double x); */ +/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */ + +#include "DD.h" + +uint64_t __fixunstfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.s.hi }; + + const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p64: */ + if (UINT32_C(0x04000000) > highWordMinusOne) + { + const int unbiasedHeadExponent = highWordMinusOne >> 20; + + uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) + { + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + + const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 11 that we shifted the head mantissa by. + */ + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); + + result += tailMantissa; + } + + result >>= (63 - unbiasedHeadExponent); + return result; + } + + /* Edge cases are handled here, with saturation. */ + if (1.0 > x.s.hi) + return UINT64_C(0); + else + return UINT64_MAX; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/floatditf.c b/contrib/libs/cxxsupp/builtins/ppc/floatditf.c index 0b86d8b4d87..beabdd01742 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/floatditf.c +++ b/contrib/libs/cxxsupp/builtins/ppc/floatditf.c @@ -1,36 +1,36 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __floatditf(long long x); */ -/* This file implements the PowerPC long long -> long double conversion */ - -#include "DD.h" - -long double __floatditf(int64_t a) { - - static const double twop32 = 0x1.0p32; - static const double twop52 = 0x1.0p52; - - doublebits low = { .d = twop52 }; - low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */ - - const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; - - /* At this point, we have two double precision numbers - * high_addend and low.d, and we wish to return their sum - * as a canonicalized long double: - */ - - /* This implementation sets the inexact flag spuriously. - * This could be avoided, but at some substantial cost. - */ - - DD result; - - result.s.hi = high_addend + low.d; - result.s.lo = (high_addend - result.s.hi) + low.d; - - return result.ld; - -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatditf(long long x); */ +/* This file implements the PowerPC long long -> long double conversion */ + +#include "DD.h" + +long double __floatditf(int64_t a) { + + static const double twop32 = 0x1.0p32; + static const double twop52 = 0x1.0p52; + + doublebits low = { .d = twop52 }; + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */ + + const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. + * This could be avoided, but at some substantial cost. + */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; + +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c b/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c index e76a4e52db4..b12e1e738fd 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c +++ b/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c @@ -1,41 +1,41 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __floatunditf(unsigned long long x); */ -/* This file implements the PowerPC unsigned long long -> long double conversion */ - -#include "DD.h" - -long double __floatunditf(uint64_t a) { - - /* Begins with an exact copy of the code from __floatundidf */ - - static const double twop52 = 0x1.0p52; - static const double twop84 = 0x1.0p84; - static const double twop84_plus_twop52 = 0x1.00000001p84; - - doublebits high = { .d = twop84 }; - doublebits low = { .d = twop52 }; - - high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ - low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ - - const double high_addend = high.d - twop84_plus_twop52; - - /* At this point, we have two double precision numbers - * high_addend and low.d, and we wish to return their sum - * as a canonicalized long double: - */ - - /* This implementation sets the inexact flag spuriously. */ - /* This could be avoided, but at some substantial cost. */ - - DD result; - - result.s.hi = high_addend + low.d; - result.s.lo = (high_addend - result.s.hi) + low.d; - - return result.ld; - -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatunditf(unsigned long long x); */ +/* This file implements the PowerPC unsigned long long -> long double conversion */ + +#include "DD.h" + +long double __floatunditf(uint64_t a) { + + /* Begins with an exact copy of the code from __floatundidf */ + + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + doublebits high = { .d = twop84 }; + doublebits low = { .d = twop52 }; + + high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ + + const double high_addend = high.d - twop84_plus_twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. */ + /* This could be avoided, but at some substantial cost. */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; + +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c index 0284e28f9c4..32e16e9d1d1 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c +++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c @@ -1,76 +1,76 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __gcc_qadd(long double x, long double y); - * This file implements the PowerPC 128-bit double-double add operation. - * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) - */ - -#include "DD.h" - -long double __gcc_qadd(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = src.s.hi, b = src.s.lo; - - /* If both operands are zero: */ - if ((A == 0.0) && (B == 0.0)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If either operand is NaN or infinity: */ - const doublebits abits = { .d = A }; - const doublebits bbits = { .d = B }; - if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || - (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If the computation overflows: */ - /* This may be playing things a little bit fast and loose, but it will do for a start. */ - const double testForOverflow = A + (B + (a + b)); - const doublebits testbits = { .d = testForOverflow }; - if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = testForOverflow; - dst.s.lo = 0.0; - return dst.ld; - } - - double H, h; - double T, t; - double W, w; - double Y; - - H = B + (A - (A + B)); - T = b + (a - (a + b)); - h = A + (B - (A + B)); - t = a + (b - (a + b)); - - if (local_fabs(A) <= local_fabs(B)) - w = (a + b) + h; - else - w = (a + b) + H; - - W = (A + B) + w; - Y = (A + B) - W; - Y += w; - - if (local_fabs(a) <= local_fabs(b)) - w = t + Y; - else - w = T + Y; - - dst.s.hi = Y = W + w; - dst.s.lo = (W - Y) + w; - - return dst.ld; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qadd(long double x, long double y); + * This file implements the PowerPC 128-bit double-double add operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qadd(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = src.s.hi, b = src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c index 0e010679148..70aa00b6440 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c +++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c @@ -1,55 +1,55 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __gcc_qdiv(long double x, long double y); - * This file implements the PowerPC 128-bit double-double division operation. - * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) - */ - -#include "DD.h" - -long double __gcc_qdiv(long double a, long double b) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - DD dst = { .ld = a }, src = { .ld = b }; - - register double x = dst.s.hi, x1 = dst.s.lo, - y = src.s.hi, y1 = src.s.lo; - - double yHi, yLo, qHi, qLo; - double yq, tmp, q; - - q = x / y; - - /* Detect special cases */ - if (q == 0.0) { - dst.s.hi = q; - dst.s.lo = 0.0; - return dst.ld; - } - - const doublebits qBits = { .d = q }; - if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = q; - dst.s.lo = 0.0; - return dst.ld; - } - - yHi = high26bits(y); - qHi = high26bits(q); - - yq = y * q; - yLo = y - yHi; - qLo = q - qHi; - - tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); - tmp = (x - yq) - tmp; - tmp = ((tmp + x1) - y1 * q) / y; - x = q + tmp; - - dst.s.lo = (q - x) + tmp; - dst.s.hi = x; - - return dst.ld; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qdiv(long double x, long double y); + * This file implements the PowerPC 128-bit double-double division operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qdiv(long double a, long double b) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = a }, src = { .ld = b }; + + register double x = dst.s.hi, x1 = dst.s.lo, + y = src.s.hi, y1 = src.s.lo; + + double yHi, yLo, qHi, qLo; + double yq, tmp, q; + + q = x / y; + + /* Detect special cases */ + if (q == 0.0) { + dst.s.hi = q; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits qBits = { .d = q }; + if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = q; + dst.s.lo = 0.0; + return dst.ld; + } + + yHi = high26bits(y); + qHi = high26bits(q); + + yq = y * q; + yLo = y - yHi; + qLo = q - qHi; + + tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); + tmp = (x - yq) - tmp; + tmp = ((tmp + x1) - y1 * q) / y; + x = q + tmp; + + dst.s.lo = (q - x) + tmp; + dst.s.hi = x; + + return dst.ld; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c index 1bdac9337aa..fb4c5164ccb 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c +++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c @@ -1,53 +1,53 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __gcc_qmul(long double x, long double y); - * This file implements the PowerPC 128-bit double-double multiply operation. - * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) - */ - -#include "DD.h" - -long double __gcc_qmul(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = src.s.hi, b = src.s.lo; - - double aHi, aLo, bHi, bLo; - double ab, tmp, tau; - - ab = A * B; - - /* Detect special cases */ - if (ab == 0.0) { - dst.s.hi = ab; - dst.s.lo = 0.0; - return dst.ld; - } - - const doublebits abBits = { .d = ab }; - if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = ab; - dst.s.lo = 0.0; - return dst.ld; - } - - /* Generic cases handled here. */ - aHi = high26bits(A); - bHi = high26bits(B); - aLo = A - aHi; - bLo = B - bHi; - - tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); - tmp += (A * b + a * B); - tau = ab + tmp; - - dst.s.lo = (ab - tau) + tmp; - dst.s.hi = tau; - - return dst.ld; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qmul(long double x, long double y); + * This file implements the PowerPC 128-bit double-double multiply operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qmul(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = src.s.hi, b = src.s.lo; + + double aHi, aLo, bHi, bLo; + double ab, tmp, tau; + + ab = A * B; + + /* Detect special cases */ + if (ab == 0.0) { + dst.s.hi = ab; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits abBits = { .d = ab }; + if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = ab; + dst.s.lo = 0.0; + return dst.ld; + } + + /* Generic cases handled here. */ + aHi = high26bits(A); + bHi = high26bits(B); + aLo = A - aHi; + bLo = B - bHi; + + tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); + tmp += (A * b + a * B); + tau = ab + tmp; + + dst.s.lo = (ab - tau) + tmp; + dst.s.hi = tau; + + return dst.ld; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c index d45fc4d14af..c092e24dbda 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c +++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c @@ -1,76 +1,76 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __gcc_qsub(long double x, long double y); - * This file implements the PowerPC 128-bit double-double add operation. - * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) - */ - -#include "DD.h" - -long double __gcc_qsub(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = -src.s.hi, b = -src.s.lo; - - /* If both operands are zero: */ - if ((A == 0.0) && (B == 0.0)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If either operand is NaN or infinity: */ - const doublebits abits = { .d = A }; - const doublebits bbits = { .d = B }; - if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || - (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If the computation overflows: */ - /* This may be playing things a little bit fast and loose, but it will do for a start. */ - const double testForOverflow = A + (B + (a + b)); - const doublebits testbits = { .d = testForOverflow }; - if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = testForOverflow; - dst.s.lo = 0.0; - return dst.ld; - } - - double H, h; - double T, t; - double W, w; - double Y; - - H = B + (A - (A + B)); - T = b + (a - (a + b)); - h = A + (B - (A + B)); - t = a + (b - (a + b)); - - if (local_fabs(A) <= local_fabs(B)) - w = (a + b) + h; - else - w = (a + b) + H; - - W = (A + B) + w; - Y = (A + B) - W; - Y += w; - - if (local_fabs(a) <= local_fabs(b)) - w = t + Y; - else - w = T + Y; - - dst.s.hi = Y = W + w; - dst.s.lo = (W - Y) + w; - - return dst.ld; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qsub(long double x, long double y); + * This file implements the PowerPC 128-bit double-double add operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qsub(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = -src.s.hi, b = -src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/multc3.c b/contrib/libs/cxxsupp/builtins/ppc/multc3.c index 327d625dc70..9dd79c975dd 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/multc3.c +++ b/contrib/libs/cxxsupp/builtins/ppc/multc3.c @@ -1,90 +1,90 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -#include "DD.h" -#include "../int_math.h" - -#define makeFinite(x) { \ - (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ - } - -#define zeroNaN(x) { \ - if (crt_isnan((x).s.hi)) { \ - (x).s.hi = crt_copysign(0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ - } \ - } - -long double _Complex -__multc3(long double a, long double b, long double c, long double d) -{ - long double ac = __gcc_qmul(a,c); - long double bd = __gcc_qmul(b,d); - long double ad = __gcc_qmul(a,d); - long double bc = __gcc_qmul(b,c); - - DD real = { .ld = __gcc_qsub(ac,bd) }; - DD imag = { .ld = __gcc_qadd(ad,bc) }; - - if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) - { - int recalc = 0; - - DD aDD = { .ld = a }; - DD bDD = { .ld = b }; - DD cDD = { .ld = c }; - DD dDD = { .ld = d }; - - if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) - { - makeFinite(aDD); - makeFinite(bDD); - zeroNaN(cDD); - zeroNaN(dDD); - recalc = 1; - } - - if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) - { - makeFinite(cDD); - makeFinite(dDD); - zeroNaN(aDD); - zeroNaN(bDD); - recalc = 1; - } - - if (!recalc) - { - DD acDD = { .ld = ac }; - DD bdDD = { .ld = bd }; - DD adDD = { .ld = ad }; - DD bcDD = { .ld = bc }; - - if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || - crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) - { - zeroNaN(aDD); - zeroNaN(bDD); - zeroNaN(cDD); - zeroNaN(dDD); - recalc = 1; - } - } - - if (recalc) - { - real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi); - real.s.lo = 0.0; - imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi); - imag.s.lo = 0.0; - } - } - - long double _Complex z; - __real__ z = real.ld; - __imag__ z = imag.ld; - - return z; -} +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#include "DD.h" +#include "../int_math.h" + +#define makeFinite(x) { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } + +#define zeroNaN(x) { \ + if (crt_isnan((x).s.hi)) { \ + (x).s.hi = crt_copysign(0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } \ + } + +long double _Complex +__multc3(long double a, long double b, long double c, long double d) +{ + long double ac = __gcc_qmul(a,c); + long double bd = __gcc_qmul(b,d); + long double ad = __gcc_qmul(a,d); + long double bc = __gcc_qmul(b,c); + + DD real = { .ld = __gcc_qsub(ac,bd) }; + DD imag = { .ld = __gcc_qadd(ad,bc) }; + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) + { + int recalc = 0; + + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + + if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + zeroNaN(aDD); + zeroNaN(bDD); + recalc = 1; + } + + if (!recalc) + { + DD acDD = { .ld = ac }; + DD bdDD = { .ld = bd }; + DD adDD = { .ld = ad }; + DD bcDD = { .ld = bc }; + + if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || + crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) + { + zeroNaN(aDD); + zeroNaN(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + } + + if (recalc) + { + real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi); + imag.s.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/contrib/libs/cxxsupp/builtins/ppc/restFP.S b/contrib/libs/cxxsupp/builtins/ppc/restFP.S index 23d5e142b16..95032897c0d 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/restFP.S +++ b/contrib/libs/cxxsupp/builtins/ppc/restFP.S @@ -1,43 +1,43 @@ -//===-- restFP.S - Implement restFP ---------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// Helper function used by compiler to restore ppc floating point registers at -// the end of the function epilog. This function returns to the address -// in the LR slot. So a function epilog must branch (b) not branch and link -// (bl) to this function. -// If the compiler wants to restore f27..f31, it does a "b restFP+52" -// -// This function should never be exported by a shared library. Each linkage -// unit carries its own copy of this function. -// -DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP) - lfd f14,-144(r1) - lfd f15,-136(r1) - lfd f16,-128(r1) - lfd f17,-120(r1) - lfd f18,-112(r1) - lfd f19,-104(r1) - lfd f20,-96(r1) - lfd f21,-88(r1) - lfd f22,-80(r1) - lfd f23,-72(r1) - lfd f24,-64(r1) - lfd f25,-56(r1) - lfd f26,-48(r1) - lfd f27,-40(r1) - lfd f28,-32(r1) - lfd f29,-24(r1) - lfd f30,-16(r1) - lfd f31,-8(r1) - lwz r0,8(r1) - mtlr r0 - blr +//===-- restFP.S - Implement restFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// Helper function used by compiler to restore ppc floating point registers at +// the end of the function epilog. This function returns to the address +// in the LR slot. So a function epilog must branch (b) not branch and link +// (bl) to this function. +// If the compiler wants to restore f27..f31, it does a "b restFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// +DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP) + lfd f14,-144(r1) + lfd f15,-136(r1) + lfd f16,-128(r1) + lfd f17,-120(r1) + lfd f18,-112(r1) + lfd f19,-104(r1) + lfd f20,-96(r1) + lfd f21,-88(r1) + lfd f22,-80(r1) + lfd f23,-72(r1) + lfd f24,-64(r1) + lfd f25,-56(r1) + lfd f26,-48(r1) + lfd f27,-40(r1) + lfd f28,-32(r1) + lfd f29,-24(r1) + lfd f30,-16(r1) + lfd f31,-8(r1) + lwz r0,8(r1) + mtlr r0 + blr diff --git a/contrib/libs/cxxsupp/builtins/ppc/saveFP.S b/contrib/libs/cxxsupp/builtins/ppc/saveFP.S index c8dcfed616b..72bd459f4cc 100644 --- a/contrib/libs/cxxsupp/builtins/ppc/saveFP.S +++ b/contrib/libs/cxxsupp/builtins/ppc/saveFP.S @@ -1,40 +1,40 @@ -//===-- saveFP.S - Implement saveFP ---------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// -// Helper function used by compiler to save ppc floating point registers in -// function prologs. This routines also saves r0 in the LR slot. -// If the compiler wants to save f27..f31, it does a "bl saveFP+52" -// -// This function should never be exported by a shared library. Each linkage -// unit carries its own copy of this function. -// -DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP) - stfd f14,-144(r1) - stfd f15,-136(r1) - stfd f16,-128(r1) - stfd f17,-120(r1) - stfd f18,-112(r1) - stfd f19,-104(r1) - stfd f20,-96(r1) - stfd f21,-88(r1) - stfd f22,-80(r1) - stfd f23,-72(r1) - stfd f24,-64(r1) - stfd f25,-56(r1) - stfd f26,-48(r1) - stfd f27,-40(r1) - stfd f28,-32(r1) - stfd f29,-24(r1) - stfd f30,-16(r1) - stfd f31,-8(r1) - stw r0,8(r1) - blr +//===-- saveFP.S - Implement saveFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// Helper function used by compiler to save ppc floating point registers in +// function prologs. This routines also saves r0 in the LR slot. +// If the compiler wants to save f27..f31, it does a "bl saveFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// +DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP) + stfd f14,-144(r1) + stfd f15,-136(r1) + stfd f16,-128(r1) + stfd f17,-120(r1) + stfd f18,-112(r1) + stfd f19,-104(r1) + stfd f20,-96(r1) + stfd f21,-88(r1) + stfd f22,-80(r1) + stfd f23,-72(r1) + stfd f24,-64(r1) + stfd f25,-56(r1) + stfd f26,-48(r1) + stfd f27,-40(r1) + stfd f28,-32(r1) + stfd f29,-24(r1) + stfd f30,-16(r1) + stfd f31,-8(r1) + stw r0,8(r1) + blr diff --git a/contrib/libs/cxxsupp/builtins/subdf3.c b/contrib/libs/cxxsupp/builtins/subdf3.c index 33264bf5611..7a79e5e7765 100644 --- a/contrib/libs/cxxsupp/builtins/subdf3.c +++ b/contrib/libs/cxxsupp/builtins/subdf3.c @@ -1,25 +1,25 @@ -//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements double-precision soft-float subtraction with the -// IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define DOUBLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(dsub, subdf3) - -// Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subdf3(fp_t a, fp_t b) { - return __adddf3(a, fromRep(toRep(b) ^ signBit)); -} - +//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(dsub, subdf3) + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subdf3(fp_t a, fp_t b) { + return __adddf3(a, fromRep(toRep(b) ^ signBit)); +} + diff --git a/contrib/libs/cxxsupp/builtins/subsf3.c b/contrib/libs/cxxsupp/builtins/subsf3.c index 99677d259da..c3b85144af4 100644 --- a/contrib/libs/cxxsupp/builtins/subsf3.c +++ b/contrib/libs/cxxsupp/builtins/subsf3.c @@ -1,25 +1,25 @@ -//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float subtraction with the -// IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define SINGLE_PRECISION -#include "fp_lib.h" - -ARM_EABI_FNALIAS(fsub, subsf3) - -// Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subsf3(fp_t a, fp_t b) { - return __addsf3(a, fromRep(toRep(b) ^ signBit)); -} - +//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +ARM_EABI_FNALIAS(fsub, subsf3) + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subsf3(fp_t a, fp_t b) { + return __addsf3(a, fromRep(toRep(b) ^ signBit)); +} + diff --git a/contrib/libs/cxxsupp/builtins/subtf3.c b/contrib/libs/cxxsupp/builtins/subtf3.c index 02e90608a8b..609b816f41e 100644 --- a/contrib/libs/cxxsupp/builtins/subtf3.c +++ b/contrib/libs/cxxsupp/builtins/subtf3.c @@ -1,27 +1,27 @@ -//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements quad-precision soft-float subtraction with the -// IEEE-754 default rounding (to nearest, ties to even). -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); - -// Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subtf3(fp_t a, fp_t b) { - return __addtf3(a, fromRep(toRep(b) ^ signBit)); -} - -#endif +//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subtf3(fp_t a, fp_t b) { + return __addtf3(a, fromRep(toRep(b) ^ signBit)); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/subvdi3.c b/contrib/libs/cxxsupp/builtins/subvdi3.c index b4a680a50fc..71fc70ffa92 100644 --- a/contrib/libs/cxxsupp/builtins/subvdi3.c +++ b/contrib/libs/cxxsupp/builtins/subvdi3.c @@ -1,36 +1,36 @@ -/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __subvdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a - b */ - -/* Effects: aborts if a - b overflows */ - -COMPILER_RT_ABI di_int -__subvdi3(di_int a, di_int b) -{ - di_int s = (du_int) a - (du_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; -} +/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI di_int +__subvdi3(di_int a, di_int b) +{ + di_int s = (du_int) a - (du_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/libs/cxxsupp/builtins/subvsi3.c b/contrib/libs/cxxsupp/builtins/subvsi3.c index 2e4b732866f..e6c0fb688c9 100644 --- a/contrib/libs/cxxsupp/builtins/subvsi3.c +++ b/contrib/libs/cxxsupp/builtins/subvsi3.c @@ -1,36 +1,36 @@ -/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __subvsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a - b */ - -/* Effects: aborts if a - b overflows */ - -COMPILER_RT_ABI si_int -__subvsi3(si_int a, si_int b) -{ - si_int s = (su_int) a - (su_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; -} +/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI si_int +__subvsi3(si_int a, si_int b) +{ + si_int s = (su_int) a - (su_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/libs/cxxsupp/builtins/subvti3.c b/contrib/libs/cxxsupp/builtins/subvti3.c index 23b504e1faa..a6804d2d7b9 100644 --- a/contrib/libs/cxxsupp/builtins/subvti3.c +++ b/contrib/libs/cxxsupp/builtins/subvti3.c @@ -1,40 +1,40 @@ -/* ===-- subvti3.c - Implement __subvti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __subvti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a - b */ - -/* Effects: aborts if a - b overflows */ - -COMPILER_RT_ABI ti_int -__subvti3(ti_int a, ti_int b) -{ - ti_int s = (tu_int) a - (tu_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- subvti3.c - Implement __subvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI ti_int +__subvti3(ti_int a, ti_int b) +{ + ti_int s = (tu_int) a - (tu_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/trampoline_setup.c b/contrib/libs/cxxsupp/builtins/trampoline_setup.c index d24d3657356..25b627ab765 100644 --- a/contrib/libs/cxxsupp/builtins/trampoline_setup.c +++ b/contrib/libs/cxxsupp/builtins/trampoline_setup.c @@ -1,48 +1,48 @@ -/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -extern void __clear_cache(void* start, void* end); - -/* - * The ppc compiler generates calls to __trampoline_setup() when creating - * trampoline functions on the stack for use with nested functions. - * This function creates a custom 40-byte trampoline function on the stack - * which loads r11 with a pointer to the outer function's locals - * and then jumps to the target nested function. - */ - -#if __ppc__ && !defined(__powerpc64__) -COMPILER_RT_ABI void -__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, - const void* realFunc, void* localsPtr) -{ - /* should never happen, but if compiler did not allocate */ - /* enough space on stack for the trampoline, abort */ - if ( trampSizeAllocated < 40 ) - compilerrt_abort(); - - /* create trampoline */ - trampOnStack[0] = 0x7c0802a6; /* mflr r0 */ - trampOnStack[1] = 0x4800000d; /* bl Lbase */ - trampOnStack[2] = (uint32_t)realFunc; - trampOnStack[3] = (uint32_t)localsPtr; - trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */ - trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */ - trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */ - trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */ - trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */ - trampOnStack[9] = 0x4e800420; /* bctr */ - - /* clear instruction cache */ - __clear_cache(trampOnStack, &trampOnStack[10]); -} -#endif /* __ppc__ && !defined(__powerpc64__) */ +/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +extern void __clear_cache(void* start, void* end); + +/* + * The ppc compiler generates calls to __trampoline_setup() when creating + * trampoline functions on the stack for use with nested functions. + * This function creates a custom 40-byte trampoline function on the stack + * which loads r11 with a pointer to the outer function's locals + * and then jumps to the target nested function. + */ + +#if __ppc__ && !defined(__powerpc64__) +COMPILER_RT_ABI void +__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr) +{ + /* should never happen, but if compiler did not allocate */ + /* enough space on stack for the trampoline, abort */ + if ( trampSizeAllocated < 40 ) + compilerrt_abort(); + + /* create trampoline */ + trampOnStack[0] = 0x7c0802a6; /* mflr r0 */ + trampOnStack[1] = 0x4800000d; /* bl Lbase */ + trampOnStack[2] = (uint32_t)realFunc; + trampOnStack[3] = (uint32_t)localsPtr; + trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */ + trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */ + trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */ + trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */ + trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */ + trampOnStack[9] = 0x4e800420; /* bctr */ + + /* clear instruction cache */ + __clear_cache(trampOnStack, &trampOnStack[10]); +} +#endif /* __ppc__ && !defined(__powerpc64__) */ diff --git a/contrib/libs/cxxsupp/builtins/truncdfhf2.c b/contrib/libs/cxxsupp/builtins/truncdfhf2.c index c81e272a8af..17195cd9e79 100644 --- a/contrib/libs/cxxsupp/builtins/truncdfhf2.c +++ b/contrib/libs/cxxsupp/builtins/truncdfhf2.c @@ -1,18 +1,18 @@ -//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define SRC_DOUBLE -#define DST_HALF -#include "fp_trunc_impl.inc" - -ARM_EABI_FNALIAS(d2h, truncdfhf2) - -COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { - return __truncXfYf2__(a); -} +//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_DOUBLE +#define DST_HALF +#include "fp_trunc_impl.inc" + +ARM_EABI_FNALIAS(d2h, truncdfhf2) + +COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { + return __truncXfYf2__(a); +} diff --git a/contrib/libs/cxxsupp/builtins/truncdfsf2.c b/contrib/libs/cxxsupp/builtins/truncdfsf2.c index 6b07b621110..46ec11dccd7 100644 --- a/contrib/libs/cxxsupp/builtins/truncdfsf2.c +++ b/contrib/libs/cxxsupp/builtins/truncdfsf2.c @@ -1,18 +1,18 @@ -//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define SRC_DOUBLE -#define DST_SINGLE -#include "fp_trunc_impl.inc" - -ARM_EABI_FNALIAS(d2f, truncdfsf2) - -COMPILER_RT_ABI float __truncdfsf2(double a) { - return __truncXfYf2__(a); -} +//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_DOUBLE +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +ARM_EABI_FNALIAS(d2f, truncdfsf2) + +COMPILER_RT_ABI float __truncdfsf2(double a) { + return __truncXfYf2__(a); +} diff --git a/contrib/libs/cxxsupp/builtins/truncsfhf2.c b/contrib/libs/cxxsupp/builtins/truncsfhf2.c index edc71420b47..9d61895bfd8 100644 --- a/contrib/libs/cxxsupp/builtins/truncsfhf2.c +++ b/contrib/libs/cxxsupp/builtins/truncsfhf2.c @@ -1,24 +1,24 @@ -//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define SRC_SINGLE -#define DST_HALF -#include "fp_trunc_impl.inc" - -ARM_EABI_FNALIAS(f2h, truncsfhf2) - -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. -COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { - return __truncXfYf2__(a); -} - -COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { - return __truncsfhf2(a); -} +//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_SINGLE +#define DST_HALF +#include "fp_trunc_impl.inc" + +ARM_EABI_FNALIAS(f2h, truncsfhf2) + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { + return __truncXfYf2__(a); +} + +COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { + return __truncsfhf2(a); +} diff --git a/contrib/libs/cxxsupp/builtins/trunctfdf2.c b/contrib/libs/cxxsupp/builtins/trunctfdf2.c index 3547234eff0..741a71b33c5 100644 --- a/contrib/libs/cxxsupp/builtins/trunctfdf2.c +++ b/contrib/libs/cxxsupp/builtins/trunctfdf2.c @@ -1,22 +1,22 @@ -//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#define SRC_QUAD -#define DST_DOUBLE -#include "fp_trunc_impl.inc" - -COMPILER_RT_ABI double __trunctfdf2(long double a) { - return __truncXfYf2__(a); -} - -#endif +//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_QUAD +#define DST_DOUBLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI double __trunctfdf2(long double a) { + return __truncXfYf2__(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/trunctfsf2.c b/contrib/libs/cxxsupp/builtins/trunctfsf2.c index 3f6636f19af..de96c1decf6 100644 --- a/contrib/libs/cxxsupp/builtins/trunctfsf2.c +++ b/contrib/libs/cxxsupp/builtins/trunctfsf2.c @@ -1,22 +1,22 @@ -//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define QUAD_PRECISION -#include "fp_lib.h" - -#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -#define SRC_QUAD -#define DST_SINGLE -#include "fp_trunc_impl.inc" - -COMPILER_RT_ABI float __trunctfsf2(long double a) { - return __truncXfYf2__(a); -} - -#endif +//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_QUAD +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI float __trunctfsf2(long double a) { + return __truncXfYf2__(a); +} + +#endif diff --git a/contrib/libs/cxxsupp/builtins/ucmpdi2.c b/contrib/libs/cxxsupp/builtins/ucmpdi2.c index 5a57adb98cd..40af23613b1 100644 --- a/contrib/libs/cxxsupp/builtins/ucmpdi2.c +++ b/contrib/libs/cxxsupp/builtins/ucmpdi2.c @@ -1,51 +1,51 @@ -/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ucmpdi2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: if (a < b) returns 0 - * if (a == b) returns 1 - * if (a > b) returns 2 - */ - -COMPILER_RT_ABI si_int -__ucmpdi2(du_int a, du_int b) -{ - udwords x; - x.all = a; - udwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; -} - -#ifdef __ARM_EABI__ -/* Returns: if (a < b) returns -1 -* if (a == b) returns 0 -* if (a > b) returns 1 -*/ -COMPILER_RT_ABI si_int -__aeabi_ulcmp(di_int a, di_int b) -{ - return __ucmpdi2(a, b) - 1; -} -#endif - +/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ucmpdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__ucmpdi2(du_int a, du_int b) +{ + udwords x; + x.all = a; + udwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#ifdef __ARM_EABI__ +/* Returns: if (a < b) returns -1 +* if (a == b) returns 0 +* if (a > b) returns 1 +*/ +COMPILER_RT_ABI si_int +__aeabi_ulcmp(di_int a, di_int b) +{ + return __ucmpdi2(a, b) - 1; +} +#endif + diff --git a/contrib/libs/cxxsupp/builtins/ucmpti2.c b/contrib/libs/cxxsupp/builtins/ucmpti2.c index 797c62a8a7e..bda8083bb2a 100644 --- a/contrib/libs/cxxsupp/builtins/ucmpti2.c +++ b/contrib/libs/cxxsupp/builtins/ucmpti2.c @@ -1,42 +1,42 @@ -/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __ucmpti2 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: if (a < b) returns 0 - * if (a == b) returns 1 - * if (a > b) returns 2 - */ - -COMPILER_RT_ABI si_int -__ucmpti2(tu_int a, tu_int b) -{ - utwords x; - x.all = a; - utwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ucmpti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__ucmpti2(tu_int a, tu_int b) +{ + utwords x; + x.all = a; + utwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/udivdi3.c b/contrib/libs/cxxsupp/builtins/udivdi3.c index 1a1524479ea..dc68e154b10 100644 --- a/contrib/libs/cxxsupp/builtins/udivdi3.c +++ b/contrib/libs/cxxsupp/builtins/udivdi3.c @@ -1,23 +1,23 @@ -/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivdi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b */ - -COMPILER_RT_ABI du_int -__udivdi3(du_int a, du_int b) -{ - return __udivmoddi4(a, b, 0); -} +/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +COMPILER_RT_ABI du_int +__udivdi3(du_int a, du_int b) +{ + return __udivmoddi4(a, b, 0); +} diff --git a/contrib/libs/cxxsupp/builtins/udivmoddi4.c b/contrib/libs/cxxsupp/builtins/udivmoddi4.c index 606c43e5098..0c8b4ff4647 100644 --- a/contrib/libs/cxxsupp/builtins/udivmoddi4.c +++ b/contrib/libs/cxxsupp/builtins/udivmoddi4.c @@ -1,231 +1,231 @@ -/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivmoddi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Effects: if rem != 0, *rem = a % b - * Returns: a / b - */ - -/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ - -COMPILER_RT_ABI du_int -__udivmoddi4(du_int a, du_int b, du_int* rem) -{ - const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; - const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; - udwords n; - n.all = a; - udwords d; - d.all = b; - udwords q; - udwords r; - unsigned sr; - /* special cases, X is unknown, K != 0 */ - if (n.s.high == 0) - { - if (d.s.high == 0) - { - /* 0 X - * --- - * 0 X +/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 */ - if (rem) - *rem = n.s.low % d.s.low; - return n.s.low / d.s.low; - } - /* 0 X - * --- - * K X - */ - if (rem) - *rem = n.s.low; - return 0; - } - /* n.s.high != 0 */ - if (d.s.low == 0) - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 0 - */ - if (rem) - *rem = n.s.high % d.s.low; - return n.s.high / d.s.low; - } - /* d.s.high != 0 */ - if (n.s.low == 0) - { - /* K 0 - * --- - * K 0 - */ - if (rem) - { - r.s.high = n.s.high % d.s.high; - r.s.low = 0; - *rem = r.all; - } - return n.s.high / d.s.high; - } - /* K K - * --- - * K 0 - */ - if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - { - r.s.low = n.s.low; - r.s.high = n.s.high & (d.s.high - 1); - *rem = r.all; - } - return n.s.high >> __builtin_ctz(d.s.high); - } - /* K K - * --- - * K 0 - */ - sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); - /* 0 <= sr <= n_uword_bits - 2 or sr large */ - if (sr > n_uword_bits - 2) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_uword_bits - 1 */ - /* q.all = n.all << (n_udword_bits - sr); */ - q.s.low = 0; - q.s.high = n.s.low << (n_uword_bits - sr); - /* r.all = n.all >> sr; */ - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - } - else /* d.s.low != 0 */ - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 K - */ - if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - *rem = n.s.low & (d.s.low - 1); - if (d.s.low == 1) - return n.all; - sr = __builtin_ctz(d.s.low); - q.s.high = n.s.high >> sr; - q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - return q.all; - } - /* K X - * --- - * 0 K - */ - sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); - /* 2 <= sr <= n_udword_bits - 1 - * q.all = n.all << (n_udword_bits - sr); - * r.all = n.all >> sr; - */ - if (sr == n_uword_bits) - { - q.s.low = 0; - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 - { - q.s.low = 0; - q.s.high = n.s.low << (n_uword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - } - else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 - { - q.s.low = n.s.low << (n_udword_bits - sr); - q.s.high = (n.s.high << (n_udword_bits - sr)) | - (n.s.low >> (sr - n_uword_bits)); - r.s.high = 0; - r.s.low = n.s.high >> (sr - n_uword_bits); - } - } - else - { - /* K X - * --- - * K K - */ - sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); - /* 0 <= sr <= n_uword_bits - 1 or sr large */ - if (sr > n_uword_bits - 1) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_uword_bits */ - /* q.all = n.all << (n_udword_bits - sr); */ - q.s.low = 0; - if (sr == n_uword_bits) - { - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else - { - q.s.high = n.s.low << (n_uword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - } - } - } - /* Not a special case - * q and r are initialized with: - * q.all = n.all << (n_udword_bits - sr); - * r.all = n.all >> sr; - * 1 <= sr <= n_udword_bits - 1 - */ - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); - r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); - q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); - q.s.low = (q.s.low << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); - carry = s & 1; - r.all -= d.all & s; - } - q.all = (q.all << 1) | carry; - if (rem) - *rem = r.all; - return q.all; -} + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} diff --git a/contrib/libs/cxxsupp/builtins/udivmodsi4.c b/contrib/libs/cxxsupp/builtins/udivmodsi4.c index 67fab7f36f9..789c4b5061e 100644 --- a/contrib/libs/cxxsupp/builtins/udivmodsi4.c +++ b/contrib/libs/cxxsupp/builtins/udivmodsi4.c @@ -1,27 +1,27 @@ -/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivmodsi4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b, *rem = a % b */ - -COMPILER_RT_ABI su_int -__udivmodsi4(su_int a, su_int b, su_int* rem) -{ - si_int d = __udivsi3(a,b); - *rem = a - (d*b); - return d; -} - - +/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmodsi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI su_int +__udivmodsi4(su_int a, su_int b, su_int* rem) +{ + si_int d = __udivsi3(a,b); + *rem = a - (d*b); + return d; +} + + diff --git a/contrib/libs/cxxsupp/builtins/udivmodti4.c b/contrib/libs/cxxsupp/builtins/udivmodti4.c index 2d221f8581f..803168849c6 100644 --- a/contrib/libs/cxxsupp/builtins/udivmodti4.c +++ b/contrib/libs/cxxsupp/builtins/udivmodti4.c @@ -1,238 +1,238 @@ -/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivmodti4 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Effects: if rem != 0, *rem = a % b - * Returns: a / b +/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmodti4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== */ - -/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ - -COMPILER_RT_ABI tu_int -__udivmodti4(tu_int a, tu_int b, tu_int* rem) -{ - const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; - const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; - utwords n; - n.all = a; - utwords d; - d.all = b; - utwords q; - utwords r; - unsigned sr; - /* special cases, X is unknown, K != 0 */ - if (n.s.high == 0) - { - if (d.s.high == 0) - { - /* 0 X - * --- - * 0 X - */ - if (rem) - *rem = n.s.low % d.s.low; - return n.s.low / d.s.low; - } - /* 0 X - * --- - * K X - */ - if (rem) - *rem = n.s.low; - return 0; - } - /* n.s.high != 0 */ - if (d.s.low == 0) - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 0 - */ - if (rem) - *rem = n.s.high % d.s.low; - return n.s.high / d.s.low; - } - /* d.s.high != 0 */ - if (n.s.low == 0) - { - /* K 0 - * --- - * K 0 - */ - if (rem) - { - r.s.high = n.s.high % d.s.high; - r.s.low = 0; - *rem = r.all; - } - return n.s.high / d.s.high; - } - /* K K - * --- - * K 0 - */ - if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - { - r.s.low = n.s.low; - r.s.high = n.s.high & (d.s.high - 1); - *rem = r.all; - } - return n.s.high >> __builtin_ctzll(d.s.high); - } - /* K K - * --- - * K 0 - */ - sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); - /* 0 <= sr <= n_udword_bits - 2 or sr large */ - if (sr > n_udword_bits - 2) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_udword_bits - 1 */ - /* q.all = n.all << (n_utword_bits - sr); */ - q.s.low = 0; - q.s.high = n.s.low << (n_udword_bits - sr); - /* r.all = n.all >> sr; */ - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - } - else /* d.s.low != 0 */ - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 K - */ - if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - *rem = n.s.low & (d.s.low - 1); - if (d.s.low == 1) - return n.all; - sr = __builtin_ctzll(d.s.low); - q.s.high = n.s.high >> sr; - q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - return q.all; - } - /* K X - * --- - * 0 K - */ - sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) - - __builtin_clzll(n.s.high); - /* 2 <= sr <= n_utword_bits - 1 - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - */ - if (sr == n_udword_bits) - { - q.s.low = 0; - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 - { - q.s.low = 0; - q.s.high = n.s.low << (n_udword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - } - else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 - { - q.s.low = n.s.low << (n_utword_bits - sr); - q.s.high = (n.s.high << (n_utword_bits - sr)) | - (n.s.low >> (sr - n_udword_bits)); - r.s.high = 0; - r.s.low = n.s.high >> (sr - n_udword_bits); - } - } - else - { - /* K X - * --- - * K K - */ - sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); - /*0 <= sr <= n_udword_bits - 1 or sr large */ - if (sr > n_udword_bits - 1) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_udword_bits - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - */ - q.s.low = 0; - if (sr == n_udword_bits) - { - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else - { - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - q.s.high = n.s.low << (n_udword_bits - sr); - } - } - } - /* Not a special case - * q and r are initialized with: - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - * 1 <= sr <= n_utword_bits - 1 - */ - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); - r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); - q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); - q.s.low = (q.s.low << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); - carry = s & 1; - r.all -= d.all & s; - } - q.all = (q.all << 1) | carry; - if (rem) - *rem = r.all; - return q.all; -} - -#endif /* CRT_HAS_128BIT */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI tu_int +__udivmodti4(tu_int a, tu_int b, tu_int* rem) +{ + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; + utwords n; + n.all = a; + utwords d; + d.all = b; + utwords q; + utwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctzll(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /* 0 <= sr <= n_udword_bits - 2 or sr large */ + if (sr > n_udword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits - 1 */ + /* q.all = n.all << (n_utword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_udword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctzll(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) + - __builtin_clzll(n.s.high); + /* 2 <= sr <= n_utword_bits - 1 + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_udword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_udword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } + else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 + { + q.s.low = n.s.low << (n_utword_bits - sr); + q.s.high = (n.s.high << (n_utword_bits - sr)) | + (n.s.low >> (sr - n_udword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_udword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /*0 <= sr <= n_udword_bits - 1 or sr large */ + if (sr > n_udword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + q.s.low = 0; + if (sr == n_udword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + q.s.high = n.s.low << (n_udword_bits - sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_utword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/udivsi3.c b/contrib/libs/cxxsupp/builtins/udivsi3.c index 24c806c8066..5d0140cc3e7 100644 --- a/contrib/libs/cxxsupp/builtins/udivsi3.c +++ b/contrib/libs/cxxsupp/builtins/udivsi3.c @@ -1,66 +1,66 @@ -/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a / b */ - -/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ - -ARM_EABI_FNALIAS(uidiv, udivsi3) - -/* This function should not call __divsi3! */ -COMPILER_RT_ABI su_int -__udivsi3(su_int n, su_int d) -{ - const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; - su_int q; - su_int r; - unsigned sr; - /* special cases */ - if (d == 0) - return 0; /* ?! */ - if (n == 0) - return 0; - sr = __builtin_clz(d) - __builtin_clz(n); - /* 0 <= sr <= n_uword_bits - 1 or sr large */ - if (sr > n_uword_bits - 1) /* d > r */ - return 0; - if (sr == n_uword_bits - 1) /* d == 1 */ - return n; - ++sr; - /* 1 <= sr <= n_uword_bits - 1 */ - /* Not a special case */ - q = n << (n_uword_bits - sr); - r = n >> sr; - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r = (r << 1) | (q >> (n_uword_bits - 1)); - q = (q << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); - carry = s & 1; - r -= d & s; - } - q = (q << 1) | carry; - return q; -} +/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +ARM_EABI_FNALIAS(uidiv, udivsi3) + +/* This function should not call __divsi3! */ +COMPILER_RT_ABI su_int +__udivsi3(su_int n, su_int d) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + su_int q; + su_int r; + unsigned sr; + /* special cases */ + if (d == 0) + return 0; /* ?! */ + if (n == 0) + return 0; + sr = __builtin_clz(d) - __builtin_clz(n); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) /* d > r */ + return 0; + if (sr == n_uword_bits - 1) /* d == 1 */ + return n; + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* Not a special case */ + q = n << (n_uword_bits - sr); + r = n >> sr; + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r = (r << 1) | (q >> (n_uword_bits - 1)); + q = (q << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); + carry = s & 1; + r -= d & s; + } + q = (q << 1) | carry; + return q; +} diff --git a/contrib/libs/cxxsupp/builtins/udivti3.c b/contrib/libs/cxxsupp/builtins/udivti3.c index fcc96af0495..ec94673e25b 100644 --- a/contrib/libs/cxxsupp/builtins/udivti3.c +++ b/contrib/libs/cxxsupp/builtins/udivti3.c @@ -1,27 +1,27 @@ -/* ===-- udivti3.c - Implement __udivti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __udivti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a / b */ - -COMPILER_RT_ABI tu_int -__udivti3(tu_int a, tu_int b) -{ - return __udivmodti4(a, b, 0); -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- udivti3.c - Implement __udivti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a / b */ + +COMPILER_RT_ABI tu_int +__udivti3(tu_int a, tu_int b) +{ + return __udivmodti4(a, b, 0); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/umoddi3.c b/contrib/libs/cxxsupp/builtins/umoddi3.c index 20b1c4ef7b5..d513f080a1e 100644 --- a/contrib/libs/cxxsupp/builtins/umoddi3.c +++ b/contrib/libs/cxxsupp/builtins/umoddi3.c @@ -1,25 +1,25 @@ -/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __umoddi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a % b */ - -COMPILER_RT_ABI du_int -__umoddi3(du_int a, du_int b) -{ - du_int r; - __udivmoddi4(a, b, &r); - return r; -} +/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umoddi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI du_int +__umoddi3(du_int a, du_int b) +{ + du_int r; + __udivmoddi4(a, b, &r); + return r; +} diff --git a/contrib/libs/cxxsupp/builtins/umodsi3.c b/contrib/libs/cxxsupp/builtins/umodsi3.c index 52185098288..d5fda4a6af1 100644 --- a/contrib/libs/cxxsupp/builtins/umodsi3.c +++ b/contrib/libs/cxxsupp/builtins/umodsi3.c @@ -1,23 +1,23 @@ -/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __umodsi3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -/* Returns: a % b */ - -COMPILER_RT_ABI su_int -__umodsi3(su_int a, su_int b) -{ - return a - __udivsi3(a, b) * b; -} +/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umodsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI su_int +__umodsi3(su_int a, su_int b) +{ + return a - __udivsi3(a, b) * b; +} diff --git a/contrib/libs/cxxsupp/builtins/umodti3.c b/contrib/libs/cxxsupp/builtins/umodti3.c index 166fdf53948..6d1ca7a8cf6 100644 --- a/contrib/libs/cxxsupp/builtins/umodti3.c +++ b/contrib/libs/cxxsupp/builtins/umodti3.c @@ -1,29 +1,29 @@ -/* ===-- umodti3.c - Implement __umodti3 -----------------------------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. - * - * ===----------------------------------------------------------------------=== - * - * This file implements __umodti3 for the compiler_rt library. - * - * ===----------------------------------------------------------------------=== - */ - -#include "int_lib.h" - -#ifdef CRT_HAS_128BIT - -/* Returns: a % b */ - -COMPILER_RT_ABI tu_int -__umodti3(tu_int a, tu_int b) -{ - tu_int r; - __udivmodti4(a, b, &r); - return r; -} - -#endif /* CRT_HAS_128BIT */ +/* ===-- umodti3.c - Implement __umodti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umodti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a % b */ + +COMPILER_RT_ABI tu_int +__umodti3(tu_int a, tu_int b) +{ + tu_int r; + __udivmodti4(a, b, &r); + return r; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk b/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk index 60219c0bc16..83848dddd96 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk +++ b/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk @@ -1,20 +1,20 @@ -#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := x86_64 x86_64h - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) +#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := x86_64 x86_64h + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S b/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S index de315176c15..4149ac63d9d 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S +++ b/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S @@ -1,39 +1,39 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// _chkstk routine -// This routine is windows specific -// http://msdn.microsoft.com/en-us/library/ms648426.aspx - -// Notes from r227519 -// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp -// themselves. It also does not clobber %rax so we can reuse it when -// adjusting %rsp. - -#ifdef __x86_64__ - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) - push %rcx - push %rax - cmp $0x1000,%rax - lea 24(%rsp),%rcx - jb 1f -2: - sub $0x1000,%rcx - test %rcx,(%rcx) - sub $0x1000,%rax - cmp $0x1000,%rax - ja 2b -1: - sub %rax,%rcx - test %rcx,(%rcx) - pop %rax - pop %rcx - ret -END_COMPILERRT_FUNCTION(___chkstk_ms) - -#endif // __x86_64__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// Notes from r227519 +// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp +// themselves. It also does not clobber %rax so we can reuse it when +// adjusting %rsp. + +#ifdef __x86_64__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) + push %rcx + push %rax + cmp $0x1000,%rax + lea 24(%rsp),%rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + pop %rax + pop %rcx + ret +END_COMPILERRT_FUNCTION(___chkstk_ms) + +#endif // __x86_64__ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S b/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S index 24f4ab1727a..ac1eb920e0e 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S +++ b/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S @@ -1,42 +1,42 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -#ifdef __x86_64__ - -// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments, -// then decrement %rsp by %rax. Preserves all registers except %rsp and flags. -// This routine is windows specific -// http://msdn.microsoft.com/en-us/library/ms648426.aspx - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__alloca) - mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx - // fallthrough -DEFINE_COMPILERRT_FUNCTION(___chkstk) - push %rcx - cmp $0x1000,%rax - lea 16(%rsp),%rcx // rsp before calling this routine -> rcx - jb 1f -2: - sub $0x1000,%rcx - test %rcx,(%rcx) - sub $0x1000,%rax - cmp $0x1000,%rax - ja 2b -1: - sub %rax,%rcx - test %rcx,(%rcx) - - lea 8(%rsp),%rax // load pointer to the return address into rax - mov %rcx,%rsp // install the new top of stack pointer into rsp - mov -8(%rax),%rcx // restore rcx - push (%rax) // push return address onto the stack - sub %rsp,%rax // restore the original value in rax - ret -END_COMPILERRT_FUNCTION(___chkstk) -END_COMPILERRT_FUNCTION(__alloca) - -#endif // __x86_64__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __x86_64__ + +// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments, +// then decrement %rsp by %rax. Preserves all registers except %rsp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__alloca) + mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx + // fallthrough +DEFINE_COMPILERRT_FUNCTION(___chkstk) + push %rcx + cmp $0x1000,%rax + lea 16(%rsp),%rcx // rsp before calling this routine -> rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + + lea 8(%rsp),%rax // load pointer to the return address into rax + mov %rcx,%rsp // install the new top of stack pointer into rsp + mov -8(%rax),%rcx // restore rcx + push (%rax) // push return address onto the stack + sub %rsp,%rax // restore the original value in rax + ret +END_COMPILERRT_FUNCTION(___chkstk) +END_COMPILERRT_FUNCTION(__alloca) + +#endif // __x86_64__ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c index 6bf8e90b1c9..388404e5e08 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c @@ -1,16 +1,16 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* double __floatdidf(di_int a); */ - -#ifdef __x86_64__ - -#include "../int_lib.h" - -double __floatdidf(int64_t a) -{ - return (double)a; -} - -#endif /* __x86_64__ */ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* double __floatdidf(di_int a); */ + +#ifdef __x86_64__ + +#include "../int_lib.h" + +double __floatdidf(int64_t a) +{ + return (double)a; +} + +#endif /* __x86_64__ */ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c index 92fc82d2cb3..96c3728e92c 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c @@ -1,14 +1,14 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -#ifdef __x86_64__ - -#include "../int_lib.h" - -float __floatdisf(int64_t a) -{ - return (float)a; -} - -#endif /* __x86_64__ */ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#ifdef __x86_64__ + +#include "../int_lib.h" + +float __floatdisf(int64_t a) +{ + return (float)a; +} + +#endif /* __x86_64__ */ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c index 8d308e6fb08..c01193a82b5 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c @@ -1,16 +1,16 @@ -/* This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - */ - -/* long double __floatdixf(di_int a); */ - -#ifdef __x86_64__ - -#include "../int_lib.h" - -long double __floatdixf(int64_t a) -{ - return (long double)a; -} - -#endif /* __i386__ */ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatdixf(di_int a); */ + +#ifdef __x86_64__ + +#include "../int_lib.h" + +long double __floatdixf(int64_t a) +{ + return (long double)a; +} + +#endif /* __i386__ */ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S index 6b2f0613916..3cd5d02a743 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S @@ -1,49 +1,49 @@ -//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements __floatundidf for the compiler_rt library. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - -// double __floatundidf(du_int a); - -#ifdef __x86_64__ - -CONST_SECTION - - .balign 16 -twop52: - .quad 0x4330000000000000 - - .balign 16 -twop84_plus_twop52: - .quad 0x4530000000100000 - - .balign 16 -twop84: - .quad 0x4530000000000000 - -#define REL_ADDR(_a) (_a)(%rip) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundidf) - movd %edi, %xmm0 // low 32 bits of a - shrq $32, %rdi // high 32 bits of a - orq REL_ADDR(twop84), %rdi // 0x1p84 + a_hi (no rounding occurs) - orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) - movd %rdi, %xmm1 - subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) - addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) - ret -END_COMPILERRT_FUNCTION(__floatundidf) - -#endif // __x86_64__ +//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52: + .quad 0x4530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundidf) + movd %edi, %xmm0 // low 32 bits of a + shrq $32, %rdi // high 32 bits of a + orq REL_ADDR(twop84), %rdi // 0x1p84 + a_hi (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + movd %rdi, %xmm1 + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + ret +END_COMPILERRT_FUNCTION(__floatundidf) + +#endif // __x86_64__ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S index ad453271145..61952f40470 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S @@ -1,35 +1,35 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// float __floatundisf(du_int a); - -#ifdef __x86_64__ - -CONST_SECTION - - .balign 16 -two: - .single 2.0 - -#define REL_ADDR(_a) (_a)(%rip) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundisf) - movq $1, %rsi - testq %rdi, %rdi - js 1f - cvtsi2ssq %rdi, %xmm0 - ret - -1: andq %rdi, %rsi - shrq %rdi - orq %rsi, %rdi - cvtsi2ssq %rdi, %xmm0 - mulss REL_ADDR(two), %xmm0 - ret -END_COMPILERRT_FUNCTION(__floatundisf) - -#endif // __x86_64__ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatundisf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +two: + .single 2.0 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movq $1, %rsi + testq %rdi, %rdi + js 1f + cvtsi2ssq %rdi, %xmm0 + ret + +1: andq %rdi, %rsi + shrq %rdi + orq %rsi, %rdi + cvtsi2ssq %rdi, %xmm0 + mulss REL_ADDR(two), %xmm0 + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __x86_64__ diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S index b3bac15b9ef..92961c89115 100644 --- a/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S +++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S @@ -1,68 +1,68 @@ -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. - -#include "../assembly.h" - -// long double __floatundixf(du_int a); - -#ifdef __x86_64__ - -CONST_SECTION - - .balign 16 -twop64: - .quad 0x43f0000000000000 - -#define REL_ADDR(_a) (_a)(%rip) - - .text - - .balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundixf) - movq %rdi, -8(%rsp) - fildq -8(%rsp) - test %rdi, %rdi - js 1f - ret -1: faddl REL_ADDR(twop64) - ret -END_COMPILERRT_FUNCTION(__floatundixf) - -#endif // __x86_64__ - - -/* Branch-free implementation is ever so slightly slower, but more beautiful. - It is likely superior for inlining, so I kept it around for future reference. - -#ifdef __x86_64__ - -CONST_SECTION - - .balign 4 -twop52: - .quad 0x4330000000000000 -twop84_plus_twop52_neg: - .quad 0xc530000000100000 -twop84: - .quad 0x4530000000000000 - -#define REL_ADDR(_a) (_a)(%rip) - -.text -.balign 4 -DEFINE_COMPILERRT_FUNCTION(__floatundixf) - movl %edi, %esi // low 32 bits of input - shrq $32, %rdi // hi 32 bits of input - orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double) - orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double) - movq %rdi, -8(%rsp) - movq %rsi, -16(%rsp) - fldl REL_ADDR(twop84_plus_twop52_neg) - faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs) - faddl -16(%rsp) // hi + lo (as double extended) - ret -END_COMPILERRT_FUNCTION(__floatundixf) - -#endif // __x86_64__ - -*/ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// long double __floatundixf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +twop64: + .quad 0x43f0000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + + .text + + .balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + movq %rdi, -8(%rsp) + fildq -8(%rsp) + test %rdi, %rdi + js 1f + ret +1: faddl REL_ADDR(twop64) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __x86_64__ + + +/* Branch-free implementation is ever so slightly slower, but more beautiful. + It is likely superior for inlining, so I kept it around for future reference. + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 4 +twop52: + .quad 0x4330000000000000 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + movl %edi, %esi // low 32 bits of input + shrq $32, %rdi // hi 32 bits of input + orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double) + orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double) + movq %rdi, -8(%rsp) + movq %rsi, -16(%rsp) + fldl REL_ADDR(twop84_plus_twop52_neg) + faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs) + faddl -16(%rsp) // hi + lo (as double extended) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __x86_64__ + +*/ diff --git a/contrib/libs/cxxsupp/builtins/ya.make b/contrib/libs/cxxsupp/builtins/ya.make index bc4a815c9b8..d2c319c927c 100644 --- a/contrib/libs/cxxsupp/builtins/ya.make +++ b/contrib/libs/cxxsupp/builtins/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - +LIBRARY() + # Part of compiler-rt LLVM subproject # git repository: https://github.com/llvm/llvm-project.git @@ -28,7 +28,7 @@ OWNER( g:contrib g:cpp-contrib ) - + # Check MUSL before NO_PLATFORM() disables it. IF (MUSL) # We use C headers despite NO_PLATFORM, but we do not propagate @@ -43,14 +43,14 @@ IF (MUSL) ) ENDIF() -NO_UTIL() +NO_UTIL() + +NO_RUNTIME() -NO_RUNTIME() +NO_PLATFORM() -NO_PLATFORM() +NO_COMPILER_WARNINGS() -NO_COMPILER_WARNINGS() - IF (GCC OR CLANG) # Clang (maybe GCC too) LTO code generator leaves the builtin calls unresolved # even if they are available. After the code generation pass is done @@ -61,7 +61,7 @@ IF (GCC OR CLANG) # Just generate native code from the beginning. DISABLE(USE_LTO) ENDIF() - + SRCS( addtf3.c ashlti3.c @@ -109,7 +109,7 @@ SRCS( udivti3.c umodti3.c ) - + IF (OS_DARWIN OR OS_IOS) SRCS( os_version_check.c @@ -123,4 +123,4 @@ IF (ARCH_ARM) ) ENDIF() -END() +END() diff --git a/contrib/libs/cxxsupp/libcxx/include/deque b/contrib/libs/cxxsupp/libcxx/include/deque index da489aa4575..67cd6654f13 100644 --- a/contrib/libs/cxxsupp/libcxx/include/deque +++ b/contrib/libs/cxxsupp/libcxx/include/deque @@ -280,9 +280,9 @@ move_backward(__deque_iterator<_V1, _P1, _R1, _M1, _D1, _B1> __f, template struct __deque_block_size { - static const _DiffType __buf_size = 64 * sizeof(void*); - static const _DiffType value = (__buf_size / sizeof(_ValueType)) > 2 ? (__buf_size / sizeof(_ValueType)) : 2; - //static const _DiffType value = sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16; + static const _DiffType __buf_size = 64 * sizeof(void*); + static const _DiffType value = (__buf_size / sizeof(_ValueType)) > 2 ? (__buf_size / sizeof(_ValueType)) : 2; + //static const _DiffType value = sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16; }; template (__cxa_current_exception_type), &myinfo); - void *ip = reinterpret_cast(_Unwind_GetIP(context)); - Dl_info info; - if (dladdr(ip, &info) != 0) - { - if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0) - { - printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname); - } - } - return _URC_CONTINUE_UNWIND; -} - -static void bt_terminate_handler() { - __cxa_eh_globals* globals = __cxa_get_globals(); - __cxa_exception* thrown_exception = globals->caughtExceptions; - - if (!thrown_exception) { - abort(); - } - - fprintf(stderr, "uncaught exception:\n address -> %p\n", (void*)thrown_exception); - thrown_exception = realExceptionFromException(thrown_exception); - - const __class_type_info *e_ti = static_cast(&typeid(std::exception)); - const __class_type_info *throw_ti = dynamic_cast(thrown_exception->exceptionType); - - if (throw_ti) { - void* ptr = thrown_exception + 1; - - if (throw_ti->__do_upcast(e_ti, &ptr)) { - std::exception* e = static_cast(ptr); - - if (e) { - fprintf(stderr, " what() -> \"%s\"\n", e->what()); - } - } - } - - size_t bufferSize = 128; - char *demangled = static_cast(malloc(bufferSize)); - const char *mangled = thrown_exception->exceptionType->name(); - int status; - demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status); - fprintf(stderr, " type -> %s\n", status == 0 ? demangled : mangled); - if (status == 0) { free(demangled); } - abort(); -} - +/** + * Callback function used with _Unwind_Backtrace(). + * + * Prints a stack trace. Used only for debugging help. + * + * Note: As of FreeBSD 8.1, dladd() still doesn't work properly, so this only + * correctly prints function names from public, relocatable, symbols. + */ +static _Unwind_Reason_Code trace(struct _Unwind_Context *context, void *c) +{ + Dl_info myinfo; + int mylookup = + dladdr(reinterpret_cast(__cxa_current_exception_type), &myinfo); + void *ip = reinterpret_cast(_Unwind_GetIP(context)); + Dl_info info; + if (dladdr(ip, &info) != 0) + { + if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0) + { + printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname); + } + } + return _URC_CONTINUE_UNWIND; +} + +static void bt_terminate_handler() { + __cxa_eh_globals* globals = __cxa_get_globals(); + __cxa_exception* thrown_exception = globals->caughtExceptions; + + if (!thrown_exception) { + abort(); + } + + fprintf(stderr, "uncaught exception:\n address -> %p\n", (void*)thrown_exception); + thrown_exception = realExceptionFromException(thrown_exception); + + const __class_type_info *e_ti = static_cast(&typeid(std::exception)); + const __class_type_info *throw_ti = dynamic_cast(thrown_exception->exceptionType); + + if (throw_ti) { + void* ptr = thrown_exception + 1; + + if (throw_ti->__do_upcast(e_ti, &ptr)) { + std::exception* e = static_cast(ptr); + + if (e) { + fprintf(stderr, " what() -> \"%s\"\n", e->what()); + } + } + } + + size_t bufferSize = 128; + char *demangled = static_cast(malloc(bufferSize)); + const char *mangled = thrown_exception->exceptionType->name(); + int status; + demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status); + fprintf(stderr, " type -> %s\n", status == 0 ? demangled : mangled); + if (status == 0) { free(demangled); } + abort(); +} + /** The global termination handler. */ -static terminate_handler terminateHandler = bt_terminate_handler; +static terminate_handler terminateHandler = bt_terminate_handler; /** The global unexpected exception handler. */ static unexpected_handler unexpectedHandler = std::terminate; @@ -377,44 +377,44 @@ static void free_exception_list(__cxa_exception *ex) __cxa_free_exception(ex+1); } -#define fast_ti_size 100 - -static long fast_ti_index; -static __cxa_thread_info fast_ti[fast_ti_size]; - -static inline __cxa_thread_info* alloc_thread_info() { - { - long cur_index; - - __atomic_load(&fast_ti_index, &cur_index, __ATOMIC_SEQ_CST); - - // exausted long time ago - if (cur_index >= fast_ti_size) { - return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); - } - } - - auto my_index = __sync_fetch_and_add(&fast_ti_index, 1); - - // exausted - if (my_index >= fast_ti_size) { - return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); - } - - // fast path - auto& ret = fast_ti[my_index]; - - memset(&ret, 0, sizeof(ret)); - - return &ret; -} - -static inline void free_thread_info(__cxa_thread_info* ti) { - if ((ti < fast_ti) || (ti >= (fast_ti + fast_ti_size))) { - free(ti); - } -} - +#define fast_ti_size 100 + +static long fast_ti_index; +static __cxa_thread_info fast_ti[fast_ti_size]; + +static inline __cxa_thread_info* alloc_thread_info() { + { + long cur_index; + + __atomic_load(&fast_ti_index, &cur_index, __ATOMIC_SEQ_CST); + + // exausted long time ago + if (cur_index >= fast_ti_size) { + return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); + } + } + + auto my_index = __sync_fetch_and_add(&fast_ti_index, 1); + + // exausted + if (my_index >= fast_ti_size) { + return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); + } + + // fast path + auto& ret = fast_ti[my_index]; + + memset(&ret, 0, sizeof(ret)); + + return &ret; +} + +static inline void free_thread_info(__cxa_thread_info* ti) { + if ((ti < fast_ti) || (ti >= (fast_ti + fast_ti_size))) { + free(ti); + } +} + /** * Cleanup function called when a thread exists to make certain that all of the * per-thread data is deleted. @@ -436,7 +436,7 @@ static void thread_cleanup(void* thread_info) free_exception_list(info->globals.caughtExceptions); } } - free_thread_info(info); + free_thread_info(info); } /** @@ -457,8 +457,8 @@ static void init_key(void) pthread_setspecific(eh_key, 0); } -static __thread __cxa_thread_info* THR_INFO = nullptr; - +static __thread __cxa_thread_info* THR_INFO = nullptr; + /** * Returns the thread info structure, creating it if it is not already created. */ @@ -477,14 +477,14 @@ static __cxa_thread_info *thread_info() THR_INFO = info; return info; } - -// ensure main thread will allocate preallocated tls -static struct InitMainTls { - inline InitMainTls() { - thread_info(); - } -} init_main_tls; - + +// ensure main thread will allocate preallocated tls +static struct InitMainTls { + inline InitMainTls() { + thread_info(); + } +} init_main_tls; + /** * Fast version of thread_info(). May fail if thread_info() is not called on * this thread at least once already. diff --git a/contrib/libs/cxxsupp/libcxxrt/ya.make b/contrib/libs/cxxsupp/libcxxrt/ya.make index feab5bda041..12dccbd505a 100644 --- a/contrib/libs/cxxsupp/libcxxrt/ya.make +++ b/contrib/libs/cxxsupp/libcxxrt/ya.make @@ -1,7 +1,7 @@ # Generated by devtools/yamaker from nixpkgs 9ee8bd188933750be0584f285daf9a295d0c8930. LIBRARY() - + LICENSE( BSD-2-Clause AND BSD-2-Clause-Views AND @@ -42,11 +42,11 @@ ELSE() ENDIF() IF (SANITIZER_TYPE == undefined OR FUZZING) - NO_SANITIZE() + NO_SANITIZE() NO_SANITIZE_COVERAGE() ENDIF() - -SRCS( + +SRCS( auxhelper.cc dynamic_cast.cc exception.cc diff --git a/contrib/libs/cxxsupp/libsan/ya.make b/contrib/libs/cxxsupp/libsan/ya.make index 8c54354b934..2fb16630be8 100644 --- a/contrib/libs/cxxsupp/libsan/ya.make +++ b/contrib/libs/cxxsupp/libsan/ya.make @@ -3,7 +3,7 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() LICENSE(YandexOpen) - + NO_PLATFORM() NO_SANITIZE() diff --git a/contrib/libs/cxxsupp/openmp/asm.S b/contrib/libs/cxxsupp/openmp/asm.S index 426373bee37..1c869244eff 100644 --- a/contrib/libs/cxxsupp/openmp/asm.S +++ b/contrib/libs/cxxsupp/openmp/asm.S @@ -1 +1 @@ -#include "z_Linux_asm.s" +#include "z_Linux_asm.s" diff --git a/contrib/libs/cxxsupp/openmp/extractExternal.cpp b/contrib/libs/cxxsupp/openmp/extractExternal.cpp index 83c61869e8d..7a6fdb7e297 100644 --- a/contrib/libs/cxxsupp/openmp/extractExternal.cpp +++ b/contrib/libs/cxxsupp/openmp/extractExternal.cpp @@ -1,497 +1,497 @@ -/* - * extractExternal.cpp - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include -#include -#include -#include -#include -#include -#include - -/* Given a set of n object files h ('external' object files) and a set of m - object files o ('internal' object files), - 1. Determines r, the subset of h that o depends on, directly or indirectly - 2. Removes the files in h - r from the file system - 3. For each external symbol defined in some file in r, rename it in r U o - by prefixing it with "__kmp_external_" - Usage: - hide.exe - - Thus, the prefixed symbols become hidden in the sense that they now have a special - prefix. -*/ - -using namespace std; - -void stop(char* errorMsg) { - printf("%s\n", errorMsg); - exit(1); -} - -// an entry in the symbol table of a .OBJ file -class Symbol { -public: - __int64 name; - unsigned value; - unsigned short sectionNum, type; - char storageClass, nAux; -}; - -class _rstream : public istrstream { -private: - const char *buf; -protected: - _rstream(pair p):istrstream(p.first,p.second),buf(p.first){} - ~_rstream() { - delete[]buf; - } -}; - -/* A stream encapuslating the content of a file or the content of a string, overriding the - >> operator to read various integer types in binary form, as well as a symbol table - entry. -*/ -class rstream : public _rstream { -private: - template - inline rstream& doRead(T &x) { - read((char*)&x, sizeof(T)); - return *this; - } - static pair getBuf(const char *fileName) { - ifstream raw(fileName,ios::binary | ios::in); - if(!raw.is_open()) - stop("rstream.getBuf: Error opening file"); - raw.seekg(0,ios::end); - streampos fileSize = raw.tellg(); - if(fileSize < 0) - stop("rstream.getBuf: Error reading file"); - char *buf = new char[fileSize]; - raw.seekg(0,ios::beg); - raw.read(buf, fileSize); - return pair(buf,fileSize); - } -public: - // construct from a string - rstream(const char *buf,streamsize size):_rstream(pair(buf, size)){} - /* construct from a file whole content is fully read once to initialize the content of - this stream - */ - rstream(const char *fileName):_rstream(getBuf(fileName)){} - rstream& operator>>(int &x) { - return doRead(x); - } - rstream& operator>>(unsigned &x) { - return doRead(x); - } - rstream& operator>>(short &x) { - return doRead(x); - } - rstream& operator>>(unsigned short &x) { - return doRead(x); - } - rstream& operator>>(Symbol &e) { - read((char*)&e, 18); - return *this; - } -}; - -// string table in a .OBJ file -class StringTable { -private: - map directory; - size_t length; - char *data; - - // make from bytes in - void makeDirectory(void) { - unsigned i = 4; - while(i < length) { - string s = string(data + i); - directory.insert(make_pair(s, i)); - i += s.size() + 1; - } - } - // initialize and with contents specified by the arguments - void init(const char *_data) { - unsigned _length = *(unsigned*)_data; - - if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) - stop("StringTable.init: Invalid symbol table"); - if(_data[_length - 1]) { - // to prevent runaway strings, make sure the data ends with a zero - data = new char[length = _length + 1]; - data[_length] = 0; - } else { - data = new char[length = _length]; - } - *(unsigned*)data = length; - KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), - length - sizeof(unsigned)); - makeDirectory(); - } -public: - StringTable(rstream &f) { - /* Construct string table by reading from f. - */ - streampos s; - unsigned strSize; - char *strData; - - s = f.tellg(); - f>>strSize; - if(strSize < sizeof(unsigned)) - stop("StringTable: Invalid string table"); - strData = new char[strSize]; - *(unsigned*)strData = strSize; - // read the raw data into - f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); - s = f.tellg() - s; - if(s < strSize) - stop("StringTable: Unexpected EOF"); - init(strData); - delete[]strData; - } - StringTable(const set &strings) { - /* Construct string table from given strings. - */ - char *p; - set::const_iterator it; - size_t s; - - // count required size for data - for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { - size_t l = (*it).size(); - - if(l > (unsigned) 0xFFFFFFFF) - stop("StringTable: String too long"); - if(l > 8) { - length += l + 1; - if(length > (unsigned) 0xFFFFFFFF) - stop("StringTable: Symbol table too long"); - } - } - data = new char[length]; - *(unsigned*)data = length; - // populate data and directory - for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { - const string &str = *it; - size_t l = str.size(); - if(l > 8) { - directory.insert(make_pair(str, p - data)); - KMP_MEMCPY(p, str.c_str(), l); - p[l] = 0; - p += l + 1; - } - } - } - ~StringTable() { - delete[] data; - } - /* Returns encoding for given string based on this string table. - Error if string length is greater than 8 but string is not in - the string table--returns 0. - */ - __int64 encode(const string &str) { - __int64 r; - - if(str.size() <= 8) { - // encoded directly - ((char*)&r)[7] = 0; - KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); - return r; - } else { - // represented as index into table - map::const_iterator it = directory.find(str); - if(it == directory.end()) - stop("StringTable::encode: String now found in string table"); - ((unsigned*)&r)[0] = 0; - ((unsigned*)&r)[1] = (*it).second; - return r; - } - } - /* Returns string represented by x based on this string table. - Error if x references an invalid position in the table--returns - the empty string. - */ - string decode(__int64 x) const { - if(*(unsigned*)&x == 0) { - // represented as index into table - unsigned &p = ((unsigned*)&x)[1]; - if(p >= length) - stop("StringTable::decode: Invalid string table lookup"); - return string(data + p); - } else { - // encoded directly - char *p = (char*)&x; - int i; - - for(i = 0; i < 8 && p[i]; ++i); - return string(p, i); - } - } - void write(ostream &os) { - os.write(data, length); - } -}; - -/* for the named object file, determines the set of defined symbols and the set of undefined external symbols - and writes them to and respectively -*/ -void computeExternalSymbols(const char *fileName, set *defined, set *undefined){ - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - rstream f(fileName); - - f.seekg(0,ios::end); - fileSize = f.tellg(); - - f.seekg(8); - f >> symTabStart >> symNEntries; - // seek to the string table - f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if(f.eof()) { - printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", - fileName, (unsigned long) fileSize, symTabStart, symNEntries); - stop("computeExternalSymbols: Unexpected EOF 1"); - } - StringTable stringTable(f); // read the string table - if(f.tellg() != fileSize) - stop("computeExternalSymbols: Unexpected data after string table"); - - f.clear(); - f.seekg(symTabStart); // seek to the symbol table - - defined->clear(); undefined->clear(); - for(int i = 0; i < symNEntries; ++i) { - // process each entry - Symbol e; - - if(f.eof()) - stop("computeExternalSymbols: Unexpected EOF 2"); - f>>e; - if(f.fail()) - stop("computeExternalSymbols: File read error"); - if(e.nAux) { // auxiliary entry: skip - f.seekg(e.nAux * 18, ios::cur); - i += e.nAux; - } - // if symbol is extern and defined in the current file, insert it - if(e.storageClass == 2) - if(e.sectionNum) - defined->insert(stringTable.decode(e.name)); - else - undefined->insert(stringTable.decode(e.name)); - } -} - -/* For each occurrence of an external symbol in the object file named by - by that is a member of , renames it by prefixing - with "__kmp_external_", writing back the file in-place -*/ -void hideSymbols(char *fileName, const set &hide) { - static const string prefix("__kmp_external_"); - set strings; // set of all occurring symbols, appropriately prefixed - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - int i; - rstream in(fileName); - - in.seekg(0,ios::end); - fileSize = in.tellg(); - - in.seekg(8); - in >> symTabStart >> symNEntries; - in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - StringTable stringTableOld(in); // read original string table - - if(in.tellg() != fileSize) - stop("hideSymbols: Unexpected data after string table"); - - // compute set of occurring strings with prefix added - for(i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i * 18); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if(in.fail()) - stop("hideSymbols: File read error"); - if(e.nAux) - i += e.nAux; - const string &s = stringTableOld.decode(e.name); - // if symbol is extern and found in , prefix and insert into strings, - // otherwise, just insert into strings without prefix - strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? - prefix + s : s); - } - - ofstream out(fileName, ios::trunc | ios::out | ios::binary); - if(!out.is_open()) - stop("hideSymbols: Error opening output file"); - - // make new string table from string set - StringTable stringTableNew = StringTable(strings); - - // copy input file to output file up to just before the symbol table - in.seekg(0); - char *buf = new char[symTabStart]; - in.read(buf, symTabStart); - out.write(buf, symTabStart); - delete []buf; - - // copy input symbol table to output symbol table with name translation - for(i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i*18); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if(in.fail()) - stop("hideSymbols: File read error"); - const string &s = stringTableOld.decode(e.name); - out.seekp(symTabStart + i*18); - e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? - prefix + s : s); - out.write((char*)&e, 18); - if(out.fail()) - stop("hideSymbols: File write error"); - if(e.nAux) { - // copy auxiliary symbol table entries - int nAux = e.nAux; - for(int j = 1; j <= nAux; ++j) { - in >> e; - out.seekp(symTabStart + (i + j) * 18); - out.write((char*)&e, 18); - } - i += nAux; - } - } - // output string table - stringTableNew.write(out); -} - -// returns true iff and have no common element -template -bool isDisjoint(const set &a, const set &b) { - set::const_iterator ita, itb; - - for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { - const T &ta = *ita, &tb = *itb; - if(ta < tb) - ++ita; - else if (tb < ta) - ++itb; - else - return false; - } - return true; -} - -/* precondition: and are arrays with elements where - >= . The first elements correspond to the external object - files and the rest correspond to the internal object files. - postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not - disjoint. Returns the transitive closure of the set of internal object files, as a set of - file indexes, under the 'depends on' relation, minus the set of internal object files. -*/ -set *findRequiredExternal(int nExternal, int nTotal, set *defined, set *undefined) { - set *required = new set; - set fresh[2]; - int i, cur = 0; - bool changed; - - for(i = nTotal - 1; i >= nExternal; --i) - fresh[cur].insert(i); - do { - changed = false; - for(set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { - set &s = undefined[*it]; - - for(i = 0; i < nExternal; ++i) { - if(required->find(i) == required->end()) { - if(!isDisjoint(defined[i], s)) { - // found a new qualifying element - required->insert(i); - fresh[1 - cur].insert(i); - changed = true; - } - } - } - } - fresh[cur].clear(); - cur = 1 - cur; - } while(changed); - return required; -} - -int main(int argc, char **argv) { - int nExternal, nInternal, i; - set *defined, *undefined; - set::iterator it; - - if(argc < 3) - stop("Please specify a positive integer followed by a list of object filenames"); - nExternal = atoi(argv[1]); - if(nExternal <= 0) - stop("Please specify a positive integer followed by a list of object filenames"); - if(nExternal + 2 > argc) - stop("Too few external objects"); - nInternal = argc - nExternal - 2; - defined = new set[argc - 2]; - undefined = new set[argc - 2]; - - // determine the set of defined and undefined external symbols - for(i = 2; i < argc; ++i) - computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); - - // determine the set of required external files - set *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); - set hide; - - /* determine the set of symbols to hide--namely defined external symbols of the - required external files - */ - for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { - int idx = *it; - set::iterator it2; - /* We have to insert one element at a time instead of inserting a range because - the insert member function taking a range doesn't exist on Windows* OS, at least - at the time of this writing. - */ - for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) - hide.insert(*it2); - } - - /* process the external files--removing those that are not required and hiding - the appropriate symbols in the others - */ - for(i = 0; i < nExternal; ++i) - if(requiredExternal->find(i) != requiredExternal->end()) - hideSymbols(argv[2 + i], hide); - else - remove(argv[2 + i]); - // hide the appropriate symbols in the internal files - for(i = nExternal + 2; i < argc; ++i) - hideSymbols(argv[i], hide); - return 0; -} +/* + * extractExternal.cpp + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include +#include +#include +#include +#include +#include +#include + +/* Given a set of n object files h ('external' object files) and a set of m + object files o ('internal' object files), + 1. Determines r, the subset of h that o depends on, directly or indirectly + 2. Removes the files in h - r from the file system + 3. For each external symbol defined in some file in r, rename it in r U o + by prefixing it with "__kmp_external_" + Usage: + hide.exe + + Thus, the prefixed symbols become hidden in the sense that they now have a special + prefix. +*/ + +using namespace std; + +void stop(char* errorMsg) { + printf("%s\n", errorMsg); + exit(1); +} + +// an entry in the symbol table of a .OBJ file +class Symbol { +public: + __int64 name; + unsigned value; + unsigned short sectionNum, type; + char storageClass, nAux; +}; + +class _rstream : public istrstream { +private: + const char *buf; +protected: + _rstream(pair p):istrstream(p.first,p.second),buf(p.first){} + ~_rstream() { + delete[]buf; + } +}; + +/* A stream encapuslating the content of a file or the content of a string, overriding the + >> operator to read various integer types in binary form, as well as a symbol table + entry. +*/ +class rstream : public _rstream { +private: + template + inline rstream& doRead(T &x) { + read((char*)&x, sizeof(T)); + return *this; + } + static pair getBuf(const char *fileName) { + ifstream raw(fileName,ios::binary | ios::in); + if(!raw.is_open()) + stop("rstream.getBuf: Error opening file"); + raw.seekg(0,ios::end); + streampos fileSize = raw.tellg(); + if(fileSize < 0) + stop("rstream.getBuf: Error reading file"); + char *buf = new char[fileSize]; + raw.seekg(0,ios::beg); + raw.read(buf, fileSize); + return pair(buf,fileSize); + } +public: + // construct from a string + rstream(const char *buf,streamsize size):_rstream(pair(buf, size)){} + /* construct from a file whole content is fully read once to initialize the content of + this stream + */ + rstream(const char *fileName):_rstream(getBuf(fileName)){} + rstream& operator>>(int &x) { + return doRead(x); + } + rstream& operator>>(unsigned &x) { + return doRead(x); + } + rstream& operator>>(short &x) { + return doRead(x); + } + rstream& operator>>(unsigned short &x) { + return doRead(x); + } + rstream& operator>>(Symbol &e) { + read((char*)&e, 18); + return *this; + } +}; + +// string table in a .OBJ file +class StringTable { +private: + map directory; + size_t length; + char *data; + + // make from bytes in + void makeDirectory(void) { + unsigned i = 4; + while(i < length) { + string s = string(data + i); + directory.insert(make_pair(s, i)); + i += s.size() + 1; + } + } + // initialize and with contents specified by the arguments + void init(const char *_data) { + unsigned _length = *(unsigned*)_data; + + if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) + stop("StringTable.init: Invalid symbol table"); + if(_data[_length - 1]) { + // to prevent runaway strings, make sure the data ends with a zero + data = new char[length = _length + 1]; + data[_length] = 0; + } else { + data = new char[length = _length]; + } + *(unsigned*)data = length; + KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), + length - sizeof(unsigned)); + makeDirectory(); + } +public: + StringTable(rstream &f) { + /* Construct string table by reading from f. + */ + streampos s; + unsigned strSize; + char *strData; + + s = f.tellg(); + f>>strSize; + if(strSize < sizeof(unsigned)) + stop("StringTable: Invalid string table"); + strData = new char[strSize]; + *(unsigned*)strData = strSize; + // read the raw data into + f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); + s = f.tellg() - s; + if(s < strSize) + stop("StringTable: Unexpected EOF"); + init(strData); + delete[]strData; + } + StringTable(const set &strings) { + /* Construct string table from given strings. + */ + char *p; + set::const_iterator it; + size_t s; + + // count required size for data + for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { + size_t l = (*it).size(); + + if(l > (unsigned) 0xFFFFFFFF) + stop("StringTable: String too long"); + if(l > 8) { + length += l + 1; + if(length > (unsigned) 0xFFFFFFFF) + stop("StringTable: Symbol table too long"); + } + } + data = new char[length]; + *(unsigned*)data = length; + // populate data and directory + for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { + const string &str = *it; + size_t l = str.size(); + if(l > 8) { + directory.insert(make_pair(str, p - data)); + KMP_MEMCPY(p, str.c_str(), l); + p[l] = 0; + p += l + 1; + } + } + } + ~StringTable() { + delete[] data; + } + /* Returns encoding for given string based on this string table. + Error if string length is greater than 8 but string is not in + the string table--returns 0. + */ + __int64 encode(const string &str) { + __int64 r; + + if(str.size() <= 8) { + // encoded directly + ((char*)&r)[7] = 0; + KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); + return r; + } else { + // represented as index into table + map::const_iterator it = directory.find(str); + if(it == directory.end()) + stop("StringTable::encode: String now found in string table"); + ((unsigned*)&r)[0] = 0; + ((unsigned*)&r)[1] = (*it).second; + return r; + } + } + /* Returns string represented by x based on this string table. + Error if x references an invalid position in the table--returns + the empty string. + */ + string decode(__int64 x) const { + if(*(unsigned*)&x == 0) { + // represented as index into table + unsigned &p = ((unsigned*)&x)[1]; + if(p >= length) + stop("StringTable::decode: Invalid string table lookup"); + return string(data + p); + } else { + // encoded directly + char *p = (char*)&x; + int i; + + for(i = 0; i < 8 && p[i]; ++i); + return string(p, i); + } + } + void write(ostream &os) { + os.write(data, length); + } +}; + +/* for the named object file, determines the set of defined symbols and the set of undefined external symbols + and writes them to and respectively +*/ +void computeExternalSymbols(const char *fileName, set *defined, set *undefined){ + streampos fileSize; + size_t strTabStart; + unsigned symTabStart, symNEntries; + rstream f(fileName); + + f.seekg(0,ios::end); + fileSize = f.tellg(); + + f.seekg(8); + f >> symTabStart >> symNEntries; + // seek to the string table + f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); + if(f.eof()) { + printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", + fileName, (unsigned long) fileSize, symTabStart, symNEntries); + stop("computeExternalSymbols: Unexpected EOF 1"); + } + StringTable stringTable(f); // read the string table + if(f.tellg() != fileSize) + stop("computeExternalSymbols: Unexpected data after string table"); + + f.clear(); + f.seekg(symTabStart); // seek to the symbol table + + defined->clear(); undefined->clear(); + for(int i = 0; i < symNEntries; ++i) { + // process each entry + Symbol e; + + if(f.eof()) + stop("computeExternalSymbols: Unexpected EOF 2"); + f>>e; + if(f.fail()) + stop("computeExternalSymbols: File read error"); + if(e.nAux) { // auxiliary entry: skip + f.seekg(e.nAux * 18, ios::cur); + i += e.nAux; + } + // if symbol is extern and defined in the current file, insert it + if(e.storageClass == 2) + if(e.sectionNum) + defined->insert(stringTable.decode(e.name)); + else + undefined->insert(stringTable.decode(e.name)); + } +} + +/* For each occurrence of an external symbol in the object file named by + by that is a member of , renames it by prefixing + with "__kmp_external_", writing back the file in-place +*/ +void hideSymbols(char *fileName, const set &hide) { + static const string prefix("__kmp_external_"); + set strings; // set of all occurring symbols, appropriately prefixed + streampos fileSize; + size_t strTabStart; + unsigned symTabStart, symNEntries; + int i; + rstream in(fileName); + + in.seekg(0,ios::end); + fileSize = in.tellg(); + + in.seekg(8); + in >> symTabStart >> symNEntries; + in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); + if(in.eof()) + stop("hideSymbols: Unexpected EOF"); + StringTable stringTableOld(in); // read original string table + + if(in.tellg() != fileSize) + stop("hideSymbols: Unexpected data after string table"); + + // compute set of occurring strings with prefix added + for(i = 0; i < symNEntries; ++i) { + Symbol e; + + in.seekg(symTabStart + i * 18); + if(in.eof()) + stop("hideSymbols: Unexpected EOF"); + in >> e; + if(in.fail()) + stop("hideSymbols: File read error"); + if(e.nAux) + i += e.nAux; + const string &s = stringTableOld.decode(e.name); + // if symbol is extern and found in , prefix and insert into strings, + // otherwise, just insert into strings without prefix + strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? + prefix + s : s); + } + + ofstream out(fileName, ios::trunc | ios::out | ios::binary); + if(!out.is_open()) + stop("hideSymbols: Error opening output file"); + + // make new string table from string set + StringTable stringTableNew = StringTable(strings); + + // copy input file to output file up to just before the symbol table + in.seekg(0); + char *buf = new char[symTabStart]; + in.read(buf, symTabStart); + out.write(buf, symTabStart); + delete []buf; + + // copy input symbol table to output symbol table with name translation + for(i = 0; i < symNEntries; ++i) { + Symbol e; + + in.seekg(symTabStart + i*18); + if(in.eof()) + stop("hideSymbols: Unexpected EOF"); + in >> e; + if(in.fail()) + stop("hideSymbols: File read error"); + const string &s = stringTableOld.decode(e.name); + out.seekp(symTabStart + i*18); + e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? + prefix + s : s); + out.write((char*)&e, 18); + if(out.fail()) + stop("hideSymbols: File write error"); + if(e.nAux) { + // copy auxiliary symbol table entries + int nAux = e.nAux; + for(int j = 1; j <= nAux; ++j) { + in >> e; + out.seekp(symTabStart + (i + j) * 18); + out.write((char*)&e, 18); + } + i += nAux; + } + } + // output string table + stringTableNew.write(out); +} + +// returns true iff and have no common element +template +bool isDisjoint(const set &a, const set &b) { + set::const_iterator ita, itb; + + for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { + const T &ta = *ita, &tb = *itb; + if(ta < tb) + ++ita; + else if (tb < ta) + ++itb; + else + return false; + } + return true; +} + +/* precondition: and are arrays with elements where + >= . The first elements correspond to the external object + files and the rest correspond to the internal object files. + postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not + disjoint. Returns the transitive closure of the set of internal object files, as a set of + file indexes, under the 'depends on' relation, minus the set of internal object files. +*/ +set *findRequiredExternal(int nExternal, int nTotal, set *defined, set *undefined) { + set *required = new set; + set fresh[2]; + int i, cur = 0; + bool changed; + + for(i = nTotal - 1; i >= nExternal; --i) + fresh[cur].insert(i); + do { + changed = false; + for(set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { + set &s = undefined[*it]; + + for(i = 0; i < nExternal; ++i) { + if(required->find(i) == required->end()) { + if(!isDisjoint(defined[i], s)) { + // found a new qualifying element + required->insert(i); + fresh[1 - cur].insert(i); + changed = true; + } + } + } + } + fresh[cur].clear(); + cur = 1 - cur; + } while(changed); + return required; +} + +int main(int argc, char **argv) { + int nExternal, nInternal, i; + set *defined, *undefined; + set::iterator it; + + if(argc < 3) + stop("Please specify a positive integer followed by a list of object filenames"); + nExternal = atoi(argv[1]); + if(nExternal <= 0) + stop("Please specify a positive integer followed by a list of object filenames"); + if(nExternal + 2 > argc) + stop("Too few external objects"); + nInternal = argc - nExternal - 2; + defined = new set[argc - 2]; + undefined = new set[argc - 2]; + + // determine the set of defined and undefined external symbols + for(i = 2; i < argc; ++i) + computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); + + // determine the set of required external files + set *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); + set hide; + + /* determine the set of symbols to hide--namely defined external symbols of the + required external files + */ + for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { + int idx = *it; + set::iterator it2; + /* We have to insert one element at a time instead of inserting a range because + the insert member function taking a range doesn't exist on Windows* OS, at least + at the time of this writing. + */ + for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) + hide.insert(*it2); + } + + /* process the external files--removing those that are not required and hiding + the appropriate symbols in the others + */ + for(i = 0; i < nExternal; ++i) + if(requiredExternal->find(i) != requiredExternal->end()) + hideSymbols(argv[2 + i], hide); + else + remove(argv[2 + i]); + // hide the appropriate symbols in the internal files + for(i = nExternal + 2; i < argc; ++i) + hideSymbols(argv[i], hide); + return 0; +} diff --git a/contrib/libs/cxxsupp/openmp/i18n/en_US.txt b/contrib/libs/cxxsupp/openmp/i18n/en_US.txt index 840d29ea481..11d57eb798c 100644 --- a/contrib/libs/cxxsupp/openmp/i18n/en_US.txt +++ b/contrib/libs/cxxsupp/openmp/i18n/en_US.txt @@ -1,475 +1,475 @@ -# en_US.txt # - -# -#//===----------------------------------------------------------------------===// -#// -#// The LLVM Compiler Infrastructure -#// -#// This file is dual licensed under the MIT and the University of Illinois Open -#// Source Licenses. See LICENSE.txt for details. -#// -#//===----------------------------------------------------------------------===// -# - -# Default messages, embedded into the OpenMP RTL, and source for English catalog. - - -# Compatible changes (which does not require version bumping): -# * Editing message (number and type of placeholders must remain, relative order of -# placeholders may be changed, e.g. "File %1$s line %2$d" may be safely edited to -# "Line %2$d file %1$s"). -# * Adding new message to the end of section. -# Incompatible changes (version must be bumbed by 1): -# * Introducing new placeholders to existing messages. -# * Changing type of placeholders (e.g. "line %1$d" -> "line %1$s"). -# * Rearranging order of messages. -# * Deleting messages. -# Use special "OBSOLETE" pseudoidentifier for obsolete entries, which is kept only for backward -# compatibility. When version is bumped, do not forget to delete all obsolete entries. - - -# -------------------------------------------------------------------------------------------------- --*- META -*- -# -------------------------------------------------------------------------------------------------- - -# Meta information about message catalog. - -Language "English" -Country "USA" -LangId "1033" -Version "2" -Revision "20140827" - - - -# -------------------------------------------------------------------------------------------------- --*- STRINGS -*- -# -------------------------------------------------------------------------------------------------- - -# Strings are not complete messages, just fragments. We need to work on it and reduce number of -# strings (to zero?). - -Error "Error" -UnknownFile "(unknown file)" -NotANumber "not a number" -BadUnit "bad unit" -IllegalCharacters "illegal characters" -ValueTooLarge "value too large" -ValueTooSmall "value too small" -NotMultiple4K "value is not a multiple of 4k" -UnknownTopology "Unknown processor topology" -CantOpenCpuinfo "Cannot open /proc/cpuinfo" -ProcCpuinfo "/proc/cpuinfo" -NoProcRecords "cpuinfo file invalid (No processor records)" -TooManyProcRecords "cpuinfo file invalid (Too many processor records)" -CantRewindCpuinfo "Cannot rewind cpuinfo file" -LongLineCpuinfo "cpuinfo file invalid (long line)" -TooManyEntries "cpuinfo file contains too many entries" -MissingProcField "cpuinfo file missing processor field" -MissingPhysicalIDField "cpuinfo file missing physical id field" -MissingValCpuinfo "cpuinfo file invalid (missing val)" -DuplicateFieldCpuinfo "cpuinfo file invalid (duplicate field)" -PhysicalIDsNotUnique "Physical node/pkg/core/thread ids not unique" -ApicNotPresent "APIC not present" -InvalidCpuidInfo "Invalid cpuid info" -OBSOLETE "APIC ids not unique" -InconsistentCpuidInfo "Inconsistent cpuid info" -OutOfHeapMemory "Out of heap memory" -MemoryAllocFailed "Memory allocation failed" -Core "core" -Thread "thread" -Package "package" -Node "node" -OBSOLETE "" -DecodingLegacyAPIC "decoding legacy APIC ids" -OBSOLETE "parsing /proc/cpuinfo" -NotDefined "value is not defined" -EffectiveSettings "Effective settings:" -UserSettings "User settings:" -StorageMapWarning "warning: pointers or size don't make sense" -OBSOLETE "CPU" -OBSOLETE "TPU" -OBSOLETE "TPUs per package" -OBSOLETE "HT enabled" -OBSOLETE "HT disabled" -Decodingx2APIC "decoding x2APIC ids" -NoLeaf11Support "cpuid leaf 11 not supported" -NoLeaf4Support "cpuid leaf 4 not supported" -ThreadIDsNotUnique "thread ids not unique" -UsingPthread "using pthread info" -LegacyApicIDsNotUnique "legacy APIC ids not unique" -x2ApicIDsNotUnique "x2APIC ids not unique" -DisplayEnvBegin "OPENMP DISPLAY ENVIRONMENT BEGIN" -DisplayEnvEnd "OPENMP DISPLAY ENVIRONMENT END" -Device "[device]" -Host "[host]" - - - -# -------------------------------------------------------------------------------------------------- --*- FORMATS -*- -# -------------------------------------------------------------------------------------------------- - -Info "OMP: Info #%1$d: %2$s\n" -Warning "OMP: Warning #%1$d: %2$s\n" -Fatal "OMP: Error #%1$d: %2$s\n" -SysErr "OMP: System error #%1$d: %2$s\n" -Hint "OMP: Hint: %2$s\n" - -Pragma "%1$s pragma (at %2$s:%3$s():%4$s)" - # %1 is pragma name (like "parallel" or "master", - # %2 is file name, - # %3 is function (routine) name, - # %4 is the line number (as string, so "s" type specifier should be used). - - - -# -------------------------------------------------------------------------------------------------- --*- MESSAGES -*- -# -------------------------------------------------------------------------------------------------- - -# Messages of any severity: informational, warning, or fatal. -# To maintain message numbers (they are visible to customers), add new messages to the end. - -# Use following prefixes for messages and hints when appropriate: -# Aff -- Affinity messages. -# Cns -- Consistency check failures (KMP_CONSISTENCY_CHECK). -# Itt -- ITT Notify-related messages. - -LibraryIsSerial "Library is \"serial\"." -CantOpenMessageCatalog "Cannot open message catalog \"%1$s\":" -WillUseDefaultMessages "Default messages will be used." -LockIsUninitialized "%1$s: Lock is uninitialized" -LockSimpleUsedAsNestable "%1$s: Lock was initialized as simple, but used as nestable" -LockNestableUsedAsSimple "%1$s: Lock was initialized as nestable, but used as simple" -LockIsAlreadyOwned "%1$s: Lock is already owned by requesting thread" -LockStillOwned "%1$s: Lock is still owned by a thread" -LockUnsettingFree "%1$s: Attempt to release a lock not owned by any thread" -LockUnsettingSetByAnother "%1$s: Attempt to release a lock owned by another thread" -StackOverflow "Stack overflow detected for OpenMP thread #%1$d" -StackOverlap "Stack overlap detected. " -AssertionFailure "Assertion failure at %1$s(%2$d)." -CantRegisterNewThread "Unable to register a new user thread." -DuplicateLibrary "Initializing %1$s, but found %2$s already initialized." -CantOpenFileForReading "Cannot open file \"%1$s\" for reading:" -CantGetEnvVar "Getting environment variable \"%1$s\" failed:" -CantSetEnvVar "Setting environment variable \"%1$s\" failed:" -CantGetEnvironment "Getting environment failed:" -BadBoolValue "%1$s=\"%2$s\": Wrong value, boolean expected." -SSPNotBuiltIn "No Helper Thread support built in this OMP library." -SPPSotfTerminateFailed "Helper thread failed to soft terminate." -BufferOverflow "Buffer overflow detected." -RealTimeSchedNotSupported "Real-time scheduling policy is not supported." -RunningAtMaxPriority "OMP application is running at maximum priority with real-time scheduling policy. " -CantChangeMonitorPriority "Changing priority of the monitor thread failed:" -MonitorWillStarve "Deadlocks are highly possible due to monitor thread starvation." -CantSetMonitorStackSize "Unable to set monitor thread stack size to %1$lu bytes:" -CantSetWorkerStackSize "Unable to set OMP thread stack size to %1$lu bytes:" -CantInitThreadAttrs "Thread attribute initialization failed:" -CantDestroyThreadAttrs "Thread attribute destroying failed:" -CantSetWorkerState "OMP thread joinable state setting failed:" -CantSetMonitorState "Monitor thread joinable state setting failed:" -NoResourcesForWorkerThread "System unable to allocate necessary resources for OMP thread:" -NoResourcesForMonitorThread "System unable to allocate necessary resources for the monitor thread:" -CantTerminateWorkerThread "Unable to terminate OMP thread:" -ScheduleKindOutOfRange "Wrong schedule type %1$d, see or file for the list of values supported." -UnknownSchedulingType "Unknown scheduling type \"%1$d\"." -InvalidValue "%1$s value \"%2$s\" is invalid." -SmallValue "%1$s value \"%2$s\" is too small." -LargeValue "%1$s value \"%2$s\" is too large." -StgInvalidValue "%1$s: \"%2$s\" is an invalid value; ignored." -BarrReleaseValueInvalid "%1$s release value \"%2$s\" is invalid." -BarrGatherValueInvalid "%1$s gather value \"%2$s\" is invalid." -OBSOLETE "%1$s supported only on debug builds; ignored." -ParRangeSyntax "Syntax error: Usage: %1$s=[ routine= | filename= | range=: " - "| excl_range=: ],..." -UnbalancedQuotes "Unbalanced quotes in %1$s." -EmptyString "Empty string specified for %1$s; ignored." -LongValue "%1$s value is too long; ignored." -InvalidClause "%1$s: Invalid clause in \"%2$s\"." -EmptyClause "Empty clause in %1$s." -InvalidChunk "%1$s value \"%2$s\" is invalid chunk size." -LargeChunk "%1$s value \"%2$s\" is to large chunk size." -IgnoreChunk "%1$s value \"%2$s\" is ignored." -CantGetProcFreq "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY." -EnvParallelWarn "%1$s must be set prior to first parallel region; ignored." -AffParamDefined "%1$s: parameter has been specified already, ignoring \"%2$s\"." -AffInvalidParam "%1$s: parameter invalid, ignoring \"%2$s\"." -AffManyParams "%1$s: too many integer parameters specified, ignoring \"%2$s\"." -AffManyParamsForLogic "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\"." -AffNoParam "%1$s: '%2$s' type does not take any integer parameters, ignoring them." -AffNoProcList "%1$s: proclist not specified with explicit affinity type, using \"none\"." -AffProcListNoType "%1$s: proclist specified, setting affinity type to \"explicit\"." -AffProcListNotExplicit "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored." -AffSyntaxError "%1$s: syntax error, not using affinity." -AffZeroStride "%1$s: range error (zero stride), not using affinity." -AffStartGreaterEnd "%1$s: range error (%2$d > %3$d), not using affinity." -AffStrideLessZero "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity." -AffRangeTooBig "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity." -OBSOLETE "%1$s: %2$s is defined. %3$s will be ignored." -AffNotSupported "%1$s: affinity not supported, using \"disabled\"." -OBSOLETE "%1$s: affinity only supported for Intel(R) processors." -GetAffSysCallNotSupported "%1$s: getaffinity system call not supported." -SetAffSysCallNotSupported "%1$s: setaffinity system call not supported." -OBSOLETE "%1$s: pthread_aff_set_np call not found." -OBSOLETE "%1$s: pthread_get_num_resources_np call not found." -OBSOLETE "%1$s: the OS kernel does not support affinity." -OBSOLETE "%1$s: pthread_get_num_resources_np returned %2$d." -AffCantGetMaskSize "%1$s: cannot determine proper affinity mask size." -ParseSizeIntWarn "%1$s=\"%2$s\": %3$s." -ParseExtraCharsWarn "%1$s: extra trailing characters ignored: \"%2$s\"." -UnknownForceReduction "%1$s: unknown method \"%2$s\"." -TimerUseGettimeofday "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday." -TimerNeedMoreParam "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday." -TimerInvalidParam "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday." -TimerGettimeFailed "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday." -TimerUnknownFunction "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\")." -UnknownSchedTypeDetected "Unknown scheduling type detected." -DispatchManyThreads "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling." -IttLookupFailed "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed." -IttLoadLibFailed "ittnotify: Loading \"%1$s\" library failed." -IttAllNotifDisabled "ittnotify: All itt notifications disabled." -IttObjNotifDisabled "ittnotify: Object state itt notifications disabled." -IttMarkNotifDisabled "ittnotify: Mark itt notifications disabled." -IttUnloadLibFailed "ittnotify: Unloading \"%1$s\" library failed." -CantFormThrTeam "Cannot form a team with %1$d threads, using %2$d instead." -ActiveLevelsNegative "Requested number of active parallel levels \"%1$d\" is negative; ignored." -ActiveLevelsExceedLimit "Requested number of active parallel levels \"%1$d\" exceeds supported limit; " - "the following limit value will be used: \"%1$d\"." -SetLibraryIncorrectCall "kmp_set_library must only be called from the top level serial thread; ignored." -FatalSysError "Fatal system error detected." -OutOfHeapMemory "Out of heap memory." -OBSOLETE "Clearing __KMP_REGISTERED_LIB env var failed." -OBSOLETE "Registering library with env var failed." -Using_int_Value "%1$s value \"%2$d\" will be used." -Using_uint_Value "%1$s value \"%2$u\" will be used." -Using_uint64_Value "%1$s value \"%2$s\" will be used." -Using_str_Value "%1$s value \"%2$s\" will be used." -MaxValueUsing "%1$s maximum value \"%2$d\" will be used." -MinValueUsing "%1$s minimum value \"%2$d\" will be used." -MemoryAllocFailed "Memory allocation failed." -FileNameTooLong "File name too long." -OBSOLETE "Lock table overflow." -ManyThreadsForTPDirective "Too many threads to use threadprivate directive." -AffinityInvalidMask "%1$s: invalid mask." -WrongDefinition "Wrong definition." -TLSSetValueFailed "Windows* OS: TLS Set Value failed." -TLSOutOfIndexes "Windows* OS: TLS out of indexes." -OBSOLETE "PDONE directive must be nested within a DO directive." -CantGetNumAvailCPU "Cannot get number of available CPUs." -AssumedNumCPU "Assumed number of CPUs is 2." -ErrorInitializeAffinity "Error initializing affinity - not using affinity." -AffThreadsMayMigrate "Threads may migrate across all available OS procs (granularity setting too coarse)." -AffIgnoreInvalidProcID "Ignoring invalid OS proc ID %1$d." -AffNoValidProcID "No valid OS proc IDs specified - not using affinity." -UsingFlatOS "%1$s - using \"flat\" OS <-> physical proc mapping." -UsingFlatOSFile "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping." -UsingFlatOSFileLine "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping." -FileMsgExiting "%1$s: %2$s - exiting." -FileLineMsgExiting "%1$s, line %2$d: %3$s - exiting." -ConstructIdentInvalid "Construct identifier invalid." -ThreadIdentInvalid "Thread identifier invalid." -RTLNotInitialized "runtime library not initialized." -TPCommonBlocksInconsist "Inconsistent THREADPRIVATE common block declarations are non-conforming " - "and are unsupported. Either all threadprivate common blocks must be declared " - "identically, or the largest instance of each threadprivate common block " - "must be referenced first during the run." -CantSetThreadAffMask "Cannot set thread affinity mask." -CantSetThreadPriority "Cannot set thread priority." -CantCreateThread "Cannot create thread." -CantCreateEvent "Cannot create event." -CantSetEvent "Cannot set event." -CantCloseHandle "Cannot close handle." -UnknownLibraryType "Unknown library type: %1$d." -ReapMonitorError "Monitor did not reap properly." -ReapWorkerError "Worker thread failed to join." -ChangeThreadAffMaskError "Cannot change thread affinity mask." -ThreadsMigrate "%1$s: Threads may migrate across %2$d innermost levels of machine" -DecreaseToThreads "%1$s: decrease to %2$d threads" -IncreaseToThreads "%1$s: increase to %2$d threads" -OBSOLETE "%1$s: Internal thread %2$d bound to OS proc set %3$s" -AffCapableUseCpuinfo "%1$s: Affinity capable, using cpuinfo file" -AffUseGlobCpuid "%1$s: Affinity capable, using global cpuid info" -AffCapableUseFlat "%1$s: Affinity capable, using default \"flat\" topology" -AffNotCapableUseLocCpuid "%1$s: Affinity not capable, using local cpuid info" -AffNotCapableUseCpuinfo "%1$s: Affinity not capable, using cpuinfo file" -AffFlatTopology "%1$s: Affinity not capable, assumming \"flat\" topology" -InitOSProcSetRespect "%1$s: Initial OS proc set respected: %2$s" -InitOSProcSetNotRespect "%1$s: Initial OS proc set not respected: %2$s" -AvailableOSProc "%1$s: %2$d available OS procs" -Uniform "%1$s: Uniform topology" -NonUniform "%1$s: Nonuniform topology" -Topology "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" -OBSOLETE "%1$s: OS proc to physical thread map ([] => level not in map):" -OSProcToPackage "%1$s: OS proc maps to th package core 0" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d" -OSProcMapToPack "%1$s: OS proc %2$d maps to %3$s" -OBSOLETE "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d" -OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d" -OBSOLETE "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package" -OBSOLETE "%1$s: HT disabled; %2$d packages" -BarriersInDifferentOrder "Threads encountered barriers in different order. " -FunctionError "Function %1$s failed:" -TopologyExtra "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" -WrongMessageCatalog "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected." -StgIgnored "%1$s: ignored because %2$s has been defined" - # %1, -- name of ignored variable, %2 -- name of variable with higher priority. -OBSOLETE "%1$s: overrides %3$s specified before" - # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable. - -# --- OpenMP errors detected at runtime --- -# -# %1 is the name of OpenMP construct (formatted with "Pragma" format). -# -CnsBoundToWorksharing "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause" -CnsDetectedEnd "Detected end of %1$s without first executing a corresponding beginning." -CnsIterationRangeTooLarge "Iteration range too large in %1$s." -CnsLoopIncrZeroProhibited "%1$s must not have a loop increment that evaluates to zero." -# -# %1 is the name of the first OpenMP construct, %2 -- the name of the second one (both formatted with "Pragma" format). -# -CnsExpectedEnd "Expected end of %1$s; %2$s, however, has most recently begun execution." -CnsInvalidNesting "%1$s is incorrectly nested within %2$s" -CnsMultipleNesting "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s" -CnsNestingSameName "%1$s is incorrectly nested within %2$s of the same name" -CnsNoOrderedClause "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause" -CnsNotInTaskConstruct "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs" -CnsThreadsAtBarrier "One thread at %1$s while another thread is at %2$s." - -# New errors -CantConnect "Cannot connect to %1$s" -CantConnectUsing "Cannot connect to %1$s - Using %2$s" -LibNotSupport "%1$s does not support %2$s. Continuing without using %2$s." -LibNotSupportFor "%1$s does not support %2$s for %3$s. Continuing without using %2$s." -StaticLibNotSupport "Static %1$s does not support %2$s. Continuing without using %2$s." -OBSOLETE "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0" -IttUnknownGroup "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\"." -IttEnvVarTooLong "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu." -AffUseGlobCpuidL11 "%1$s: Affinity capable, using global cpuid leaf 11 info" -AffNotCapableUseLocCpuidL11 "%1$s: Affinity not capable, using local cpuid leaf 11 info" -AffInfoStr "%1$s: %2$s." -AffInfoStrStr "%1$s: %2$s - %3$s." -OSProcToPhysicalThreadMap "%1$s: OS proc to physical thread map:" -AffUsingFlatOS "%1$s: using \"flat\" OS <-> physical proc mapping." -AffParseFilename "%1$s: parsing %2$s." -MsgExiting "%1$s - exiting." -IncompatibleLibrary "Incompatible %1$s library with version %2$s found." -IttFunctionError "ittnotify: Function %1$s failed:" -IttUnknownError "ittnofify: Error #%1$d." -EnvMiddleWarn "%1$s must be set prior to first parallel region or certain API calls; ignored." -CnsLockNotDestroyed "Lock initialized at %1$s(%2$d) was not destroyed" - # %1, %2, %3, %4 -- file, line, func, col -CantLoadBalUsing "Cannot determine machine load balance - Using %1$s" -AffNotCapableUsePthread "%1$s: Affinity not capable, using pthread info" -AffUsePthread "%1$s: Affinity capable, using pthread info" -OBSOLETE "Loading \"%1$s\" library failed:" -OBSOLETE "Lookup of \"%1$s\" function failed:" -OBSOLETE "Buffer too small." -OBSOLETE "Error #%1$d." -NthSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." -NthSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." -AffStrParseFilename "%1$s: %2$s - parsing %3$s." -OBSOLETE "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group." -AffTypeCantUseMultGroups "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." -AffGranCantUseMultGroups "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." -AffWindowsProcGroupMap "%1$s: Mapping Windows* OS processor group proc to OS proc 64*+." -AffOSProcToGroup "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d" -AffBalancedNotAvail "%1$s: Affinity balanced is not available." -OBSOLETE "%1$s: granularity=core will be used." -EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored." -FutexNotSupported "futex system call not supported; %1$s=%2$s ignored." -AffGranUsing "%1$s: granularity=%2$s will be used." -AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt " - "(nSockets@offset, nCores@offset, nTthreads per core)\"." -AffThrPlaceUnsupported "KMP_PLACE_THREADS ignored: unsupported architecture." -AffThrPlaceManyCores "KMP_PLACE_THREADS ignored: too many cores requested." -SyntaxErrorUsing "%1$s: syntax error, using %2$s." -AdaptiveNotSupported "%1$s: Adaptive locks are not supported; using queuing." -EnvSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." -EnvSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." -BoundToOSProcSet "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s" -CnsLoopIncrIllegal "%1$s error: parallel loop increment and condition are inconsistent." -NoGompCancellation "libgomp cancellation is not currently supported." -AffThrPlaceNonUniform "KMP_PLACE_THREADS ignored: non-uniform topology." -AffThrPlaceNonThreeLevel "KMP_PLACE_THREADS ignored: only three-level topology is supported." -AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"." -AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"." -AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested." -AffThrPlaceDeprecated "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value." -AffUsingHwloc "%1$s: Affinity capable, using hwloc." -AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism." -AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." - - -# -------------------------------------------------------------------------------------------------- --*- HINTS -*- -# -------------------------------------------------------------------------------------------------- - -# Hints. Hint may be printed after a message. Usually it is longer explanation text or suggestion. -# To maintain hint numbers (they are visible to customers), add new hints to the end. - -SubmitBugReport "Please submit a bug report with this message, compile and run " - "commands used, and machine configuration info including native " - "compiler and operating system versions. Faster response will be " - "obtained by including all program sources. For information on " - "submitting this issue, please see " - "http://www.intel.com/software/products/support/." -OBSOLETE "Check NLSPATH environment variable, its value is \"%1$s\"." -ChangeStackLimit "Please try changing the shell stack limit or adjusting the " - "OMP_STACKSIZE environment variable." -Unset_ALL_THREADS "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)." -Set_ALL_THREADPRIVATE "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d." -PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads." -DuplicateLibrary "This means that multiple copies of the OpenMP runtime have been " - "linked into the program. That is dangerous, since it can degrade " - "performance or cause incorrect results. " - "The best thing to do is to ensure that only a single OpenMP runtime is " - "linked into the process, e.g. by avoiding static linking of the OpenMP " - "runtime in any library. As an unsafe, unsupported, undocumented workaround " - "you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow " - "the program to continue to execute, but that may cause crashes or " - "silently produce incorrect results. " - "For more information, please see http://www.intel.com/software/products/support/." -NameComesFrom_CPUINFO_FILE "This name is specified in environment variable KMP_CPUINFO_FILE." -NotEnoughMemory "Seems application required too much memory." -ValidBoolValues "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, " - "\"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values." -BufferOverflow "Perhaps too many threads." -RunningAtMaxPriority "Decrease priority of application. " - "This will allow the monitor thread run at higher priority than other threads." -ChangeMonitorStackSize "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit." -ChangeWorkerStackSize "Try changing OMP_STACKSIZE and/or the shell stack limit." -IncreaseWorkerStackSize "Try increasing OMP_STACKSIZE or the shell stack limit." -DecreaseWorkerStackSize "Try decreasing OMP_STACKSIZE." -Decrease_NUM_THREADS "Try decreasing the value of OMP_NUM_THREADS." -IncreaseMonitorStackSize "Try increasing KMP_MONITOR_STACKSIZE." -DecreaseMonitorStackSize "Try decreasing KMP_MONITOR_STACKSIZE." -DecreaseNumberOfThreadsInUse "Try decreasing the number of threads in use simultaneously." -DefaultScheduleKindUsed "Will use default schedule type (%1$s)." -GetNewerLibrary "It could be a result of using an older OMP library with a newer " - "compiler or memory corruption. You may check the proper OMP library " - "is linked to the application." -CheckEnvVar "Check %1$s environment variable, its value is \"%2$s\"." -OBSOLETE "You may want to use an %1$s library that supports %2$s interface with version %3$s." -OBSOLETE "You may want to use an %1$s library with version %2$s." -BadExeFormat "System error #193 is \"Bad format of EXE or DLL file\". " - "Usually it means the file is found, but it is corrupted or " - "a file for another architecture. " - "Check whether \"%1$s\" is a file for %2$s architecture." -SystemLimitOnThreads "System-related limit on the number of threads." - - - -# -------------------------------------------------------------------------------------------------- -# end of file # -# -------------------------------------------------------------------------------------------------- - +# en_US.txt # + +# +#//===----------------------------------------------------------------------===// +#// +#// The LLVM Compiler Infrastructure +#// +#// This file is dual licensed under the MIT and the University of Illinois Open +#// Source Licenses. See LICENSE.txt for details. +#// +#//===----------------------------------------------------------------------===// +# + +# Default messages, embedded into the OpenMP RTL, and source for English catalog. + + +# Compatible changes (which does not require version bumping): +# * Editing message (number and type of placeholders must remain, relative order of +# placeholders may be changed, e.g. "File %1$s line %2$d" may be safely edited to +# "Line %2$d file %1$s"). +# * Adding new message to the end of section. +# Incompatible changes (version must be bumbed by 1): +# * Introducing new placeholders to existing messages. +# * Changing type of placeholders (e.g. "line %1$d" -> "line %1$s"). +# * Rearranging order of messages. +# * Deleting messages. +# Use special "OBSOLETE" pseudoidentifier for obsolete entries, which is kept only for backward +# compatibility. When version is bumped, do not forget to delete all obsolete entries. + + +# -------------------------------------------------------------------------------------------------- +-*- META -*- +# -------------------------------------------------------------------------------------------------- + +# Meta information about message catalog. + +Language "English" +Country "USA" +LangId "1033" +Version "2" +Revision "20140827" + + + +# -------------------------------------------------------------------------------------------------- +-*- STRINGS -*- +# -------------------------------------------------------------------------------------------------- + +# Strings are not complete messages, just fragments. We need to work on it and reduce number of +# strings (to zero?). + +Error "Error" +UnknownFile "(unknown file)" +NotANumber "not a number" +BadUnit "bad unit" +IllegalCharacters "illegal characters" +ValueTooLarge "value too large" +ValueTooSmall "value too small" +NotMultiple4K "value is not a multiple of 4k" +UnknownTopology "Unknown processor topology" +CantOpenCpuinfo "Cannot open /proc/cpuinfo" +ProcCpuinfo "/proc/cpuinfo" +NoProcRecords "cpuinfo file invalid (No processor records)" +TooManyProcRecords "cpuinfo file invalid (Too many processor records)" +CantRewindCpuinfo "Cannot rewind cpuinfo file" +LongLineCpuinfo "cpuinfo file invalid (long line)" +TooManyEntries "cpuinfo file contains too many entries" +MissingProcField "cpuinfo file missing processor field" +MissingPhysicalIDField "cpuinfo file missing physical id field" +MissingValCpuinfo "cpuinfo file invalid (missing val)" +DuplicateFieldCpuinfo "cpuinfo file invalid (duplicate field)" +PhysicalIDsNotUnique "Physical node/pkg/core/thread ids not unique" +ApicNotPresent "APIC not present" +InvalidCpuidInfo "Invalid cpuid info" +OBSOLETE "APIC ids not unique" +InconsistentCpuidInfo "Inconsistent cpuid info" +OutOfHeapMemory "Out of heap memory" +MemoryAllocFailed "Memory allocation failed" +Core "core" +Thread "thread" +Package "package" +Node "node" +OBSOLETE "" +DecodingLegacyAPIC "decoding legacy APIC ids" +OBSOLETE "parsing /proc/cpuinfo" +NotDefined "value is not defined" +EffectiveSettings "Effective settings:" +UserSettings "User settings:" +StorageMapWarning "warning: pointers or size don't make sense" +OBSOLETE "CPU" +OBSOLETE "TPU" +OBSOLETE "TPUs per package" +OBSOLETE "HT enabled" +OBSOLETE "HT disabled" +Decodingx2APIC "decoding x2APIC ids" +NoLeaf11Support "cpuid leaf 11 not supported" +NoLeaf4Support "cpuid leaf 4 not supported" +ThreadIDsNotUnique "thread ids not unique" +UsingPthread "using pthread info" +LegacyApicIDsNotUnique "legacy APIC ids not unique" +x2ApicIDsNotUnique "x2APIC ids not unique" +DisplayEnvBegin "OPENMP DISPLAY ENVIRONMENT BEGIN" +DisplayEnvEnd "OPENMP DISPLAY ENVIRONMENT END" +Device "[device]" +Host "[host]" + + + +# -------------------------------------------------------------------------------------------------- +-*- FORMATS -*- +# -------------------------------------------------------------------------------------------------- + +Info "OMP: Info #%1$d: %2$s\n" +Warning "OMP: Warning #%1$d: %2$s\n" +Fatal "OMP: Error #%1$d: %2$s\n" +SysErr "OMP: System error #%1$d: %2$s\n" +Hint "OMP: Hint: %2$s\n" + +Pragma "%1$s pragma (at %2$s:%3$s():%4$s)" + # %1 is pragma name (like "parallel" or "master", + # %2 is file name, + # %3 is function (routine) name, + # %4 is the line number (as string, so "s" type specifier should be used). + + + +# -------------------------------------------------------------------------------------------------- +-*- MESSAGES -*- +# -------------------------------------------------------------------------------------------------- + +# Messages of any severity: informational, warning, or fatal. +# To maintain message numbers (they are visible to customers), add new messages to the end. + +# Use following prefixes for messages and hints when appropriate: +# Aff -- Affinity messages. +# Cns -- Consistency check failures (KMP_CONSISTENCY_CHECK). +# Itt -- ITT Notify-related messages. + +LibraryIsSerial "Library is \"serial\"." +CantOpenMessageCatalog "Cannot open message catalog \"%1$s\":" +WillUseDefaultMessages "Default messages will be used." +LockIsUninitialized "%1$s: Lock is uninitialized" +LockSimpleUsedAsNestable "%1$s: Lock was initialized as simple, but used as nestable" +LockNestableUsedAsSimple "%1$s: Lock was initialized as nestable, but used as simple" +LockIsAlreadyOwned "%1$s: Lock is already owned by requesting thread" +LockStillOwned "%1$s: Lock is still owned by a thread" +LockUnsettingFree "%1$s: Attempt to release a lock not owned by any thread" +LockUnsettingSetByAnother "%1$s: Attempt to release a lock owned by another thread" +StackOverflow "Stack overflow detected for OpenMP thread #%1$d" +StackOverlap "Stack overlap detected. " +AssertionFailure "Assertion failure at %1$s(%2$d)." +CantRegisterNewThread "Unable to register a new user thread." +DuplicateLibrary "Initializing %1$s, but found %2$s already initialized." +CantOpenFileForReading "Cannot open file \"%1$s\" for reading:" +CantGetEnvVar "Getting environment variable \"%1$s\" failed:" +CantSetEnvVar "Setting environment variable \"%1$s\" failed:" +CantGetEnvironment "Getting environment failed:" +BadBoolValue "%1$s=\"%2$s\": Wrong value, boolean expected." +SSPNotBuiltIn "No Helper Thread support built in this OMP library." +SPPSotfTerminateFailed "Helper thread failed to soft terminate." +BufferOverflow "Buffer overflow detected." +RealTimeSchedNotSupported "Real-time scheduling policy is not supported." +RunningAtMaxPriority "OMP application is running at maximum priority with real-time scheduling policy. " +CantChangeMonitorPriority "Changing priority of the monitor thread failed:" +MonitorWillStarve "Deadlocks are highly possible due to monitor thread starvation." +CantSetMonitorStackSize "Unable to set monitor thread stack size to %1$lu bytes:" +CantSetWorkerStackSize "Unable to set OMP thread stack size to %1$lu bytes:" +CantInitThreadAttrs "Thread attribute initialization failed:" +CantDestroyThreadAttrs "Thread attribute destroying failed:" +CantSetWorkerState "OMP thread joinable state setting failed:" +CantSetMonitorState "Monitor thread joinable state setting failed:" +NoResourcesForWorkerThread "System unable to allocate necessary resources for OMP thread:" +NoResourcesForMonitorThread "System unable to allocate necessary resources for the monitor thread:" +CantTerminateWorkerThread "Unable to terminate OMP thread:" +ScheduleKindOutOfRange "Wrong schedule type %1$d, see or file for the list of values supported." +UnknownSchedulingType "Unknown scheduling type \"%1$d\"." +InvalidValue "%1$s value \"%2$s\" is invalid." +SmallValue "%1$s value \"%2$s\" is too small." +LargeValue "%1$s value \"%2$s\" is too large." +StgInvalidValue "%1$s: \"%2$s\" is an invalid value; ignored." +BarrReleaseValueInvalid "%1$s release value \"%2$s\" is invalid." +BarrGatherValueInvalid "%1$s gather value \"%2$s\" is invalid." +OBSOLETE "%1$s supported only on debug builds; ignored." +ParRangeSyntax "Syntax error: Usage: %1$s=[ routine= | filename= | range=: " + "| excl_range=: ],..." +UnbalancedQuotes "Unbalanced quotes in %1$s." +EmptyString "Empty string specified for %1$s; ignored." +LongValue "%1$s value is too long; ignored." +InvalidClause "%1$s: Invalid clause in \"%2$s\"." +EmptyClause "Empty clause in %1$s." +InvalidChunk "%1$s value \"%2$s\" is invalid chunk size." +LargeChunk "%1$s value \"%2$s\" is to large chunk size." +IgnoreChunk "%1$s value \"%2$s\" is ignored." +CantGetProcFreq "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY." +EnvParallelWarn "%1$s must be set prior to first parallel region; ignored." +AffParamDefined "%1$s: parameter has been specified already, ignoring \"%2$s\"." +AffInvalidParam "%1$s: parameter invalid, ignoring \"%2$s\"." +AffManyParams "%1$s: too many integer parameters specified, ignoring \"%2$s\"." +AffManyParamsForLogic "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\"." +AffNoParam "%1$s: '%2$s' type does not take any integer parameters, ignoring them." +AffNoProcList "%1$s: proclist not specified with explicit affinity type, using \"none\"." +AffProcListNoType "%1$s: proclist specified, setting affinity type to \"explicit\"." +AffProcListNotExplicit "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored." +AffSyntaxError "%1$s: syntax error, not using affinity." +AffZeroStride "%1$s: range error (zero stride), not using affinity." +AffStartGreaterEnd "%1$s: range error (%2$d > %3$d), not using affinity." +AffStrideLessZero "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity." +AffRangeTooBig "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity." +OBSOLETE "%1$s: %2$s is defined. %3$s will be ignored." +AffNotSupported "%1$s: affinity not supported, using \"disabled\"." +OBSOLETE "%1$s: affinity only supported for Intel(R) processors." +GetAffSysCallNotSupported "%1$s: getaffinity system call not supported." +SetAffSysCallNotSupported "%1$s: setaffinity system call not supported." +OBSOLETE "%1$s: pthread_aff_set_np call not found." +OBSOLETE "%1$s: pthread_get_num_resources_np call not found." +OBSOLETE "%1$s: the OS kernel does not support affinity." +OBSOLETE "%1$s: pthread_get_num_resources_np returned %2$d." +AffCantGetMaskSize "%1$s: cannot determine proper affinity mask size." +ParseSizeIntWarn "%1$s=\"%2$s\": %3$s." +ParseExtraCharsWarn "%1$s: extra trailing characters ignored: \"%2$s\"." +UnknownForceReduction "%1$s: unknown method \"%2$s\"." +TimerUseGettimeofday "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday." +TimerNeedMoreParam "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday." +TimerInvalidParam "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday." +TimerGettimeFailed "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday." +TimerUnknownFunction "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\")." +UnknownSchedTypeDetected "Unknown scheduling type detected." +DispatchManyThreads "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling." +IttLookupFailed "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed." +IttLoadLibFailed "ittnotify: Loading \"%1$s\" library failed." +IttAllNotifDisabled "ittnotify: All itt notifications disabled." +IttObjNotifDisabled "ittnotify: Object state itt notifications disabled." +IttMarkNotifDisabled "ittnotify: Mark itt notifications disabled." +IttUnloadLibFailed "ittnotify: Unloading \"%1$s\" library failed." +CantFormThrTeam "Cannot form a team with %1$d threads, using %2$d instead." +ActiveLevelsNegative "Requested number of active parallel levels \"%1$d\" is negative; ignored." +ActiveLevelsExceedLimit "Requested number of active parallel levels \"%1$d\" exceeds supported limit; " + "the following limit value will be used: \"%1$d\"." +SetLibraryIncorrectCall "kmp_set_library must only be called from the top level serial thread; ignored." +FatalSysError "Fatal system error detected." +OutOfHeapMemory "Out of heap memory." +OBSOLETE "Clearing __KMP_REGISTERED_LIB env var failed." +OBSOLETE "Registering library with env var failed." +Using_int_Value "%1$s value \"%2$d\" will be used." +Using_uint_Value "%1$s value \"%2$u\" will be used." +Using_uint64_Value "%1$s value \"%2$s\" will be used." +Using_str_Value "%1$s value \"%2$s\" will be used." +MaxValueUsing "%1$s maximum value \"%2$d\" will be used." +MinValueUsing "%1$s minimum value \"%2$d\" will be used." +MemoryAllocFailed "Memory allocation failed." +FileNameTooLong "File name too long." +OBSOLETE "Lock table overflow." +ManyThreadsForTPDirective "Too many threads to use threadprivate directive." +AffinityInvalidMask "%1$s: invalid mask." +WrongDefinition "Wrong definition." +TLSSetValueFailed "Windows* OS: TLS Set Value failed." +TLSOutOfIndexes "Windows* OS: TLS out of indexes." +OBSOLETE "PDONE directive must be nested within a DO directive." +CantGetNumAvailCPU "Cannot get number of available CPUs." +AssumedNumCPU "Assumed number of CPUs is 2." +ErrorInitializeAffinity "Error initializing affinity - not using affinity." +AffThreadsMayMigrate "Threads may migrate across all available OS procs (granularity setting too coarse)." +AffIgnoreInvalidProcID "Ignoring invalid OS proc ID %1$d." +AffNoValidProcID "No valid OS proc IDs specified - not using affinity." +UsingFlatOS "%1$s - using \"flat\" OS <-> physical proc mapping." +UsingFlatOSFile "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping." +UsingFlatOSFileLine "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping." +FileMsgExiting "%1$s: %2$s - exiting." +FileLineMsgExiting "%1$s, line %2$d: %3$s - exiting." +ConstructIdentInvalid "Construct identifier invalid." +ThreadIdentInvalid "Thread identifier invalid." +RTLNotInitialized "runtime library not initialized." +TPCommonBlocksInconsist "Inconsistent THREADPRIVATE common block declarations are non-conforming " + "and are unsupported. Either all threadprivate common blocks must be declared " + "identically, or the largest instance of each threadprivate common block " + "must be referenced first during the run." +CantSetThreadAffMask "Cannot set thread affinity mask." +CantSetThreadPriority "Cannot set thread priority." +CantCreateThread "Cannot create thread." +CantCreateEvent "Cannot create event." +CantSetEvent "Cannot set event." +CantCloseHandle "Cannot close handle." +UnknownLibraryType "Unknown library type: %1$d." +ReapMonitorError "Monitor did not reap properly." +ReapWorkerError "Worker thread failed to join." +ChangeThreadAffMaskError "Cannot change thread affinity mask." +ThreadsMigrate "%1$s: Threads may migrate across %2$d innermost levels of machine" +DecreaseToThreads "%1$s: decrease to %2$d threads" +IncreaseToThreads "%1$s: increase to %2$d threads" +OBSOLETE "%1$s: Internal thread %2$d bound to OS proc set %3$s" +AffCapableUseCpuinfo "%1$s: Affinity capable, using cpuinfo file" +AffUseGlobCpuid "%1$s: Affinity capable, using global cpuid info" +AffCapableUseFlat "%1$s: Affinity capable, using default \"flat\" topology" +AffNotCapableUseLocCpuid "%1$s: Affinity not capable, using local cpuid info" +AffNotCapableUseCpuinfo "%1$s: Affinity not capable, using cpuinfo file" +AffFlatTopology "%1$s: Affinity not capable, assumming \"flat\" topology" +InitOSProcSetRespect "%1$s: Initial OS proc set respected: %2$s" +InitOSProcSetNotRespect "%1$s: Initial OS proc set not respected: %2$s" +AvailableOSProc "%1$s: %2$d available OS procs" +Uniform "%1$s: Uniform topology" +NonUniform "%1$s: Nonuniform topology" +Topology "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" +OBSOLETE "%1$s: OS proc to physical thread map ([] => level not in map):" +OSProcToPackage "%1$s: OS proc maps to th package core 0" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" +OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]" +OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d" +OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" +OBSOLETE "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d" +OSProcMapToPack "%1$s: OS proc %2$d maps to %3$s" +OBSOLETE "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d" +OBSOLETE "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d" +OBSOLETE "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package" +OBSOLETE "%1$s: HT disabled; %2$d packages" +BarriersInDifferentOrder "Threads encountered barriers in different order. " +FunctionError "Function %1$s failed:" +TopologyExtra "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" +WrongMessageCatalog "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected." +StgIgnored "%1$s: ignored because %2$s has been defined" + # %1, -- name of ignored variable, %2 -- name of variable with higher priority. +OBSOLETE "%1$s: overrides %3$s specified before" + # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable. + +# --- OpenMP errors detected at runtime --- +# +# %1 is the name of OpenMP construct (formatted with "Pragma" format). +# +CnsBoundToWorksharing "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause" +CnsDetectedEnd "Detected end of %1$s without first executing a corresponding beginning." +CnsIterationRangeTooLarge "Iteration range too large in %1$s." +CnsLoopIncrZeroProhibited "%1$s must not have a loop increment that evaluates to zero." +# +# %1 is the name of the first OpenMP construct, %2 -- the name of the second one (both formatted with "Pragma" format). +# +CnsExpectedEnd "Expected end of %1$s; %2$s, however, has most recently begun execution." +CnsInvalidNesting "%1$s is incorrectly nested within %2$s" +CnsMultipleNesting "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s" +CnsNestingSameName "%1$s is incorrectly nested within %2$s of the same name" +CnsNoOrderedClause "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause" +CnsNotInTaskConstruct "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs" +CnsThreadsAtBarrier "One thread at %1$s while another thread is at %2$s." + +# New errors +CantConnect "Cannot connect to %1$s" +CantConnectUsing "Cannot connect to %1$s - Using %2$s" +LibNotSupport "%1$s does not support %2$s. Continuing without using %2$s." +LibNotSupportFor "%1$s does not support %2$s for %3$s. Continuing without using %2$s." +StaticLibNotSupport "Static %1$s does not support %2$s. Continuing without using %2$s." +OBSOLETE "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0" +IttUnknownGroup "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\"." +IttEnvVarTooLong "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu." +AffUseGlobCpuidL11 "%1$s: Affinity capable, using global cpuid leaf 11 info" +AffNotCapableUseLocCpuidL11 "%1$s: Affinity not capable, using local cpuid leaf 11 info" +AffInfoStr "%1$s: %2$s." +AffInfoStrStr "%1$s: %2$s - %3$s." +OSProcToPhysicalThreadMap "%1$s: OS proc to physical thread map:" +AffUsingFlatOS "%1$s: using \"flat\" OS <-> physical proc mapping." +AffParseFilename "%1$s: parsing %2$s." +MsgExiting "%1$s - exiting." +IncompatibleLibrary "Incompatible %1$s library with version %2$s found." +IttFunctionError "ittnotify: Function %1$s failed:" +IttUnknownError "ittnofify: Error #%1$d." +EnvMiddleWarn "%1$s must be set prior to first parallel region or certain API calls; ignored." +CnsLockNotDestroyed "Lock initialized at %1$s(%2$d) was not destroyed" + # %1, %2, %3, %4 -- file, line, func, col +CantLoadBalUsing "Cannot determine machine load balance - Using %1$s" +AffNotCapableUsePthread "%1$s: Affinity not capable, using pthread info" +AffUsePthread "%1$s: Affinity capable, using pthread info" +OBSOLETE "Loading \"%1$s\" library failed:" +OBSOLETE "Lookup of \"%1$s\" function failed:" +OBSOLETE "Buffer too small." +OBSOLETE "Error #%1$d." +NthSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." +NthSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." +AffStrParseFilename "%1$s: %2$s - parsing %3$s." +OBSOLETE "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group." +AffTypeCantUseMultGroups "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." +AffGranCantUseMultGroups "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." +AffWindowsProcGroupMap "%1$s: Mapping Windows* OS processor group proc to OS proc 64*+." +AffOSProcToGroup "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d" +AffBalancedNotAvail "%1$s: Affinity balanced is not available." +OBSOLETE "%1$s: granularity=core will be used." +EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored." +FutexNotSupported "futex system call not supported; %1$s=%2$s ignored." +AffGranUsing "%1$s: granularity=%2$s will be used." +AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt " + "(nSockets@offset, nCores@offset, nTthreads per core)\"." +AffThrPlaceUnsupported "KMP_PLACE_THREADS ignored: unsupported architecture." +AffThrPlaceManyCores "KMP_PLACE_THREADS ignored: too many cores requested." +SyntaxErrorUsing "%1$s: syntax error, using %2$s." +AdaptiveNotSupported "%1$s: Adaptive locks are not supported; using queuing." +EnvSyntaxError "%1$s: Invalid symbols found. Check the value \"%2$s\"." +EnvSpacesNotAllowed "%1$s: Spaces between digits are not allowed \"%2$s\"." +BoundToOSProcSet "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s" +CnsLoopIncrIllegal "%1$s error: parallel loop increment and condition are inconsistent." +NoGompCancellation "libgomp cancellation is not currently supported." +AffThrPlaceNonUniform "KMP_PLACE_THREADS ignored: non-uniform topology." +AffThrPlaceNonThreeLevel "KMP_PLACE_THREADS ignored: only three-level topology is supported." +AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"." +AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"." +AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested." +AffThrPlaceDeprecated "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value." +AffUsingHwloc "%1$s: Affinity capable, using hwloc." +AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism." +AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." + + +# -------------------------------------------------------------------------------------------------- +-*- HINTS -*- +# -------------------------------------------------------------------------------------------------- + +# Hints. Hint may be printed after a message. Usually it is longer explanation text or suggestion. +# To maintain hint numbers (they are visible to customers), add new hints to the end. + +SubmitBugReport "Please submit a bug report with this message, compile and run " + "commands used, and machine configuration info including native " + "compiler and operating system versions. Faster response will be " + "obtained by including all program sources. For information on " + "submitting this issue, please see " + "http://www.intel.com/software/products/support/." +OBSOLETE "Check NLSPATH environment variable, its value is \"%1$s\"." +ChangeStackLimit "Please try changing the shell stack limit or adjusting the " + "OMP_STACKSIZE environment variable." +Unset_ALL_THREADS "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)." +Set_ALL_THREADPRIVATE "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d." +PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads." +DuplicateLibrary "This means that multiple copies of the OpenMP runtime have been " + "linked into the program. That is dangerous, since it can degrade " + "performance or cause incorrect results. " + "The best thing to do is to ensure that only a single OpenMP runtime is " + "linked into the process, e.g. by avoiding static linking of the OpenMP " + "runtime in any library. As an unsafe, unsupported, undocumented workaround " + "you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow " + "the program to continue to execute, but that may cause crashes or " + "silently produce incorrect results. " + "For more information, please see http://www.intel.com/software/products/support/." +NameComesFrom_CPUINFO_FILE "This name is specified in environment variable KMP_CPUINFO_FILE." +NotEnoughMemory "Seems application required too much memory." +ValidBoolValues "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, " + "\"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values." +BufferOverflow "Perhaps too many threads." +RunningAtMaxPriority "Decrease priority of application. " + "This will allow the monitor thread run at higher priority than other threads." +ChangeMonitorStackSize "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit." +ChangeWorkerStackSize "Try changing OMP_STACKSIZE and/or the shell stack limit." +IncreaseWorkerStackSize "Try increasing OMP_STACKSIZE or the shell stack limit." +DecreaseWorkerStackSize "Try decreasing OMP_STACKSIZE." +Decrease_NUM_THREADS "Try decreasing the value of OMP_NUM_THREADS." +IncreaseMonitorStackSize "Try increasing KMP_MONITOR_STACKSIZE." +DecreaseMonitorStackSize "Try decreasing KMP_MONITOR_STACKSIZE." +DecreaseNumberOfThreadsInUse "Try decreasing the number of threads in use simultaneously." +DefaultScheduleKindUsed "Will use default schedule type (%1$s)." +GetNewerLibrary "It could be a result of using an older OMP library with a newer " + "compiler or memory corruption. You may check the proper OMP library " + "is linked to the application." +CheckEnvVar "Check %1$s environment variable, its value is \"%2$s\"." +OBSOLETE "You may want to use an %1$s library that supports %2$s interface with version %3$s." +OBSOLETE "You may want to use an %1$s library with version %2$s." +BadExeFormat "System error #193 is \"Bad format of EXE or DLL file\". " + "Usually it means the file is found, but it is corrupted or " + "a file for another architecture. " + "Check whether \"%1$s\" is a file for %2$s architecture." +SystemLimitOnThreads "System-related limit on the number of threads." + + + +# -------------------------------------------------------------------------------------------------- +# end of file # +# -------------------------------------------------------------------------------------------------- + diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp.h.var b/contrib/libs/cxxsupp/openmp/include/30/omp.h.var index 212c8c180db..9ffcfb297bd 100644 --- a/contrib/libs/cxxsupp/openmp/include/30/omp.h.var +++ b/contrib/libs/cxxsupp/openmp/include/30/omp.h.var @@ -1,164 +1,164 @@ -/* - * include/30/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# define omp_set_num_threads ompc_set_num_threads -# define omp_set_dynamic ompc_set_dynamic -# define omp_set_nested ompc_set_nested -# define omp_set_max_active_levels ompc_set_max_active_levels -# define omp_set_schedule ompc_set_schedule -# define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -# define omp_get_team_size ompc_get_team_size - - -# define kmp_set_stacksize kmpc_set_stacksize -# define kmp_set_stacksize_s kmpc_set_stacksize_s -# define kmp_set_blocktime kmpc_set_blocktime -# define kmp_set_library kmpc_set_library -# define kmp_set_defaults kmpc_set_defaults -# define kmp_set_affinity_mask_proc kmpc_set_affinity_mask_proc -# define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc -# define kmp_get_affinity_mask_proc kmpc_get_affinity_mask_proc - -# define kmp_malloc kmpc_malloc -# define kmp_calloc kmpc_calloc -# define kmp_realloc kmpc_realloc -# define kmp_free kmpc_free - - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - -# include - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* affinity API functions */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - +/* + * include/30/omp.h.var + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef __OMP_H +# define __OMP_H + +# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ +# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ +# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ +# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" + +# ifdef __cplusplus + extern "C" { +# endif + +# define omp_set_num_threads ompc_set_num_threads +# define omp_set_dynamic ompc_set_dynamic +# define omp_set_nested ompc_set_nested +# define omp_set_max_active_levels ompc_set_max_active_levels +# define omp_set_schedule ompc_set_schedule +# define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num +# define omp_get_team_size ompc_get_team_size + + +# define kmp_set_stacksize kmpc_set_stacksize +# define kmp_set_stacksize_s kmpc_set_stacksize_s +# define kmp_set_blocktime kmpc_set_blocktime +# define kmp_set_library kmpc_set_library +# define kmp_set_defaults kmpc_set_defaults +# define kmp_set_affinity_mask_proc kmpc_set_affinity_mask_proc +# define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc +# define kmp_get_affinity_mask_proc kmpc_get_affinity_mask_proc + +# define kmp_malloc kmpc_malloc +# define kmp_calloc kmpc_calloc +# define kmp_realloc kmpc_realloc +# define kmp_free kmpc_free + + +# if defined(_WIN32) +# define __KAI_KMPC_CONVENTION __cdecl +# else +# define __KAI_KMPC_CONVENTION +# endif + + /* schedule kind constants */ + typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 + } omp_sched_t; + + /* set API functions */ + extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); + extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); + extern void __KAI_KMPC_CONVENTION omp_set_nested (int); + extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); + extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); + + /* query API functions */ + extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); + extern int __KAI_KMPC_CONVENTION omp_get_nested (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); + extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); + extern int __KAI_KMPC_CONVENTION omp_in_final (void); + extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); + extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); + extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); + extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); + + /* lock API functions */ + typedef struct omp_lock_t { + void * _lk; + } omp_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); + + /* nested lock API functions */ + typedef struct omp_nest_lock_t { + void * _lk; + } omp_nest_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); + + /* time API functions */ + extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); + extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); + +# include + /* kmp API functions */ + extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); + extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); + extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); + extern int __KAI_KMPC_CONVENTION kmp_get_library (void); + extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); + extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); + + /* affinity API functions */ + typedef void * kmp_affinity_mask_t; + + extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); + extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); + extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); + + extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); + extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); + extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); + extern void __KAI_KMPC_CONVENTION kmp_free (void *); + + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + +# undef __KAI_KMPC_CONVENTION + + /* Warning: + The following typedefs are not standard, deprecated and will be removed in a future release. + */ + typedef int omp_int_t; + typedef double omp_wtime_t; + +# ifdef __cplusplus + } +# endif + +#endif /* __OMP_H */ + diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var index f46b5224acd..99122067af0 100644 --- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var +++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var @@ -1,633 +1,633 @@ -! include/30/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - use omp_lib_kinds - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - - end module omp_lib - +! include/30/omp_lib.f.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!dec$ fixedformlinesize:132 + + module omp_lib_kinds + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) + use omp_lib_kinds + integer (kind=omp_integer_kind) nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_nested + + function omp_get_num_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_get_dynamic() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) + use omp_lib_kinds + integer (kind=omp_integer_kind) max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_team_size + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_wtime() + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick () + double precision omp_get_wtick + end function omp_get_wtick + + subroutine omp_init_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) + use omp_lib_kinds + integer (kind=omp_integer_kind) size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) + use omp_lib_kinds + integer (kind=omp_integer_kind) msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) + use omp_lib_kinds + integer (kind=omp_integer_kind) libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) + character*(*) string + end subroutine kmp_set_defaults + + function kmp_get_stacksize() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind) size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind) nelem + integer (kind=kmp_size_t_kind) elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind) ptr + integer (kind=kmp_size_t_kind) size + end function kmp_realloc + + subroutine kmp_free(ptr) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() + end subroutine kmp_set_warnings_off + + end interface + +!dec$ if defined(_WIN32) +!dec$ if defined(_WIN64) .or. defined(_M_AMD64) + +!*** +!*** The Fortran entry points must be in uppercase, even if the /Qlowercase +!*** option is specified. The alias attribute ensures that the specified +!*** string is used as the entry point. +!*** +!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an +!*** underscore prepended. On the Windows* OS Intel(R) 64 +!*** architecture, no underscore is prepended. +!*** + +!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick + +!dec$ attributes alias:'omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'KMP_FREE'::kmp_free + +!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ else + +!*** +!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. +!*** + +!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick + +!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'_KMP_FREE'::kmp_free + +!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ endif +!dec$ endif + +!dec$ if defined(__linux) + +!*** +!*** The Linux* OS entry points are in lowercase, with an underscore appended. +!*** + +!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'omp_get_level_'::omp_get_level +!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick + +!dec$ attributes alias:'omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'kmp_free_'::kmp_free + +!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ endif + +!dec$ if defined(__APPLE__) + +!*** +!*** The Mac entry points are in lowercase, with an both an underscore +!*** appended and an underscore prepended. +!*** + +!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'_omp_get_level_'::omp_get_level +!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick + +!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'_kmp_free_'::kmp_free + +!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ endif + + end module omp_lib + diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var index 328e2cfa8d1..3325486d26a 100644 --- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var +++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var @@ -1,358 +1,358 @@ -! include/30/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) :: omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) :: omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) :: kind - integer (kind=omp_integer_kind) :: modifier - end subroutine omp_get_schedule - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - end interface - - end module omp_lib +! include/30/omp_lib.f90.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + + module omp_lib_kinds + + use, intrinsic :: iso_c_binding + + integer, parameter :: omp_integer_kind = c_int + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = c_float + integer, parameter :: kmp_double_kind = c_double + integer, parameter :: omp_lock_kind = c_intptr_t + integer, parameter :: omp_nest_lock_kind = c_intptr_t + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = c_intptr_t + integer, parameter :: kmp_size_t_kind = c_size_t + integer, parameter :: kmp_affinity_mask_kind = c_intptr_t + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_nested + + function omp_get_num_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) :: omp_get_level + end function omp_get_level + + function omp_get_active_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) :: omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + integer (kind=omp_integer_kind), value :: level + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_size + integer (kind=omp_integer_kind), value :: level + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind), value :: kind + integer (kind=omp_integer_kind), value :: modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind) :: kind + integer (kind=omp_integer_kind) :: modifier + end subroutine omp_get_schedule + + function omp_get_wtime() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtick + end function omp_get_wtick + + subroutine omp_init_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind), value :: size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() bind(c) + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() bind(c) + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() bind(c) + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) bind(c) + use, intrinsic :: iso_c_binding + character (kind=c_char) :: string(*) + end subroutine kmp_set_defaults + + function kmp_get_stacksize() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind), value :: size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind), value :: nelem + integer (kind=kmp_size_t_kind), value :: elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind), value :: ptr + integer (kind=kmp_size_t_kind), value :: size + end function kmp_realloc + + subroutine kmp_free(ptr) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind), value :: ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() bind(c) + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() bind(c) + end subroutine kmp_set_warnings_off + + end interface + + end module omp_lib diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var index c442f073b22..84ed39b321c 100644 --- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var +++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var @@ -1,638 +1,638 @@ -! include/30/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - import - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - import - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - import - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - import - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - import - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - import - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - subroutine omp_init_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - import - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - import - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - import - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - import - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - import - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS'::omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC'::omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED'::omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS'::omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS'::omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM'::omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS'::omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL'::omp_in_parallel -!dec$ attributes alias:'OMP_IN_FINAL'::omp_in_final -!dec$ attributes alias:'OMP_GET_DYNAMIC'::omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED'::omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT'::omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL'::omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL'::omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE'::omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE'::omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE'::omp_get_schedule -!dec$ attributes alias:'OMP_GET_WTIME'::omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock'::omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_SET_DEFAULTS'::kmp_set_defaults -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS'::omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC'::omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED'::omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS'::omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS'::omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM'::omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS'::omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL'::omp_in_parallel -!dec$ attributes alias:'_OMP_IN_FINAL'::omp_in_final -!dec$ attributes alias:'_OMP_GET_DYNAMIC'::omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED'::omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT'::omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL'::omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL'::omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE'::omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE'::omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE'::omp_get_schedule -!dec$ attributes alias:'_OMP_GET_WTIME'::omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock'::omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_SET_DEFAULTS'::kmp_set_defaults -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_in_final_'::omp_in_final -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_set_defaults_'::kmp_set_defaults -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_in_final_'::omp_in_final -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_set_defaults_'::kmp_set_defaults -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ endif - - +! include/30/omp_lib.h.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!dec$ fixedformlinesize:132 + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) + import + integer (kind=omp_integer_kind) nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) + import + logical (kind=omp_logical_kind) enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) + import + logical (kind=omp_logical_kind) enable + end subroutine omp_set_nested + + function omp_get_num_threads() + import + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() + import + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() + import + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() + import + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() + import + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() + import + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() + import + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() + import + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() + import + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) + import + integer (kind=omp_integer_kind) max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() + import + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() + import + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() + import + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) + import + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) + import + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_team_size + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) + import + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) + import + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_wtime() + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick () + double precision omp_get_wtick + end function omp_get_wtick + + subroutine omp_init_lock(lockvar) + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) + import + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) + import + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) + import + integer (kind=omp_integer_kind) size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) + import + integer (kind=kmp_size_t_kind) size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) + import + integer (kind=omp_integer_kind) msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) + import + integer (kind=omp_integer_kind) libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) + character*(*) string + end subroutine kmp_set_defaults + + function kmp_get_stacksize() + import + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() + import + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() + import + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() + import + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) + import + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) + import + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() + import + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) + import + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) + import + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) + import + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) + import + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind) size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) + import + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind) nelem + integer (kind=kmp_size_t_kind) elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) + import + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind) ptr + integer (kind=kmp_size_t_kind) size + end function kmp_realloc + + subroutine kmp_free(ptr) + import + integer (kind=kmp_pointer_kind) ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() + end subroutine kmp_set_warnings_off + + end interface + +!dec$ if defined(_WIN32) +!dec$ if defined(_WIN64) .or. defined(_M_AMD64) + +!*** +!*** The Fortran entry points must be in uppercase, even if the /Qlowercase +!*** option is specified. The alias attribute ensures that the specified +!*** string is used as the entry point. +!*** +!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an +!*** underscore prepended. On the Windows* OS Intel(R) 64 +!*** architecture, no underscore is prepended. +!*** + +!dec$ attributes alias:'OMP_SET_NUM_THREADS'::omp_set_num_threads +!dec$ attributes alias:'OMP_SET_DYNAMIC'::omp_set_dynamic +!dec$ attributes alias:'OMP_SET_NESTED'::omp_set_nested +!dec$ attributes alias:'OMP_GET_NUM_THREADS'::omp_get_num_threads +!dec$ attributes alias:'OMP_GET_MAX_THREADS'::omp_get_max_threads +!dec$ attributes alias:'OMP_GET_THREAD_NUM'::omp_get_thread_num +!dec$ attributes alias:'OMP_GET_NUM_PROCS'::omp_get_num_procs +!dec$ attributes alias:'OMP_IN_PARALLEL'::omp_in_parallel +!dec$ attributes alias:'OMP_IN_FINAL'::omp_in_final +!dec$ attributes alias:'OMP_GET_DYNAMIC'::omp_get_dynamic +!dec$ attributes alias:'OMP_GET_NESTED'::omp_get_nested +!dec$ attributes alias:'OMP_GET_THREAD_LIMIT'::omp_get_thread_limit +!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels +!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels +!dec$ attributes alias:'OMP_GET_LEVEL'::omp_get_level +!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL'::omp_get_active_level +!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num +!dec$ attributes alias:'OMP_GET_TEAM_SIZE'::omp_get_team_size +!dec$ attributes alias:'OMP_SET_SCHEDULE'::omp_set_schedule +!dec$ attributes alias:'OMP_GET_SCHEDULE'::omp_get_schedule +!dec$ attributes alias:'OMP_GET_WTIME'::omp_get_wtime +!dec$ attributes alias:'OMP_GET_WTICK'::omp_get_wtick + +!dec$ attributes alias:'omp_init_lock'::omp_init_lock +!dec$ attributes alias:'omp_destroy_lock'::omp_destroy_lock +!dec$ attributes alias:'omp_set_lock'::omp_set_lock +!dec$ attributes alias:'omp_unset_lock'::omp_unset_lock +!dec$ attributes alias:'omp_test_lock'::omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock'::omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock'::omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock'::omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock'::omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock'::omp_test_nest_lock + +!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'KMP_SET_DEFAULTS'::kmp_set_defaults +!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'KMP_FREE'::kmp_free + +!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ else + +!*** +!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. +!*** + +!dec$ attributes alias:'_OMP_SET_NUM_THREADS'::omp_set_num_threads +!dec$ attributes alias:'_OMP_SET_DYNAMIC'::omp_set_dynamic +!dec$ attributes alias:'_OMP_SET_NESTED'::omp_set_nested +!dec$ attributes alias:'_OMP_GET_NUM_THREADS'::omp_get_num_threads +!dec$ attributes alias:'_OMP_GET_MAX_THREADS'::omp_get_max_threads +!dec$ attributes alias:'_OMP_GET_THREAD_NUM'::omp_get_thread_num +!dec$ attributes alias:'_OMP_GET_NUM_PROCS'::omp_get_num_procs +!dec$ attributes alias:'_OMP_IN_PARALLEL'::omp_in_parallel +!dec$ attributes alias:'_OMP_IN_FINAL'::omp_in_final +!dec$ attributes alias:'_OMP_GET_DYNAMIC'::omp_get_dynamic +!dec$ attributes alias:'_OMP_GET_NESTED'::omp_get_nested +!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT'::omp_get_thread_limit +!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels +!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels +!dec$ attributes alias:'_OMP_GET_LEVEL'::omp_get_level +!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL'::omp_get_active_level +!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num +!dec$ attributes alias:'_OMP_GET_TEAM_SIZE'::omp_get_team_size +!dec$ attributes alias:'_OMP_SET_SCHEDULE'::omp_set_schedule +!dec$ attributes alias:'_OMP_GET_SCHEDULE'::omp_get_schedule +!dec$ attributes alias:'_OMP_GET_WTIME'::omp_get_wtime +!dec$ attributes alias:'_OMP_GET_WTICK'::omp_get_wtick + +!dec$ attributes alias:'_omp_init_lock'::omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock'::omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock'::omp_set_lock +!dec$ attributes alias:'_omp_unset_lock'::omp_unset_lock +!dec$ attributes alias:'_omp_test_lock'::omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock'::omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock'::omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock'::omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock'::omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock'::omp_test_nest_lock + +!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'_KMP_SET_DEFAULTS'::kmp_set_defaults +!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'_KMP_FREE'::kmp_free + +!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ endif +!dec$ endif + +!dec$ if defined(__linux) + +!*** +!*** The Linux* OS entry points are in lowercase, with an underscore appended. +!*** + +!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'omp_in_final_'::omp_in_final +!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'omp_get_level_'::omp_get_level +!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick + +!dec$ attributes alias:'omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'kmp_set_defaults_'::kmp_set_defaults +!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'kmp_free_'::kmp_free + +!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ endif + +!dec$ if defined(__APPLE__) + +!*** +!*** The Mac entry points are in lowercase, with an both an underscore +!*** appended and an underscore prepended. +!*** + +!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'_omp_in_final_'::omp_in_final +!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'_omp_get_level_'::omp_get_level +!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick + +!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'_kmp_set_defaults_'::kmp_set_defaults +!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'_kmp_free_'::kmp_free + +!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ endif + + diff --git a/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var index 773174a1f5c..83b4f1e3dfc 100644 --- a/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var +++ b/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var @@ -1,487 +1,487 @@ -/* - * include/30/ompt.h.var - */ - -#ifndef __OMPT__ -#define __OMPT__ - -/***************************************************************************** - * system include files - *****************************************************************************/ - -#include - - - -/***************************************************************************** - * iteration macros - *****************************************************************************/ - -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_state) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_idle_frame) \ - macro (ompt_get_task_frame) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_id) \ - macro (ompt_get_parallel_team_size) \ - macro (ompt_get_task_id) \ - macro (ompt_get_thread_id) - -#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ - macro (ompt_idle) \ - macro (ompt_overhead) \ - macro (ompt_barrier_wait) \ - macro (ompt_task_wait) \ - macro (ompt_mutex_wait) - -#define FOREACH_OMPT_STATE(macro) \ - \ - /* first */ \ - macro (ompt_state_first, 0x71) /* initial enumeration state */ \ - \ - /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ - \ - /* idle (16..31) */ \ - macro (ompt_state_idle, 0x10) /* waiting for work */ \ - \ - /* overhead states (32..63) */ \ - macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ - \ - /* barrier wait states (64..79) */ \ - macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ - \ - /* task wait states (80..95) */ \ - macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ - \ - /* mutex wait states (96..111) */ \ - macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ - macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ - macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ - macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ - \ - /* misc (112..127) */ \ - macro (ompt_state_undefined, 0x70) /* undefined thread state */ - - -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ - macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ - \ - macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ - macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ - \ - macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ - macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ - \ - macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ - \ - macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ - macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ - \ - macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ - macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ - \ - macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ - macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ - \ - macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ - macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ - \ - macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ - macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ - macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ - \ - macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ - \ - macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ - \ - /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ - macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ - \ - macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ - macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ - \ - macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ - \ - macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ - macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ - \ - macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ - macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ - \ - macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ - macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ - \ - macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ - macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ - \ - macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ - macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ - \ - macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ - macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ - \ - macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ - macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ - \ - macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ - macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ - \ - macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ - macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ - \ - macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ - \ - macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ - macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ - macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ - macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ - macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ - \ - macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ - macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ - macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ - macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ - macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ - macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ - \ - macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ - macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ - \ - macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ - macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ - \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ - - - -/***************************************************************************** - * data types - *****************************************************************************/ - -/*--------------------- - * identifiers - *---------------------*/ - -typedef uint64_t ompt_thread_id_t; -#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_task_id_t; -#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_parallel_id_t; -#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_wait_id_t; -#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ - - -/*--------------------- - * ompt_frame_t - *---------------------*/ - -typedef struct ompt_frame_s { - void *exit_runtime_frame; /* next frame is user code */ - void *reenter_runtime_frame; /* previous frame is user code */ -} ompt_frame_t; - - -/***************************************************************************** - * enumerations for thread states and runtime events - *****************************************************************************/ - -/*--------------------- - * runtime states - *---------------------*/ - -typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; - - -/*--------------------- - * runtime events - *---------------------*/ - -typedef enum { -#define ompt_event_macro(event, callback, eventid) event = eventid, - FOREACH_OMPT_EVENT(ompt_event_macro) -#undef ompt_event_macro -} ompt_event_t; - - -/*--------------------- - * set callback results - *---------------------*/ -typedef enum { - ompt_set_result_registration_error = 0, - ompt_set_result_event_may_occur_no_callback = 1, - ompt_set_result_event_never_occurs = 2, - ompt_set_result_event_may_occur_callback_some = 3, - ompt_set_result_event_may_occur_callback_always = 4, -} ompt_set_result_t; - - - -/***************************************************************************** - * callback signatures - *****************************************************************************/ - -/* initialization */ -typedef void (*ompt_interface_fn_t)(void); - -typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ -); - -/* threads */ -typedef void (*ompt_thread_callback_t) ( - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef enum { - ompt_thread_initial = 1, // start the enumeration at 1 - ompt_thread_worker = 2, - ompt_thread_other = 3 -} ompt_thread_type_t; - -typedef enum { - ompt_invoker_program = 0, /* program invokes master task */ - ompt_invoker_runtime = 1 /* runtime invokes master task */ -} ompt_invoker_t; - -typedef void (*ompt_thread_type_callback_t) ( - ompt_thread_type_t thread_type, /* type of thread */ - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait id */ -); - -/* parallel and workshares */ -typedef void (*ompt_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_new_workshare_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t parent_task_id, /* id of parent task */ - void *workshare_function /* pointer to outlined function */ -); - -typedef void (*ompt_new_parallel_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data of parent task */ - ompt_parallel_id_t parallel_id, /* id of parallel region */ - uint32_t requested_team_size, /* number of threads in team */ - void *parallel_function, /* pointer to outlined function */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -typedef void (*ompt_end_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id, /* id of task */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -/* tasks */ -typedef void (*ompt_task_callback_t) ( - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_task_pair_callback_t) ( - ompt_task_id_t first_task_id, - ompt_task_id_t second_task_id -); - -typedef void (*ompt_new_task_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data for parent task */ - ompt_task_id_t new_task_id, /* id of created task */ - void *task_function /* pointer to outlined function */ -); - -/* program */ -typedef void (*ompt_control_callback_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier /* modifier of control call */ -); - -typedef void (*ompt_callback_t)(void); - - -/**************************************************************************** - * ompt API - ***************************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define OMPT_API_FNTYPE(fn) fn##_t - -#define OMPT_API_FUNCTION(return_type, fn, args) \ - typedef return_type (*OMPT_API_FNTYPE(fn)) args - - - -/**************************************************************************** - * INQUIRY FUNCTIONS - ***************************************************************************/ - -/* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *ompt_wait_id -)); - -/* thread */ -OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); - -OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); - -/* parallel region */ -OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( - int ancestor_level -)); - -OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( - int ancestor_level -)); - -/* task */ -OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( - int depth -)); - -OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( - int depth -)); - - - -/**************************************************************************** - * PLACEHOLDERS FOR PERFORMANCE REPORTING - ***************************************************************************/ - -/* idle */ -OMPT_API_FUNCTION(void, ompt_idle, ( - void -)); - -/* overhead */ -OMPT_API_FUNCTION(void, ompt_overhead, ( - void -)); - -/* barrier wait */ -OMPT_API_FUNCTION(void, ompt_barrier_wait, ( - void -)); - -/* task wait */ -OMPT_API_FUNCTION(void, ompt_task_wait, ( - void -)); - -/* mutex wait */ -OMPT_API_FUNCTION(void, ompt_mutex_wait, ( - void -)); - - - -/**************************************************************************** - * INITIALIZATION FUNCTIONS - ***************************************************************************/ - -OMPT_API_FUNCTION(void, ompt_initialize, ( - ompt_function_lookup_t ompt_fn_lookup, - const char *runtime_version, - unsigned int ompt_version -)); - - -/* initialization interface to be defined by tool */ -ompt_initialize_t ompt_tool(void); - -typedef enum opt_init_mode_e { - ompt_init_mode_never = 0, - ompt_init_mode_false = 1, - ompt_init_mode_true = 2, - ompt_init_mode_always = 3 -} ompt_init_mode_t; - -OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_event_t event, - ompt_callback_t callback -)); - -typedef enum ompt_set_callback_rc_e { /* non-standard */ - ompt_set_callback_error = 0, - ompt_has_event_no_callback = 1, - ompt_no_event_no_callback = 2, - ompt_has_event_may_callback = 3, - ompt_has_event_must_callback = 4, -} ompt_set_callback_rc_t; - - -OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_event_t event, - ompt_callback_t *callback -)); - - - -/**************************************************************************** - * MISCELLANEOUS FUNCTIONS - ***************************************************************************/ - -/* control */ -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp declare target -#endif -void ompt_control( - uint64_t command, - uint64_t modifier -); -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp end declare target -#endif - -/* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_state, ( - int current_state, - int *next_state, - const char **next_state_name -)); - -#ifdef __cplusplus -}; -#endif - -#endif - +/* + * include/30/ompt.h.var + */ + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include + + + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_state) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_idle_frame) \ + macro (ompt_get_task_frame) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_id) \ + macro (ompt_get_parallel_team_size) \ + macro (ompt_get_task_id) \ + macro (ompt_get_thread_id) + +#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ + macro (ompt_idle) \ + macro (ompt_overhead) \ + macro (ompt_barrier_wait) \ + macro (ompt_task_wait) \ + macro (ompt_mutex_wait) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first */ \ + macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + \ + /* idle (16..31) */ \ + macro (ompt_state_idle, 0x10) /* waiting for work */ \ + \ + /* overhead states (32..63) */ \ + macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + \ + /* barrier wait states (64..79) */ \ + macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + \ + /* task wait states (80..95) */ \ + macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (96..111) */ \ + macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ + macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ + macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ + macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + \ + /* misc (112..127) */ \ + macro (ompt_state_undefined, 0x70) /* undefined thread state */ + + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ + macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ + \ + macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ + macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ + \ + macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ + macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + \ + macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + \ + macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ + \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ + macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ + macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + \ + macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ + macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + \ + macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ + macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + \ + macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ + macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + \ + macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ + macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ + macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + \ + macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + \ + macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + \ + /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ + macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ + macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ + \ + macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ + macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ + \ + macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ + \ + macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ + macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ + \ + macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ + macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ + \ + macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ + macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + \ + macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ + macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + \ + macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ + macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + \ + macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ + macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + \ + macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ + macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + \ + macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ + macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + \ + macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ + macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + \ + macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + \ + macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ + macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ + macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ + macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ + macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + \ + macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ + macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ + macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ + macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ + macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ + macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ + \ + macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ + macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ + \ + macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ + macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ + \ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + + + +/***************************************************************************** + * data types + *****************************************************************************/ + +/*--------------------- + * identifiers + *---------------------*/ + +typedef uint64_t ompt_thread_id_t; +#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_task_id_t; +#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_parallel_id_t; +#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_wait_id_t; +#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ + + +/*--------------------- + * ompt_frame_t + *---------------------*/ + +typedef struct ompt_frame_s { + void *exit_runtime_frame; /* next frame is user code */ + void *reenter_runtime_frame; /* previous frame is user code */ +} ompt_frame_t; + + +/***************************************************************************** + * enumerations for thread states and runtime events + *****************************************************************************/ + +/*--------------------- + * runtime states + *---------------------*/ + +typedef enum { +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + + +/*--------------------- + * runtime events + *---------------------*/ + +typedef enum { +#define ompt_event_macro(event, callback, eventid) event = eventid, + FOREACH_OMPT_EVENT(ompt_event_macro) +#undef ompt_event_macro +} ompt_event_t; + + +/*--------------------- + * set callback results + *---------------------*/ +typedef enum { + ompt_set_result_registration_error = 0, + ompt_set_result_event_may_occur_no_callback = 1, + ompt_set_result_event_never_occurs = 2, + ompt_set_result_event_may_occur_callback_some = 3, + ompt_set_result_event_may_occur_callback_always = 4, +} ompt_set_result_t; + + + +/***************************************************************************** + * callback signatures + *****************************************************************************/ + +/* initialization */ +typedef void (*ompt_interface_fn_t)(void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t)( + const char * /* entry point to look up */ +); + +/* threads */ +typedef void (*ompt_thread_callback_t) ( + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef enum { + ompt_thread_initial = 1, // start the enumeration at 1 + ompt_thread_worker = 2, + ompt_thread_other = 3 +} ompt_thread_type_t; + +typedef enum { + ompt_invoker_program = 0, /* program invokes master task */ + ompt_invoker_runtime = 1 /* runtime invokes master task */ +} ompt_invoker_t; + +typedef void (*ompt_thread_type_callback_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef void (*ompt_wait_callback_t) ( + ompt_wait_id_t wait_id /* wait id */ +); + +/* parallel and workshares */ +typedef void (*ompt_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_new_workshare_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t parent_task_id, /* id of parent task */ + void *workshare_function /* pointer to outlined function */ +); + +typedef void (*ompt_new_parallel_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data of parent task */ + ompt_parallel_id_t parallel_id, /* id of parallel region */ + uint32_t requested_team_size, /* number of threads in team */ + void *parallel_function, /* pointer to outlined function */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +typedef void (*ompt_end_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id, /* id of task */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +/* tasks */ +typedef void (*ompt_task_callback_t) ( + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_task_pair_callback_t) ( + ompt_task_id_t first_task_id, + ompt_task_id_t second_task_id +); + +typedef void (*ompt_new_task_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data for parent task */ + ompt_task_id_t new_task_id, /* id of created task */ + void *task_function /* pointer to outlined function */ +); + +/* program */ +typedef void (*ompt_control_callback_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier /* modifier of control call */ +); + +typedef void (*ompt_callback_t)(void); + + +/**************************************************************************** + * ompt API + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define OMPT_API_FNTYPE(fn) fn##_t + +#define OMPT_API_FUNCTION(return_type, fn, args) \ + typedef return_type (*OMPT_API_FNTYPE(fn)) args + + + +/**************************************************************************** + * INQUIRY FUNCTIONS + ***************************************************************************/ + +/* state */ +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *ompt_wait_id +)); + +/* thread */ +OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); + +OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); + +/* parallel region */ +OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( + int ancestor_level +)); + +OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( + int ancestor_level +)); + +/* task */ +OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( + int depth +)); + +OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( + int depth +)); + + + +/**************************************************************************** + * PLACEHOLDERS FOR PERFORMANCE REPORTING + ***************************************************************************/ + +/* idle */ +OMPT_API_FUNCTION(void, ompt_idle, ( + void +)); + +/* overhead */ +OMPT_API_FUNCTION(void, ompt_overhead, ( + void +)); + +/* barrier wait */ +OMPT_API_FUNCTION(void, ompt_barrier_wait, ( + void +)); + +/* task wait */ +OMPT_API_FUNCTION(void, ompt_task_wait, ( + void +)); + +/* mutex wait */ +OMPT_API_FUNCTION(void, ompt_mutex_wait, ( + void +)); + + + +/**************************************************************************** + * INITIALIZATION FUNCTIONS + ***************************************************************************/ + +OMPT_API_FUNCTION(void, ompt_initialize, ( + ompt_function_lookup_t ompt_fn_lookup, + const char *runtime_version, + unsigned int ompt_version +)); + + +/* initialization interface to be defined by tool */ +ompt_initialize_t ompt_tool(void); + +typedef enum opt_init_mode_e { + ompt_init_mode_never = 0, + ompt_init_mode_false = 1, + ompt_init_mode_true = 2, + ompt_init_mode_always = 3 +} ompt_init_mode_t; + +OMPT_API_FUNCTION(int, ompt_set_callback, ( + ompt_event_t event, + ompt_callback_t callback +)); + +typedef enum ompt_set_callback_rc_e { /* non-standard */ + ompt_set_callback_error = 0, + ompt_has_event_no_callback = 1, + ompt_no_event_no_callback = 2, + ompt_has_event_may_callback = 3, + ompt_has_event_must_callback = 4, +} ompt_set_callback_rc_t; + + +OMPT_API_FUNCTION(int, ompt_get_callback, ( + ompt_event_t event, + ompt_callback_t *callback +)); + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ***************************************************************************/ + +/* control */ +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp declare target +#endif +void ompt_control( + uint64_t command, + uint64_t modifier +); +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp end declare target +#endif + +/* state enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_state, ( + int current_state, + int *next_state, + const char **next_state_name +)); + +#ifdef __cplusplus +}; +#endif + +#endif + diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp.h.var b/contrib/libs/cxxsupp/openmp/include/40/omp.h.var index 99083072bf1..4c518e77bc0 100644 --- a/contrib/libs/cxxsupp/openmp/include/40/omp.h.var +++ b/contrib/libs/cxxsupp/openmp/include/40/omp.h.var @@ -1,160 +1,160 @@ -/* - * include/40/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - -# include - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - +/* + * include/40/omp.h.var + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef __OMP_H +# define __OMP_H + +# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ +# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ +# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ +# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" + +# ifdef __cplusplus + extern "C" { +# endif + +# if defined(_WIN32) +# define __KAI_KMPC_CONVENTION __cdecl +# else +# define __KAI_KMPC_CONVENTION +# endif + + /* schedule kind constants */ + typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 + } omp_sched_t; + + /* set API functions */ + extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); + extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); + extern void __KAI_KMPC_CONVENTION omp_set_nested (int); + extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); + extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); + + /* query API functions */ + extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); + extern int __KAI_KMPC_CONVENTION omp_get_nested (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); + extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); + extern int __KAI_KMPC_CONVENTION omp_in_final (void); + extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); + extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); + extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); + extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); + + /* lock API functions */ + typedef struct omp_lock_t { + void * _lk; + } omp_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); + + /* nested lock API functions */ + typedef struct omp_nest_lock_t { + void * _lk; + } omp_nest_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); + + /* time API functions */ + extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); + extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); + + /* OpenMP 4.0 */ + extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); + extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); + extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); + extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); + +# include + /* kmp API functions */ + extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); + extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); + extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); + extern int __KAI_KMPC_CONVENTION kmp_get_library (void); + extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); + extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); + + /* Intel affinity API */ + typedef void * kmp_affinity_mask_t; + + extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); + extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); + extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); + + /* OpenMP 4.0 affinity API */ + typedef enum omp_proc_bind_t { + omp_proc_bind_false = 0, + omp_proc_bind_true = 1, + omp_proc_bind_master = 2, + omp_proc_bind_close = 3, + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + + extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); + + extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); + extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); + extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); + extern void __KAI_KMPC_CONVENTION kmp_free (void *); + + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + +# undef __KAI_KMPC_CONVENTION + + /* Warning: + The following typedefs are not standard, deprecated and will be removed in a future release. + */ + typedef int omp_int_t; + typedef double omp_wtime_t; + +# ifdef __cplusplus + } +# endif + +#endif /* __OMP_H */ + diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var index 7f0276de9b6..3a59162b4bb 100644 --- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var +++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var @@ -1,758 +1,758 @@ -! include/40/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: kmp_cancel_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - use omp_lib_kinds - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) - use omp_lib_kinds - integer (kind=omp_integer_kind) dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) - use omp_lib_kinds - integer (kind=kmp_cancel_kind) cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off -!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - - end module omp_lib - +! include/40/omp_lib.f.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!dec$ fixedformlinesize:132 + + module omp_lib_kinds + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + integer, parameter :: kmp_cancel_kind = omp_integer_kind + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) + use omp_lib_kinds + integer (kind=omp_integer_kind) nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_nested + + function omp_get_num_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_get_dynamic() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) + use omp_lib_kinds + integer (kind=omp_integer_kind) max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_team_size + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() + use omp_lib_kinds + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick () + double precision omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) + use omp_lib_kinds + integer (kind=omp_integer_kind) dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_get_cancellation() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + + function omp_is_initial_device() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) + use omp_lib_kinds + integer (kind=omp_integer_kind) size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) + use omp_lib_kinds + integer (kind=omp_integer_kind) msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) + use omp_lib_kinds + integer (kind=omp_integer_kind) libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) + character*(*) string + end subroutine kmp_set_defaults + + function kmp_get_stacksize() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind) size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind) nelem + integer (kind=kmp_size_t_kind) elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind) ptr + integer (kind=kmp_size_t_kind) size + end function kmp_realloc + + subroutine kmp_free(ptr) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() + end subroutine kmp_set_warnings_off + + function kmp_get_cancellation_status(cancelkind) + use omp_lib_kinds + integer (kind=kmp_cancel_kind) cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status + + end interface + +!dec$ if defined(_WIN32) +!dec$ if defined(_WIN64) .or. defined(_M_AMD64) + +!*** +!*** The Fortran entry points must be in uppercase, even if the /Qlowercase +!*** option is specified. The alias attribute ensures that the specified +!*** string is used as the entry point. +!*** +!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an +!*** underscore prepended. On the Windows* OS Intel(R) 64 +!*** architecture, no underscore is prepended. +!*** + +!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind +!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick +!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device +!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device +!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices +!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams +!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation +!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device + +!dec$ attributes alias:'omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'KMP_FREE'::kmp_free + +!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + +!dec$ else + +!*** +!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. +!*** + +!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind +!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick +!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device +!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device +!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices +!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams +!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation +!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device + +!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'_KMP_FREE'::kmp_free + +!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + +!dec$ endif +!dec$ endif + +!dec$ if defined(__linux) + +!*** +!*** The Linux* OS entry points are in lowercase, with an underscore appended. +!*** + +!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'omp_get_level_'::omp_get_level +!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind +!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick +!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device +!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device +!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices +!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams +!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation +!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device + +!dec$ attributes alias:'omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'kmp_free_'::kmp_free + +!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off +!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status + +!dec$ endif + +!dec$ if defined(__APPLE__) + +!*** +!*** The Mac entry points are in lowercase, with an both an underscore +!*** appended and an underscore prepended. +!*** + +!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'_omp_get_level_'::omp_get_level +!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind +!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick +!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams +!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation +!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device + +!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'_kmp_free_'::kmp_free + +!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status + +!dec$ endif + + end module omp_lib + diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var index be4bcaf257e..5be80266035 100644 --- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var +++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var @@ -1,448 +1,448 @@ -! include/40/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - integer, parameter :: kmp_cancel_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) bind(c) - use omp_lib_kinds - integer (kind=kmp_cancel_kind), value :: cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - end interface - - end module omp_lib +! include/40/omp_lib.f90.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + + module omp_lib_kinds + + use, intrinsic :: iso_c_binding + + integer, parameter :: omp_integer_kind = c_int + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = c_float + integer, parameter :: kmp_double_kind = c_double + integer, parameter :: omp_lock_kind = c_intptr_t + integer, parameter :: omp_nest_lock_kind = c_intptr_t + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = c_intptr_t + integer, parameter :: kmp_size_t_kind = c_size_t + integer, parameter :: kmp_affinity_mask_kind = c_intptr_t + integer, parameter :: kmp_cancel_kind = omp_integer_kind + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_nested + + function omp_get_num_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + integer (kind=omp_integer_kind), value :: level + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_size + integer (kind=omp_integer_kind), value :: level + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind), value :: kind + integer (kind=omp_integer_kind), value :: modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() bind(c) + use omp_lib_kinds + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_get_cancellation() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + + function omp_is_initial_device() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind), value :: size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() bind(c) + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() bind(c) + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() bind(c) + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) bind(c) + use, intrinsic :: iso_c_binding + character (kind=c_char) :: string(*) + end subroutine kmp_set_defaults + + function kmp_get_stacksize() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind), value :: size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind), value :: nelem + integer (kind=kmp_size_t_kind), value :: elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind), value :: ptr + integer (kind=kmp_size_t_kind), value :: size + end function kmp_realloc + + subroutine kmp_free(ptr) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind), value :: ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() bind(c) + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() bind(c) + end subroutine kmp_set_warnings_off + + function kmp_get_cancellation_status(cancelkind) bind(c) + use omp_lib_kinds + integer (kind=kmp_cancel_kind), value :: cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status + + end interface + + end module omp_lib diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var index 4c933cda00f..cc134fd3527 100644 --- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var +++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var @@ -1,558 +1,558 @@ -! include/40/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!DIR$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - import - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - import - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - import - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - import - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - import - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - import - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) bind(c) - import - integer (kind=omp_integer_kind), value :: dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_is_initial_device() bind(c) - import - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - import - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - import - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - import - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - import - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - character string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - import - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - end interface - -!DIR$ IF DEFINED (__INTEL_OFFLOAD) -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off - -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!$omp declare target(omp_set_num_threads ) -!$omp declare target(omp_set_dynamic ) -!$omp declare target(omp_set_nested ) -!$omp declare target(omp_get_num_threads ) -!$omp declare target(omp_get_max_threads ) -!$omp declare target(omp_get_thread_num ) -!$omp declare target(omp_get_num_procs ) -!$omp declare target(omp_in_parallel ) -!$omp declare target(omp_in_final ) -!$omp declare target(omp_get_dynamic ) -!$omp declare target(omp_get_nested ) -!$omp declare target(omp_get_thread_limit ) -!$omp declare target(omp_set_max_active_levels ) -!$omp declare target(omp_get_max_active_levels ) -!$omp declare target(omp_get_level ) -!$omp declare target(omp_get_active_level ) -!$omp declare target(omp_get_ancestor_thread_num ) -!$omp declare target(omp_get_team_size ) -!$omp declare target(omp_set_schedule ) -!$omp declare target(omp_get_schedule ) -!$omp declare target(omp_get_proc_bind ) -!$omp declare target(omp_get_wtime ) -!$omp declare target(omp_get_wtick ) -!$omp declare target(omp_get_default_device ) -!$omp declare target(omp_set_default_device ) -!$omp declare target(omp_is_initial_device ) -!$omp declare target(omp_get_num_devices ) -!$omp declare target(omp_get_num_teams ) -!$omp declare target(omp_get_team_num ) -!$omp declare target(omp_init_lock ) -!$omp declare target(omp_destroy_lock ) -!$omp declare target(omp_set_lock ) -!$omp declare target(omp_unset_lock ) -!$omp declare target(omp_test_lock ) -!$omp declare target(omp_init_nest_lock ) -!$omp declare target(omp_destroy_nest_lock ) -!$omp declare target(omp_set_nest_lock ) -!$omp declare target(omp_unset_nest_lock ) -!$omp declare target(omp_test_nest_lock ) -!$omp declare target(kmp_set_stacksize ) -!$omp declare target(kmp_set_stacksize_s ) -!$omp declare target(kmp_set_blocktime ) -!$omp declare target(kmp_set_library_serial ) -!$omp declare target(kmp_set_library_turnaround ) -!$omp declare target(kmp_set_library_throughput ) -!$omp declare target(kmp_set_library ) -!$omp declare target(kmp_set_defaults ) -!$omp declare target(kmp_get_stacksize ) -!$omp declare target(kmp_get_stacksize_s ) -!$omp declare target(kmp_get_blocktime ) -!$omp declare target(kmp_get_library ) -!$omp declare target(kmp_set_affinity ) -!$omp declare target(kmp_get_affinity ) -!$omp declare target(kmp_get_affinity_max_proc ) -!$omp declare target(kmp_create_affinity_mask ) -!$omp declare target(kmp_destroy_affinity_mask ) -!$omp declare target(kmp_set_affinity_mask_proc ) -!$omp declare target(kmp_unset_affinity_mask_proc ) -!$omp declare target(kmp_get_affinity_mask_proc ) -!$omp declare target(kmp_malloc ) -!$omp declare target(kmp_calloc ) -!$omp declare target(kmp_realloc ) -!$omp declare target(kmp_free ) -!$omp declare target(kmp_set_warnings_on ) -!$omp declare target(kmp_set_warnings_off ) -!DIR$ ENDIF -!DIR$ ENDIF - +! include/40/omp_lib.h.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!DIR$ fixedformlinesize:132 + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) bind(c) + import + integer (kind=omp_integer_kind), value :: nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) bind(c) + import + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) bind(c) + import + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_nested + + function omp_get_num_threads() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() bind(c) + import + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() bind(c) + import + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() bind(c) + import + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() bind(c) + import + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() bind(c) + import + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() bind(c) + import + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() bind(c) + import + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) bind(c) + import + integer (kind=omp_integer_kind), value :: max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() bind(c) + import + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() bind(c) + import + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() bind(c) + import + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) bind(c) + import + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + integer (kind=omp_integer_kind), value :: level + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) bind(c) + import + integer (kind=omp_integer_kind) omp_get_team_size + integer (kind=omp_integer_kind), value :: level + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) bind(c) + import + integer (kind=omp_sched_kind), value :: kind + integer (kind=omp_integer_kind), value :: modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) bind(c) + import + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() bind(c) + import + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() bind(c) + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick() bind(c) + double precision omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() bind(c) + import + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) bind(c) + import + integer (kind=omp_integer_kind), value :: dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() bind(c) + import + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_is_initial_device() bind(c) + import + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + import + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) bind(c) + import + integer (kind=omp_integer_kind), value :: size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) bind(c) + import + integer (kind=kmp_size_t_kind), value :: size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) bind(c) + import + integer (kind=omp_integer_kind), value :: msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() bind(c) + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() bind(c) + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() bind(c) + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) bind(c) + import + integer (kind=omp_integer_kind), value :: libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) bind(c) + character string(*) + end subroutine kmp_set_defaults + + function kmp_get_stacksize() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() bind(c) + import + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) bind(c) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) bind(c) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind), value :: size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind), value :: nelem + integer (kind=kmp_size_t_kind), value :: elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind), value :: ptr + integer (kind=kmp_size_t_kind), value :: size + end function kmp_realloc + + subroutine kmp_free(ptr) bind(c) + import + integer (kind=kmp_pointer_kind), value :: ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() bind(c) + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() bind(c) + end subroutine kmp_set_warnings_off + + end interface + +!DIR$ IF DEFINED (__INTEL_OFFLOAD) +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off + +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!$omp declare target(omp_set_num_threads ) +!$omp declare target(omp_set_dynamic ) +!$omp declare target(omp_set_nested ) +!$omp declare target(omp_get_num_threads ) +!$omp declare target(omp_get_max_threads ) +!$omp declare target(omp_get_thread_num ) +!$omp declare target(omp_get_num_procs ) +!$omp declare target(omp_in_parallel ) +!$omp declare target(omp_in_final ) +!$omp declare target(omp_get_dynamic ) +!$omp declare target(omp_get_nested ) +!$omp declare target(omp_get_thread_limit ) +!$omp declare target(omp_set_max_active_levels ) +!$omp declare target(omp_get_max_active_levels ) +!$omp declare target(omp_get_level ) +!$omp declare target(omp_get_active_level ) +!$omp declare target(omp_get_ancestor_thread_num ) +!$omp declare target(omp_get_team_size ) +!$omp declare target(omp_set_schedule ) +!$omp declare target(omp_get_schedule ) +!$omp declare target(omp_get_proc_bind ) +!$omp declare target(omp_get_wtime ) +!$omp declare target(omp_get_wtick ) +!$omp declare target(omp_get_default_device ) +!$omp declare target(omp_set_default_device ) +!$omp declare target(omp_is_initial_device ) +!$omp declare target(omp_get_num_devices ) +!$omp declare target(omp_get_num_teams ) +!$omp declare target(omp_get_team_num ) +!$omp declare target(omp_init_lock ) +!$omp declare target(omp_destroy_lock ) +!$omp declare target(omp_set_lock ) +!$omp declare target(omp_unset_lock ) +!$omp declare target(omp_test_lock ) +!$omp declare target(omp_init_nest_lock ) +!$omp declare target(omp_destroy_nest_lock ) +!$omp declare target(omp_set_nest_lock ) +!$omp declare target(omp_unset_nest_lock ) +!$omp declare target(omp_test_nest_lock ) +!$omp declare target(kmp_set_stacksize ) +!$omp declare target(kmp_set_stacksize_s ) +!$omp declare target(kmp_set_blocktime ) +!$omp declare target(kmp_set_library_serial ) +!$omp declare target(kmp_set_library_turnaround ) +!$omp declare target(kmp_set_library_throughput ) +!$omp declare target(kmp_set_library ) +!$omp declare target(kmp_set_defaults ) +!$omp declare target(kmp_get_stacksize ) +!$omp declare target(kmp_get_stacksize_s ) +!$omp declare target(kmp_get_blocktime ) +!$omp declare target(kmp_get_library ) +!$omp declare target(kmp_set_affinity ) +!$omp declare target(kmp_get_affinity ) +!$omp declare target(kmp_get_affinity_max_proc ) +!$omp declare target(kmp_create_affinity_mask ) +!$omp declare target(kmp_destroy_affinity_mask ) +!$omp declare target(kmp_set_affinity_mask_proc ) +!$omp declare target(kmp_unset_affinity_mask_proc ) +!$omp declare target(kmp_get_affinity_mask_proc ) +!$omp declare target(kmp_malloc ) +!$omp declare target(kmp_calloc ) +!$omp declare target(kmp_realloc ) +!$omp declare target(kmp_free ) +!$omp declare target(kmp_set_warnings_on ) +!$omp declare target(kmp_set_warnings_off ) +!DIR$ ENDIF +!DIR$ ENDIF + diff --git a/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var index c99ec8e6770..3a8c30c1657 100644 --- a/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var +++ b/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var @@ -1,487 +1,487 @@ -/* - * include/40/ompt.h.var - */ - -#ifndef __OMPT__ -#define __OMPT__ - -/***************************************************************************** - * system include files - *****************************************************************************/ - -#include - - - -/***************************************************************************** - * iteration macros - *****************************************************************************/ - -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_state) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_idle_frame) \ - macro (ompt_get_task_frame) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_id) \ - macro (ompt_get_parallel_team_size) \ - macro (ompt_get_task_id) \ - macro (ompt_get_thread_id) - -#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ - macro (ompt_idle) \ - macro (ompt_overhead) \ - macro (ompt_barrier_wait) \ - macro (ompt_task_wait) \ - macro (ompt_mutex_wait) - -#define FOREACH_OMPT_STATE(macro) \ - \ - /* first */ \ - macro (ompt_state_first, 0x71) /* initial enumeration state */ \ - \ - /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ - \ - /* idle (16..31) */ \ - macro (ompt_state_idle, 0x10) /* waiting for work */ \ - \ - /* overhead states (32..63) */ \ - macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ - \ - /* barrier wait states (64..79) */ \ - macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ - \ - /* task wait states (80..95) */ \ - macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ - \ - /* mutex wait states (96..111) */ \ - macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ - macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ - macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ - macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ - \ - /* misc (112..127) */ \ - macro (ompt_state_undefined, 0x70) /* undefined thread state */ - - -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ - macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ - \ - macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ - macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ - \ - macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ - macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ - \ - macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ - \ - macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ - macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ - \ - macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ - macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ - \ - macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ - macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ - \ - macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ - macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ - \ - macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ - macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ - macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ - \ - macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ - \ - macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ - \ - /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ - macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ - \ - macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ - macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ - \ - macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ - \ - macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ - macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ - \ - macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ - macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ - \ - macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ - macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ - \ - macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ - macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ - \ - macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ - macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ - \ - macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ - macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ - \ - macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ - macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ - \ - macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ - macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ - \ - macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ - macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ - \ - macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ - \ - macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ - macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ - macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ - macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ - macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ - \ - macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ - macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ - macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ - macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ - macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ - macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ - \ - macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ - macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ - \ - macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ - macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ - \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ - - - -/***************************************************************************** - * data types - *****************************************************************************/ - -/*--------------------- - * identifiers - *---------------------*/ - -typedef uint64_t ompt_thread_id_t; -#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_task_id_t; -#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_parallel_id_t; -#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_wait_id_t; -#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ - - -/*--------------------- - * ompt_frame_t - *---------------------*/ - -typedef struct ompt_frame_s { - void *exit_runtime_frame; /* next frame is user code */ - void *reenter_runtime_frame; /* previous frame is user code */ -} ompt_frame_t; - - -/***************************************************************************** - * enumerations for thread states and runtime events - *****************************************************************************/ - -/*--------------------- - * runtime states - *---------------------*/ - -typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; - - -/*--------------------- - * runtime events - *---------------------*/ - -typedef enum { -#define ompt_event_macro(event, callback, eventid) event = eventid, - FOREACH_OMPT_EVENT(ompt_event_macro) -#undef ompt_event_macro -} ompt_event_t; - - -/*--------------------- - * set callback results - *---------------------*/ -typedef enum { - ompt_set_result_registration_error = 0, - ompt_set_result_event_may_occur_no_callback = 1, - ompt_set_result_event_never_occurs = 2, - ompt_set_result_event_may_occur_callback_some = 3, - ompt_set_result_event_may_occur_callback_always = 4, -} ompt_set_result_t; - - - -/***************************************************************************** - * callback signatures - *****************************************************************************/ - -/* initialization */ -typedef void (*ompt_interface_fn_t)(void); - -typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ -); - -/* threads */ -typedef void (*ompt_thread_callback_t) ( - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef enum { - ompt_thread_initial = 1, // start the enumeration at 1 - ompt_thread_worker = 2, - ompt_thread_other = 3 -} ompt_thread_type_t; - -typedef enum { - ompt_invoker_program = 0, /* program invokes master task */ - ompt_invoker_runtime = 1 /* runtime invokes master task */ -} ompt_invoker_t; - -typedef void (*ompt_thread_type_callback_t) ( - ompt_thread_type_t thread_type, /* type of thread */ - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait id */ -); - -/* parallel and workshares */ -typedef void (*ompt_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_new_workshare_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t parent_task_id, /* id of parent task */ - void *workshare_function /* pointer to outlined function */ -); - -typedef void (*ompt_new_parallel_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data of parent task */ - ompt_parallel_id_t parallel_id, /* id of parallel region */ - uint32_t requested_team_size, /* number of threads in team */ - void *parallel_function, /* pointer to outlined function */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -typedef void (*ompt_end_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id, /* id of task */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -/* tasks */ -typedef void (*ompt_task_callback_t) ( - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_task_pair_callback_t) ( - ompt_task_id_t first_task_id, - ompt_task_id_t second_task_id -); - -typedef void (*ompt_new_task_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data for parent task */ - ompt_task_id_t new_task_id, /* id of created task */ - void *task_function /* pointer to outlined function */ -); - -/* program */ -typedef void (*ompt_control_callback_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier /* modifier of control call */ -); - -typedef void (*ompt_callback_t)(void); - - -/**************************************************************************** - * ompt API - ***************************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define OMPT_API_FNTYPE(fn) fn##_t - -#define OMPT_API_FUNCTION(return_type, fn, args) \ - typedef return_type (*OMPT_API_FNTYPE(fn)) args - - - -/**************************************************************************** - * INQUIRY FUNCTIONS - ***************************************************************************/ - -/* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *ompt_wait_id -)); - -/* thread */ -OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); - -OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); - -/* parallel region */ -OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( - int ancestor_level -)); - -OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( - int ancestor_level -)); - -/* task */ -OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( - int depth -)); - -OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( - int depth -)); - - - -/**************************************************************************** - * PLACEHOLDERS FOR PERFORMANCE REPORTING - ***************************************************************************/ - -/* idle */ -OMPT_API_FUNCTION(void, ompt_idle, ( - void -)); - -/* overhead */ -OMPT_API_FUNCTION(void, ompt_overhead, ( - void -)); - -/* barrier wait */ -OMPT_API_FUNCTION(void, ompt_barrier_wait, ( - void -)); - -/* task wait */ -OMPT_API_FUNCTION(void, ompt_task_wait, ( - void -)); - -/* mutex wait */ -OMPT_API_FUNCTION(void, ompt_mutex_wait, ( - void -)); - - - -/**************************************************************************** - * INITIALIZATION FUNCTIONS - ***************************************************************************/ - -OMPT_API_FUNCTION(void, ompt_initialize, ( - ompt_function_lookup_t ompt_fn_lookup, - const char *runtime_version, - unsigned int ompt_version -)); - - -/* initialization interface to be defined by tool */ -ompt_initialize_t ompt_tool(void); - -typedef enum opt_init_mode_e { - ompt_init_mode_never = 0, - ompt_init_mode_false = 1, - ompt_init_mode_true = 2, - ompt_init_mode_always = 3 -} ompt_init_mode_t; - -OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_event_t event, - ompt_callback_t callback -)); - -typedef enum ompt_set_callback_rc_e { /* non-standard */ - ompt_set_callback_error = 0, - ompt_has_event_no_callback = 1, - ompt_no_event_no_callback = 2, - ompt_has_event_may_callback = 3, - ompt_has_event_must_callback = 4, -} ompt_set_callback_rc_t; - - -OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_event_t event, - ompt_callback_t *callback -)); - - - -/**************************************************************************** - * MISCELLANEOUS FUNCTIONS - ***************************************************************************/ - -/* control */ -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp declare target -#endif -void ompt_control( - uint64_t command, - uint64_t modifier -); -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp end declare target -#endif - -/* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_state, ( - int current_state, - int *next_state, - const char **next_state_name -)); - -#ifdef __cplusplus -}; -#endif - -#endif - +/* + * include/40/ompt.h.var + */ + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include + + + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_state) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_idle_frame) \ + macro (ompt_get_task_frame) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_id) \ + macro (ompt_get_parallel_team_size) \ + macro (ompt_get_task_id) \ + macro (ompt_get_thread_id) + +#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ + macro (ompt_idle) \ + macro (ompt_overhead) \ + macro (ompt_barrier_wait) \ + macro (ompt_task_wait) \ + macro (ompt_mutex_wait) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first */ \ + macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + \ + /* idle (16..31) */ \ + macro (ompt_state_idle, 0x10) /* waiting for work */ \ + \ + /* overhead states (32..63) */ \ + macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + \ + /* barrier wait states (64..79) */ \ + macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + \ + /* task wait states (80..95) */ \ + macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (96..111) */ \ + macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ + macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ + macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ + macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + \ + /* misc (112..127) */ \ + macro (ompt_state_undefined, 0x70) /* undefined thread state */ + + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ + macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ + \ + macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ + macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ + \ + macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ + macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + \ + macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + \ + macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ + \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ + macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ + macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + \ + macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ + macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + \ + macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ + macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + \ + macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ + macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + \ + macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ + macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ + macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + \ + macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + \ + macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + \ + /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ + macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ + macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ + \ + macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ + macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ + \ + macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ + \ + macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ + macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ + \ + macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ + macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ + \ + macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ + macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + \ + macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ + macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + \ + macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ + macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + \ + macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ + macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + \ + macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ + macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + \ + macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ + macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + \ + macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ + macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + \ + macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + \ + macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ + macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ + macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ + macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ + macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + \ + macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ + macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ + macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ + macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ + macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ + macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ + \ + macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ + macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ + \ + macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ + macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ + \ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + + + +/***************************************************************************** + * data types + *****************************************************************************/ + +/*--------------------- + * identifiers + *---------------------*/ + +typedef uint64_t ompt_thread_id_t; +#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_task_id_t; +#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_parallel_id_t; +#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_wait_id_t; +#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ + + +/*--------------------- + * ompt_frame_t + *---------------------*/ + +typedef struct ompt_frame_s { + void *exit_runtime_frame; /* next frame is user code */ + void *reenter_runtime_frame; /* previous frame is user code */ +} ompt_frame_t; + + +/***************************************************************************** + * enumerations for thread states and runtime events + *****************************************************************************/ + +/*--------------------- + * runtime states + *---------------------*/ + +typedef enum { +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + + +/*--------------------- + * runtime events + *---------------------*/ + +typedef enum { +#define ompt_event_macro(event, callback, eventid) event = eventid, + FOREACH_OMPT_EVENT(ompt_event_macro) +#undef ompt_event_macro +} ompt_event_t; + + +/*--------------------- + * set callback results + *---------------------*/ +typedef enum { + ompt_set_result_registration_error = 0, + ompt_set_result_event_may_occur_no_callback = 1, + ompt_set_result_event_never_occurs = 2, + ompt_set_result_event_may_occur_callback_some = 3, + ompt_set_result_event_may_occur_callback_always = 4, +} ompt_set_result_t; + + + +/***************************************************************************** + * callback signatures + *****************************************************************************/ + +/* initialization */ +typedef void (*ompt_interface_fn_t)(void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t)( + const char * /* entry point to look up */ +); + +/* threads */ +typedef void (*ompt_thread_callback_t) ( + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef enum { + ompt_thread_initial = 1, // start the enumeration at 1 + ompt_thread_worker = 2, + ompt_thread_other = 3 +} ompt_thread_type_t; + +typedef enum { + ompt_invoker_program = 0, /* program invokes master task */ + ompt_invoker_runtime = 1 /* runtime invokes master task */ +} ompt_invoker_t; + +typedef void (*ompt_thread_type_callback_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef void (*ompt_wait_callback_t) ( + ompt_wait_id_t wait_id /* wait id */ +); + +/* parallel and workshares */ +typedef void (*ompt_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_new_workshare_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t parent_task_id, /* id of parent task */ + void *workshare_function /* pointer to outlined function */ +); + +typedef void (*ompt_new_parallel_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data of parent task */ + ompt_parallel_id_t parallel_id, /* id of parallel region */ + uint32_t requested_team_size, /* number of threads in team */ + void *parallel_function, /* pointer to outlined function */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +typedef void (*ompt_end_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id, /* id of task */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +/* tasks */ +typedef void (*ompt_task_callback_t) ( + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_task_pair_callback_t) ( + ompt_task_id_t first_task_id, + ompt_task_id_t second_task_id +); + +typedef void (*ompt_new_task_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data for parent task */ + ompt_task_id_t new_task_id, /* id of created task */ + void *task_function /* pointer to outlined function */ +); + +/* program */ +typedef void (*ompt_control_callback_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier /* modifier of control call */ +); + +typedef void (*ompt_callback_t)(void); + + +/**************************************************************************** + * ompt API + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define OMPT_API_FNTYPE(fn) fn##_t + +#define OMPT_API_FUNCTION(return_type, fn, args) \ + typedef return_type (*OMPT_API_FNTYPE(fn)) args + + + +/**************************************************************************** + * INQUIRY FUNCTIONS + ***************************************************************************/ + +/* state */ +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *ompt_wait_id +)); + +/* thread */ +OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); + +OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); + +/* parallel region */ +OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( + int ancestor_level +)); + +OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( + int ancestor_level +)); + +/* task */ +OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( + int depth +)); + +OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( + int depth +)); + + + +/**************************************************************************** + * PLACEHOLDERS FOR PERFORMANCE REPORTING + ***************************************************************************/ + +/* idle */ +OMPT_API_FUNCTION(void, ompt_idle, ( + void +)); + +/* overhead */ +OMPT_API_FUNCTION(void, ompt_overhead, ( + void +)); + +/* barrier wait */ +OMPT_API_FUNCTION(void, ompt_barrier_wait, ( + void +)); + +/* task wait */ +OMPT_API_FUNCTION(void, ompt_task_wait, ( + void +)); + +/* mutex wait */ +OMPT_API_FUNCTION(void, ompt_mutex_wait, ( + void +)); + + + +/**************************************************************************** + * INITIALIZATION FUNCTIONS + ***************************************************************************/ + +OMPT_API_FUNCTION(void, ompt_initialize, ( + ompt_function_lookup_t ompt_fn_lookup, + const char *runtime_version, + unsigned int ompt_version +)); + + +/* initialization interface to be defined by tool */ +ompt_initialize_t ompt_tool(void); + +typedef enum opt_init_mode_e { + ompt_init_mode_never = 0, + ompt_init_mode_false = 1, + ompt_init_mode_true = 2, + ompt_init_mode_always = 3 +} ompt_init_mode_t; + +OMPT_API_FUNCTION(int, ompt_set_callback, ( + ompt_event_t event, + ompt_callback_t callback +)); + +typedef enum ompt_set_callback_rc_e { /* non-standard */ + ompt_set_callback_error = 0, + ompt_has_event_no_callback = 1, + ompt_no_event_no_callback = 2, + ompt_has_event_may_callback = 3, + ompt_has_event_must_callback = 4, +} ompt_set_callback_rc_t; + + +OMPT_API_FUNCTION(int, ompt_get_callback, ( + ompt_event_t event, + ompt_callback_t *callback +)); + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ***************************************************************************/ + +/* control */ +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp declare target +#endif +void ompt_control( + uint64_t command, + uint64_t modifier +); +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp end declare target +#endif + +/* state enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_state, ( + int current_state, + int *next_state, + const char **next_state_name +)); + +#ifdef __cplusplus +}; +#endif + +#endif + diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp.h.var b/contrib/libs/cxxsupp/openmp/include/41/omp.h.var index 9e7c871a782..6d9fa43810f 100644 --- a/contrib/libs/cxxsupp/openmp/include/41/omp.h.var +++ b/contrib/libs/cxxsupp/openmp/include/41/omp.h.var @@ -1,176 +1,176 @@ -/* - * include/41/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ -# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ -# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ -# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" - -# ifdef __cplusplus - extern "C" { -# endif - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* lock hint type for dynamic user lock */ - typedef enum omp_lock_hint_t { - omp_lock_hint_none = 0, - omp_lock_hint_uncontended = 1, - omp_lock_hint_contended = (1<<1 ), - omp_lock_hint_nonspeculative = (1<<2 ), - omp_lock_hint_speculative = (1<<3 ), - kmp_lock_hint_hle = (1<<16), - kmp_lock_hint_rtm = (1<<17), - kmp_lock_hint_adaptive = (1<<18) - } omp_lock_hint_t; - - /* hinted lock initializers */ - extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - -# include - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - +/* + * include/41/omp.h.var + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef __OMP_H +# define __OMP_H + +# define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ +# define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ +# define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ +# define KMP_BUILD_DATE "@LIBOMP_BUILD_DATE@" + +# ifdef __cplusplus + extern "C" { +# endif + +# if defined(_WIN32) +# define __KAI_KMPC_CONVENTION __cdecl +# else +# define __KAI_KMPC_CONVENTION +# endif + + /* schedule kind constants */ + typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 + } omp_sched_t; + + /* set API functions */ + extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); + extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); + extern void __KAI_KMPC_CONVENTION omp_set_nested (int); + extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); + extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); + + /* query API functions */ + extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); + extern int __KAI_KMPC_CONVENTION omp_get_nested (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); + extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); + extern int __KAI_KMPC_CONVENTION omp_in_final (void); + extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); + extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); + extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); + extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); + + /* lock API functions */ + typedef struct omp_lock_t { + void * _lk; + } omp_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); + + /* nested lock API functions */ + typedef struct omp_nest_lock_t { + void * _lk; + } omp_nest_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); + + /* lock hint type for dynamic user lock */ + typedef enum omp_lock_hint_t { + omp_lock_hint_none = 0, + omp_lock_hint_uncontended = 1, + omp_lock_hint_contended = (1<<1 ), + omp_lock_hint_nonspeculative = (1<<2 ), + omp_lock_hint_speculative = (1<<3 ), + kmp_lock_hint_hle = (1<<16), + kmp_lock_hint_rtm = (1<<17), + kmp_lock_hint_adaptive = (1<<18) + } omp_lock_hint_t; + + /* hinted lock initializers */ + extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); + + /* time API functions */ + extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); + extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); + + /* OpenMP 4.0 */ + extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); + extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); + extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); + extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); + +# include + /* kmp API functions */ + extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); + extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); + extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); + extern int __KAI_KMPC_CONVENTION kmp_get_library (void); + extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); + extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); + + /* Intel affinity API */ + typedef void * kmp_affinity_mask_t; + + extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); + extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); + extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); + + /* OpenMP 4.0 affinity API */ + typedef enum omp_proc_bind_t { + omp_proc_bind_false = 0, + omp_proc_bind_true = 1, + omp_proc_bind_master = 2, + omp_proc_bind_close = 3, + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + + extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); + + extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); + extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); + extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); + extern void __KAI_KMPC_CONVENTION kmp_free (void *); + + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + +# undef __KAI_KMPC_CONVENTION + + /* Warning: + The following typedefs are not standard, deprecated and will be removed in a future release. + */ + typedef int omp_int_t; + typedef double omp_wtime_t; + +# ifdef __cplusplus + } +# endif + +#endif /* __OMP_H */ + diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var index 49eb401df49..c801908cd41 100644 --- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var +++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var @@ -1,788 +1,788 @@ -! include/41/omp_lib.f.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!dec$ fixedformlinesize:132 - - module omp_lib_kinds - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) - use omp_lib_kinds - integer (kind=omp_integer_kind) nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) - use omp_lib_kinds - logical (kind=omp_logical_kind) enable - end subroutine omp_set_nested - - function omp_get_num_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_get_dynamic() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) - use omp_lib_kinds - integer (kind=omp_integer_kind) max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) - use omp_lib_kinds - integer (kind=omp_integer_kind) level - integer (kind=omp_integer_kind) omp_get_team_size - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick () - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) - use omp_lib_kinds - integer (kind=omp_integer_kind) dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) - use omp_lib_kinds - integer (kind=omp_integer_kind) size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) - use omp_lib_kinds - integer (kind=omp_integer_kind) msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) - use omp_lib_kinds - integer (kind=omp_integer_kind) libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) - character*(*) string - end subroutine kmp_set_defaults - - function kmp_get_stacksize() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind) proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind) size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind) nelem - integer (kind=kmp_size_t_kind) elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind) ptr - integer (kind=kmp_size_t_kind) size - end function kmp_realloc - - subroutine kmp_free(ptr) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) - use omp_lib_kinds - integer (kind=kmp_cancel_kind) cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine kmp_init_lock_with_hint(lockvar, lockhint) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - integer (kind=omp_lock_hint_kind) lockhint - end subroutine kmp_init_lock_with_hint - - subroutine kmp_init_nest_lock_with_hint(lockvar, lockhint) - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - integer (kind=omp_lock_hint_kind) lockhint - end subroutine kmp_init_nest_lock_with_hint - - end interface - -!dec$ if defined(_WIN32) -!dec$ if defined(_WIN64) .or. defined(_M_AMD64) - -!*** -!*** The Fortran entry points must be in uppercase, even if the /Qlowercase -!*** option is specified. The alias attribute ensures that the specified -!*** string is used as the entry point. -!*** -!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an -!*** underscore prepended. On the Windows* OS Intel(R) 64 -!*** architecture, no underscore is prepended. -!*** - -!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'KMP_FREE'::kmp_free - -!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ else - -!*** -!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. -!*** - -!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads -!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic -!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested -!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads -!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads -!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num -!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs -!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel -!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic -!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested -!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit -!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels -!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels -!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level -!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level -!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num -!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size -!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule -!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule -!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind -!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime -!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick -!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device -!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device -!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices -!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams -!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num -!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation -!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock -!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock -!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock - -!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize -!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s -!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime -!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial -!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround -!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput -!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library -!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize -!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s -!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime -!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library -!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity -!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask -!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc -!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc -!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc -!dec$ attributes alias:'_KMP_FREE'::kmp_free - -!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on -!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off - -!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status - -!dec$ endif -!dec$ endif - -!dec$ if defined(__linux) - -!*** -!*** The Linux* OS entry points are in lowercase, with an underscore appended. -!*** - -!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'omp_get_level_'::omp_get_level -!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device -!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device -!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices -!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'kmp_free_'::kmp_free - -!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off -!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - -!dec$ if defined(__APPLE__) - -!*** -!*** The Mac entry points are in lowercase, with an both an underscore -!*** appended and an underscore prepended. -!*** - -!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads -!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic -!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested -!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads -!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads -!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num -!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs -!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel -!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic -!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested -!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit -!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels -!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels -!dec$ attributes alias:'_omp_get_level_'::omp_get_level -!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level -!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num -!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size -!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule -!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule -!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind -!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime -!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick -!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams -!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num -!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation -!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device - -!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock -!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint -!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock -!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock -!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock -!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock -!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock -!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint -!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock -!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock -!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock -!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock - -!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize -!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s -!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime -!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial -!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround -!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput -!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library -!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize -!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s -!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime -!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library -!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity -!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity -!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc -!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask -!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask -!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc -!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc -!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc -!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc -!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc -!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc -!dec$ attributes alias:'_kmp_free_'::kmp_free - -!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on -!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off - -!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status - -!dec$ endif - - end module omp_lib - +! include/41/omp_lib.f.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!dec$ fixedformlinesize:132 + + module omp_lib_kinds + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + integer, parameter :: kmp_cancel_kind = omp_integer_kind + integer, parameter :: omp_lock_hint_kind = omp_integer_kind + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*), parameter :: kmp_build_date = '@LIBOMP_BUILD_DATE@' + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) + use omp_lib_kinds + integer (kind=omp_integer_kind) nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) + use omp_lib_kinds + logical (kind=omp_logical_kind) enable + end subroutine omp_set_nested + + function omp_get_num_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_get_dynamic() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) + use omp_lib_kinds + integer (kind=omp_integer_kind) max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) + use omp_lib_kinds + integer (kind=omp_integer_kind) level + integer (kind=omp_integer_kind) omp_get_team_size + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() + use omp_lib_kinds + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick () + double precision omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) + use omp_lib_kinds + integer (kind=omp_integer_kind) dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_get_cancellation() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + + function omp_is_initial_device() + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) + use omp_lib_kinds + integer (kind=omp_integer_kind) size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) + use omp_lib_kinds + integer (kind=omp_integer_kind) msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) + use omp_lib_kinds + integer (kind=omp_integer_kind) libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) + character*(*) string + end subroutine kmp_set_defaults + + function kmp_get_stacksize() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind) proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind) size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind) nelem + integer (kind=kmp_size_t_kind) elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind) ptr + integer (kind=kmp_size_t_kind) size + end function kmp_realloc + + subroutine kmp_free(ptr) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() + end subroutine kmp_set_warnings_off + + function kmp_get_cancellation_status(cancelkind) + use omp_lib_kinds + integer (kind=kmp_cancel_kind) cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status + + subroutine kmp_init_lock_with_hint(lockvar, lockhint) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=omp_lock_hint_kind) lockhint + end subroutine kmp_init_lock_with_hint + + subroutine kmp_init_nest_lock_with_hint(lockvar, lockhint) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + integer (kind=omp_lock_hint_kind) lockhint + end subroutine kmp_init_nest_lock_with_hint + + end interface + +!dec$ if defined(_WIN32) +!dec$ if defined(_WIN64) .or. defined(_M_AMD64) + +!*** +!*** The Fortran entry points must be in uppercase, even if the /Qlowercase +!*** option is specified. The alias attribute ensures that the specified +!*** string is used as the entry point. +!*** +!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an +!*** underscore prepended. On the Windows* OS Intel(R) 64 +!*** architecture, no underscore is prepended. +!*** + +!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind +!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick +!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device +!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device +!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices +!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams +!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation +!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device + +!dec$ attributes alias:'omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint +!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint +!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'KMP_FREE'::kmp_free + +!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + +!dec$ else + +!*** +!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. +!*** + +!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads +!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic +!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested +!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads +!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads +!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num +!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs +!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel +!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic +!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested +!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit +!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels +!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels +!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level +!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level +!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num +!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size +!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule +!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule +!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind +!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime +!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick +!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device +!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device +!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices +!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams +!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation +!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device + +!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock +!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint +!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock +!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock +!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock +!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint +!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock + +!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize +!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s +!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime +!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial +!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround +!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput +!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library +!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize +!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s +!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime +!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library +!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity +!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask +!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc +!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc +!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc +!dec$ attributes alias:'_KMP_FREE'::kmp_free + +!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on +!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off + +!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + +!dec$ endif +!dec$ endif + +!dec$ if defined(__linux) + +!*** +!*** The Linux* OS entry points are in lowercase, with an underscore appended. +!*** + +!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'omp_get_level_'::omp_get_level +!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind +!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick +!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device +!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device +!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices +!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams +!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation +!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device + +!dec$ attributes alias:'omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint +!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint +!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'kmp_free_'::kmp_free + +!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off +!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status + +!dec$ endif + +!dec$ if defined(__APPLE__) + +!*** +!*** The Mac entry points are in lowercase, with an both an underscore +!*** appended and an underscore prepended. +!*** + +!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads +!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic +!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested +!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads +!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads +!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num +!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs +!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel +!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic +!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested +!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit +!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels +!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels +!dec$ attributes alias:'_omp_get_level_'::omp_get_level +!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level +!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num +!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size +!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule +!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule +!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind +!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime +!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick +!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams +!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation +!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device + +!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock +!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint +!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock +!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock +!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock +!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock +!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock +!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint +!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock +!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock +!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock +!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock + +!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize +!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s +!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime +!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial +!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround +!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput +!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library +!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize +!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s +!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime +!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library +!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity +!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity +!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc +!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask +!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask +!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc +!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc +!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc +!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc +!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc +!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc +!dec$ attributes alias:'_kmp_free_'::kmp_free + +!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on +!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off + +!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status + +!dec$ endif + + end module omp_lib + diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var index 7066ee41c57..2d23667b108 100644 --- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var +++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var @@ -1,470 +1,470 @@ -! include/41/omp_lib.f90.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - - module omp_lib_kinds - - use, intrinsic :: iso_c_binding - - integer, parameter :: omp_integer_kind = c_int - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = c_float - integer, parameter :: kmp_double_kind = c_double - integer, parameter :: omp_lock_kind = c_intptr_t - integer, parameter :: omp_nest_lock_kind = c_intptr_t - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = c_intptr_t - integer, parameter :: kmp_size_t_kind = c_size_t - integer, parameter :: kmp_affinity_mask_kind = c_intptr_t - integer, parameter :: kmp_cancel_kind = omp_integer_kind - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - end module omp_lib_kinds - - module omp_lib - - use omp_lib_kinds - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 - integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - use omp_lib_kinds - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - use omp_lib_kinds - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - use omp_lib_kinds - real (kind=kmp_double_kind) omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_get_cancellation() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_get_cancellation - end function omp_get_cancellation - - function omp_is_initial_device() bind(c) - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - use omp_lib_kinds - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - use omp_lib_kinds - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - use omp_lib_kinds - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - use omp_lib_kinds - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - use omp_lib_kinds - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - function kmp_get_cancellation_status(cancelkind) bind(c) - use omp_lib_kinds - integer (kind=kmp_cancel_kind), value :: cancelkind - logical (kind=omp_logical_kind) kmp_get_cancellation_status - end function kmp_get_cancellation_status - - subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - integer (kind=omp_lock_hint_kind), value :: lockhint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) - use omp_lib_kinds - integer (kind=omp_lock_kind) lockvar - integer (kind=omp_lock_hint_kind), value :: lockhint - end subroutine omp_init_nest_lock_with_hint - - end interface - - end module omp_lib +! include/41/omp_lib.f90.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + + module omp_lib_kinds + + use, intrinsic :: iso_c_binding + + integer, parameter :: omp_integer_kind = c_int + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = c_float + integer, parameter :: kmp_double_kind = c_double + integer, parameter :: omp_lock_kind = c_intptr_t + integer, parameter :: omp_nest_lock_kind = c_intptr_t + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = c_intptr_t + integer, parameter :: kmp_size_t_kind = c_size_t + integer, parameter :: kmp_affinity_mask_kind = c_intptr_t + integer, parameter :: kmp_cancel_kind = omp_integer_kind + integer, parameter :: omp_lock_hint_kind = omp_integer_kind + + end module omp_lib_kinds + + module omp_lib + + use omp_lib_kinds + + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_nested + + function omp_get_num_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + integer (kind=omp_integer_kind), value :: level + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_size + integer (kind=omp_integer_kind), value :: level + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind), value :: kind + integer (kind=omp_integer_kind), value :: modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() bind(c) + use omp_lib_kinds + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick() bind(c) + use omp_lib_kinds + real (kind=kmp_double_kind) omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_get_cancellation() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + + function omp_is_initial_device() bind(c) + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + use omp_lib_kinds + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind), value :: size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() bind(c) + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() bind(c) + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() bind(c) + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind), value :: libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) bind(c) + use, intrinsic :: iso_c_binding + character (kind=c_char) :: string(*) + end subroutine kmp_set_defaults + + function kmp_get_stacksize() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() bind(c) + use omp_lib_kinds + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) bind(c) + use omp_lib_kinds + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind), value :: size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind), value :: nelem + integer (kind=kmp_size_t_kind), value :: elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind), value :: ptr + integer (kind=kmp_size_t_kind), value :: size + end function kmp_realloc + + subroutine kmp_free(ptr) bind(c) + use omp_lib_kinds + integer (kind=kmp_pointer_kind), value :: ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() bind(c) + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() bind(c) + end subroutine kmp_set_warnings_off + + function kmp_get_cancellation_status(cancelkind) bind(c) + use omp_lib_kinds + integer (kind=kmp_cancel_kind), value :: cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status + + subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=omp_lock_hint_kind), value :: lockhint + end subroutine omp_init_lock_with_hint + + subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=omp_lock_hint_kind), value :: lockhint + end subroutine omp_init_nest_lock_with_hint + + end interface + + end module omp_lib diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var index 7d9a32db4ca..867bcd97b09 100644 --- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var +++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var @@ -1,584 +1,584 @@ -! include/41/omp_lib.h.var - -! -!//===----------------------------------------------------------------------===// -!// -!// The LLVM Compiler Infrastructure -!// -!// This file is dual licensed under the MIT and the University of Illinois Open -!// Source Licenses. See LICENSE.txt for details. -!// -!//===----------------------------------------------------------------------===// -! - -!*** -!*** Some of the directives for the following routine extend past column 72, -!*** so process this file in 132-column mode. -!*** - -!DIR$ fixedformlinesize:132 - - integer, parameter :: omp_integer_kind = 4 - integer, parameter :: omp_logical_kind = 4 - integer, parameter :: omp_real_kind = 4 - integer, parameter :: omp_lock_kind = int_ptr_kind() - integer, parameter :: omp_nest_lock_kind = int_ptr_kind() - integer, parameter :: omp_sched_kind = omp_integer_kind - integer, parameter :: omp_proc_bind_kind = omp_integer_kind - integer, parameter :: kmp_pointer_kind = int_ptr_kind() - integer, parameter :: kmp_size_t_kind = int_ptr_kind() - integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() - integer, parameter :: omp_lock_hint_kind = omp_integer_kind - - integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ - integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ - integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ - character(*) kmp_build_date - parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) - - integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 - integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 - integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 - - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 - - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 - integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 - integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - - interface - -! *** -! *** omp_* entry points -! *** - - subroutine omp_set_num_threads(nthreads) bind(c) - import - integer (kind=omp_integer_kind), value :: nthreads - end subroutine omp_set_num_threads - - subroutine omp_set_dynamic(enable) bind(c) - import - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_dynamic - - subroutine omp_set_nested(enable) bind(c) - import - logical (kind=omp_logical_kind), value :: enable - end subroutine omp_set_nested - - function omp_get_num_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_threads - end function omp_get_num_threads - - function omp_get_max_threads() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_threads - end function omp_get_max_threads - - function omp_get_thread_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_num - end function omp_get_thread_num - - function omp_get_num_procs() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_procs - end function omp_get_num_procs - - function omp_in_parallel() bind(c) - import - logical (kind=omp_logical_kind) omp_in_parallel - end function omp_in_parallel - - function omp_in_final() bind(c) - import - logical (kind=omp_logical_kind) omp_in_final - end function omp_in_final - - function omp_get_dynamic() bind(c) - import - logical (kind=omp_logical_kind) omp_get_dynamic - end function omp_get_dynamic - - function omp_get_nested() bind(c) - import - logical (kind=omp_logical_kind) omp_get_nested - end function omp_get_nested - - function omp_get_thread_limit() bind(c) - import - integer (kind=omp_integer_kind) omp_get_thread_limit - end function omp_get_thread_limit - - subroutine omp_set_max_active_levels(max_levels) bind(c) - import - integer (kind=omp_integer_kind), value :: max_levels - end subroutine omp_set_max_active_levels - - function omp_get_max_active_levels() bind(c) - import - integer (kind=omp_integer_kind) omp_get_max_active_levels - end function omp_get_max_active_levels - - function omp_get_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_level - end function omp_get_level - - function omp_get_active_level() bind(c) - import - integer (kind=omp_integer_kind) omp_get_active_level - end function omp_get_active_level - - function omp_get_ancestor_thread_num(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_ancestor_thread_num - integer (kind=omp_integer_kind), value :: level - end function omp_get_ancestor_thread_num - - function omp_get_team_size(level) bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_size - integer (kind=omp_integer_kind), value :: level - end function omp_get_team_size - - subroutine omp_set_schedule(kind, modifier) bind(c) - import - integer (kind=omp_sched_kind), value :: kind - integer (kind=omp_integer_kind), value :: modifier - end subroutine omp_set_schedule - - subroutine omp_get_schedule(kind, modifier) bind(c) - import - integer (kind=omp_sched_kind) kind - integer (kind=omp_integer_kind) modifier - end subroutine omp_get_schedule - - function omp_get_proc_bind() bind(c) - import - integer (kind=omp_proc_bind_kind) omp_get_proc_bind - end function omp_get_proc_bind - - function omp_get_wtime() bind(c) - double precision omp_get_wtime - end function omp_get_wtime - - function omp_get_wtick() bind(c) - double precision omp_get_wtick - end function omp_get_wtick - - function omp_get_default_device() bind(c) - import - integer (kind=omp_integer_kind) omp_get_default_device - end function omp_get_default_device - - subroutine omp_set_default_device(dflt_device) bind(c) - import - integer (kind=omp_integer_kind), value :: dflt_device - end subroutine omp_set_default_device - - function omp_get_num_devices() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_devices - end function omp_get_num_devices - - function omp_get_num_teams() bind(c) - import - integer (kind=omp_integer_kind) omp_get_num_teams - end function omp_get_num_teams - - function omp_get_team_num() bind(c) - import - integer (kind=omp_integer_kind) omp_get_team_num - end function omp_get_team_num - - function omp_is_initial_device() bind(c) - import - logical (kind=omp_logical_kind) omp_is_initial_device - end function omp_is_initial_device - - subroutine omp_init_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_init_lock - - subroutine omp_destroy_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_destroy_lock - - subroutine omp_set_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_set_lock - - subroutine omp_unset_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_lock -!DIR$ ENDIF - import - integer (kind=omp_lock_kind) lockvar - end subroutine omp_unset_lock - - function omp_test_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_lock -!DIR$ ENDIF - import - logical (kind=omp_logical_kind) omp_test_lock - integer (kind=omp_lock_kind) lockvar - end function omp_test_lock - - subroutine omp_init_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_init_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_init_nest_lock - - subroutine omp_destroy_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_destroy_nest_lock - - subroutine omp_set_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_set_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_set_nest_lock - - subroutine omp_unset_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_unset_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_nest_lock_kind) lockvar - end subroutine omp_unset_nest_lock - - function omp_test_nest_lock(lockvar) bind(c) -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!DIR$ attributes known_intrinsic :: omp_test_nest_lock -!DIR$ ENDIF - import - integer (kind=omp_integer_kind) omp_test_nest_lock - integer (kind=omp_nest_lock_kind) lockvar - end function omp_test_nest_lock - -! *** -! *** kmp_* entry points -! *** - - subroutine kmp_set_stacksize(size) bind(c) - import - integer (kind=omp_integer_kind), value :: size - end subroutine kmp_set_stacksize - - subroutine kmp_set_stacksize_s(size) bind(c) - import - integer (kind=kmp_size_t_kind), value :: size - end subroutine kmp_set_stacksize_s - - subroutine kmp_set_blocktime(msec) bind(c) - import - integer (kind=omp_integer_kind), value :: msec - end subroutine kmp_set_blocktime - - subroutine kmp_set_library_serial() bind(c) - end subroutine kmp_set_library_serial - - subroutine kmp_set_library_turnaround() bind(c) - end subroutine kmp_set_library_turnaround - - subroutine kmp_set_library_throughput() bind(c) - end subroutine kmp_set_library_throughput - - subroutine kmp_set_library(libnum) bind(c) - import - integer (kind=omp_integer_kind), value :: libnum - end subroutine kmp_set_library - - subroutine kmp_set_defaults(string) bind(c) - character string(*) - end subroutine kmp_set_defaults - - function kmp_get_stacksize() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_stacksize - end function kmp_get_stacksize - - function kmp_get_stacksize_s() bind(c) - import - integer (kind=kmp_size_t_kind) kmp_get_stacksize_s - end function kmp_get_stacksize_s - - function kmp_get_blocktime() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_blocktime - end function kmp_get_blocktime - - function kmp_get_library() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_library - end function kmp_get_library - - function kmp_set_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity - - function kmp_get_affinity(mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity - - function kmp_get_affinity_max_proc() bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_max_proc - end function kmp_get_affinity_max_proc - - subroutine kmp_create_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_create_affinity_mask - - subroutine kmp_destroy_affinity_mask(mask) bind(c) - import - integer (kind=kmp_affinity_mask_kind) mask - end subroutine kmp_destroy_affinity_mask - - function kmp_set_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_set_affinity_mask_proc - - function kmp_unset_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_unset_affinity_mask_proc - - function kmp_get_affinity_mask_proc(proc, mask) bind(c) - import - integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc - integer (kind=omp_integer_kind), value :: proc - integer (kind=kmp_affinity_mask_kind) mask - end function kmp_get_affinity_mask_proc - - function kmp_malloc(size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_malloc - integer (kind=kmp_size_t_kind), value :: size - end function kmp_malloc - - function kmp_calloc(nelem, elsize) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_calloc - integer (kind=kmp_size_t_kind), value :: nelem - integer (kind=kmp_size_t_kind), value :: elsize - end function kmp_calloc - - function kmp_realloc(ptr, size) bind(c) - import - integer (kind=kmp_pointer_kind) kmp_realloc - integer (kind=kmp_pointer_kind), value :: ptr - integer (kind=kmp_size_t_kind), value :: size - end function kmp_realloc - - subroutine kmp_free(ptr) bind(c) - import - integer (kind=kmp_pointer_kind), value :: ptr - end subroutine kmp_free - - subroutine kmp_set_warnings_on() bind(c) - end subroutine kmp_set_warnings_on - - subroutine kmp_set_warnings_off() bind(c) - end subroutine kmp_set_warnings_off - - subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) - import - integer (kind=omp_lock_kind) lockvar - integer (kind=omp_lock_hint_kind), value :: lockhint - end subroutine omp_init_lock_with_hint - - subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) - import - integer (kind=omp_lock_kind) lockvar - integer (kind=omp_lock_hint_kind), value :: lockhint - end subroutine omp_init_nest_lock_with_hint - - end interface - -!DIR$ IF DEFINED (__INTEL_OFFLOAD) -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on -!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint - -!DIR$ IF(__INTEL_COMPILER.GE.1400) -!$omp declare target(omp_set_num_threads ) -!$omp declare target(omp_set_dynamic ) -!$omp declare target(omp_set_nested ) -!$omp declare target(omp_get_num_threads ) -!$omp declare target(omp_get_max_threads ) -!$omp declare target(omp_get_thread_num ) -!$omp declare target(omp_get_num_procs ) -!$omp declare target(omp_in_parallel ) -!$omp declare target(omp_in_final ) -!$omp declare target(omp_get_dynamic ) -!$omp declare target(omp_get_nested ) -!$omp declare target(omp_get_thread_limit ) -!$omp declare target(omp_set_max_active_levels ) -!$omp declare target(omp_get_max_active_levels ) -!$omp declare target(omp_get_level ) -!$omp declare target(omp_get_active_level ) -!$omp declare target(omp_get_ancestor_thread_num ) -!$omp declare target(omp_get_team_size ) -!$omp declare target(omp_set_schedule ) -!$omp declare target(omp_get_schedule ) -!$omp declare target(omp_get_proc_bind ) -!$omp declare target(omp_get_wtime ) -!$omp declare target(omp_get_wtick ) -!$omp declare target(omp_get_default_device ) -!$omp declare target(omp_set_default_device ) -!$omp declare target(omp_is_initial_device ) -!$omp declare target(omp_get_num_devices ) -!$omp declare target(omp_get_num_teams ) -!$omp declare target(omp_get_team_num ) -!$omp declare target(omp_init_lock ) -!$omp declare target(omp_destroy_lock ) -!$omp declare target(omp_set_lock ) -!$omp declare target(omp_unset_lock ) -!$omp declare target(omp_test_lock ) -!$omp declare target(omp_init_nest_lock ) -!$omp declare target(omp_destroy_nest_lock ) -!$omp declare target(omp_set_nest_lock ) -!$omp declare target(omp_unset_nest_lock ) -!$omp declare target(omp_test_nest_lock ) -!$omp declare target(kmp_set_stacksize ) -!$omp declare target(kmp_set_stacksize_s ) -!$omp declare target(kmp_set_blocktime ) -!$omp declare target(kmp_set_library_serial ) -!$omp declare target(kmp_set_library_turnaround ) -!$omp declare target(kmp_set_library_throughput ) -!$omp declare target(kmp_set_library ) -!$omp declare target(kmp_set_defaults ) -!$omp declare target(kmp_get_stacksize ) -!$omp declare target(kmp_get_stacksize_s ) -!$omp declare target(kmp_get_blocktime ) -!$omp declare target(kmp_get_library ) -!$omp declare target(kmp_set_affinity ) -!$omp declare target(kmp_get_affinity ) -!$omp declare target(kmp_get_affinity_max_proc ) -!$omp declare target(kmp_create_affinity_mask ) -!$omp declare target(kmp_destroy_affinity_mask ) -!$omp declare target(kmp_set_affinity_mask_proc ) -!$omp declare target(kmp_unset_affinity_mask_proc ) -!$omp declare target(kmp_get_affinity_mask_proc ) -!$omp declare target(kmp_malloc ) -!$omp declare target(kmp_calloc ) -!$omp declare target(kmp_realloc ) -!$omp declare target(kmp_free ) -!$omp declare target(kmp_set_warnings_on ) -!$omp declare target(kmp_set_warnings_off ) -!$omp declare target(omp_init_lock_with_hint ) -!$omp declare target(omp_init_nest_lock_with_hint ) -!DIR$ ENDIF -!DIR$ ENDIF - +! include/41/omp_lib.h.var + +! +!//===----------------------------------------------------------------------===// +!// +!// The LLVM Compiler Infrastructure +!// +!// This file is dual licensed under the MIT and the University of Illinois Open +!// Source Licenses. See LICENSE.txt for details. +!// +!//===----------------------------------------------------------------------===// +! + +!*** +!*** Some of the directives for the following routine extend past column 72, +!*** so process this file in 132-column mode. +!*** + +!DIR$ fixedformlinesize:132 + + integer, parameter :: omp_integer_kind = 4 + integer, parameter :: omp_logical_kind = 4 + integer, parameter :: omp_real_kind = 4 + integer, parameter :: omp_lock_kind = int_ptr_kind() + integer, parameter :: omp_nest_lock_kind = int_ptr_kind() + integer, parameter :: omp_sched_kind = omp_integer_kind + integer, parameter :: omp_proc_bind_kind = omp_integer_kind + integer, parameter :: kmp_pointer_kind = int_ptr_kind() + integer, parameter :: kmp_size_t_kind = int_ptr_kind() + integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + integer, parameter :: omp_lock_hint_kind = omp_integer_kind + + integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ + integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ + integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ + character(*) kmp_build_date + parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) + + integer(kind=omp_sched_kind), parameter :: omp_sched_static = 1 + integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 + integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none = 0 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended = 1 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended = 2 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 + integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative = 8 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle = 65536 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 + integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + + interface + +! *** +! *** omp_* entry points +! *** + + subroutine omp_set_num_threads(nthreads) bind(c) + import + integer (kind=omp_integer_kind), value :: nthreads + end subroutine omp_set_num_threads + + subroutine omp_set_dynamic(enable) bind(c) + import + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_dynamic + + subroutine omp_set_nested(enable) bind(c) + import + logical (kind=omp_logical_kind), value :: enable + end subroutine omp_set_nested + + function omp_get_num_threads() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_threads + end function omp_get_num_threads + + function omp_get_max_threads() bind(c) + import + integer (kind=omp_integer_kind) omp_get_max_threads + end function omp_get_max_threads + + function omp_get_thread_num() bind(c) + import + integer (kind=omp_integer_kind) omp_get_thread_num + end function omp_get_thread_num + + function omp_get_num_procs() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_procs + end function omp_get_num_procs + + function omp_in_parallel() bind(c) + import + logical (kind=omp_logical_kind) omp_in_parallel + end function omp_in_parallel + + function omp_in_final() bind(c) + import + logical (kind=omp_logical_kind) omp_in_final + end function omp_in_final + + function omp_get_dynamic() bind(c) + import + logical (kind=omp_logical_kind) omp_get_dynamic + end function omp_get_dynamic + + function omp_get_nested() bind(c) + import + logical (kind=omp_logical_kind) omp_get_nested + end function omp_get_nested + + function omp_get_thread_limit() bind(c) + import + integer (kind=omp_integer_kind) omp_get_thread_limit + end function omp_get_thread_limit + + subroutine omp_set_max_active_levels(max_levels) bind(c) + import + integer (kind=omp_integer_kind), value :: max_levels + end subroutine omp_set_max_active_levels + + function omp_get_max_active_levels() bind(c) + import + integer (kind=omp_integer_kind) omp_get_max_active_levels + end function omp_get_max_active_levels + + function omp_get_level() bind(c) + import + integer (kind=omp_integer_kind) omp_get_level + end function omp_get_level + + function omp_get_active_level() bind(c) + import + integer (kind=omp_integer_kind) omp_get_active_level + end function omp_get_active_level + + function omp_get_ancestor_thread_num(level) bind(c) + import + integer (kind=omp_integer_kind) omp_get_ancestor_thread_num + integer (kind=omp_integer_kind), value :: level + end function omp_get_ancestor_thread_num + + function omp_get_team_size(level) bind(c) + import + integer (kind=omp_integer_kind) omp_get_team_size + integer (kind=omp_integer_kind), value :: level + end function omp_get_team_size + + subroutine omp_set_schedule(kind, modifier) bind(c) + import + integer (kind=omp_sched_kind), value :: kind + integer (kind=omp_integer_kind), value :: modifier + end subroutine omp_set_schedule + + subroutine omp_get_schedule(kind, modifier) bind(c) + import + integer (kind=omp_sched_kind) kind + integer (kind=omp_integer_kind) modifier + end subroutine omp_get_schedule + + function omp_get_proc_bind() bind(c) + import + integer (kind=omp_proc_bind_kind) omp_get_proc_bind + end function omp_get_proc_bind + + function omp_get_wtime() bind(c) + double precision omp_get_wtime + end function omp_get_wtime + + function omp_get_wtick() bind(c) + double precision omp_get_wtick + end function omp_get_wtick + + function omp_get_default_device() bind(c) + import + integer (kind=omp_integer_kind) omp_get_default_device + end function omp_get_default_device + + subroutine omp_set_default_device(dflt_device) bind(c) + import + integer (kind=omp_integer_kind), value :: dflt_device + end subroutine omp_set_default_device + + function omp_get_num_devices() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_devices + end function omp_get_num_devices + + function omp_get_num_teams() bind(c) + import + integer (kind=omp_integer_kind) omp_get_num_teams + end function omp_get_num_teams + + function omp_get_team_num() bind(c) + import + integer (kind=omp_integer_kind) omp_get_team_num + end function omp_get_team_num + + function omp_is_initial_device() bind(c) + import + logical (kind=omp_logical_kind) omp_is_initial_device + end function omp_is_initial_device + + subroutine omp_init_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_init_lock + + subroutine omp_destroy_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_destroy_lock + + subroutine omp_set_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_set_lock + + subroutine omp_unset_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_lock +!DIR$ ENDIF + import + integer (kind=omp_lock_kind) lockvar + end subroutine omp_unset_lock + + function omp_test_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_lock +!DIR$ ENDIF + import + logical (kind=omp_logical_kind) omp_test_lock + integer (kind=omp_lock_kind) lockvar + end function omp_test_lock + + subroutine omp_init_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_init_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_init_nest_lock + + subroutine omp_destroy_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_destroy_nest_lock + + subroutine omp_set_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_set_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_set_nest_lock + + subroutine omp_unset_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_unset_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_nest_lock_kind) lockvar + end subroutine omp_unset_nest_lock + + function omp_test_nest_lock(lockvar) bind(c) +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!DIR$ attributes known_intrinsic :: omp_test_nest_lock +!DIR$ ENDIF + import + integer (kind=omp_integer_kind) omp_test_nest_lock + integer (kind=omp_nest_lock_kind) lockvar + end function omp_test_nest_lock + +! *** +! *** kmp_* entry points +! *** + + subroutine kmp_set_stacksize(size) bind(c) + import + integer (kind=omp_integer_kind), value :: size + end subroutine kmp_set_stacksize + + subroutine kmp_set_stacksize_s(size) bind(c) + import + integer (kind=kmp_size_t_kind), value :: size + end subroutine kmp_set_stacksize_s + + subroutine kmp_set_blocktime(msec) bind(c) + import + integer (kind=omp_integer_kind), value :: msec + end subroutine kmp_set_blocktime + + subroutine kmp_set_library_serial() bind(c) + end subroutine kmp_set_library_serial + + subroutine kmp_set_library_turnaround() bind(c) + end subroutine kmp_set_library_turnaround + + subroutine kmp_set_library_throughput() bind(c) + end subroutine kmp_set_library_throughput + + subroutine kmp_set_library(libnum) bind(c) + import + integer (kind=omp_integer_kind), value :: libnum + end subroutine kmp_set_library + + subroutine kmp_set_defaults(string) bind(c) + character string(*) + end subroutine kmp_set_defaults + + function kmp_get_stacksize() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_stacksize + end function kmp_get_stacksize + + function kmp_get_stacksize_s() bind(c) + import + integer (kind=kmp_size_t_kind) kmp_get_stacksize_s + end function kmp_get_stacksize_s + + function kmp_get_blocktime() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_blocktime + end function kmp_get_blocktime + + function kmp_get_library() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_library + end function kmp_get_library + + function kmp_set_affinity(mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_set_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity + + function kmp_get_affinity(mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity + + function kmp_get_affinity_max_proc() bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity_max_proc + end function kmp_get_affinity_max_proc + + subroutine kmp_create_affinity_mask(mask) bind(c) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_create_affinity_mask + + subroutine kmp_destroy_affinity_mask(mask) bind(c) + import + integer (kind=kmp_affinity_mask_kind) mask + end subroutine kmp_destroy_affinity_mask + + function kmp_set_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_set_affinity_mask_proc + + function kmp_unset_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_unset_affinity_mask_proc + + function kmp_get_affinity_mask_proc(proc, mask) bind(c) + import + integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc + integer (kind=omp_integer_kind), value :: proc + integer (kind=kmp_affinity_mask_kind) mask + end function kmp_get_affinity_mask_proc + + function kmp_malloc(size) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_malloc + integer (kind=kmp_size_t_kind), value :: size + end function kmp_malloc + + function kmp_calloc(nelem, elsize) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_calloc + integer (kind=kmp_size_t_kind), value :: nelem + integer (kind=kmp_size_t_kind), value :: elsize + end function kmp_calloc + + function kmp_realloc(ptr, size) bind(c) + import + integer (kind=kmp_pointer_kind) kmp_realloc + integer (kind=kmp_pointer_kind), value :: ptr + integer (kind=kmp_size_t_kind), value :: size + end function kmp_realloc + + subroutine kmp_free(ptr) bind(c) + import + integer (kind=kmp_pointer_kind), value :: ptr + end subroutine kmp_free + + subroutine kmp_set_warnings_on() bind(c) + end subroutine kmp_set_warnings_on + + subroutine kmp_set_warnings_off() bind(c) + end subroutine kmp_set_warnings_off + + subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) + import + integer (kind=omp_lock_kind) lockvar + integer (kind=omp_lock_hint_kind), value :: lockhint + end subroutine omp_init_lock_with_hint + + subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) + import + integer (kind=omp_lock_kind) lockvar + integer (kind=omp_lock_hint_kind), value :: lockhint + end subroutine omp_init_nest_lock_with_hint + + end interface + +!DIR$ IF DEFINED (__INTEL_OFFLOAD) +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint + +!DIR$ IF(__INTEL_COMPILER.GE.1400) +!$omp declare target(omp_set_num_threads ) +!$omp declare target(omp_set_dynamic ) +!$omp declare target(omp_set_nested ) +!$omp declare target(omp_get_num_threads ) +!$omp declare target(omp_get_max_threads ) +!$omp declare target(omp_get_thread_num ) +!$omp declare target(omp_get_num_procs ) +!$omp declare target(omp_in_parallel ) +!$omp declare target(omp_in_final ) +!$omp declare target(omp_get_dynamic ) +!$omp declare target(omp_get_nested ) +!$omp declare target(omp_get_thread_limit ) +!$omp declare target(omp_set_max_active_levels ) +!$omp declare target(omp_get_max_active_levels ) +!$omp declare target(omp_get_level ) +!$omp declare target(omp_get_active_level ) +!$omp declare target(omp_get_ancestor_thread_num ) +!$omp declare target(omp_get_team_size ) +!$omp declare target(omp_set_schedule ) +!$omp declare target(omp_get_schedule ) +!$omp declare target(omp_get_proc_bind ) +!$omp declare target(omp_get_wtime ) +!$omp declare target(omp_get_wtick ) +!$omp declare target(omp_get_default_device ) +!$omp declare target(omp_set_default_device ) +!$omp declare target(omp_is_initial_device ) +!$omp declare target(omp_get_num_devices ) +!$omp declare target(omp_get_num_teams ) +!$omp declare target(omp_get_team_num ) +!$omp declare target(omp_init_lock ) +!$omp declare target(omp_destroy_lock ) +!$omp declare target(omp_set_lock ) +!$omp declare target(omp_unset_lock ) +!$omp declare target(omp_test_lock ) +!$omp declare target(omp_init_nest_lock ) +!$omp declare target(omp_destroy_nest_lock ) +!$omp declare target(omp_set_nest_lock ) +!$omp declare target(omp_unset_nest_lock ) +!$omp declare target(omp_test_nest_lock ) +!$omp declare target(kmp_set_stacksize ) +!$omp declare target(kmp_set_stacksize_s ) +!$omp declare target(kmp_set_blocktime ) +!$omp declare target(kmp_set_library_serial ) +!$omp declare target(kmp_set_library_turnaround ) +!$omp declare target(kmp_set_library_throughput ) +!$omp declare target(kmp_set_library ) +!$omp declare target(kmp_set_defaults ) +!$omp declare target(kmp_get_stacksize ) +!$omp declare target(kmp_get_stacksize_s ) +!$omp declare target(kmp_get_blocktime ) +!$omp declare target(kmp_get_library ) +!$omp declare target(kmp_set_affinity ) +!$omp declare target(kmp_get_affinity ) +!$omp declare target(kmp_get_affinity_max_proc ) +!$omp declare target(kmp_create_affinity_mask ) +!$omp declare target(kmp_destroy_affinity_mask ) +!$omp declare target(kmp_set_affinity_mask_proc ) +!$omp declare target(kmp_unset_affinity_mask_proc ) +!$omp declare target(kmp_get_affinity_mask_proc ) +!$omp declare target(kmp_malloc ) +!$omp declare target(kmp_calloc ) +!$omp declare target(kmp_realloc ) +!$omp declare target(kmp_free ) +!$omp declare target(kmp_set_warnings_on ) +!$omp declare target(kmp_set_warnings_off ) +!$omp declare target(omp_init_lock_with_hint ) +!$omp declare target(omp_init_nest_lock_with_hint ) +!DIR$ ENDIF +!DIR$ ENDIF + diff --git a/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var index 96818519c98..fbd95e858b6 100644 --- a/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var +++ b/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var @@ -1,487 +1,487 @@ -/* - * include/41/ompt.h.var - */ - -#ifndef __OMPT__ -#define __OMPT__ - -/***************************************************************************** - * system include files - *****************************************************************************/ - -#include - - - -/***************************************************************************** - * iteration macros - *****************************************************************************/ - -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_state) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_idle_frame) \ - macro (ompt_get_task_frame) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_id) \ - macro (ompt_get_parallel_team_size) \ - macro (ompt_get_task_id) \ - macro (ompt_get_thread_id) - -#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ - macro (ompt_idle) \ - macro (ompt_overhead) \ - macro (ompt_barrier_wait) \ - macro (ompt_task_wait) \ - macro (ompt_mutex_wait) - -#define FOREACH_OMPT_STATE(macro) \ - \ - /* first */ \ - macro (ompt_state_first, 0x71) /* initial enumeration state */ \ - \ - /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ - \ - /* idle (16..31) */ \ - macro (ompt_state_idle, 0x10) /* waiting for work */ \ - \ - /* overhead states (32..63) */ \ - macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ - \ - /* barrier wait states (64..79) */ \ - macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ - \ - /* task wait states (80..95) */ \ - macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ - \ - /* mutex wait states (96..111) */ \ - macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ - macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ - macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ - macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ - \ - /* misc (112..127) */ \ - macro (ompt_state_undefined, 0x70) /* undefined thread state */ - - -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ - macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ - \ - macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ - macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ - \ - macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ - macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ - \ - macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ - \ - macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ - macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ - \ - macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ - macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ - \ - macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ - macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ - \ - macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ - macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ - \ - macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ - macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ - macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ - \ - macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ - \ - macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ - \ - /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ - macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ - \ - macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ - macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ - \ - macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ - \ - macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ - macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ - \ - macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ - macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ - \ - macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ - macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ - \ - macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ - macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ - \ - macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ - macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ - \ - macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ - macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ - \ - macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ - macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ - \ - macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ - macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ - \ - macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ - macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ - \ - macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ - \ - macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ - macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ - macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ - macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ - macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ - \ - macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ - macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ - macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ - macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ - macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ - macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ - \ - macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ - macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ - \ - macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ - macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ - \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ - - - -/***************************************************************************** - * data types - *****************************************************************************/ - -/*--------------------- - * identifiers - *---------------------*/ - -typedef uint64_t ompt_thread_id_t; -#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_task_id_t; -#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_parallel_id_t; -#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ - -typedef uint64_t ompt_wait_id_t; -#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ - - -/*--------------------- - * ompt_frame_t - *---------------------*/ - -typedef struct ompt_frame_s { - void *exit_runtime_frame; /* next frame is user code */ - void *reenter_runtime_frame; /* previous frame is user code */ -} ompt_frame_t; - - -/***************************************************************************** - * enumerations for thread states and runtime events - *****************************************************************************/ - -/*--------------------- - * runtime states - *---------------------*/ - -typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; - - -/*--------------------- - * runtime events - *---------------------*/ - -typedef enum { -#define ompt_event_macro(event, callback, eventid) event = eventid, - FOREACH_OMPT_EVENT(ompt_event_macro) -#undef ompt_event_macro -} ompt_event_t; - - -/*--------------------- - * set callback results - *---------------------*/ -typedef enum { - ompt_set_result_registration_error = 0, - ompt_set_result_event_may_occur_no_callback = 1, - ompt_set_result_event_never_occurs = 2, - ompt_set_result_event_may_occur_callback_some = 3, - ompt_set_result_event_may_occur_callback_always = 4, -} ompt_set_result_t; - - - -/***************************************************************************** - * callback signatures - *****************************************************************************/ - -/* initialization */ -typedef void (*ompt_interface_fn_t)(void); - -typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ -); - -/* threads */ -typedef void (*ompt_thread_callback_t) ( - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef enum { - ompt_thread_initial = 1, // start the enumeration at 1 - ompt_thread_worker = 2, - ompt_thread_other = 3 -} ompt_thread_type_t; - -typedef enum { - ompt_invoker_program = 0, /* program invokes master task */ - ompt_invoker_runtime = 1 /* runtime invokes master task */ -} ompt_invoker_t; - -typedef void (*ompt_thread_type_callback_t) ( - ompt_thread_type_t thread_type, /* type of thread */ - ompt_thread_id_t thread_id /* ID of thread */ -); - -typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait id */ -); - -/* parallel and workshares */ -typedef void (*ompt_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_new_workshare_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t parent_task_id, /* id of parent task */ - void *workshare_function /* pointer to outlined function */ -); - -typedef void (*ompt_new_parallel_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data of parent task */ - ompt_parallel_id_t parallel_id, /* id of parallel region */ - uint32_t requested_team_size, /* number of threads in team */ - void *parallel_function, /* pointer to outlined function */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -typedef void (*ompt_end_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id, /* id of task */ - ompt_invoker_t invoker /* who invokes master task? */ -); - -/* tasks */ -typedef void (*ompt_task_callback_t) ( - ompt_task_id_t task_id /* id of task */ -); - -typedef void (*ompt_task_pair_callback_t) ( - ompt_task_id_t first_task_id, - ompt_task_id_t second_task_id -); - -typedef void (*ompt_new_task_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data for parent task */ - ompt_task_id_t new_task_id, /* id of created task */ - void *task_function /* pointer to outlined function */ -); - -/* program */ -typedef void (*ompt_control_callback_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier /* modifier of control call */ -); - -typedef void (*ompt_callback_t)(void); - - -/**************************************************************************** - * ompt API - ***************************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define OMPT_API_FNTYPE(fn) fn##_t - -#define OMPT_API_FUNCTION(return_type, fn, args) \ - typedef return_type (*OMPT_API_FNTYPE(fn)) args - - - -/**************************************************************************** - * INQUIRY FUNCTIONS - ***************************************************************************/ - -/* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *ompt_wait_id -)); - -/* thread */ -OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); - -OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); - -/* parallel region */ -OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( - int ancestor_level -)); - -OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( - int ancestor_level -)); - -/* task */ -OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( - int depth -)); - -OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( - int depth -)); - - - -/**************************************************************************** - * PLACEHOLDERS FOR PERFORMANCE REPORTING - ***************************************************************************/ - -/* idle */ -OMPT_API_FUNCTION(void, ompt_idle, ( - void -)); - -/* overhead */ -OMPT_API_FUNCTION(void, ompt_overhead, ( - void -)); - -/* barrier wait */ -OMPT_API_FUNCTION(void, ompt_barrier_wait, ( - void -)); - -/* task wait */ -OMPT_API_FUNCTION(void, ompt_task_wait, ( - void -)); - -/* mutex wait */ -OMPT_API_FUNCTION(void, ompt_mutex_wait, ( - void -)); - - - -/**************************************************************************** - * INITIALIZATION FUNCTIONS - ***************************************************************************/ - -OMPT_API_FUNCTION(void, ompt_initialize, ( - ompt_function_lookup_t ompt_fn_lookup, - const char *runtime_version, - unsigned int ompt_version -)); - - -/* initialization interface to be defined by tool */ -ompt_initialize_t ompt_tool(void); - -typedef enum opt_init_mode_e { - ompt_init_mode_never = 0, - ompt_init_mode_false = 1, - ompt_init_mode_true = 2, - ompt_init_mode_always = 3 -} ompt_init_mode_t; - -OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_event_t event, - ompt_callback_t callback -)); - -typedef enum ompt_set_callback_rc_e { /* non-standard */ - ompt_set_callback_error = 0, - ompt_has_event_no_callback = 1, - ompt_no_event_no_callback = 2, - ompt_has_event_may_callback = 3, - ompt_has_event_must_callback = 4, -} ompt_set_callback_rc_t; - - -OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_event_t event, - ompt_callback_t *callback -)); - - - -/**************************************************************************** - * MISCELLANEOUS FUNCTIONS - ***************************************************************************/ - -/* control */ -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp declare target -#endif -void ompt_control( - uint64_t command, - uint64_t modifier -); -#if defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp end declare target -#endif - -/* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_state, ( - int current_state, - int *next_state, - const char **next_state_name -)); - -#ifdef __cplusplus -}; -#endif - -#endif - +/* + * include/41/ompt.h.var + */ + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include + + + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_state) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_idle_frame) \ + macro (ompt_get_task_frame) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_id) \ + macro (ompt_get_parallel_team_size) \ + macro (ompt_get_task_id) \ + macro (ompt_get_thread_id) + +#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ + macro (ompt_idle) \ + macro (ompt_overhead) \ + macro (ompt_barrier_wait) \ + macro (ompt_task_wait) \ + macro (ompt_mutex_wait) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first */ \ + macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + \ + /* idle (16..31) */ \ + macro (ompt_state_idle, 0x10) /* waiting for work */ \ + \ + /* overhead states (32..63) */ \ + macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + \ + /* barrier wait states (64..79) */ \ + macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + \ + /* task wait states (80..95) */ \ + macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (96..111) */ \ + macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ + macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ + macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ + macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + \ + /* misc (112..127) */ \ + macro (ompt_state_undefined, 0x70) /* undefined thread state */ + + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ + macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ + \ + macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ + macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ + \ + macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ + macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + \ + macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + \ + macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ + \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ + macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ + macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + \ + macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ + macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + \ + macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ + macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + \ + macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ + macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + \ + macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ + macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ + macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + \ + macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + \ + macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + \ + /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ + macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ + macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ + \ + macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ + macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ + \ + macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ + \ + macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ + macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ + \ + macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ + macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ + \ + macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ + macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + \ + macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ + macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + \ + macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ + macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + \ + macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ + macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + \ + macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ + macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + \ + macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ + macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + \ + macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ + macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + \ + macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + \ + macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ + macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ + macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ + macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ + macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + \ + macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ + macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ + macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ + macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ + macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ + macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ + \ + macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ + macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ + \ + macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ + macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ + \ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + + + +/***************************************************************************** + * data types + *****************************************************************************/ + +/*--------------------- + * identifiers + *---------------------*/ + +typedef uint64_t ompt_thread_id_t; +#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_task_id_t; +#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_parallel_id_t; +#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_wait_id_t; +#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ + + +/*--------------------- + * ompt_frame_t + *---------------------*/ + +typedef struct ompt_frame_s { + void *exit_runtime_frame; /* next frame is user code */ + void *reenter_runtime_frame; /* previous frame is user code */ +} ompt_frame_t; + + +/***************************************************************************** + * enumerations for thread states and runtime events + *****************************************************************************/ + +/*--------------------- + * runtime states + *---------------------*/ + +typedef enum { +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + + +/*--------------------- + * runtime events + *---------------------*/ + +typedef enum { +#define ompt_event_macro(event, callback, eventid) event = eventid, + FOREACH_OMPT_EVENT(ompt_event_macro) +#undef ompt_event_macro +} ompt_event_t; + + +/*--------------------- + * set callback results + *---------------------*/ +typedef enum { + ompt_set_result_registration_error = 0, + ompt_set_result_event_may_occur_no_callback = 1, + ompt_set_result_event_never_occurs = 2, + ompt_set_result_event_may_occur_callback_some = 3, + ompt_set_result_event_may_occur_callback_always = 4, +} ompt_set_result_t; + + + +/***************************************************************************** + * callback signatures + *****************************************************************************/ + +/* initialization */ +typedef void (*ompt_interface_fn_t)(void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t)( + const char * /* entry point to look up */ +); + +/* threads */ +typedef void (*ompt_thread_callback_t) ( + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef enum { + ompt_thread_initial = 1, // start the enumeration at 1 + ompt_thread_worker = 2, + ompt_thread_other = 3 +} ompt_thread_type_t; + +typedef enum { + ompt_invoker_program = 0, /* program invokes master task */ + ompt_invoker_runtime = 1 /* runtime invokes master task */ +} ompt_invoker_t; + +typedef void (*ompt_thread_type_callback_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef void (*ompt_wait_callback_t) ( + ompt_wait_id_t wait_id /* wait id */ +); + +/* parallel and workshares */ +typedef void (*ompt_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_new_workshare_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t parent_task_id, /* id of parent task */ + void *workshare_function /* pointer to outlined function */ +); + +typedef void (*ompt_new_parallel_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data of parent task */ + ompt_parallel_id_t parallel_id, /* id of parallel region */ + uint32_t requested_team_size, /* number of threads in team */ + void *parallel_function, /* pointer to outlined function */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +typedef void (*ompt_end_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id, /* id of task */ + ompt_invoker_t invoker /* who invokes master task? */ +); + +/* tasks */ +typedef void (*ompt_task_callback_t) ( + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_task_pair_callback_t) ( + ompt_task_id_t first_task_id, + ompt_task_id_t second_task_id +); + +typedef void (*ompt_new_task_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data for parent task */ + ompt_task_id_t new_task_id, /* id of created task */ + void *task_function /* pointer to outlined function */ +); + +/* program */ +typedef void (*ompt_control_callback_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier /* modifier of control call */ +); + +typedef void (*ompt_callback_t)(void); + + +/**************************************************************************** + * ompt API + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define OMPT_API_FNTYPE(fn) fn##_t + +#define OMPT_API_FUNCTION(return_type, fn, args) \ + typedef return_type (*OMPT_API_FNTYPE(fn)) args + + + +/**************************************************************************** + * INQUIRY FUNCTIONS + ***************************************************************************/ + +/* state */ +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *ompt_wait_id +)); + +/* thread */ +OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); + +OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); + +/* parallel region */ +OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( + int ancestor_level +)); + +OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( + int ancestor_level +)); + +/* task */ +OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( + int depth +)); + +OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( + int depth +)); + + + +/**************************************************************************** + * PLACEHOLDERS FOR PERFORMANCE REPORTING + ***************************************************************************/ + +/* idle */ +OMPT_API_FUNCTION(void, ompt_idle, ( + void +)); + +/* overhead */ +OMPT_API_FUNCTION(void, ompt_overhead, ( + void +)); + +/* barrier wait */ +OMPT_API_FUNCTION(void, ompt_barrier_wait, ( + void +)); + +/* task wait */ +OMPT_API_FUNCTION(void, ompt_task_wait, ( + void +)); + +/* mutex wait */ +OMPT_API_FUNCTION(void, ompt_mutex_wait, ( + void +)); + + + +/**************************************************************************** + * INITIALIZATION FUNCTIONS + ***************************************************************************/ + +OMPT_API_FUNCTION(void, ompt_initialize, ( + ompt_function_lookup_t ompt_fn_lookup, + const char *runtime_version, + unsigned int ompt_version +)); + + +/* initialization interface to be defined by tool */ +ompt_initialize_t ompt_tool(void); + +typedef enum opt_init_mode_e { + ompt_init_mode_never = 0, + ompt_init_mode_false = 1, + ompt_init_mode_true = 2, + ompt_init_mode_always = 3 +} ompt_init_mode_t; + +OMPT_API_FUNCTION(int, ompt_set_callback, ( + ompt_event_t event, + ompt_callback_t callback +)); + +typedef enum ompt_set_callback_rc_e { /* non-standard */ + ompt_set_callback_error = 0, + ompt_has_event_no_callback = 1, + ompt_no_event_no_callback = 2, + ompt_has_event_may_callback = 3, + ompt_has_event_must_callback = 4, +} ompt_set_callback_rc_t; + + +OMPT_API_FUNCTION(int, ompt_get_callback, ( + ompt_event_t event, + ompt_callback_t *callback +)); + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ***************************************************************************/ + +/* control */ +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp declare target +#endif +void ompt_control( + uint64_t command, + uint64_t modifier +); +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp end declare target +#endif + +/* state enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_state, ( + int current_state, + int *next_state, + const char **next_state_name +)); + +#ifdef __cplusplus +}; +#endif + +#endif + diff --git a/contrib/libs/cxxsupp/openmp/kmp.h b/contrib/libs/cxxsupp/openmp/kmp.h index 4bc2611c71e..66ebf6cbdb3 100644 --- a/contrib/libs/cxxsupp/openmp/kmp.h +++ b/contrib/libs/cxxsupp/openmp/kmp.h @@ -1,3558 +1,3558 @@ -/*! \file */ -/* - * kmp.h -- KPTS runtime header file. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_H -#define KMP_H - -#include "kmp_config.h" - -/* #define BUILD_PARALLEL_ORDERED 1 */ - -/* This fix replaces gettimeofday with clock_gettime for better scalability on - the Altix. Requires user code to be linked with -lrt. -*/ -//#define FIX_SGI_CLOCK - -/* Defines for OpenMP 3.0 tasking and auto scheduling */ - -# ifndef KMP_STATIC_STEAL_ENABLED -# define KMP_STATIC_STEAL_ENABLED 1 -# endif - -#define TASK_CURRENT_NOT_QUEUED 0 -#define TASK_CURRENT_QUEUED 1 - -#define TASK_DEQUE_BITS 8 // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK. -#define TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS ) -#define TASK_DEQUE_MASK ( TASK_DEQUE_SIZE - 1 ) - -#ifdef BUILD_TIED_TASK_STACK -#define TASK_STACK_EMPTY 0 // entries when the stack is empty - -#define TASK_STACK_BLOCK_BITS 5 // Used to define TASK_STACK_SIZE and TASK_STACK_MASK -#define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array -#define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block -#endif // BUILD_TIED_TASK_STACK - -#define TASK_NOT_PUSHED 1 -#define TASK_SUCCESSFULLY_PUSHED 0 -#define TASK_TIED 1 -#define TASK_UNTIED 0 -#define TASK_EXPLICIT 1 -#define TASK_IMPLICIT 0 -#define TASK_PROXY 1 -#define TASK_FULL 0 - -#define KMP_CANCEL_THREADS -#define KMP_THREAD_ATTR - -#include -#include -#include -#include -#include -#include -/* include don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */ -/* some macros provided below to replace some of these functions */ -#ifndef __ABSOFT_WIN -#include -#endif -#include -#include - -#include - -#include "kmp_os.h" - -#include "kmp_safe_c_api.h" - -#if KMP_STATS_ENABLED -class kmp_stats_list; -#endif - -#if KMP_USE_HWLOC -#include "hwloc.h" -extern hwloc_topology_t __kmp_hwloc_topology; -extern int __kmp_hwloc_error; -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#include -#endif - -#include "kmp_version.h" -#include "kmp_debug.h" -#include "kmp_lock.h" -#if USE_DEBUGGER -#include "kmp_debugger.h" -#endif -#include "kmp_i18n.h" - -#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) - -#include "kmp_wrapper_malloc.h" -#if KMP_OS_UNIX -# include -# if !defined NSIG && defined _NSIG -# define NSIG _NSIG -# endif -#endif - -#if KMP_OS_LINUX -# pragma weak clock_gettime -#endif - -#if OMPT_SUPPORT -#include "ompt-internal.h" -#endif - -/*Select data placement in NUMA memory */ -#define NO_FIRST_TOUCH 0 -#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ - -/* If not specified on compile command line, assume no first touch */ -#ifndef BUILD_MEMORY -#define BUILD_MEMORY NO_FIRST_TOUCH -#endif - -// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. -// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size. -#ifndef USE_FAST_MEMORY -#define USE_FAST_MEMORY 3 -#endif - -#ifndef KMP_NESTED_HOT_TEAMS -# define KMP_NESTED_HOT_TEAMS 0 -# define USE_NESTED_HOT_ARG(x) -#else -# if KMP_NESTED_HOT_TEAMS -# if OMP_40_ENABLED -# define USE_NESTED_HOT_ARG(x) ,x -# else -// Nested hot teams feature depends on omp 4.0, disable it for earlier versions -# undef KMP_NESTED_HOT_TEAMS -# define KMP_NESTED_HOT_TEAMS 0 -# define USE_NESTED_HOT_ARG(x) -# endif -# else -# define USE_NESTED_HOT_ARG(x) -# endif -#endif - -// Assume using BGET compare_exchange instruction instead of lock by default. -#ifndef USE_CMP_XCHG_FOR_BGET -#define USE_CMP_XCHG_FOR_BGET 1 -#endif - -// Test to see if queuing lock is better than bootstrap lock for bget -// #ifndef USE_QUEUING_LOCK_FOR_BGET -// #define USE_QUEUING_LOCK_FOR_BGET -// #endif - -#define KMP_NSEC_PER_SEC 1000000000L -#define KMP_USEC_PER_SEC 1000000L - -/*! -@ingroup BASIC_TYPES -@{ -*/ - -// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...) -/*! -Values for bit flags used in the ident_t to describe the fields. -*/ -/*! Use trampoline for internal microtasks */ -#define KMP_IDENT_IMB 0x01 -/*! Use c-style ident structure */ -#define KMP_IDENT_KMPC 0x02 -/* 0x04 is no longer used */ -/*! Entry point generated by auto-parallelization */ -#define KMP_IDENT_AUTOPAR 0x08 -/*! Compiler generates atomic reduction option for kmpc_reduce* */ -#define KMP_IDENT_ATOMIC_REDUCE 0x10 -/*! To mark a 'barrier' directive in user code */ -#define KMP_IDENT_BARRIER_EXPL 0x20 -/*! To Mark implicit barriers. */ -#define KMP_IDENT_BARRIER_IMPL 0x0040 -#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0 -#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040 -#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0 - -#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 -#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 - -/*! - * The ident structure that describes a source location. - */ -typedef struct ident { - kmp_int32 reserved_1; /**< might be used in Fortran; see above */ - kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ - kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ -#if USE_ITT_BUILD - /* but currently used for storing region-specific ITT */ - /* contextual information. */ -#endif /* USE_ITT_BUILD */ - kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ - char const *psource; /**< String describing the source location. - The string is composed of semi-colon separated fields which describe the source file, - the function and a pair of line numbers that delimit the construct. - */ -} ident_t; -/*! -@} -*/ - -// Some forward declarations. - -typedef union kmp_team kmp_team_t; -typedef struct kmp_taskdata kmp_taskdata_t; -typedef union kmp_task_team kmp_task_team_t; -typedef union kmp_team kmp_team_p; -typedef union kmp_info kmp_info_p; -typedef union kmp_root kmp_root_p; - - -#ifdef __cplusplus -extern "C" { -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* Pack two 32-bit signed integers into a 64-bit signed integer */ -/* ToDo: Fix word ordering for big-endian machines. */ -#define KMP_PACK_64(HIGH_32,LOW_32) \ - ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) ) - - -/* - * Generic string manipulation macros. - * Assume that _x is of type char * - */ -#define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } -#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } -#define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) -#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -/* Enumeration types */ - -enum kmp_state_timer { - ts_stop, - ts_start, - ts_pause, - - ts_last_state -}; - -enum dynamic_mode { - dynamic_default, -#ifdef USE_LOAD_BALANCE - dynamic_load_balance, -#endif /* USE_LOAD_BALANCE */ - dynamic_random, - dynamic_thread_limit, - dynamic_max -}; - -/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */ -#ifndef KMP_SCHED_TYPE_DEFINED -#define KMP_SCHED_TYPE_DEFINED -typedef enum kmp_sched { - kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check - // Note: need to adjust __kmp_sch_map global array in case this enum is changed - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_upper_std = 5, // upper bound for standard schedules - kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules - kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) -// kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) - kmp_sched_upper = 102, - kmp_sched_default = kmp_sched_static // default scheduling -} kmp_sched_t; -#endif - -/*! - @ingroup WORK_SHARING - * Describes the loop schedule to be used for a parallel for loop. - */ -enum sched_type { - kmp_sch_lower = 32, /**< lower bound for unordered values */ - kmp_sch_static_chunked = 33, - kmp_sch_static = 34, /**< static unspecialized */ - kmp_sch_dynamic_chunked = 35, - kmp_sch_guided_chunked = 36, /**< guided unspecialized */ - kmp_sch_runtime = 37, - kmp_sch_auto = 38, /**< auto */ - kmp_sch_trapezoidal = 39, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_static_greedy = 40, - kmp_sch_static_balanced = 41, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_guided_iterative_chunked = 42, - kmp_sch_guided_analytical_chunked = 43, - - kmp_sch_static_steal = 44, /**< accessible only through KMP_SCHEDULE environment variable */ - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_upper = 45, /**< upper bound for unordered values */ - - kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ - kmp_ord_static_chunked = 65, - kmp_ord_static = 66, /**< ordered static unspecialized */ - kmp_ord_dynamic_chunked = 67, - kmp_ord_guided_chunked = 68, - kmp_ord_runtime = 69, - kmp_ord_auto = 70, /**< ordered auto */ - kmp_ord_trapezoidal = 71, - kmp_ord_upper = 72, /**< upper bound for ordered values */ - -#if OMP_40_ENABLED - /* Schedules for Distribute construct */ - kmp_distribute_static_chunked = 91, /**< distribute static chunked */ - kmp_distribute_static = 92, /**< distribute static unspecialized */ -#endif - - /* - * For the "nomerge" versions, kmp_dispatch_next*() will always return - * a single iteration/chunk, even if the loop is serialized. For the - * schedule types listed above, the entire iteration vector is returned - * if the loop is serialized. This doesn't work for gcc/gcomp sections. - */ - kmp_nm_lower = 160, /**< lower bound for nomerge values */ - - kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), - kmp_nm_static = 162, /**< static unspecialized */ - kmp_nm_dynamic_chunked = 163, - kmp_nm_guided_chunked = 164, /**< guided unspecialized */ - kmp_nm_runtime = 165, - kmp_nm_auto = 166, /**< auto */ - kmp_nm_trapezoidal = 167, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_static_greedy = 168, - kmp_nm_static_balanced = 169, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_guided_iterative_chunked = 170, - kmp_nm_guided_analytical_chunked = 171, - kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */ - - kmp_nm_ord_static_chunked = 193, - kmp_nm_ord_static = 194, /**< ordered static unspecialized */ - kmp_nm_ord_dynamic_chunked = 195, - kmp_nm_ord_guided_chunked = 196, - kmp_nm_ord_runtime = 197, - kmp_nm_ord_auto = 198, /**< auto */ - kmp_nm_ord_trapezoidal = 199, - kmp_nm_upper = 200, /**< upper bound for nomerge values */ - - kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ -}; - -/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ -typedef struct kmp_r_sched { - enum sched_type r_sched_type; - int chunk; -} kmp_r_sched_t; - -extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types - -enum library_type { - library_none, - library_serial, - library_turnaround, - library_throughput -}; - -#if KMP_OS_LINUX -enum clock_function_type { - clock_function_gettimeofday, - clock_function_clock_gettime -}; -#endif /* KMP_OS_LINUX */ - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) -enum mic_type { - non_mic, - mic1, - mic2, - mic3, - dummy -}; -#endif - -/* ------------------------------------------------------------------------ */ -/* -- fast reduction stuff ------------------------------------------------ */ - -#undef KMP_FAST_REDUCTION_BARRIER -#define KMP_FAST_REDUCTION_BARRIER 1 - -#undef KMP_FAST_REDUCTION_CORE_DUO -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - #define KMP_FAST_REDUCTION_CORE_DUO 1 -#endif - -enum _reduction_method { - reduction_method_not_defined = 0, - critical_reduce_block = ( 1 << 8 ), - atomic_reduce_block = ( 2 << 8 ), - tree_reduce_block = ( 3 << 8 ), - empty_reduce_block = ( 4 << 8 ) -}; - -// description of the packed_reduction_method variable -// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte: -// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier -// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction; -// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty, -// so no need to execute a shift instruction while packing/unpacking - -#if KMP_FAST_REDUCTION_BARRIER - #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ - ( ( reduction_method ) | ( barrier_type ) ) - - #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) ) - - #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ - ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) ) -#else - #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ - ( reduction_method ) - - #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - ( packed_reduction_method ) - - #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ - ( bs_plain_barrier ) -#endif - -#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \ - ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) ) - -#if KMP_FAST_REDUCTION_BARRIER - #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ - ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) ) - - #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ - ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) ) -#endif - -typedef int PACKED_REDUCTION_METHOD_T; - -/* -- end of fast reduction stuff ----------------------------------------- */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if KMP_OS_WINDOWS -# define USE_CBLKDATA -# pragma warning( push ) -# pragma warning( disable: 271 310 ) -# include -# pragma warning( pop ) -#endif - -#if KMP_OS_UNIX -# include -# include -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* - * Only Linux* OS and Windows* OS support thread affinity. - */ -#if KMP_AFFINITY_SUPPORTED - -extern size_t __kmp_affin_mask_size; -# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) -# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) -# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) - -#if KMP_USE_HWLOC - -typedef hwloc_cpuset_t kmp_affin_mask_t; -# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask) -# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - unsigned i; \ - for(i=0;i<(unsigned)max_bit_number+1;i++) { \ - if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \ - hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \ - } else { \ - hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \ - } \ - } \ - } \ - -# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) -# define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i)) - -# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc() -# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr); -# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) -# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) -# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) -# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) - -// -// The following macro should be used to index an array of masks. -// The array should be declared as "kmp_affinity_t *" and allocated with -// size "__kmp_affinity_mask_size * len". The macro takes care of the fact -// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but -// on Linux* OS, sizeof(kmp_affin_t) is 1. -// -# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i])) -# define KMP_CPU_ALLOC_ARRAY(arr, n) { \ - arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - arr[i] = hwloc_bitmap_alloc(); \ - } \ - } -# define KMP_CPU_FREE_ARRAY(arr, n) { \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - hwloc_bitmap_free(arr[i]); \ - } \ - __kmp_free(arr); \ - } -# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \ - arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - arr[i] = hwloc_bitmap_alloc(); \ - } \ - } -# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - hwloc_bitmap_free(arr[i]); \ - } \ - KMP_INTERNAL_FREE(arr); \ - } - -#else /* KMP_USE_HWLOC */ -# define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) - -# if KMP_OS_LINUX -// -// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size -// (in bytes). It should be allocated on a word boundary. -// -// WARNING!!! We have made the base type of the affinity mask unsigned char, -// in order to eliminate a lot of checks that the true system mask size is -// really a multiple of 4 bytes (on Linux* OS). -// -// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!! -// - -typedef unsigned char kmp_affin_mask_t; - -# define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT))) -# define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) -# define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))) -# define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) -# define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT))) -# define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) - -# define KMP_CPU_ZERO(mask) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] = 0; \ - } \ - } - -# define KMP_CPU_COPY(dest, src) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - = ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] \ - = ~((kmp_affin_mask_t *)(mask))[__i]; \ - } \ - } - -# define KMP_CPU_UNION(dest, src) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - |= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# endif /* KMP_OS_LINUX */ - -# if KMP_OS_WINDOWS -// -// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on -// Intel(R) 64 it is 8 bytes times the number of processor groups. -// - -# if KMP_GROUP_AFFINITY - -// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). -# if _MSC_VER < 1600 -typedef struct GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY; -# endif - -typedef DWORD_PTR kmp_affin_mask_t; - -extern int __kmp_num_proc_groups; - -# define _KMP_CPU_SET(i,mask) \ - (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \ - (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) - -# define KMP_CPU_SET(i,mask) \ - _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) - -# define _KMP_CPU_ISSET(i,mask) \ - (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \ - (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))) - -# define KMP_CPU_ISSET(i,mask) \ - _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) - -# define _KMP_CPU_CLR(i,mask) \ - (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \ - ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) - -# define KMP_CPU_CLR(i,mask) \ - _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) - -# define KMP_CPU_ZERO(mask) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] = 0; \ - } \ - } - -# define KMP_CPU_COPY(dest, src) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - = ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] \ - = ~((kmp_affin_mask_t *)(mask))[__i]; \ - } \ - } - -# define KMP_CPU_UNION(dest, src) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - |= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); -extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; - -typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); -extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; - -typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); -extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; - -typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); -extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; - -extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); - -# else /* KMP_GROUP_AFFINITY */ - -typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */ - -# define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i))) -# define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i)))) -# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i))) -# define KMP_CPU_ZERO(mask) (*(mask) = 0) -# define KMP_CPU_COPY(dest, src) (*(dest) = *(src)) -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)) -# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src)) - -# endif /* KMP_GROUP_AFFINITY */ - -# endif /* KMP_OS_WINDOWS */ - -// -// __kmp_allocate() will return memory allocated on a 4-bytes boundary. -// after zeroing it - it takes care of those assumptions stated above. -// -# define KMP_CPU_ALLOC(ptr) \ - (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size))) -# define KMP_CPU_FREE(ptr) __kmp_free(ptr) -# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size))) -# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */ -# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size))) -# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr) - -// -// The following macro should be used to index an array of masks. -// The array should be declared as "kmp_affinity_t *" and allocated with -// size "__kmp_affinity_mask_size * len". The macro takes care of the fact -// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but -// on Linux* OS, sizeof(kmp_affin_t) is 1. -// -# define KMP_CPU_INDEX(array,i) \ - ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size)) -# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size) -# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr); -# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size) -# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr); - -#endif /* KMP_USE_HWLOC */ - -// -// Declare local char buffers with this size for printing debug and info -// messages, using __kmp_affinity_print_mask(). -// -#define KMP_AFFIN_MASK_PRINT_LEN 1024 - -enum affinity_type { - affinity_none = 0, - affinity_physical, - affinity_logical, - affinity_compact, - affinity_scatter, - affinity_explicit, - affinity_balanced, - affinity_disabled, // not used outsize the env var parser - affinity_default -}; - -enum affinity_gran { - affinity_gran_fine = 0, - affinity_gran_thread, - affinity_gran_core, - affinity_gran_package, - affinity_gran_node, -#if KMP_GROUP_AFFINITY - // - // The "group" granularity isn't necesssarily coarser than all of the - // other levels, but we put it last in the enum. - // - affinity_gran_group, -#endif /* KMP_GROUP_AFFINITY */ - affinity_gran_default -}; - -enum affinity_top_method { - affinity_top_method_all = 0, // try all (supported) methods, in order -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - affinity_top_method_apicid, - affinity_top_method_x2apicid, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too -#if KMP_GROUP_AFFINITY - affinity_top_method_group, -#endif /* KMP_GROUP_AFFINITY */ - affinity_top_method_flat, -#if KMP_USE_HWLOC - affinity_top_method_hwloc, -#endif - affinity_top_method_default -}; - -#define affinity_respect_mask_default (-1) - -extern enum affinity_type __kmp_affinity_type; /* Affinity type */ -extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ -extern int __kmp_affinity_gran_levels; /* corresponding int value */ -extern int __kmp_affinity_dups; /* Affinity duplicate masks */ -extern enum affinity_top_method __kmp_affinity_top_method; -extern int __kmp_affinity_compact; /* Affinity 'compact' value */ -extern int __kmp_affinity_offset; /* Affinity offset value */ -extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ -extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ -extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */ -extern char * __kmp_affinity_proclist; /* proc ID list */ -extern kmp_affin_mask_t *__kmp_affinity_masks; -extern unsigned __kmp_affinity_num_masks; -extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error); -extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error); -extern void __kmp_affinity_bind_thread(int which); - -# if KMP_OS_LINUX -extern kmp_affin_mask_t *__kmp_affinity_get_fullMask(); -# endif /* KMP_OS_LINUX */ -extern char const * __kmp_cpuinfo_file; - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED - -// -// This needs to be kept in sync with the values in omp.h !!! -// -typedef enum kmp_proc_bind_t { - proc_bind_false = 0, - proc_bind_true, - proc_bind_master, - proc_bind_close, - proc_bind_spread, - proc_bind_intel, // use KMP_AFFINITY interface - proc_bind_default -} kmp_proc_bind_t; - -typedef struct kmp_nested_proc_bind_t { - kmp_proc_bind_t *bind_types; - int size; - int used; -} kmp_nested_proc_bind_t; - -extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; - -#endif /* OMP_40_ENABLED */ - -# if KMP_AFFINITY_SUPPORTED -# define KMP_PLACE_ALL (-1) -# define KMP_PLACE_UNDEFINED (-2) -# endif /* KMP_AFFINITY_SUPPORTED */ - -extern int __kmp_affinity_num_places; - - -#if OMP_40_ENABLED -typedef enum kmp_cancel_kind_t { - cancel_noreq = 0, - cancel_parallel = 1, - cancel_loop = 2, - cancel_sections = 3, - cancel_taskgroup = 4 -} kmp_cancel_kind_t; -#endif // OMP_40_ENABLED - -extern int __kmp_place_num_sockets; -extern int __kmp_place_socket_offset; -extern int __kmp_place_num_cores; -extern int __kmp_place_core_offset; -extern int __kmp_place_num_threads_per_core; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) - -// -// We need to avoid using -1 as a GTID as +1 is added to the gtid -// when storing it in a lock, and the value 0 is reserved. -// -#define KMP_GTID_DNE (-2) /* Does not exist */ -#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ -#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ -#define KMP_GTID_UNKNOWN (-5) /* Is not known */ -#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ - -#define __kmp_get_gtid() __kmp_get_global_thread_id() -#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() - -#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ - __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) - -#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) -#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \ - team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid ) - -#define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team ) -#define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ - __kmp_threads[ (gtid) ]-> th.th_team ) - -#define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] ) -#define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) ) - - // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works - // with registered and not-yet-registered threads. -#define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \ - (thr)->th.th_info.ds.ds_gtid ) - -// AT: Which way is correct? -// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; -// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; -#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc ) - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1))) - -#define KMP_MIN_NTH 1 - -#ifndef KMP_MAX_NTH -# if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX -# define KMP_MAX_NTH PTHREAD_THREADS_MAX -# else -# define KMP_MAX_NTH INT_MAX -# endif -#endif /* KMP_MAX_NTH */ - -#ifdef PTHREAD_STACK_MIN -# define KMP_MIN_STKSIZE PTHREAD_STACK_MIN -#else -# define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) -#endif - -#define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) - -#if KMP_ARCH_X86 -# define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) -#elif KMP_ARCH_X86_64 -# define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) -# define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) -#else -# define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) -#endif - -#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) - -#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024)) -#define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024)) -#define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) - -#define KMP_MIN_STKOFFSET (0) -#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE -#if KMP_OS_DARWIN -# define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET -#else -# define KMP_DEFAULT_STKOFFSET CACHE_LINE -#endif - -#define KMP_MIN_STKPADDING (0) -#define KMP_MAX_STKPADDING (2 * 1024 * 1024) - -#define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */ -#define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */ -#define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */ -#define KMP_MIN_BLOCKTIME (0) -#define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */ -#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ -/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */ -/* Only allow increasing number of wakeups */ -#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \ - ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \ - ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \ - (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) ) - -/* Calculate number of intervals for a specific block time based on monitor_wakeups */ -#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \ - (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) ) - -#define KMP_MIN_STATSCOLS 40 -#define KMP_MAX_STATSCOLS 4096 -#define KMP_DEFAULT_STATSCOLS 80 - -#define KMP_MIN_INTERVAL 0 -#define KMP_MAX_INTERVAL (INT_MAX-1) -#define KMP_DEFAULT_INTERVAL 0 - -#define KMP_MIN_CHUNK 1 -#define KMP_MAX_CHUNK (INT_MAX-1) -#define KMP_DEFAULT_CHUNK 1 - -#define KMP_MIN_INIT_WAIT 1 -#define KMP_MAX_INIT_WAIT (INT_MAX/2) -#define KMP_DEFAULT_INIT_WAIT 2048U - -#define KMP_MIN_NEXT_WAIT 1 -#define KMP_MAX_NEXT_WAIT (INT_MAX/2) -#define KMP_DEFAULT_NEXT_WAIT 1024U - -// max possible dynamic loops in concurrent execution per team -#define KMP_MAX_DISP_BUF 7 -#define KMP_MAX_ORDERED 8 - -#define KMP_MAX_FIELDS 32 - -#define KMP_MAX_BRANCH_BITS 31 - -#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX - -/* Minimum number of threads before switch to TLS gtid (experimentally determined) */ -/* josh TODO: what about OS X* tuning? */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define KMP_TLS_GTID_MIN 5 -#else -# define KMP_TLS_GTID_MIN INT_MAX -#endif - -#define KMP_MASTER_TID(tid) ( (tid) == 0 ) -#define KMP_WORKER_TID(tid) ( (tid) != 0 ) - -#define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 ) -#define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 ) -#define KMP_UBER_GTID(gtid) \ - ( \ - KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \ - KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \ - (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ - (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\ - ) -#define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 ) - -#ifndef TRUE -#define FALSE 0 -#define TRUE (! FALSE) -#endif - -/* NOTE: all of the following constants must be even */ - -#if KMP_OS_WINDOWS -# define KMP_INIT_WAIT 64U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ -#elif KMP_OS_CNK -# define KMP_INIT_WAIT 16U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ -#elif KMP_OS_LINUX -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_DARWIN -/* TODO: tune for KMP_OS_DARWIN */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_FREEBSD -/* TODO: tune for KMP_OS_FREEBSD */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#elif KMP_OS_NETBSD -/* TODO: tune for KMP_OS_NETBSD */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -typedef struct kmp_cpuid { - kmp_uint32 eax; - kmp_uint32 ebx; - kmp_uint32 ecx; - kmp_uint32 edx; -} kmp_cpuid_t; -extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -# if KMP_ARCH_X86 - extern void __kmp_x86_pause( void ); -# elif KMP_MIC - static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); }; -# else - static void __kmp_x86_pause( void ) { _mm_pause(); }; -# endif -# define KMP_CPU_PAUSE() __kmp_x86_pause() -#elif KMP_ARCH_PPC64 -# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") -# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") -# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") -# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) -#else -# define KMP_CPU_PAUSE() /* nothing to do */ -#endif - -#define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; } - -#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); } - -// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, -// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd. - -#define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \ - if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } } -#define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \ - if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } } - -/* ------------------------------------------------------------------------ */ -/* Support datatypes for the orphaned construct nesting checks. */ -/* ------------------------------------------------------------------------ */ - -enum cons_type { - ct_none, - ct_parallel, - ct_pdo, - ct_pdo_ordered, - ct_psections, - ct_psingle, - - /* the following must be left in order and not split up */ - ct_taskq, - ct_task, /* really task inside non-ordered taskq, considered a worksharing type */ - ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */ - /* the preceding must be left in order and not split up */ - - ct_critical, - ct_ordered_in_parallel, - ct_ordered_in_pdo, - ct_ordered_in_taskq, - ct_master, - ct_reduce, - ct_barrier -}; - -/* test to see if we are in a taskq construct */ -# define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) ) -# define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) - -struct cons_data { - ident_t const *ident; - enum cons_type type; - int prev; - kmp_user_lock_p name; /* address exclusively for critical section name comparison */ -}; - -struct cons_header { - int p_top, w_top, s_top; - int stack_size, stack_top; - struct cons_data *stack_data; -}; - -struct kmp_region_info { - char *text; - int offset[KMP_MAX_FIELDS]; - int length[KMP_MAX_FIELDS]; -}; - - -/* ---------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- */ - -#if KMP_OS_WINDOWS - typedef HANDLE kmp_thread_t; - typedef DWORD kmp_key_t; -#endif /* KMP_OS_WINDOWS */ - -#if KMP_OS_UNIX - typedef pthread_t kmp_thread_t; - typedef pthread_key_t kmp_key_t; -#endif - -extern kmp_key_t __kmp_gtid_threadprivate_key; - -typedef struct kmp_sys_info { - long maxrss; /* the maximum resident set size utilized (in kilobytes) */ - long minflt; /* the number of page faults serviced without any I/O */ - long majflt; /* the number of page faults serviced that required I/O */ - long nswap; /* the number of times a process was "swapped" out of memory */ - long inblock; /* the number of times the file system had to perform input */ - long oublock; /* the number of times the file system had to perform output */ - long nvcsw; /* the number of times a context switch was voluntarily */ - long nivcsw; /* the number of times a context switch was forced */ -} kmp_sys_info_t; - -typedef struct kmp_cpuinfo { - int initialized; // If 0, other fields are not initialized. - int signature; // CPUID(1).EAX - int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family ) - int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model) - int stepping; // CPUID(1).EAX[3:0] ( Stepping ) - int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. - int rtm; // 0 if RTM instructions are not supported, 1 otherwise. - int cpu_stackoffset; - int apic_id; - int physical_id; - int logical_id; - kmp_uint64 frequency; // Nominal CPU frequency in Hz. -} kmp_cpuinfo_t; - - -#ifdef BUILD_TV - -struct tv_threadprivate { - /* Record type #1 */ - void *global_addr; - void *thread_addr; -}; - -struct tv_data { - struct tv_data *next; - void *type; - union tv_union { - struct tv_threadprivate tp; - } u; -}; - -extern kmp_key_t __kmp_tv_key; - -#endif /* BUILD_TV */ - -/* ------------------------------------------------------------------------ */ - -#if USE_ITT_BUILD -// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. -// Later we will check the type meets requirements. -typedef int kmp_itt_mark_t; -#define KMP_ITT_DEBUG 0 -#endif /* USE_ITT_BUILD */ - -/* ------------------------------------------------------------------------ */ - -/* - * Taskq data structures - */ - -#define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4) -#define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */ - -/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */ - -#define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */ -#define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */ -#define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */ -#define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */ -#define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */ - -#define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */ - -#define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */ -#define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */ -#define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */ -#define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */ -#define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */ -#define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */ - -#define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */ - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { - kmp_int32 ai_data; -} kmpc_aligned_int32_t; - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { - struct kmpc_thunk_t *qs_thunk; -} kmpc_aligned_queue_slot_t; - -typedef struct kmpc_task_queue_t { - /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */ - kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */ - union { - struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */ - struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */ - } tq; - volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */ - struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */ - struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */ - volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */ - /* (other than the thread executing the kmpc_end_taskq call) */ - /* locked by parent tq's tq_link_lck */ - - /* shared data for task queue */ - struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */ - /* only one array element exists for all but outermost taskq */ - - /* bookkeeping for ordered task queue */ - kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */ - volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */ - - /* thunk storage management for task queue */ - kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ - struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */ - struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */ - - /* data fields for queue itself */ - kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */ - kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */ - volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */ - kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */ - kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */ - kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */ - volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */ - kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ - volatile kmp_int32 tq_flags; /* TQF_xxx */ - - /* bookkeeping for outstanding thunks */ - struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */ - kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ - - /* statistics library bookkeeping */ - ident_t *tq_loc; /* source location information for taskq directive */ -} kmpc_task_queue_t; - -typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk); - -/* sizeof_shareds passed as arg to __kmpc_taskq call */ -typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ - kmpc_task_queue_t *sv_queue; - /* (pointers to) shared vars */ -} kmpc_shared_vars_t; - -typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { - volatile struct kmpc_shared_vars_t *ai_data; -} kmpc_aligned_shared_vars_t; - -/* sizeof_thunk passed as arg to kmpc_taskq call */ -typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ - union { /* field used for internal freelists too */ - kmpc_shared_vars_t *th_shareds; - struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */ - } th; - kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ - struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */ - kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */ - kmp_int32 th_status; - kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */ - /* private vars */ -} kmpc_thunk_t; - -typedef struct KMP_ALIGN_CACHE kmp_taskq { - int tq_curr_thunk_capacity; - - kmpc_task_queue_t *tq_root; - kmp_int32 tq_global_flags; - - kmp_lock_t tq_freelist_lck; - kmpc_task_queue_t *tq_freelist; - - kmpc_thunk_t **tq_curr_thunk; -} kmp_taskq_t; - -/* END Taskq data structures */ -/* --------------------------------------------------------------------------- */ - -typedef kmp_int32 kmp_critical_name[8]; - -/*! -@ingroup PARALLEL -The type for a microtask which gets passed to @ref __kmpc_fork_call(). -The arguments to the outlined function are -@param global_tid the global thread identity of the thread executing the function. -@param bound_tid the local identitiy of the thread executing the function -@param ... pointers to shared variables accessed by the function. -*/ -typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... ); -typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... ); - -/*! -@ingroup THREADPRIVATE -@{ -*/ -/* --------------------------------------------------------------------------- */ -/* Threadprivate initialization/finalization function declarations */ - -/* for non-array objects: __kmpc_threadprivate_register() */ - -/*! - Pointer to the constructor function. - The first argument is the this pointer -*/ -typedef void *(*kmpc_ctor) (void *); - -/*! - Pointer to the destructor function. - The first argument is the this pointer -*/ -typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */ -/*! - Pointer to an alternate constructor. - The first argument is the this pointer. -*/ -typedef void *(*kmpc_cctor) (void *, void *); - -/* for array objects: __kmpc_threadprivate_register_vec() */ - /* First arg: "this" pointer */ - /* Last arg: number of array elements */ -/*! - Array constructor. - First argument is the this pointer - Second argument the number of array elements. -*/ -typedef void *(*kmpc_ctor_vec) (void *, size_t); -/*! - Pointer to the array destructor function. - The first argument is the this pointer - Second argument the number of array elements. -*/ -typedef void (*kmpc_dtor_vec) (void *, size_t); -/*! - Array constructor. - First argument is the this pointer - Third argument the number of array elements. -*/ -typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */ - -/*! -@} -*/ - - -/* ------------------------------------------------------------------------ */ - -/* keeps tracked of threadprivate cache allocations for cleanup later */ -typedef struct kmp_cached_addr { - void **addr; /* address of allocated cache */ - struct kmp_cached_addr *next; /* pointer to next cached address */ -} kmp_cached_addr_t; - -struct private_data { - struct private_data *next; /* The next descriptor in the list */ - void *data; /* The data buffer for this descriptor */ - int more; /* The repeat count for this descriptor */ - size_t size; /* The data size for this descriptor */ -}; - -struct private_common { - struct private_common *next; - struct private_common *link; - void *gbl_addr; - void *par_addr; /* par_addr == gbl_addr for MASTER thread */ - size_t cmn_size; -}; - -struct shared_common -{ - struct shared_common *next; - struct private_data *pod_init; - void *obj_init; - void *gbl_addr; - union { - kmpc_ctor ctor; - kmpc_ctor_vec ctorv; - } ct; - union { - kmpc_cctor cctor; - kmpc_cctor_vec cctorv; - } cct; - union { - kmpc_dtor dtor; - kmpc_dtor_vec dtorv; - } dt; - size_t vec_len; - int is_vec; - size_t cmn_size; -}; - -#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ -#define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ -#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ -#define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1)) - -struct common_table { - struct private_common *data[ KMP_HASH_TABLE_SIZE ]; -}; - -struct shared_table { - struct shared_common *data[ KMP_HASH_TABLE_SIZE ]; -}; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef KMP_STATIC_STEAL_ENABLED -typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 count; - kmp_int32 ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int32 lb; - kmp_int32 st; - kmp_int32 tc; - kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */ - - // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template - kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should - kmp_int32 parm2; // make no real change at least while padding is off. - kmp_int32 parm3; - kmp_int32 parm4; - }; - - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; -#if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. - // It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int32 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info32_t; - -typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 count; /* current chunk number for static and static-steal scheduling*/ - kmp_int64 ub; /* upper-bound */ - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int64 lb; /* lower-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ - kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */ - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; - }; - - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; -#if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. - // It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int64 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info64_t; -#else /* KMP_STATIC_STEAL_ENABLED */ -typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 lb; - kmp_int32 ub; - kmp_int32 st; - kmp_int32 tc; - - kmp_int32 parm1; - kmp_int32 parm2; - kmp_int32 parm3; - kmp_int32 parm4; - - kmp_int32 count; - - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; -#if KMP_OS_WINDOWS - kmp_int32 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info32_t; - -typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 lb; /* lower-bound */ - kmp_int64 ub; /* upper-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; - - kmp_int64 count; /* current chunk number for static scheduling */ - - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; -#if KMP_OS_WINDOWS - kmp_int64 last_upper; -#endif /* KMP_OS_WINDOWS */ -} dispatch_private_info64_t; -#endif /* KMP_STATIC_STEAL_ENABLED */ - -typedef struct KMP_ALIGN_CACHE dispatch_private_info { - union private_info { - dispatch_private_info32_t p32; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_int32 ordered; /* ordered clause specified */ - kmp_int32 ordered_bumped; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar - struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */ - kmp_int32 nomerge; /* don't merge iters if serialized */ - kmp_int32 type_size; /* the size of types in private_info */ - enum cons_type pushed_ws; -} dispatch_private_info_t; - -typedef struct dispatch_shared_info32 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint32 iteration; - volatile kmp_uint32 num_done; - volatile kmp_uint32 ordered_iteration; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar -} dispatch_shared_info32_t; - -typedef struct dispatch_shared_info64 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint64 iteration; - volatile kmp_uint64 num_done; - volatile kmp_uint64 ordered_iteration; - kmp_int64 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar -} dispatch_shared_info64_t; - -typedef struct dispatch_shared_info { - union shared_info { - dispatch_shared_info32_t s32; - dispatch_shared_info64_t s64; - } u; -/* volatile kmp_int32 dispatch_abort; depricated */ - volatile kmp_uint32 buffer_index; -} dispatch_shared_info_t; - -typedef struct kmp_disp { - /* Vector for ORDERED SECTION */ - void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); - /* Vector for END ORDERED SECTION */ - void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); - - dispatch_shared_info_t *th_dispatch_sh_current; - dispatch_private_info_t *th_dispatch_pr_current; - - dispatch_private_info_t *th_disp_buffer; - kmp_int32 th_disp_index; - void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64 -#if KMP_USE_INTERNODE_ALIGNMENT - char more_padding[INTERNODE_CACHE_LINE]; -#endif -} kmp_disp_t; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* Barrier stuff */ - -/* constants for barrier state update */ -#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ -#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ -#define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */ -#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ - -#define KMP_BARRIER_SLEEP_STATE ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT)) -#define KMP_BARRIER_UNUSED_STATE ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT)) -#define KMP_BARRIER_STATE_BUMP ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT)) - -#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) -# error "Barrier sleep bit must be smaller than barrier bump bit" -#endif -#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) -# error "Barrier unused bit must be smaller than barrier bump bit" -#endif - -// Constants for release barrier wait state: currently, hierarchical only -#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep -#define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release -#define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release -#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go -#define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up - -enum barrier_type { - bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ - bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ - #if KMP_FAST_REDUCTION_BARRIER - bs_reduction_barrier, /* 2, All barriers that are used in reduction */ - #endif // KMP_FAST_REDUCTION_BARRIER - bs_last_barrier /* Just a placeholder to mark the end */ -}; - -// to work with reduction barriers just like with plain barriers -#if !KMP_FAST_REDUCTION_BARRIER - #define bs_reduction_barrier bs_plain_barrier -#endif // KMP_FAST_REDUCTION_BARRIER - -typedef enum kmp_bar_pat { /* Barrier communication patterns */ - bp_linear_bar = 0, /* Single level (degenerate) tree */ - bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */ - bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */ - bp_hierarchical_bar = 3, /* Machine hierarchy tree */ - bp_last_bar = 4 /* Placeholder to mark the end */ -} kmp_bar_pat_e; - -# define KMP_BARRIER_ICV_PUSH 1 - -/* Record for holding the values of the internal controls stack records */ -typedef struct kmp_internal_control { - int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ - kmp_int8 nested; /* internal control for nested parallelism (per thread) */ - kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ - kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */ - int blocktime; /* internal control for blocktime */ - int bt_intervals; /* internal control for blocktime intervals */ - int nproc; /* internal control for #threads for next parallel region (per thread) */ - int max_active_levels; /* internal control for max_active_levels */ - kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */ -#if OMP_40_ENABLED - kmp_proc_bind_t proc_bind; /* internal control for affinity */ -#endif // OMP_40_ENABLED - struct kmp_internal_control *next; -} kmp_internal_control_t; - -static inline void -copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) { - *dst = *src; -} - -/* Thread barrier needs volatile barrier fields */ -typedef struct KMP_ALIGN_CACHE kmp_bstate { - // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it). - // It is not explicitly aligned below, because we *don't* want it to be padded -- instead, - // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines - // stores in the hierarchical barrier. - kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread - // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store - volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) - KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point. - kmp_uint32 *skip_per_level; - kmp_uint32 my_level; - kmp_int32 parent_tid; - kmp_int32 old_tid; - kmp_uint32 depth; - struct kmp_bstate *parent_bar; - kmp_team_t *team; - kmp_uint64 leaf_state; - kmp_uint32 nproc; - kmp_uint8 base_leaf_kids; - kmp_uint8 leaf_kids; - kmp_uint8 offset; - kmp_uint8 wait_flag; - kmp_uint8 use_oncore_barrier; -#if USE_DEBUGGER - // The following field is intended for the debugger solely. Only the worker thread itself accesses this - // field: the worker increases it by 1 when it arrives to a barrier. - KMP_ALIGN_CACHE kmp_uint b_worker_arrived; -#endif /* USE_DEBUGGER */ -} kmp_bstate_t; - -union KMP_ALIGN_CACHE kmp_barrier_union { - double b_align; /* use worst case alignment */ - char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ]; - kmp_bstate_t bb; -}; - -typedef union kmp_barrier_union kmp_balign_t; - -/* Team barrier needs only non-volatile arrived counter */ -union KMP_ALIGN_CACHE kmp_barrier_team_union { - double b_align; /* use worst case alignment */ - char b_pad[ CACHE_LINE ]; - struct { - kmp_uint64 b_arrived; /* STATE => task reached synch point. */ -#if USE_DEBUGGER - // The following two fields are indended for the debugger solely. Only master of the team accesses - // these fields: the first one is increased by 1 when master arrives to a barrier, the - // second one is increased by one when all the threads arrived. - kmp_uint b_master_arrived; - kmp_uint b_team_arrived; -#endif - }; -}; - -typedef union kmp_barrier_team_union kmp_balign_team_t; - -/* - * Padding for Linux* OS pthreads condition variables and mutexes used to signal - * threads when a condition changes. This is to workaround an NPTL bug - * where padding was added to pthread_cond_t which caused the initialization - * routine to write outside of the structure if compiled on pre-NPTL threads. - */ - -#if KMP_OS_WINDOWS -typedef struct kmp_win32_mutex -{ - /* The Lock */ - CRITICAL_SECTION cs; -} kmp_win32_mutex_t; - -typedef struct kmp_win32_cond -{ - /* Count of the number of waiters. */ - int waiters_count_; - - /* Serialize access to */ - kmp_win32_mutex_t waiters_count_lock_; - - /* Number of threads to release via a or a */ - /* */ - int release_count_; - - /* Keeps track of the current "generation" so that we don't allow */ - /* one thread to steal all the "releases" from the broadcast. */ - int wait_generation_count_; - - /* A manual-reset event that's used to block and release waiting */ - /* threads. */ - HANDLE event_; -} kmp_win32_cond_t; -#endif - -#if KMP_OS_UNIX - -union KMP_ALIGN_CACHE kmp_cond_union { - double c_align; - char c_pad[ CACHE_LINE ]; - pthread_cond_t c_cond; -}; - -typedef union kmp_cond_union kmp_cond_align_t; - -union KMP_ALIGN_CACHE kmp_mutex_union { - double m_align; - char m_pad[ CACHE_LINE ]; - pthread_mutex_t m_mutex; -}; - -typedef union kmp_mutex_union kmp_mutex_align_t; - -#endif /* KMP_OS_UNIX */ - -typedef struct kmp_desc_base { - void *ds_stackbase; - size_t ds_stacksize; - int ds_stackgrow; - kmp_thread_t ds_thread; - volatile int ds_tid; - int ds_gtid; -#if KMP_OS_WINDOWS - volatile int ds_alive; - DWORD ds_thread_id; - /* - ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However, - debugger support (libomp_db) cannot work with handles, because they uncomparable. For - example, debugger requests info about thread with handle h. h is valid within debugger - process, and meaningless within debugee process. Even if h is duped by call to - DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new* - handle which does *not* equal to any other handle in debugee... The only way to - compare handles is convert them to system-wide ids. GetThreadId() function is - available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is - available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by - call to GetCurrentThreadId() from within the thread and save it to let libomp_db - identify threads. - */ -#endif /* KMP_OS_WINDOWS */ -} kmp_desc_base_t; - -typedef union KMP_ALIGN_CACHE kmp_desc { - double ds_align; /* use worst case alignment */ - char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ]; - kmp_desc_base_t ds; -} kmp_desc_t; - - -typedef struct kmp_local { - volatile int this_construct; /* count of single's encountered by thread */ - void *reduce_data; -#if KMP_USE_BGET - void *bget_data; - void *bget_list; -#if ! USE_CMP_XCHG_FOR_BGET -#ifdef USE_QUEUING_LOCK_FOR_BGET - kmp_lock_t bget_lock; /* Lock for accessing bget free list */ -#else - kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */ - /* Must be bootstrap lock so we can use it at library shutdown */ -#endif /* USE_LOCK_FOR_BGET */ -#endif /* ! USE_CMP_XCHG_FOR_BGET */ -#endif /* KMP_USE_BGET */ - -#ifdef BUILD_TV - struct tv_data *tv_data; -#endif - - PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */ - -} kmp_local_t; - -#define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) -#define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) -#define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) - -#define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) -#define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) -#define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) -#define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) - -#define set__blocktime_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) ) - -#define set__bt_intervals_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) ) - -#define set__bt_set_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) ) - - -#define set__nested( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) ) -#define get__nested( xthread ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) ) - -#define set__dynamic( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) ) -#define get__dynamic( xthread ) \ - ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) ) - -#define set__nproc( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) ) - -#define set__max_active_levels( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) ) - -#define set__sched( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) ) - -#if OMP_40_ENABLED - -#define set__proc_bind( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) ) -#define get__proc_bind( xthread ) \ - ( (xthread)->th.th_current_task->td_icvs.proc_bind ) - -#endif /* OMP_40_ENABLED */ - - -/* ------------------------------------------------------------------------ */ -// OpenMP tasking data structures -// - -typedef enum kmp_tasking_mode { - tskm_immediate_exec = 0, - tskm_extra_barrier = 1, - tskm_task_teams = 2, - tskm_max = 2 -} kmp_tasking_mode_t; - -extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ -extern kmp_int32 __kmp_task_stealing_constraint; - -/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */ -#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1) -#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1) - -// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and -// queued since the previous barrier release. -#define KMP_TASKING_ENABLED(task_team) \ - (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) -/*! -@ingroup BASIC_TYPES -@{ -*/ - -/*! - */ -typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * ); - -/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ -/*! - */ -typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ - void * shareds; /**< pointer to block of pointers to shared vars */ - kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ - kmp_int32 part_id; /**< part id for the task */ -#if OMP_40_ENABLED - kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ -#endif // OMP_40_ENABLED - /* private vars */ -} kmp_task_t; - -/*! -@} -*/ - -#if OMP_40_ENABLED -typedef struct kmp_taskgroup { - kmp_uint32 count; // number of allocated and not yet complete tasks - kmp_int32 cancel_request; // request for cancellation of this taskgroup - struct kmp_taskgroup *parent; // parent taskgroup -} kmp_taskgroup_t; - - -// forward declarations -typedef union kmp_depnode kmp_depnode_t; -typedef struct kmp_depnode_list kmp_depnode_list_t; -typedef struct kmp_dephash_entry kmp_dephash_entry_t; - -typedef struct kmp_depend_info { - kmp_intptr_t base_addr; - size_t len; - struct { - bool in:1; - bool out:1; - } flags; -} kmp_depend_info_t; - -struct kmp_depnode_list { - kmp_depnode_t * node; - kmp_depnode_list_t * next; -}; - -typedef struct kmp_base_depnode { - kmp_depnode_list_t * successors; - kmp_task_t * task; - - kmp_lock_t lock; - -#if KMP_SUPPORT_GRAPH_OUTPUT - kmp_uint32 id; -#endif - - volatile kmp_int32 npredecessors; - volatile kmp_int32 nrefs; -} kmp_base_depnode_t; - -union KMP_ALIGN_CACHE kmp_depnode { - double dn_align; /* use worst case alignment */ - char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; - kmp_base_depnode_t dn; -}; - -struct kmp_dephash_entry { - kmp_intptr_t addr; - kmp_depnode_t * last_out; - kmp_depnode_list_t * last_ins; - kmp_dephash_entry_t * next_in_bucket; -}; - -typedef struct kmp_dephash { - kmp_dephash_entry_t ** buckets; -#ifdef KMP_DEBUG - kmp_uint32 nelements; - kmp_uint32 nconflicts; -#endif -} kmp_dephash_t; - -#endif - -#ifdef BUILD_TIED_TASK_STACK - -/* Tied Task stack definitions */ -typedef struct kmp_stack_block { - kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ]; - struct kmp_stack_block * sb_next; - struct kmp_stack_block * sb_prev; -} kmp_stack_block_t; - -typedef struct kmp_task_stack { - kmp_stack_block_t ts_first_block; // first block of stack entries - kmp_taskdata_t ** ts_top; // pointer to the top of stack - kmp_int32 ts_entries; // number of entries on the stack -} kmp_task_stack_t; - -#endif // BUILD_TIED_TASK_STACK - -typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ - /* Compiler flags */ /* Total compiler flags must be 16 bits */ - unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ - unsigned final : 1; /* task is final(1) so execute immediately */ - unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ -#if OMP_40_ENABLED - unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ -#if OMP_41_ENABLED - unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */ - unsigned reserved : 11; /* reserved for compiler use */ -#else - unsigned reserved : 12; /* reserved for compiler use */ -#endif -#else // OMP_40_ENABLED - unsigned reserved : 13; /* reserved for compiler use */ -#endif // OMP_40_ENABLED - - /* Library flags */ /* Total library flags must be 16 bits */ - unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ - unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */ - unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */ - unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */ - /* If either team_serial or tasking_ser is set, task team may be NULL */ - /* Task State Flags: */ - unsigned started : 1; /* 1==started, 0==not started */ - unsigned executing : 1; /* 1==executing, 0==not executing */ - unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocateed */ - unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ - unsigned reserved31 : 7; /* reserved for library use */ - -} kmp_tasking_flags_t; - - -struct kmp_taskdata { /* aligned during dynamic allocation */ - kmp_int32 td_task_id; /* id, assigned by debugger */ - kmp_tasking_flags_t td_flags; /* task flags */ - kmp_team_t * td_team; /* team for this task */ - kmp_info_p * td_alloc_thread; /* thread that allocated data structures */ - /* Currently not used except for perhaps IDB */ - kmp_taskdata_t * td_parent; /* parent task */ - kmp_int32 td_level; /* task nesting level */ - ident_t * td_ident; /* task identifier */ - // Taskwait data. - ident_t * td_taskwait_ident; - kmp_uint32 td_taskwait_counter; - kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ - KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */ - volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ - volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */ -#if OMP_40_ENABLED - kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup - kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here - kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies -#endif -#if OMPT_SUPPORT - ompt_task_info_t ompt_task_info; -#endif -#if KMP_HAVE_QUAD - _Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t -#else - kmp_uint32 td_dummy[2]; -#endif -}; // struct kmp_taskdata - -// Make sure padding above worked -KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 ); - -// Data for task team but per thread -typedef struct kmp_base_thread_data { - kmp_info_p * td_thr; // Pointer back to thread info - // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued? - kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque - kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated - kmp_uint32 td_deque_head; // Head of deque (will wrap) - kmp_uint32 td_deque_tail; // Tail of deque (will wrap) - kmp_int32 td_deque_ntasks; // Number of tasks in deque - // GEH: shouldn't this be volatile since used in while-spin? - kmp_int32 td_deque_last_stolen; // Thread number of last successful steal -#ifdef BUILD_TIED_TASK_STACK - kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint -#endif // BUILD_TIED_TASK_STACK -} kmp_base_thread_data_t; - -typedef union KMP_ALIGN_CACHE kmp_thread_data { - kmp_base_thread_data_t td; - double td_align; /* use worst case alignment */ - char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ]; -} kmp_thread_data_t; - - -// Data for task teams which are used when tasking is enabled for the team -typedef struct kmp_base_task_team { - kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */ - /* must be bootstrap lock since used at library shutdown*/ - kmp_task_team_t * tt_next; /* For linking the task team free list */ - kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */ - /* Data survives task team deallocation */ - kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */ - /* TRUE means tt_threads_data is set up and initialized */ - kmp_int32 tt_nproc; /* #threads in team */ - kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */ -#if OMP_41_ENABLED - kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ -#endif - - KMP_ALIGN_CACHE - volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */ - - KMP_ALIGN_CACHE - volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ -} kmp_base_task_team_t; - -union KMP_ALIGN_CACHE kmp_task_team { - kmp_base_task_team_t tt; - double tt_align; /* use worst case alignment */ - char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ]; -}; - -#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) -// Free lists keep same-size free memory slots for fast memory allocation routines -typedef struct kmp_free_list { - void *th_free_list_self; // Self-allocated tasks free list - void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads - void *th_free_list_other; // Non-self free list (to be returned to owner's sync list) -} kmp_free_list_t; -#endif -#if KMP_NESTED_HOT_TEAMS -// Hot teams array keeps hot teams and their sizes for given thread. -// Hot teams are not put in teams pool, and they don't put threads in threads pool. -typedef struct kmp_hot_team_ptr { - kmp_team_p *hot_team; // pointer to hot_team of given nesting level - kmp_int32 hot_team_nth; // number of threads allocated for the hot_team -} kmp_hot_team_ptr_t; -#endif -#if OMP_40_ENABLED -typedef struct kmp_teams_size { - kmp_int32 nteams; // number of teams in a league - kmp_int32 nth; // number of threads in each team of the league -} kmp_teams_size_t; -#endif - -/* ------------------------------------------------------------------------ */ -// OpenMP thread data structures -// - -typedef struct KMP_ALIGN_CACHE kmp_base_info { -/* - * Start with the readonly data which is cache aligned and padded. - * this is written before the thread starts working by the master. - * (uber masters may update themselves later) - * (usage does not consider serialized regions) - */ - kmp_desc_t th_info; - kmp_team_p *th_team; /* team we belong to */ - kmp_root_p *th_root; /* pointer to root of task hierarchy */ - kmp_info_p *th_next_pool; /* next available thread in the pool */ - kmp_disp_t *th_dispatch; /* thread's dispatch data */ - int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ - - /* The following are cached from the team info structure */ - /* TODO use these in more places as determined to be needed via profiling */ - int th_team_nproc; /* number of threads in a team */ - kmp_info_p *th_team_master; /* the team's master thread */ - int th_team_serialized; /* team is serialized */ -#if OMP_40_ENABLED - microtask_t th_teams_microtask; /* save entry address for teams construct */ - int th_teams_level; /* save initial level of teams construct */ - /* it is 0 on device but may be any on host */ -#endif - - /* The blocktime info is copied from the team struct to the thread sruct */ - /* at the start of a barrier, and the values stored in the team are used */ - /* at points in the code where the team struct is no longer guaranteed */ - /* to exist (from the POV of worker threads). */ - int th_team_bt_intervals; - int th_team_bt_set; - - -#if KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ -#endif - -/* - * The data set by the master at reinit, then R/W by the worker - */ - KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ -#endif -#if OMP_40_ENABLED - kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ - kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */ -# if KMP_AFFINITY_SUPPORTED - int th_current_place; /* place currently bound to */ - int th_new_place; /* place to bind to in par reg */ - int th_first_place; /* first place in partition */ - int th_last_place; /* last place in partition */ -# endif -#endif -#if USE_ITT_BUILD - kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ - kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ - kmp_uint64 th_frame_time; /* frame timestamp */ - kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */ -#endif /* USE_ITT_BUILD */ - kmp_local_t th_local; - struct private_common *th_pri_head; - -/* - * Now the data only used by the worker (after initial allocation) - */ - /* TODO the first serial team should actually be stored in the info_t - * structure. this will help reduce initial allocation overhead */ - KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ - -#if OMPT_SUPPORT - ompt_thread_info_t ompt_thread_info; -#endif - -/* The following are also read by the master during reinit */ - struct common_table *th_pri_common; - - volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ - /* while awaiting queuing lock acquire */ - - volatile void *th_sleep_loc; // this points at a kmp_flag - - ident_t *th_ident; - unsigned th_x; // Random number generator data - unsigned th_a; // Random number generator data - -/* - * Tasking-related data for the thread - */ - kmp_task_team_t * th_task_team; // Task team struct - kmp_taskdata_t * th_current_task; // Innermost Task being executed - kmp_uint8 th_task_state; // alternating 0/1 for task team identification - kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels - kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack - kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack - - /* - * More stuff for keeping track of active/sleeping threads - * (this part is written by the worker thread) - */ - kmp_uint8 th_active_in_pool; // included in count of - // #active threads in pool - int th_active; // ! sleeping - // 32 bits for TCR/TCW - - - struct cons_header * th_cons; // used for consistency check - -/* - * Add the syncronizing data which is cache aligned and padded. - */ - KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ]; - - KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ - -#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) - #define NUM_LISTS 4 - kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines -#endif - -#if KMP_OS_WINDOWS - kmp_win32_cond_t th_suspend_cv; - kmp_win32_mutex_t th_suspend_mx; - int th_suspend_init; -#endif -#if KMP_OS_UNIX - kmp_cond_align_t th_suspend_cv; - kmp_mutex_align_t th_suspend_mx; - int th_suspend_init_count; -#endif - -#if USE_ITT_BUILD - kmp_itt_mark_t th_itt_mark_single; - // alignment ??? -#endif /* USE_ITT_BUILD */ -#if KMP_STATS_ENABLED - kmp_stats_list* th_stats; -#endif -} kmp_base_info_t; - -typedef union KMP_ALIGN_CACHE kmp_info { - double th_align; /* use worst case alignment */ - char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ]; - kmp_base_info_t th; -} kmp_info_t; - -/* ------------------------------------------------------------------------ */ -// OpenMP thread team data structures -// -typedef struct kmp_base_data { - volatile kmp_uint32 t_value; -} kmp_base_data_t; - -typedef union KMP_ALIGN_CACHE kmp_sleep_team { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; -} kmp_sleep_team_t; - -typedef union KMP_ALIGN_CACHE kmp_ordered_team { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; -} kmp_ordered_team_t; - -typedef int (*launch_t)( int gtid ); - -/* Minimum number of ARGV entries to malloc if necessary */ -#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 - -// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we -// have supported at least 96 bytes. Using a larger value for more space between the master write/worker -// read section and read/write by all section seems to buy more performance on EPCC PARALLEL. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) ) -#else -# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) ) -#endif -#define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP ) - -typedef struct KMP_ALIGN_CACHE kmp_base_team { - // Synchronization Data --------------------------------------------------------------------------------- - KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; - kmp_balign_team_t t_bar[ bs_last_barrier ]; - volatile int t_construct; // count of single directive encountered by team - kmp_lock_t t_single_lock; // team specific lock - - // Master only ----------------------------------------------------------------------------------------- - KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team - int t_master_this_cons; // "this_construct" single counter of master in parent team - ident_t *t_ident; // if volatile, have to change too much other crud to volatile too - kmp_team_p *t_parent; // parent team - kmp_team_p *t_next_pool; // next free team in the team pool - kmp_disp_t *t_dispatch; // thread's dispatch data - kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 -#if OMP_40_ENABLED - kmp_proc_bind_t t_proc_bind; // bind type for par region -#endif // OMP_40_ENABLED -#if USE_ITT_BUILD - kmp_uint64 t_region_time; // region begin timestamp -#endif /* USE_ITT_BUILD */ - - // Master write, workers read -------------------------------------------------------------------------- - KMP_ALIGN_CACHE void **t_argv; - int t_argc; - int t_nproc; // number of threads in team - microtask_t t_pkfn; - launch_t t_invoke; // procedure to launch the microtask - -#if OMPT_SUPPORT - ompt_team_info_t ompt_team_info; - ompt_lw_taskteam_t *ompt_serialized_team_info; -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - kmp_int8 t_fp_control_saved; - kmp_int8 t_pad2b; - kmp_int16 t_x87_fpu_control_word; // FP control regs - kmp_uint32 t_mxcsr; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; - - KMP_ALIGN_CACHE kmp_info_t **t_threads; - int t_max_argc; - int t_max_nproc; // maximum threads this team can handle (dynamicly expandable) - int t_serialized; // levels deep of serialized teams - dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system - int t_id; // team's id, assigned by debugger. - int t_level; // nested parallel level - int t_active_level; // nested active parallel level - kmp_r_sched_t t_sched; // run-time schedule for the team -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - int t_first_place; // first & last place in parent thread's partition. - int t_last_place; // Restore these values to master after par region. -#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call - - // Read/write by workers as well ----------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' - // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' - // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. - char dummy_padding[1024]; -#endif - KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task - kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. - // for SERIALIZED teams nested 2 or more levels deep -#if OMP_40_ENABLED - kmp_int32 t_cancel_request; // typed flag to store request state of cancellation -#endif - int t_master_active; // save on fork, restore on join - kmp_taskq_t t_taskq; // this team's task queue - void *t_copypriv_data; // team specific pointer to copyprivate data array - kmp_uint32 t_copyin_counter; -#if USE_ITT_BUILD - void *t_stack_id; // team specific stack stitching id (for ittnotify) -#endif /* USE_ITT_BUILD */ -} kmp_base_team_t; - -union KMP_ALIGN_CACHE kmp_team { - kmp_base_team_t t; - double t_align; /* use worst case alignment */ - char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ]; -}; - - -typedef union KMP_ALIGN_CACHE kmp_time_global { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; -} kmp_time_global_t; - -typedef struct kmp_base_global { - /* cache-aligned */ - kmp_time_global_t g_time; - - /* non cache-aligned */ - volatile int g_abort; - volatile int g_done; - - int g_dynamic; - enum dynamic_mode g_dynamic_mode; -} kmp_base_global_t; - -typedef union KMP_ALIGN_CACHE kmp_global { - kmp_base_global_t g; - double g_align; /* use worst case alignment */ - char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ]; -} kmp_global_t; - - -typedef struct kmp_base_root { - // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0) - // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch - // overhead or keeping r_active - - volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ - // GEH: This is misnamed, should be r_in_parallel - volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. - int r_in_parallel; /* keeps a count of active parallel regions per root */ - // GEH: This is misnamed, should be r_active_levels - kmp_team_t *r_root_team; - kmp_team_t *r_hot_team; - kmp_info_t *r_uber_thread; - kmp_lock_t r_begin_lock; - volatile int r_begin; - int r_blocktime; /* blocktime for this root and descendants */ -} kmp_base_root_t; - -typedef union KMP_ALIGN_CACHE kmp_root { - kmp_base_root_t r; - double r_align; /* use worst case alignment */ - char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ]; -} kmp_root_t; - -struct fortran_inx_info { - kmp_int32 data; -}; - -/* ------------------------------------------------------------------------ */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -extern int __kmp_settings; -extern int __kmp_duplicate_library_ok; -#if USE_ITT_BUILD -extern int __kmp_forkjoin_frames; -extern int __kmp_forkjoin_frames_mode; -#endif -extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; -extern int __kmp_determ_red; - -#ifdef KMP_DEBUG -extern int kmp_a_debug; -extern int kmp_b_debug; -extern int kmp_c_debug; -extern int kmp_d_debug; -extern int kmp_e_debug; -extern int kmp_f_debug; -#endif /* KMP_DEBUG */ - -/* For debug information logging using rotating buffer */ -#define KMP_DEBUG_BUF_LINES_INIT 512 -#define KMP_DEBUG_BUF_LINES_MIN 1 - -#define KMP_DEBUG_BUF_CHARS_INIT 128 -#define KMP_DEBUG_BUF_CHARS_MIN 2 - -extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ -extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ -extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ -extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */ - -extern char *__kmp_debug_buffer; /* Debug buffer itself */ -extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */ -extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */ -/* end rotating debug buffer */ - -#ifdef KMP_DEBUG -extern int __kmp_par_range; /* +1 => only go par for constructs in range */ - -#define KMP_PAR_RANGE_ROUTINE_LEN 1024 -extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; -#define KMP_PAR_RANGE_FILENAME_LEN 1024 -extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; -extern int __kmp_par_range_lb; -extern int __kmp_par_range_ub; -#endif - -/* For printing out dynamic storage map for threads and teams */ -extern int __kmp_storage_map; /* True means print storage map for threads and teams */ -extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */ -extern int __kmp_storage_map_verbose_specified; - -extern kmp_cpuinfo_t __kmp_cpuinfo; - -extern volatile int __kmp_init_serial; -extern volatile int __kmp_init_gtid; -extern volatile int __kmp_init_common; -extern volatile int __kmp_init_middle; -extern volatile int __kmp_init_parallel; -extern volatile int __kmp_init_monitor; -extern volatile int __kmp_init_user_locks; -extern int __kmp_init_counter; -extern int __kmp_root_counter; -extern int __kmp_version; - -/* list of address of allocated caches for commons */ -extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; - -/* Barrier algorithm types and options */ -extern kmp_uint32 __kmp_barrier_gather_bb_dflt; -extern kmp_uint32 __kmp_barrier_release_bb_dflt; -extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; -extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; -extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ]; -extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ]; -extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ]; -extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ]; -extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_type_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_pattern_name [ bp_last_bar ]; - -/* Global Locks */ -extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ -extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ -extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ -extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ - -extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ -extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ -extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ - -/* used for yielding spin-waits */ -extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ -extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ - -extern enum library_type __kmp_library; - -extern enum sched_type __kmp_sched; /* default runtime scheduling */ -extern enum sched_type __kmp_static; /* default static scheduling method */ -extern enum sched_type __kmp_guided; /* default guided scheduling method */ -extern enum sched_type __kmp_auto; /* default auto scheduling method */ -extern int __kmp_chunk; /* default runtime chunk size */ - -extern size_t __kmp_stksize; /* stack size per thread */ -extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */ -extern size_t __kmp_stkoffset; /* stack offset per thread */ -extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ - -extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ -extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */ -extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ -extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */ -extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ -extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */ -extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ -extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ -extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */ -extern int __kmp_generate_warnings; /* should we issue warnings? */ -extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ - -#ifdef DEBUG_SUSPEND -extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ -#endif - -extern kmp_uint32 __kmp_yield_init; -extern kmp_uint32 __kmp_yield_next; -extern kmp_uint32 __kmp_yielding_on; -extern kmp_uint32 __kmp_yield_cycle; -extern kmp_int32 __kmp_yield_on_count; -extern kmp_int32 __kmp_yield_off_count; - -/* ------------------------------------------------------------------------- */ -extern int __kmp_allThreadsSpecified; - -extern size_t __kmp_align_alloc; -/* following data protected by initialization routines */ -extern int __kmp_xproc; /* number of processors in the system */ -extern int __kmp_avail_proc; /* number of processors available to the process */ -extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ -extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ -extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */ -extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ -extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */ -extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */ -extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */ -extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ -extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */ -extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ -extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */ -extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */ -#ifdef KMP_ADJUST_BLOCKTIME -extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ -#endif /* KMP_ADJUST_BLOCKTIME */ -#ifdef KMP_DFLT_NTH_CORES -extern int __kmp_ncores; /* Total number of cores for threads placement */ -#endif -extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */ - -extern int __kmp_need_register_atfork_specified; -extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */ -extern int __kmp_gtid_mode; /* Method of getting gtid, values: - 0 - not set, will be set at runtime - 1 - using stack search - 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)) - 3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only. - */ -extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ -#ifdef KMP_TDATA_GTID -#if KMP_OS_WINDOWS -extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ -#else -extern __thread int __kmp_gtid; -#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ -#endif -extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ -extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */ -extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */ -extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */ -#if KMP_NESTED_HOT_TEAMS -extern int __kmp_hot_teams_mode; -extern int __kmp_hot_teams_max_level; -#endif - -# if KMP_OS_LINUX -extern enum clock_function_type __kmp_clock_function; -extern int __kmp_clock_function_param; -# endif /* KMP_OS_LINUX */ - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) -extern enum mic_type __kmp_mic_type; -#endif - -# ifdef USE_LOAD_BALANCE -extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */ -# endif /* USE_LOAD_BALANCE */ - -// OpenMP 3.1 - Nested num threads array -typedef struct kmp_nested_nthreads_t { - int * nth; - int size; - int used; -} kmp_nested_nthreads_t; - -extern kmp_nested_nthreads_t __kmp_nested_nth; - -#if KMP_USE_ADAPTIVE_LOCKS - -// Parameters for the speculative lock backoff system. -struct kmp_adaptive_backoff_params_t { - // Number of soft retries before it counts as a hard retry. - kmp_uint32 max_soft_retries; - // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right - kmp_uint32 max_badness; -}; - -extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; - -#if KMP_DEBUG_ADAPTIVE_LOCKS -extern char * __kmp_speculative_statsfile; -#endif - -#endif // KMP_USE_ADAPTIVE_LOCKS - -#if OMP_40_ENABLED -extern int __kmp_display_env; /* TRUE or FALSE */ -extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ -extern int __kmp_omp_cancellation; /* TRUE or FALSE */ -#endif - -/* ------------------------------------------------------------------------- */ - -/* --------------------------------------------------------------------------- */ -/* the following are protected by the fork/join lock */ -/* write: lock read: anytime */ -extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ -/* read/write: lock */ -extern volatile kmp_team_t * __kmp_team_pool; -extern volatile kmp_info_t * __kmp_thread_pool; - -/* total number of threads reachable from some root thread including all root threads*/ -extern volatile int __kmp_nth; -/* total number of threads reachable from some root thread including all root threads, - and those in the thread pool */ -extern volatile int __kmp_all_nth; -extern int __kmp_thread_pool_nth; -extern volatile int __kmp_thread_pool_active_nth; - -extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ -/* end data protected by fork/join lock */ -/* --------------------------------------------------------------------------- */ - -extern kmp_global_t __kmp_global; /* global status */ - -extern kmp_info_t __kmp_monitor; -extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library. -extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library. - -#if USE_DEBUGGER - -#define _KMP_GEN_ID( counter ) \ - ( \ - __kmp_debugging \ - ? \ - KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \ - : \ - ~ 0 \ - ) -#else -#define _KMP_GEN_ID( counter ) \ - ( \ - ~ 0 \ - ) -#endif /* USE_DEBUGGER */ - -#define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter ) -#define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter ) - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... ); - -extern void __kmp_serial_initialize( void ); -extern void __kmp_middle_initialize( void ); -extern void __kmp_parallel_initialize( void ); - -extern void __kmp_internal_begin( void ); -extern void __kmp_internal_end_library( int gtid ); -extern void __kmp_internal_end_thread( int gtid ); -extern void __kmp_internal_end_atexit( void ); -extern void __kmp_internal_end_fini( void ); -extern void __kmp_internal_end_dtor( void ); -extern void __kmp_internal_end_dest( void* ); - -extern int __kmp_register_root( int initial_thread ); -extern void __kmp_unregister_root( int gtid ); - -extern int __kmp_ignore_mppbeg( void ); -extern int __kmp_ignore_mppend( void ); - -extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ); -extern void __kmp_exit_single( int gtid ); - -extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); -extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); - -#ifdef USE_LOAD_BALANCE -extern int __kmp_get_load_balance( int ); -#endif - -#ifdef BUILD_TV -extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ); -#endif - -extern int __kmp_get_global_thread_id( void ); -extern int __kmp_get_global_thread_id_reg( void ); -extern void __kmp_exit_thread( int exit_status ); -extern void __kmp_abort( char const * format, ... ); -extern void __kmp_abort_thread( void ); -extern void __kmp_abort_process( void ); -extern void __kmp_warn( char const * format, ... ); - -extern void __kmp_set_num_threads( int new_nth, int gtid ); - -// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. -static inline kmp_info_t * __kmp_entry_thread() -{ - int gtid = __kmp_entry_gtid(); - - return __kmp_threads[gtid]; -} - -extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels ); -extern int __kmp_get_max_active_levels( int gtid ); -extern int __kmp_get_ancestor_thread_num( int gtid, int level ); -extern int __kmp_get_team_size( int gtid, int level ); -extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk ); -extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk ); - -extern unsigned short __kmp_get_random( kmp_info_t * thread ); -extern void __kmp_init_random( kmp_info_t * thread ); - -extern kmp_r_sched_t __kmp_get_schedule_global( void ); -extern void __kmp_adjust_num_threads( int new_nproc ); - -extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL ); -extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL ); -#define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR ) -#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR ) -#define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR ) - -#if USE_FAST_MEMORY -extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL ); -extern void __kmp_free_fast_memory( kmp_info_t *this_thr ); -extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr ); -#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR ) -#define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR ) -#endif - -extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ); -extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ); -extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ); -#define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR ) -#define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR ) -#define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR ) -#define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR ) - -#define KMP_INTERNAL_MALLOC(sz) malloc(sz) -#define KMP_INTERNAL_FREE(p) free(p) -#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz)) -#define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz)) - -extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads ); - -#if OMP_40_ENABLED -extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind ); -extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads ); -#endif - -extern void __kmp_yield( int cond ); - -extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk ); -extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk ); -extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk ); -extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk ); - -extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ); -extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ); -extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ); -extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ); - -extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ); - - -#ifdef KMP_GOMP_COMPAT - -extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ); -extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ); - -#endif /* KMP_GOMP_COMPAT */ - - -extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker ); - -extern kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker ); -extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker ); -extern kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker ); -extern kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker ); -extern kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker ); - -extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); -extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj ); - -class kmp_flag_32; -class kmp_flag_64; -class kmp_flag_oncore; -extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin -#if USE_ITT_BUILD - , void * itt_sync_obj -#endif - ); -extern void __kmp_release_32(kmp_flag_32 *flag); -extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin -#if USE_ITT_BUILD - , void * itt_sync_obj -#endif - ); -extern void __kmp_release_64(kmp_flag_64 *flag); -extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin -#if USE_ITT_BUILD - , void * itt_sync_obj -#endif - ); -extern void __kmp_release_oncore(kmp_flag_oncore *flag); - -extern void __kmp_infinite_loop( void ); - -extern void __kmp_cleanup( void ); - -#if KMP_HANDLE_SIGNALS - extern int __kmp_handle_signals; - extern void __kmp_install_signals( int parallel_init ); - extern void __kmp_remove_signals( void ); -#endif - -extern void __kmp_clear_system_time( void ); -extern void __kmp_read_system_time( double *delta ); - -extern void __kmp_check_stack_overlap( kmp_info_t *thr ); - -extern void __kmp_expand_host_name( char *buffer, size_t size ); -extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern ); - -#if KMP_OS_WINDOWS -extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */ -#endif - -extern void __kmp_runtime_initialize( void ); /* machine specific initialization */ -extern void __kmp_runtime_destroy( void ); - -#if KMP_AFFINITY_SUPPORTED -extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); -extern void __kmp_affinity_initialize(void); -extern void __kmp_affinity_uninitialize(void); -extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ -#if OMP_40_ENABLED -extern void __kmp_affinity_set_place(int gtid); -#endif -extern void __kmp_affinity_determine_capable( const char *env_var ); -extern int __kmp_aux_set_affinity(void **mask); -extern int __kmp_aux_get_affinity(void **mask); -extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); -extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); -extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); -extern void __kmp_balanced_affinity( int tid, int team_size ); -#endif /* KMP_AFFINITY_SUPPORTED */ - -extern void __kmp_cleanup_hierarchy(); -extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -extern int __kmp_futex_determine_capable( void ); - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -extern void __kmp_gtid_set_specific( int gtid ); -extern int __kmp_gtid_get_specific( void ); - -extern double __kmp_read_cpu_time( void ); - -extern int __kmp_read_system_info( struct kmp_sys_info *info ); - -extern void __kmp_create_monitor( kmp_info_t *th ); - -extern void *__kmp_launch_thread( kmp_info_t *thr ); - -extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ); - -#if KMP_OS_WINDOWS -extern int __kmp_still_running(kmp_info_t *th); -extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ); -extern void __kmp_free_handle( kmp_thread_t tHandle ); -#endif - -extern void __kmp_reap_monitor( kmp_info_t *th ); -extern void __kmp_reap_worker( kmp_info_t *th ); -extern void __kmp_terminate_thread( int gtid ); - -extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag ); -extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag ); -extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag ); -extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag ); -extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag ); -extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag ); - -extern void __kmp_elapsed( double * ); -extern void __kmp_elapsed_tick( double * ); - -extern void __kmp_enable( int old_state ); -extern void __kmp_disable( int *old_state ); - -extern void __kmp_thread_sleep( int millis ); - -extern void __kmp_common_initialize( void ); -extern void __kmp_common_destroy( void ); -extern void __kmp_common_destroy_gtid( int gtid ); - -#if KMP_OS_UNIX -extern void __kmp_register_atfork( void ); -#endif -extern void __kmp_suspend_initialize( void ); -extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th ); - -extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root, - kmp_team_t *team, int tid); -#if OMP_40_ENABLED -extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, -#endif - kmp_proc_bind_t proc_bind, - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); -#else -extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, -#endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); -#endif // OMP_40_ENABLED -extern void __kmp_free_thread( kmp_info_t * ); -extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) ); -extern kmp_team_t * __kmp_reap_team( kmp_team_t * ); - -/* ------------------------------------------------------------------------ */ - -extern void __kmp_initialize_bget( kmp_info_t *th ); -extern void __kmp_finalize_bget( kmp_info_t *th ); - -KMP_EXPORT void *kmpc_malloc( size_t size ); -KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize ); -KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size ); -KMP_EXPORT void kmpc_free( void *ptr ); - -/* ------------------------------------------------------------------------ */ -/* declarations for internal use */ - -extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split, - size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) ); -extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); - -/*! - * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call. - */ -enum fork_context_e -{ - fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */ - fork_context_intel, /**< Called from Intel generated code. */ - fork_context_last -}; -extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context, - kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif - microtask_t microtask, launch_t invoker, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list *ap -#else - va_list ap -#endif - ); - -extern void __kmp_join_call( ident_t *loc, int gtid -#if OMPT_SUPPORT - , enum fork_context_e fork_context -#endif -#if OMP_40_ENABLED - , int exit_teams = 0 -#endif - ); - -extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); -extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ); -extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ); -extern int __kmp_invoke_task_func( int gtid ); -extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); -extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); - -// should never have been exported -KMP_EXPORT int __kmpc_invoke_task_func( int gtid ); -#if OMP_40_ENABLED -extern int __kmp_invoke_teams_master( int gtid ); -extern void __kmp_teams_master( int gtid ); -#endif -extern void __kmp_save_internal_controls( kmp_info_t * thread ); -extern void __kmp_user_set_library (enum library_type arg); -extern void __kmp_aux_set_library (enum library_type arg); -extern void __kmp_aux_set_stacksize( size_t arg); -extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid); -extern void __kmp_aux_set_defaults( char const * str, int len ); - -/* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */ -void kmpc_set_blocktime (int arg); -void ompc_set_nested( int flag ); -void ompc_set_dynamic( int flag ); -void ompc_set_num_threads( int arg ); - -extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr, - kmp_team_t *team, int tid ); -extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ); -extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, - kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ); -extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, - kmp_team_t *team, int tid, int set_curr_task ); - -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void * itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void * itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void * itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); - -extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); -extern void __kmp_reap_task_teams( void ); -extern void __kmp_wait_to_unref_task_teams( void ); -extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); -extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); -extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team -#if USE_ITT_BUILD - , void * itt_sync_obj -#endif /* USE_ITT_BUILD */ - , int wait=1 -); -extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); - -extern int __kmp_is_address_mapped( void *addr ); -extern kmp_uint64 __kmp_hardware_timestamp(void); - -#if KMP_OS_UNIX -extern int __kmp_read_from_file( char const *path, char const *format, ... ); -#endif - -/* ------------------------------------------------------------------------ */ -// -// Assembly routines that have no compiler intrinsic replacement -// - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -extern void __kmp_query_cpuid( kmp_cpuinfo_t *p ); - -#define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) -static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } - -extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -extern void __kmp_clear_x87_fpu_status_word(); -# define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] -#if OMPT_SUPPORT - , void **exit_frame_ptr -#endif -); - - -/* ------------------------------------------------------------------------ */ - -KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags ); -KMP_EXPORT void __kmpc_end ( ident_t * ); - -KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor, - kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length ); -KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor ); -KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size ); - -KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * ); - -KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * ); -KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... ); - -KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT void __kmpc_flush ( ident_t *); -KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); -KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); - -#if OMP_41_ENABLED -KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint ); -#endif - -KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk ); - -KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid ); - -KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ); - -extern void KMPC_SET_NUM_THREADS ( int arg ); -extern void KMPC_SET_DYNAMIC ( int flag ); -extern void KMPC_SET_NESTED ( int flag ); - -/* --------------------------------------------------------------------------- */ - -/* - * Taskq interface routines - */ - -KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk, - size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds); -KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status); -KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task); - -/* ------------------------------------------------------------------------ */ - -/* - * OMP 3.0 tasking interface routines - */ - -KMP_EXPORT kmp_int32 -__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); -KMP_EXPORT kmp_task_t* -__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ); -KMP_EXPORT void -__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); -KMP_EXPORT void -__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); -KMP_EXPORT kmp_int32 -__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); -KMP_EXPORT kmp_int32 -__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ); - -KMP_EXPORT kmp_int32 -__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ); - -#if TASK_UNUSED -void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); -void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); -#endif // TASK_UNUSED - -/* ------------------------------------------------------------------------ */ - -#if OMP_40_ENABLED - -KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); -KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); - -KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); -KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); -extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ); - -extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ); - -KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); -KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); - -#if OMP_41_ENABLED - -KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ); -KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); - -#endif - -#endif - - -/* - * Lock interface routines (fast versions with gtid passed in) - */ -KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); - -#if OMP_41_ENABLED -KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); -KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); -#endif - -/* ------------------------------------------------------------------------ */ - -/* - * Interface to fast scalable reduce methods routines - */ - -KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); -KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); -KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); -KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); - -/* - * internal fast reduction routines - */ - -extern PACKED_REDUCTION_METHOD_T -__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); - -// this function is for testing set/get/determine reduce method -KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void ); - -KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); -KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); - -// this function exported for testing of KMP_PLACE_THREADS functionality -KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int); - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -// C++ port -// missing 'extern "C"' declarations - -KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc ); -KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ); - -#if OMP_40_ENABLED -KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind ); -KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ); -KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); - -#endif - -KMP_EXPORT void* -__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid, - void * data, size_t size, void *** cache ); - -// Symbols for MS mutual detection. -extern int _You_must_link_with_exactly_one_OpenMP_library; -extern int _You_must_link_with_Intel_OpenMP_library; -#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) - extern int _You_must_link_with_Microsoft_OpenMP_library; -#endif - - -// The routines below are not exported. -// Consider making them 'static' in corresponding source files. -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); - -// -// ompc_, kmpc_ entries moved from omp.h. -// -#if KMP_OS_WINDOWS -# define KMPC_CONVENTION __cdecl -#else -# define KMPC_CONVENTION -#endif - -#ifndef __OMP_H -typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 -} omp_sched_t; -typedef void * kmp_affinity_mask_t; -#endif - -KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); -KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); -KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); - -KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); -KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); - -#ifdef __cplusplus -} -#endif - -#endif /* KMP_H */ - +/*! \file */ +/* + * kmp.h -- KPTS runtime header file. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_H +#define KMP_H + +#include "kmp_config.h" + +/* #define BUILD_PARALLEL_ORDERED 1 */ + +/* This fix replaces gettimeofday with clock_gettime for better scalability on + the Altix. Requires user code to be linked with -lrt. +*/ +//#define FIX_SGI_CLOCK + +/* Defines for OpenMP 3.0 tasking and auto scheduling */ + +# ifndef KMP_STATIC_STEAL_ENABLED +# define KMP_STATIC_STEAL_ENABLED 1 +# endif + +#define TASK_CURRENT_NOT_QUEUED 0 +#define TASK_CURRENT_QUEUED 1 + +#define TASK_DEQUE_BITS 8 // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK. +#define TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS ) +#define TASK_DEQUE_MASK ( TASK_DEQUE_SIZE - 1 ) + +#ifdef BUILD_TIED_TASK_STACK +#define TASK_STACK_EMPTY 0 // entries when the stack is empty + +#define TASK_STACK_BLOCK_BITS 5 // Used to define TASK_STACK_SIZE and TASK_STACK_MASK +#define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array +#define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block +#endif // BUILD_TIED_TASK_STACK + +#define TASK_NOT_PUSHED 1 +#define TASK_SUCCESSFULLY_PUSHED 0 +#define TASK_TIED 1 +#define TASK_UNTIED 0 +#define TASK_EXPLICIT 1 +#define TASK_IMPLICIT 0 +#define TASK_PROXY 1 +#define TASK_FULL 0 + +#define KMP_CANCEL_THREADS +#define KMP_THREAD_ATTR + +#include +#include +#include +#include +#include +#include +/* include don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */ +/* some macros provided below to replace some of these functions */ +#ifndef __ABSOFT_WIN +#include +#endif +#include +#include + +#include + +#include "kmp_os.h" + +#include "kmp_safe_c_api.h" + +#if KMP_STATS_ENABLED +class kmp_stats_list; +#endif + +#if KMP_USE_HWLOC +#include "hwloc.h" +extern hwloc_topology_t __kmp_hwloc_topology; +extern int __kmp_hwloc_error; +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#include +#endif + +#include "kmp_version.h" +#include "kmp_debug.h" +#include "kmp_lock.h" +#if USE_DEBUGGER +#include "kmp_debugger.h" +#endif +#include "kmp_i18n.h" + +#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) + +#include "kmp_wrapper_malloc.h" +#if KMP_OS_UNIX +# include +# if !defined NSIG && defined _NSIG +# define NSIG _NSIG +# endif +#endif + +#if KMP_OS_LINUX +# pragma weak clock_gettime +#endif + +#if OMPT_SUPPORT +#include "ompt-internal.h" +#endif + +/*Select data placement in NUMA memory */ +#define NO_FIRST_TOUCH 0 +#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ + +/* If not specified on compile command line, assume no first touch */ +#ifndef BUILD_MEMORY +#define BUILD_MEMORY NO_FIRST_TOUCH +#endif + +// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. +// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size. +#ifndef USE_FAST_MEMORY +#define USE_FAST_MEMORY 3 +#endif + +#ifndef KMP_NESTED_HOT_TEAMS +# define KMP_NESTED_HOT_TEAMS 0 +# define USE_NESTED_HOT_ARG(x) +#else +# if KMP_NESTED_HOT_TEAMS +# if OMP_40_ENABLED +# define USE_NESTED_HOT_ARG(x) ,x +# else +// Nested hot teams feature depends on omp 4.0, disable it for earlier versions +# undef KMP_NESTED_HOT_TEAMS +# define KMP_NESTED_HOT_TEAMS 0 +# define USE_NESTED_HOT_ARG(x) +# endif +# else +# define USE_NESTED_HOT_ARG(x) +# endif +#endif + +// Assume using BGET compare_exchange instruction instead of lock by default. +#ifndef USE_CMP_XCHG_FOR_BGET +#define USE_CMP_XCHG_FOR_BGET 1 +#endif + +// Test to see if queuing lock is better than bootstrap lock for bget +// #ifndef USE_QUEUING_LOCK_FOR_BGET +// #define USE_QUEUING_LOCK_FOR_BGET +// #endif + +#define KMP_NSEC_PER_SEC 1000000000L +#define KMP_USEC_PER_SEC 1000000L + +/*! +@ingroup BASIC_TYPES +@{ +*/ + +// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...) +/*! +Values for bit flags used in the ident_t to describe the fields. +*/ +/*! Use trampoline for internal microtasks */ +#define KMP_IDENT_IMB 0x01 +/*! Use c-style ident structure */ +#define KMP_IDENT_KMPC 0x02 +/* 0x04 is no longer used */ +/*! Entry point generated by auto-parallelization */ +#define KMP_IDENT_AUTOPAR 0x08 +/*! Compiler generates atomic reduction option for kmpc_reduce* */ +#define KMP_IDENT_ATOMIC_REDUCE 0x10 +/*! To mark a 'barrier' directive in user code */ +#define KMP_IDENT_BARRIER_EXPL 0x20 +/*! To Mark implicit barriers. */ +#define KMP_IDENT_BARRIER_IMPL 0x0040 +#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0 +#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040 +#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0 + +#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 +#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 + +/*! + * The ident structure that describes a source location. + */ +typedef struct ident { + kmp_int32 reserved_1; /**< might be used in Fortran; see above */ + kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ + kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ +#if USE_ITT_BUILD + /* but currently used for storing region-specific ITT */ + /* contextual information. */ +#endif /* USE_ITT_BUILD */ + kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ + char const *psource; /**< String describing the source location. + The string is composed of semi-colon separated fields which describe the source file, + the function and a pair of line numbers that delimit the construct. + */ +} ident_t; +/*! +@} +*/ + +// Some forward declarations. + +typedef union kmp_team kmp_team_t; +typedef struct kmp_taskdata kmp_taskdata_t; +typedef union kmp_task_team kmp_task_team_t; +typedef union kmp_team kmp_team_p; +typedef union kmp_info kmp_info_p; +typedef union kmp_root kmp_root_p; + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* Pack two 32-bit signed integers into a 64-bit signed integer */ +/* ToDo: Fix word ordering for big-endian machines. */ +#define KMP_PACK_64(HIGH_32,LOW_32) \ + ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) ) + + +/* + * Generic string manipulation macros. + * Assume that _x is of type char * + */ +#define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } +#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } +#define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) +#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + + +/* Enumeration types */ + +enum kmp_state_timer { + ts_stop, + ts_start, + ts_pause, + + ts_last_state +}; + +enum dynamic_mode { + dynamic_default, +#ifdef USE_LOAD_BALANCE + dynamic_load_balance, +#endif /* USE_LOAD_BALANCE */ + dynamic_random, + dynamic_thread_limit, + dynamic_max +}; + +/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */ +#ifndef KMP_SCHED_TYPE_DEFINED +#define KMP_SCHED_TYPE_DEFINED +typedef enum kmp_sched { + kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check + // Note: need to adjust __kmp_sch_map global array in case this enum is changed + kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) + kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) + kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) + kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) + kmp_sched_upper_std = 5, // upper bound for standard schedules + kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules + kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) +// kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) + kmp_sched_upper = 102, + kmp_sched_default = kmp_sched_static // default scheduling +} kmp_sched_t; +#endif + +/*! + @ingroup WORK_SHARING + * Describes the loop schedule to be used for a parallel for loop. + */ +enum sched_type { + kmp_sch_lower = 32, /**< lower bound for unordered values */ + kmp_sch_static_chunked = 33, + kmp_sch_static = 34, /**< static unspecialized */ + kmp_sch_dynamic_chunked = 35, + kmp_sch_guided_chunked = 36, /**< guided unspecialized */ + kmp_sch_runtime = 37, + kmp_sch_auto = 38, /**< auto */ + kmp_sch_trapezoidal = 39, + + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_static_greedy = 40, + kmp_sch_static_balanced = 41, + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_guided_iterative_chunked = 42, + kmp_sch_guided_analytical_chunked = 43, + + kmp_sch_static_steal = 44, /**< accessible only through KMP_SCHEDULE environment variable */ + + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_upper = 45, /**< upper bound for unordered values */ + + kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ + kmp_ord_static_chunked = 65, + kmp_ord_static = 66, /**< ordered static unspecialized */ + kmp_ord_dynamic_chunked = 67, + kmp_ord_guided_chunked = 68, + kmp_ord_runtime = 69, + kmp_ord_auto = 70, /**< ordered auto */ + kmp_ord_trapezoidal = 71, + kmp_ord_upper = 72, /**< upper bound for ordered values */ + +#if OMP_40_ENABLED + /* Schedules for Distribute construct */ + kmp_distribute_static_chunked = 91, /**< distribute static chunked */ + kmp_distribute_static = 92, /**< distribute static unspecialized */ +#endif + + /* + * For the "nomerge" versions, kmp_dispatch_next*() will always return + * a single iteration/chunk, even if the loop is serialized. For the + * schedule types listed above, the entire iteration vector is returned + * if the loop is serialized. This doesn't work for gcc/gcomp sections. + */ + kmp_nm_lower = 160, /**< lower bound for nomerge values */ + + kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), + kmp_nm_static = 162, /**< static unspecialized */ + kmp_nm_dynamic_chunked = 163, + kmp_nm_guided_chunked = 164, /**< guided unspecialized */ + kmp_nm_runtime = 165, + kmp_nm_auto = 166, /**< auto */ + kmp_nm_trapezoidal = 167, + + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_nm_static_greedy = 168, + kmp_nm_static_balanced = 169, + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_nm_guided_iterative_chunked = 170, + kmp_nm_guided_analytical_chunked = 171, + kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */ + + kmp_nm_ord_static_chunked = 193, + kmp_nm_ord_static = 194, /**< ordered static unspecialized */ + kmp_nm_ord_dynamic_chunked = 195, + kmp_nm_ord_guided_chunked = 196, + kmp_nm_ord_runtime = 197, + kmp_nm_ord_auto = 198, /**< auto */ + kmp_nm_ord_trapezoidal = 199, + kmp_nm_upper = 200, /**< upper bound for nomerge values */ + + kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ +}; + +/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ +typedef struct kmp_r_sched { + enum sched_type r_sched_type; + int chunk; +} kmp_r_sched_t; + +extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types + +enum library_type { + library_none, + library_serial, + library_turnaround, + library_throughput +}; + +#if KMP_OS_LINUX +enum clock_function_type { + clock_function_gettimeofday, + clock_function_clock_gettime +}; +#endif /* KMP_OS_LINUX */ + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +enum mic_type { + non_mic, + mic1, + mic2, + mic3, + dummy +}; +#endif + +/* ------------------------------------------------------------------------ */ +/* -- fast reduction stuff ------------------------------------------------ */ + +#undef KMP_FAST_REDUCTION_BARRIER +#define KMP_FAST_REDUCTION_BARRIER 1 + +#undef KMP_FAST_REDUCTION_CORE_DUO +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + #define KMP_FAST_REDUCTION_CORE_DUO 1 +#endif + +enum _reduction_method { + reduction_method_not_defined = 0, + critical_reduce_block = ( 1 << 8 ), + atomic_reduce_block = ( 2 << 8 ), + tree_reduce_block = ( 3 << 8 ), + empty_reduce_block = ( 4 << 8 ) +}; + +// description of the packed_reduction_method variable +// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte: +// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier +// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction; +// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty, +// so no need to execute a shift instruction while packing/unpacking + +#if KMP_FAST_REDUCTION_BARRIER + #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ + ( ( reduction_method ) | ( barrier_type ) ) + + #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ + ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) ) + + #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ + ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) ) +#else + #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ + ( reduction_method ) + + #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ + ( packed_reduction_method ) + + #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ + ( bs_plain_barrier ) +#endif + +#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \ + ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) ) + +#if KMP_FAST_REDUCTION_BARRIER + #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ + ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) ) + + #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ + ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) ) +#endif + +typedef int PACKED_REDUCTION_METHOD_T; + +/* -- end of fast reduction stuff ----------------------------------------- */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if KMP_OS_WINDOWS +# define USE_CBLKDATA +# pragma warning( push ) +# pragma warning( disable: 271 310 ) +# include +# pragma warning( pop ) +#endif + +#if KMP_OS_UNIX +# include +# include +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* + * Only Linux* OS and Windows* OS support thread affinity. + */ +#if KMP_AFFINITY_SUPPORTED + +extern size_t __kmp_affin_mask_size; +# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) +# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) +# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) +# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) + +#if KMP_USE_HWLOC + +typedef hwloc_cpuset_t kmp_affin_mask_t; +# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) +# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) +# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i) +# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask) +# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) +# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ + { \ + unsigned i; \ + for(i=0;i<(unsigned)max_bit_number+1;i++) { \ + if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \ + hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \ + } else { \ + hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \ + } \ + } \ + } \ + +# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) +# define KMP_CPU_SET_ITERATE(i,mask) \ + for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i)) + +# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc() +# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr); +# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) +# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) +# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) +# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) + +// +// The following macro should be used to index an array of masks. +// The array should be declared as "kmp_affinity_t *" and allocated with +// size "__kmp_affinity_mask_size * len". The macro takes care of the fact +// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but +// on Linux* OS, sizeof(kmp_affin_t) is 1. +// +# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i])) +# define KMP_CPU_ALLOC_ARRAY(arr, n) { \ + arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \ + unsigned i; \ + for(i=0;i<(unsigned)n;i++) { \ + arr[i] = hwloc_bitmap_alloc(); \ + } \ + } +# define KMP_CPU_FREE_ARRAY(arr, n) { \ + unsigned i; \ + for(i=0;i<(unsigned)n;i++) { \ + hwloc_bitmap_free(arr[i]); \ + } \ + __kmp_free(arr); \ + } +# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \ + arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \ + unsigned i; \ + for(i=0;i<(unsigned)n;i++) { \ + arr[i] = hwloc_bitmap_alloc(); \ + } \ + } +# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \ + unsigned i; \ + for(i=0;i<(unsigned)n;i++) { \ + hwloc_bitmap_free(arr[i]); \ + } \ + KMP_INTERNAL_FREE(arr); \ + } + +#else /* KMP_USE_HWLOC */ +# define KMP_CPU_SET_ITERATE(i,mask) \ + for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) + +# if KMP_OS_LINUX +// +// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size +// (in bytes). It should be allocated on a word boundary. +// +// WARNING!!! We have made the base type of the affinity mask unsigned char, +// in order to eliminate a lot of checks that the true system mask size is +// really a multiple of 4 bytes (on Linux* OS). +// +// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!! +// + +typedef unsigned char kmp_affin_mask_t; + +# define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT))) +# define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) +# define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))) +# define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) +# define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT))) +# define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) + +# define KMP_CPU_ZERO(mask) \ + { \ + size_t __i; \ + for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ + ((kmp_affin_mask_t *)(mask))[__i] = 0; \ + } \ + } + +# define KMP_CPU_COPY(dest, src) \ + { \ + size_t __i; \ + for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ + ((kmp_affin_mask_t *)(dest))[__i] \ + = ((kmp_affin_mask_t *)(src))[__i]; \ + } \ + } + +# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ + { \ + size_t __i; \ + for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ + ((kmp_affin_mask_t *)(mask))[__i] \ + = ~((kmp_affin_mask_t *)(mask))[__i]; \ + } \ + } + +# define KMP_CPU_UNION(dest, src) \ + { \ + size_t __i; \ + for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ + ((kmp_affin_mask_t *)(dest))[__i] \ + |= ((kmp_affin_mask_t *)(src))[__i]; \ + } \ + } + +# endif /* KMP_OS_LINUX */ + +# if KMP_OS_WINDOWS +// +// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on +// Intel(R) 64 it is 8 bytes times the number of processor groups. +// + +# if KMP_GROUP_AFFINITY + +// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). +# if _MSC_VER < 1600 +typedef struct GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY; +# endif + +typedef DWORD_PTR kmp_affin_mask_t; + +extern int __kmp_num_proc_groups; + +# define _KMP_CPU_SET(i,mask) \ + (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \ + (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) + +# define KMP_CPU_SET(i,mask) \ + _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) + +# define _KMP_CPU_ISSET(i,mask) \ + (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \ + (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))) + +# define KMP_CPU_ISSET(i,mask) \ + _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) + +# define _KMP_CPU_CLR(i,mask) \ + (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \ + ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) + +# define KMP_CPU_CLR(i,mask) \ + _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) + +# define KMP_CPU_ZERO(mask) \ + { \ + int __i; \ + for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ + ((kmp_affin_mask_t *)(mask))[__i] = 0; \ + } \ + } + +# define KMP_CPU_COPY(dest, src) \ + { \ + int __i; \ + for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ + ((kmp_affin_mask_t *)(dest))[__i] \ + = ((kmp_affin_mask_t *)(src))[__i]; \ + } \ + } + +# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ + { \ + int __i; \ + for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ + ((kmp_affin_mask_t *)(mask))[__i] \ + = ~((kmp_affin_mask_t *)(mask))[__i]; \ + } \ + } + +# define KMP_CPU_UNION(dest, src) \ + { \ + int __i; \ + for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ + ((kmp_affin_mask_t *)(dest))[__i] \ + |= ((kmp_affin_mask_t *)(src))[__i]; \ + } \ + } + +typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); +extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; + +typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); +extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; + +typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); +extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; + +typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); +extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; + +extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); + +# else /* KMP_GROUP_AFFINITY */ + +typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */ + +# define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i))) +# define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i)))) +# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i))) +# define KMP_CPU_ZERO(mask) (*(mask) = 0) +# define KMP_CPU_COPY(dest, src) (*(dest) = *(src)) +# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)) +# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src)) + +# endif /* KMP_GROUP_AFFINITY */ + +# endif /* KMP_OS_WINDOWS */ + +// +// __kmp_allocate() will return memory allocated on a 4-bytes boundary. +// after zeroing it - it takes care of those assumptions stated above. +// +# define KMP_CPU_ALLOC(ptr) \ + (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size))) +# define KMP_CPU_FREE(ptr) __kmp_free(ptr) +# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size))) +# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */ +# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size))) +# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr) + +// +// The following macro should be used to index an array of masks. +// The array should be declared as "kmp_affinity_t *" and allocated with +// size "__kmp_affinity_mask_size * len". The macro takes care of the fact +// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but +// on Linux* OS, sizeof(kmp_affin_t) is 1. +// +# define KMP_CPU_INDEX(array,i) \ + ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size)) +# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size) +# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr); +# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size) +# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr); + +#endif /* KMP_USE_HWLOC */ + +// +// Declare local char buffers with this size for printing debug and info +// messages, using __kmp_affinity_print_mask(). +// +#define KMP_AFFIN_MASK_PRINT_LEN 1024 + +enum affinity_type { + affinity_none = 0, + affinity_physical, + affinity_logical, + affinity_compact, + affinity_scatter, + affinity_explicit, + affinity_balanced, + affinity_disabled, // not used outsize the env var parser + affinity_default +}; + +enum affinity_gran { + affinity_gran_fine = 0, + affinity_gran_thread, + affinity_gran_core, + affinity_gran_package, + affinity_gran_node, +#if KMP_GROUP_AFFINITY + // + // The "group" granularity isn't necesssarily coarser than all of the + // other levels, but we put it last in the enum. + // + affinity_gran_group, +#endif /* KMP_GROUP_AFFINITY */ + affinity_gran_default +}; + +enum affinity_top_method { + affinity_top_method_all = 0, // try all (supported) methods, in order +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + affinity_top_method_apicid, + affinity_top_method_x2apicid, +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too +#if KMP_GROUP_AFFINITY + affinity_top_method_group, +#endif /* KMP_GROUP_AFFINITY */ + affinity_top_method_flat, +#if KMP_USE_HWLOC + affinity_top_method_hwloc, +#endif + affinity_top_method_default +}; + +#define affinity_respect_mask_default (-1) + +extern enum affinity_type __kmp_affinity_type; /* Affinity type */ +extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ +extern int __kmp_affinity_gran_levels; /* corresponding int value */ +extern int __kmp_affinity_dups; /* Affinity duplicate masks */ +extern enum affinity_top_method __kmp_affinity_top_method; +extern int __kmp_affinity_compact; /* Affinity 'compact' value */ +extern int __kmp_affinity_offset; /* Affinity offset value */ +extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ +extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ +extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */ +extern char * __kmp_affinity_proclist; /* proc ID list */ +extern kmp_affin_mask_t *__kmp_affinity_masks; +extern unsigned __kmp_affinity_num_masks; +extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error); +extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error); +extern void __kmp_affinity_bind_thread(int which); + +# if KMP_OS_LINUX +extern kmp_affin_mask_t *__kmp_affinity_get_fullMask(); +# endif /* KMP_OS_LINUX */ +extern char const * __kmp_cpuinfo_file; + +#endif /* KMP_AFFINITY_SUPPORTED */ + +#if OMP_40_ENABLED + +// +// This needs to be kept in sync with the values in omp.h !!! +// +typedef enum kmp_proc_bind_t { + proc_bind_false = 0, + proc_bind_true, + proc_bind_master, + proc_bind_close, + proc_bind_spread, + proc_bind_intel, // use KMP_AFFINITY interface + proc_bind_default +} kmp_proc_bind_t; + +typedef struct kmp_nested_proc_bind_t { + kmp_proc_bind_t *bind_types; + int size; + int used; +} kmp_nested_proc_bind_t; + +extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; + +#endif /* OMP_40_ENABLED */ + +# if KMP_AFFINITY_SUPPORTED +# define KMP_PLACE_ALL (-1) +# define KMP_PLACE_UNDEFINED (-2) +# endif /* KMP_AFFINITY_SUPPORTED */ + +extern int __kmp_affinity_num_places; + + +#if OMP_40_ENABLED +typedef enum kmp_cancel_kind_t { + cancel_noreq = 0, + cancel_parallel = 1, + cancel_loop = 2, + cancel_sections = 3, + cancel_taskgroup = 4 +} kmp_cancel_kind_t; +#endif // OMP_40_ENABLED + +extern int __kmp_place_num_sockets; +extern int __kmp_place_socket_offset; +extern int __kmp_place_num_cores; +extern int __kmp_place_core_offset; +extern int __kmp_place_num_threads_per_core; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) + +// +// We need to avoid using -1 as a GTID as +1 is added to the gtid +// when storing it in a lock, and the value 0 is reserved. +// +#define KMP_GTID_DNE (-2) /* Does not exist */ +#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ +#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ +#define KMP_GTID_UNKNOWN (-5) /* Is not known */ +#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ + +#define __kmp_get_gtid() __kmp_get_global_thread_id() +#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() + +#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ + __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) + +#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) +#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \ + team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid ) + +#define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team ) +#define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ + __kmp_threads[ (gtid) ]-> th.th_team ) + +#define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] ) +#define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) ) + + // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works + // with registered and not-yet-registered threads. +#define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \ + (thr)->th.th_info.ds.ds_gtid ) + +// AT: Which way is correct? +// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; +// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; +#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc ) + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1))) + +#define KMP_MIN_NTH 1 + +#ifndef KMP_MAX_NTH +# if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX +# define KMP_MAX_NTH PTHREAD_THREADS_MAX +# else +# define KMP_MAX_NTH INT_MAX +# endif +#endif /* KMP_MAX_NTH */ + +#ifdef PTHREAD_STACK_MIN +# define KMP_MIN_STKSIZE PTHREAD_STACK_MIN +#else +# define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) +#endif + +#define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) + +#if KMP_ARCH_X86 +# define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) +#elif KMP_ARCH_X86_64 +# define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) +# define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) +#else +# define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) +#endif + +#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) + +#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024)) +#define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024)) +#define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) + +#define KMP_MIN_STKOFFSET (0) +#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE +#if KMP_OS_DARWIN +# define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET +#else +# define KMP_DEFAULT_STKOFFSET CACHE_LINE +#endif + +#define KMP_MIN_STKPADDING (0) +#define KMP_MAX_STKPADDING (2 * 1024 * 1024) + +#define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */ +#define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */ +#define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */ +#define KMP_MIN_BLOCKTIME (0) +#define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */ +#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ +/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */ +/* Only allow increasing number of wakeups */ +#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ + ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \ + ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \ + ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \ + (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) ) + +/* Calculate number of intervals for a specific block time based on monitor_wakeups */ +#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ + ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \ + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) ) + +#define KMP_MIN_STATSCOLS 40 +#define KMP_MAX_STATSCOLS 4096 +#define KMP_DEFAULT_STATSCOLS 80 + +#define KMP_MIN_INTERVAL 0 +#define KMP_MAX_INTERVAL (INT_MAX-1) +#define KMP_DEFAULT_INTERVAL 0 + +#define KMP_MIN_CHUNK 1 +#define KMP_MAX_CHUNK (INT_MAX-1) +#define KMP_DEFAULT_CHUNK 1 + +#define KMP_MIN_INIT_WAIT 1 +#define KMP_MAX_INIT_WAIT (INT_MAX/2) +#define KMP_DEFAULT_INIT_WAIT 2048U + +#define KMP_MIN_NEXT_WAIT 1 +#define KMP_MAX_NEXT_WAIT (INT_MAX/2) +#define KMP_DEFAULT_NEXT_WAIT 1024U + +// max possible dynamic loops in concurrent execution per team +#define KMP_MAX_DISP_BUF 7 +#define KMP_MAX_ORDERED 8 + +#define KMP_MAX_FIELDS 32 + +#define KMP_MAX_BRANCH_BITS 31 + +#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX + +/* Minimum number of threads before switch to TLS gtid (experimentally determined) */ +/* josh TODO: what about OS X* tuning? */ +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +# define KMP_TLS_GTID_MIN 5 +#else +# define KMP_TLS_GTID_MIN INT_MAX +#endif + +#define KMP_MASTER_TID(tid) ( (tid) == 0 ) +#define KMP_WORKER_TID(tid) ( (tid) != 0 ) + +#define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 ) +#define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 ) +#define KMP_UBER_GTID(gtid) \ + ( \ + KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \ + KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \ + (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ + (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\ + ) +#define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 ) + +#ifndef TRUE +#define FALSE 0 +#define TRUE (! FALSE) +#endif + +/* NOTE: all of the following constants must be even */ + +#if KMP_OS_WINDOWS +# define KMP_INIT_WAIT 64U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ +#elif KMP_OS_CNK +# define KMP_INIT_WAIT 16U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ +#elif KMP_OS_LINUX +# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_DARWIN +/* TODO: tune for KMP_OS_DARWIN */ +# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_FREEBSD +/* TODO: tune for KMP_OS_FREEBSD */ +# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_NETBSD +/* TODO: tune for KMP_OS_NETBSD */ +# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +typedef struct kmp_cpuid { + kmp_uint32 eax; + kmp_uint32 ebx; + kmp_uint32 ecx; + kmp_uint32 edx; +} kmp_cpuid_t; +extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); +# if KMP_ARCH_X86 + extern void __kmp_x86_pause( void ); +# elif KMP_MIC + static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); }; +# else + static void __kmp_x86_pause( void ) { _mm_pause(); }; +# endif +# define KMP_CPU_PAUSE() __kmp_x86_pause() +#elif KMP_ARCH_PPC64 +# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") +# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") +# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") +# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) +#else +# define KMP_CPU_PAUSE() /* nothing to do */ +#endif + +#define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; } + +#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); } + +// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, +// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd. + +#define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \ + if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } } +#define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \ + if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } } + +/* ------------------------------------------------------------------------ */ +/* Support datatypes for the orphaned construct nesting checks. */ +/* ------------------------------------------------------------------------ */ + +enum cons_type { + ct_none, + ct_parallel, + ct_pdo, + ct_pdo_ordered, + ct_psections, + ct_psingle, + + /* the following must be left in order and not split up */ + ct_taskq, + ct_task, /* really task inside non-ordered taskq, considered a worksharing type */ + ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */ + /* the preceding must be left in order and not split up */ + + ct_critical, + ct_ordered_in_parallel, + ct_ordered_in_pdo, + ct_ordered_in_taskq, + ct_master, + ct_reduce, + ct_barrier +}; + +/* test to see if we are in a taskq construct */ +# define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) ) +# define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) + +struct cons_data { + ident_t const *ident; + enum cons_type type; + int prev; + kmp_user_lock_p name; /* address exclusively for critical section name comparison */ +}; + +struct cons_header { + int p_top, w_top, s_top; + int stack_size, stack_top; + struct cons_data *stack_data; +}; + +struct kmp_region_info { + char *text; + int offset[KMP_MAX_FIELDS]; + int length[KMP_MAX_FIELDS]; +}; + + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +#if KMP_OS_WINDOWS + typedef HANDLE kmp_thread_t; + typedef DWORD kmp_key_t; +#endif /* KMP_OS_WINDOWS */ + +#if KMP_OS_UNIX + typedef pthread_t kmp_thread_t; + typedef pthread_key_t kmp_key_t; +#endif + +extern kmp_key_t __kmp_gtid_threadprivate_key; + +typedef struct kmp_sys_info { + long maxrss; /* the maximum resident set size utilized (in kilobytes) */ + long minflt; /* the number of page faults serviced without any I/O */ + long majflt; /* the number of page faults serviced that required I/O */ + long nswap; /* the number of times a process was "swapped" out of memory */ + long inblock; /* the number of times the file system had to perform input */ + long oublock; /* the number of times the file system had to perform output */ + long nvcsw; /* the number of times a context switch was voluntarily */ + long nivcsw; /* the number of times a context switch was forced */ +} kmp_sys_info_t; + +typedef struct kmp_cpuinfo { + int initialized; // If 0, other fields are not initialized. + int signature; // CPUID(1).EAX + int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family ) + int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model) + int stepping; // CPUID(1).EAX[3:0] ( Stepping ) + int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. + int rtm; // 0 if RTM instructions are not supported, 1 otherwise. + int cpu_stackoffset; + int apic_id; + int physical_id; + int logical_id; + kmp_uint64 frequency; // Nominal CPU frequency in Hz. +} kmp_cpuinfo_t; + + +#ifdef BUILD_TV + +struct tv_threadprivate { + /* Record type #1 */ + void *global_addr; + void *thread_addr; +}; + +struct tv_data { + struct tv_data *next; + void *type; + union tv_union { + struct tv_threadprivate tp; + } u; +}; + +extern kmp_key_t __kmp_tv_key; + +#endif /* BUILD_TV */ + +/* ------------------------------------------------------------------------ */ + +#if USE_ITT_BUILD +// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. +// Later we will check the type meets requirements. +typedef int kmp_itt_mark_t; +#define KMP_ITT_DEBUG 0 +#endif /* USE_ITT_BUILD */ + +/* ------------------------------------------------------------------------ */ + +/* + * Taskq data structures + */ + +#define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4) +#define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */ + +/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */ + +#define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */ +#define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */ +#define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */ +#define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */ +#define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */ +#define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */ +#define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */ +#define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */ + +#define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */ + +#define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */ +#define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */ +#define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */ +#define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */ +#define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */ +#define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */ + +#define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */ + +typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { + kmp_int32 ai_data; +} kmpc_aligned_int32_t; + +typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { + struct kmpc_thunk_t *qs_thunk; +} kmpc_aligned_queue_slot_t; + +typedef struct kmpc_task_queue_t { + /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */ + kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */ + union { + struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */ + struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */ + } tq; + volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */ + struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */ + struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */ + volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */ + /* (other than the thread executing the kmpc_end_taskq call) */ + /* locked by parent tq's tq_link_lck */ + + /* shared data for task queue */ + struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */ + /* only one array element exists for all but outermost taskq */ + + /* bookkeeping for ordered task queue */ + kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */ + volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */ + + /* thunk storage management for task queue */ + kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ + struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */ + struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */ + + /* data fields for queue itself */ + kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */ + kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */ + volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */ + kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */ + kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */ + kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */ + volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */ + kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ + volatile kmp_int32 tq_flags; /* TQF_xxx */ + + /* bookkeeping for outstanding thunks */ + struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */ + kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ + + /* statistics library bookkeeping */ + ident_t *tq_loc; /* source location information for taskq directive */ +} kmpc_task_queue_t; + +typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk); + +/* sizeof_shareds passed as arg to __kmpc_taskq call */ +typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ + kmpc_task_queue_t *sv_queue; + /* (pointers to) shared vars */ +} kmpc_shared_vars_t; + +typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { + volatile struct kmpc_shared_vars_t *ai_data; +} kmpc_aligned_shared_vars_t; + +/* sizeof_thunk passed as arg to kmpc_taskq call */ +typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ + union { /* field used for internal freelists too */ + kmpc_shared_vars_t *th_shareds; + struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */ + } th; + kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ + struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */ + kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */ + kmp_int32 th_status; + kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */ + /* private vars */ +} kmpc_thunk_t; + +typedef struct KMP_ALIGN_CACHE kmp_taskq { + int tq_curr_thunk_capacity; + + kmpc_task_queue_t *tq_root; + kmp_int32 tq_global_flags; + + kmp_lock_t tq_freelist_lck; + kmpc_task_queue_t *tq_freelist; + + kmpc_thunk_t **tq_curr_thunk; +} kmp_taskq_t; + +/* END Taskq data structures */ +/* --------------------------------------------------------------------------- */ + +typedef kmp_int32 kmp_critical_name[8]; + +/*! +@ingroup PARALLEL +The type for a microtask which gets passed to @ref __kmpc_fork_call(). +The arguments to the outlined function are +@param global_tid the global thread identity of the thread executing the function. +@param bound_tid the local identitiy of the thread executing the function +@param ... pointers to shared variables accessed by the function. +*/ +typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... ); +typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... ); + +/*! +@ingroup THREADPRIVATE +@{ +*/ +/* --------------------------------------------------------------------------- */ +/* Threadprivate initialization/finalization function declarations */ + +/* for non-array objects: __kmpc_threadprivate_register() */ + +/*! + Pointer to the constructor function. + The first argument is the this pointer +*/ +typedef void *(*kmpc_ctor) (void *); + +/*! + Pointer to the destructor function. + The first argument is the this pointer +*/ +typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */ +/*! + Pointer to an alternate constructor. + The first argument is the this pointer. +*/ +typedef void *(*kmpc_cctor) (void *, void *); + +/* for array objects: __kmpc_threadprivate_register_vec() */ + /* First arg: "this" pointer */ + /* Last arg: number of array elements */ +/*! + Array constructor. + First argument is the this pointer + Second argument the number of array elements. +*/ +typedef void *(*kmpc_ctor_vec) (void *, size_t); +/*! + Pointer to the array destructor function. + The first argument is the this pointer + Second argument the number of array elements. +*/ +typedef void (*kmpc_dtor_vec) (void *, size_t); +/*! + Array constructor. + First argument is the this pointer + Third argument the number of array elements. +*/ +typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */ + +/*! +@} +*/ + + +/* ------------------------------------------------------------------------ */ + +/* keeps tracked of threadprivate cache allocations for cleanup later */ +typedef struct kmp_cached_addr { + void **addr; /* address of allocated cache */ + struct kmp_cached_addr *next; /* pointer to next cached address */ +} kmp_cached_addr_t; + +struct private_data { + struct private_data *next; /* The next descriptor in the list */ + void *data; /* The data buffer for this descriptor */ + int more; /* The repeat count for this descriptor */ + size_t size; /* The data size for this descriptor */ +}; + +struct private_common { + struct private_common *next; + struct private_common *link; + void *gbl_addr; + void *par_addr; /* par_addr == gbl_addr for MASTER thread */ + size_t cmn_size; +}; + +struct shared_common +{ + struct shared_common *next; + struct private_data *pod_init; + void *obj_init; + void *gbl_addr; + union { + kmpc_ctor ctor; + kmpc_ctor_vec ctorv; + } ct; + union { + kmpc_cctor cctor; + kmpc_cctor_vec cctorv; + } cct; + union { + kmpc_dtor dtor; + kmpc_dtor_vec dtorv; + } dt; + size_t vec_len; + int is_vec; + size_t cmn_size; +}; + +#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ +#define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ +#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ +#define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1)) + +struct common_table { + struct private_common *data[ KMP_HASH_TABLE_SIZE ]; +}; + +struct shared_table { + struct shared_common *data[ KMP_HASH_TABLE_SIZE ]; +}; +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef KMP_STATIC_STEAL_ENABLED +typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { + kmp_int32 count; + kmp_int32 ub; + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + kmp_int32 lb; + kmp_int32 st; + kmp_int32 tc; + kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */ + + // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template + kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should + kmp_int32 parm2; // make no real change at least while padding is off. + kmp_int32 parm3; + kmp_int32 parm4; + }; + + kmp_uint32 ordered_lower; + kmp_uint32 ordered_upper; +#if KMP_OS_WINDOWS + // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. + // It would be nice to measure execution times. + // Conditional if/endif can be removed at all. + kmp_int32 last_upper; +#endif /* KMP_OS_WINDOWS */ +} dispatch_private_info32_t; + +typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { + kmp_int64 count; /* current chunk number for static and static-steal scheduling*/ + kmp_int64 ub; /* upper-bound */ + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + kmp_int64 lb; /* lower-bound */ + kmp_int64 st; /* stride */ + kmp_int64 tc; /* trip count (number of iterations) */ + kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */ + + /* parm[1-4] are used in different ways by different scheduling algorithms */ + + // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN( 32 ) { + kmp_int64 parm1; + kmp_int64 parm2; + kmp_int64 parm3; + kmp_int64 parm4; + }; + + kmp_uint64 ordered_lower; + kmp_uint64 ordered_upper; +#if KMP_OS_WINDOWS + // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. + // It would be nice to measure execution times. + // Conditional if/endif can be removed at all. + kmp_int64 last_upper; +#endif /* KMP_OS_WINDOWS */ +} dispatch_private_info64_t; +#else /* KMP_STATIC_STEAL_ENABLED */ +typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { + kmp_int32 lb; + kmp_int32 ub; + kmp_int32 st; + kmp_int32 tc; + + kmp_int32 parm1; + kmp_int32 parm2; + kmp_int32 parm3; + kmp_int32 parm4; + + kmp_int32 count; + + kmp_uint32 ordered_lower; + kmp_uint32 ordered_upper; +#if KMP_OS_WINDOWS + kmp_int32 last_upper; +#endif /* KMP_OS_WINDOWS */ +} dispatch_private_info32_t; + +typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { + kmp_int64 lb; /* lower-bound */ + kmp_int64 ub; /* upper-bound */ + kmp_int64 st; /* stride */ + kmp_int64 tc; /* trip count (number of iterations) */ + + /* parm[1-4] are used in different ways by different scheduling algorithms */ + kmp_int64 parm1; + kmp_int64 parm2; + kmp_int64 parm3; + kmp_int64 parm4; + + kmp_int64 count; /* current chunk number for static scheduling */ + + kmp_uint64 ordered_lower; + kmp_uint64 ordered_upper; +#if KMP_OS_WINDOWS + kmp_int64 last_upper; +#endif /* KMP_OS_WINDOWS */ +} dispatch_private_info64_t; +#endif /* KMP_STATIC_STEAL_ENABLED */ + +typedef struct KMP_ALIGN_CACHE dispatch_private_info { + union private_info { + dispatch_private_info32_t p32; + dispatch_private_info64_t p64; + } u; + enum sched_type schedule; /* scheduling algorithm */ + kmp_int32 ordered; /* ordered clause specified */ + kmp_int32 ordered_bumped; + kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar + struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */ + kmp_int32 nomerge; /* don't merge iters if serialized */ + kmp_int32 type_size; /* the size of types in private_info */ + enum cons_type pushed_ws; +} dispatch_private_info_t; + +typedef struct dispatch_shared_info32 { + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile kmp_uint32 iteration; + volatile kmp_uint32 num_done; + volatile kmp_uint32 ordered_iteration; + kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar +} dispatch_shared_info32_t; + +typedef struct dispatch_shared_info64 { + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile kmp_uint64 iteration; + volatile kmp_uint64 num_done; + volatile kmp_uint64 ordered_iteration; + kmp_int64 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar +} dispatch_shared_info64_t; + +typedef struct dispatch_shared_info { + union shared_info { + dispatch_shared_info32_t s32; + dispatch_shared_info64_t s64; + } u; +/* volatile kmp_int32 dispatch_abort; depricated */ + volatile kmp_uint32 buffer_index; +} dispatch_shared_info_t; + +typedef struct kmp_disp { + /* Vector for ORDERED SECTION */ + void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); + /* Vector for END ORDERED SECTION */ + void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); + + dispatch_shared_info_t *th_dispatch_sh_current; + dispatch_private_info_t *th_dispatch_pr_current; + + dispatch_private_info_t *th_disp_buffer; + kmp_int32 th_disp_index; + void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64 +#if KMP_USE_INTERNODE_ALIGNMENT + char more_padding[INTERNODE_CACHE_LINE]; +#endif +} kmp_disp_t; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* Barrier stuff */ + +/* constants for barrier state update */ +#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ +#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ +#define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */ +#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ + +#define KMP_BARRIER_SLEEP_STATE ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT)) +#define KMP_BARRIER_UNUSED_STATE ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT)) +#define KMP_BARRIER_STATE_BUMP ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT)) + +#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) +# error "Barrier sleep bit must be smaller than barrier bump bit" +#endif +#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) +# error "Barrier unused bit must be smaller than barrier bump bit" +#endif + +// Constants for release barrier wait state: currently, hierarchical only +#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep +#define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release +#define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release +#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go +#define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up + +enum barrier_type { + bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ + bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ + #if KMP_FAST_REDUCTION_BARRIER + bs_reduction_barrier, /* 2, All barriers that are used in reduction */ + #endif // KMP_FAST_REDUCTION_BARRIER + bs_last_barrier /* Just a placeholder to mark the end */ +}; + +// to work with reduction barriers just like with plain barriers +#if !KMP_FAST_REDUCTION_BARRIER + #define bs_reduction_barrier bs_plain_barrier +#endif // KMP_FAST_REDUCTION_BARRIER + +typedef enum kmp_bar_pat { /* Barrier communication patterns */ + bp_linear_bar = 0, /* Single level (degenerate) tree */ + bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */ + bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */ + bp_hierarchical_bar = 3, /* Machine hierarchy tree */ + bp_last_bar = 4 /* Placeholder to mark the end */ +} kmp_bar_pat_e; + +# define KMP_BARRIER_ICV_PUSH 1 + +/* Record for holding the values of the internal controls stack records */ +typedef struct kmp_internal_control { + int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ + kmp_int8 nested; /* internal control for nested parallelism (per thread) */ + kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ + kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */ + int blocktime; /* internal control for blocktime */ + int bt_intervals; /* internal control for blocktime intervals */ + int nproc; /* internal control for #threads for next parallel region (per thread) */ + int max_active_levels; /* internal control for max_active_levels */ + kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */ +#if OMP_40_ENABLED + kmp_proc_bind_t proc_bind; /* internal control for affinity */ +#endif // OMP_40_ENABLED + struct kmp_internal_control *next; +} kmp_internal_control_t; + +static inline void +copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) { + *dst = *src; +} + +/* Thread barrier needs volatile barrier fields */ +typedef struct KMP_ALIGN_CACHE kmp_bstate { + // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it). + // It is not explicitly aligned below, because we *don't* want it to be padded -- instead, + // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines + // stores in the hierarchical barrier. + kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread + // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store + volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) + KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point. + kmp_uint32 *skip_per_level; + kmp_uint32 my_level; + kmp_int32 parent_tid; + kmp_int32 old_tid; + kmp_uint32 depth; + struct kmp_bstate *parent_bar; + kmp_team_t *team; + kmp_uint64 leaf_state; + kmp_uint32 nproc; + kmp_uint8 base_leaf_kids; + kmp_uint8 leaf_kids; + kmp_uint8 offset; + kmp_uint8 wait_flag; + kmp_uint8 use_oncore_barrier; +#if USE_DEBUGGER + // The following field is intended for the debugger solely. Only the worker thread itself accesses this + // field: the worker increases it by 1 when it arrives to a barrier. + KMP_ALIGN_CACHE kmp_uint b_worker_arrived; +#endif /* USE_DEBUGGER */ +} kmp_bstate_t; + +union KMP_ALIGN_CACHE kmp_barrier_union { + double b_align; /* use worst case alignment */ + char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ]; + kmp_bstate_t bb; +}; + +typedef union kmp_barrier_union kmp_balign_t; + +/* Team barrier needs only non-volatile arrived counter */ +union KMP_ALIGN_CACHE kmp_barrier_team_union { + double b_align; /* use worst case alignment */ + char b_pad[ CACHE_LINE ]; + struct { + kmp_uint64 b_arrived; /* STATE => task reached synch point. */ +#if USE_DEBUGGER + // The following two fields are indended for the debugger solely. Only master of the team accesses + // these fields: the first one is increased by 1 when master arrives to a barrier, the + // second one is increased by one when all the threads arrived. + kmp_uint b_master_arrived; + kmp_uint b_team_arrived; +#endif + }; +}; + +typedef union kmp_barrier_team_union kmp_balign_team_t; + +/* + * Padding for Linux* OS pthreads condition variables and mutexes used to signal + * threads when a condition changes. This is to workaround an NPTL bug + * where padding was added to pthread_cond_t which caused the initialization + * routine to write outside of the structure if compiled on pre-NPTL threads. + */ + +#if KMP_OS_WINDOWS +typedef struct kmp_win32_mutex +{ + /* The Lock */ + CRITICAL_SECTION cs; +} kmp_win32_mutex_t; + +typedef struct kmp_win32_cond +{ + /* Count of the number of waiters. */ + int waiters_count_; + + /* Serialize access to */ + kmp_win32_mutex_t waiters_count_lock_; + + /* Number of threads to release via a or a */ + /* */ + int release_count_; + + /* Keeps track of the current "generation" so that we don't allow */ + /* one thread to steal all the "releases" from the broadcast. */ + int wait_generation_count_; + + /* A manual-reset event that's used to block and release waiting */ + /* threads. */ + HANDLE event_; +} kmp_win32_cond_t; +#endif + +#if KMP_OS_UNIX + +union KMP_ALIGN_CACHE kmp_cond_union { + double c_align; + char c_pad[ CACHE_LINE ]; + pthread_cond_t c_cond; +}; + +typedef union kmp_cond_union kmp_cond_align_t; + +union KMP_ALIGN_CACHE kmp_mutex_union { + double m_align; + char m_pad[ CACHE_LINE ]; + pthread_mutex_t m_mutex; +}; + +typedef union kmp_mutex_union kmp_mutex_align_t; + +#endif /* KMP_OS_UNIX */ + +typedef struct kmp_desc_base { + void *ds_stackbase; + size_t ds_stacksize; + int ds_stackgrow; + kmp_thread_t ds_thread; + volatile int ds_tid; + int ds_gtid; +#if KMP_OS_WINDOWS + volatile int ds_alive; + DWORD ds_thread_id; + /* + ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However, + debugger support (libomp_db) cannot work with handles, because they uncomparable. For + example, debugger requests info about thread with handle h. h is valid within debugger + process, and meaningless within debugee process. Even if h is duped by call to + DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new* + handle which does *not* equal to any other handle in debugee... The only way to + compare handles is convert them to system-wide ids. GetThreadId() function is + available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is + available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by + call to GetCurrentThreadId() from within the thread and save it to let libomp_db + identify threads. + */ +#endif /* KMP_OS_WINDOWS */ +} kmp_desc_base_t; + +typedef union KMP_ALIGN_CACHE kmp_desc { + double ds_align; /* use worst case alignment */ + char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ]; + kmp_desc_base_t ds; +} kmp_desc_t; + + +typedef struct kmp_local { + volatile int this_construct; /* count of single's encountered by thread */ + void *reduce_data; +#if KMP_USE_BGET + void *bget_data; + void *bget_list; +#if ! USE_CMP_XCHG_FOR_BGET +#ifdef USE_QUEUING_LOCK_FOR_BGET + kmp_lock_t bget_lock; /* Lock for accessing bget free list */ +#else + kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */ + /* Must be bootstrap lock so we can use it at library shutdown */ +#endif /* USE_LOCK_FOR_BGET */ +#endif /* ! USE_CMP_XCHG_FOR_BGET */ +#endif /* KMP_USE_BGET */ + +#ifdef BUILD_TV + struct tv_data *tv_data; +#endif + + PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */ + +} kmp_local_t; + +#define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) +#define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) +#define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) + +#define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) +#define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) +#define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) +#define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) + +#define set__blocktime_team( xteam, xtid, xval ) \ + ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) ) + +#define set__bt_intervals_team( xteam, xtid, xval ) \ + ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) ) + +#define set__bt_set_team( xteam, xtid, xval ) \ + ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) ) + + +#define set__nested( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) ) +#define get__nested( xthread ) \ + ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) ) + +#define set__dynamic( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) ) +#define get__dynamic( xthread ) \ + ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) ) + +#define set__nproc( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) ) + +#define set__max_active_levels( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) ) + +#define set__sched( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) ) + +#if OMP_40_ENABLED + +#define set__proc_bind( xthread, xval ) \ + ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) ) +#define get__proc_bind( xthread ) \ + ( (xthread)->th.th_current_task->td_icvs.proc_bind ) + +#endif /* OMP_40_ENABLED */ + + +/* ------------------------------------------------------------------------ */ +// OpenMP tasking data structures +// + +typedef enum kmp_tasking_mode { + tskm_immediate_exec = 0, + tskm_extra_barrier = 1, + tskm_task_teams = 2, + tskm_max = 2 +} kmp_tasking_mode_t; + +extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ +extern kmp_int32 __kmp_task_stealing_constraint; + +/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */ +#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1) +#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1) + +// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and +// queued since the previous barrier release. +#define KMP_TASKING_ENABLED(task_team) \ + (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) +/*! +@ingroup BASIC_TYPES +@{ +*/ + +/*! + */ +typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * ); + +/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ +/*! + */ +typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ + void * shareds; /**< pointer to block of pointers to shared vars */ + kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ + kmp_int32 part_id; /**< part id for the task */ +#if OMP_40_ENABLED + kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ +#endif // OMP_40_ENABLED + /* private vars */ +} kmp_task_t; + +/*! +@} +*/ + +#if OMP_40_ENABLED +typedef struct kmp_taskgroup { + kmp_uint32 count; // number of allocated and not yet complete tasks + kmp_int32 cancel_request; // request for cancellation of this taskgroup + struct kmp_taskgroup *parent; // parent taskgroup +} kmp_taskgroup_t; + + +// forward declarations +typedef union kmp_depnode kmp_depnode_t; +typedef struct kmp_depnode_list kmp_depnode_list_t; +typedef struct kmp_dephash_entry kmp_dephash_entry_t; + +typedef struct kmp_depend_info { + kmp_intptr_t base_addr; + size_t len; + struct { + bool in:1; + bool out:1; + } flags; +} kmp_depend_info_t; + +struct kmp_depnode_list { + kmp_depnode_t * node; + kmp_depnode_list_t * next; +}; + +typedef struct kmp_base_depnode { + kmp_depnode_list_t * successors; + kmp_task_t * task; + + kmp_lock_t lock; + +#if KMP_SUPPORT_GRAPH_OUTPUT + kmp_uint32 id; +#endif + + volatile kmp_int32 npredecessors; + volatile kmp_int32 nrefs; +} kmp_base_depnode_t; + +union KMP_ALIGN_CACHE kmp_depnode { + double dn_align; /* use worst case alignment */ + char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; + kmp_base_depnode_t dn; +}; + +struct kmp_dephash_entry { + kmp_intptr_t addr; + kmp_depnode_t * last_out; + kmp_depnode_list_t * last_ins; + kmp_dephash_entry_t * next_in_bucket; +}; + +typedef struct kmp_dephash { + kmp_dephash_entry_t ** buckets; +#ifdef KMP_DEBUG + kmp_uint32 nelements; + kmp_uint32 nconflicts; +#endif +} kmp_dephash_t; + +#endif + +#ifdef BUILD_TIED_TASK_STACK + +/* Tied Task stack definitions */ +typedef struct kmp_stack_block { + kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ]; + struct kmp_stack_block * sb_next; + struct kmp_stack_block * sb_prev; +} kmp_stack_block_t; + +typedef struct kmp_task_stack { + kmp_stack_block_t ts_first_block; // first block of stack entries + kmp_taskdata_t ** ts_top; // pointer to the top of stack + kmp_int32 ts_entries; // number of entries on the stack +} kmp_task_stack_t; + +#endif // BUILD_TIED_TASK_STACK + +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ +#if OMP_40_ENABLED + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ +#if OMP_41_ENABLED + unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */ + unsigned reserved : 11; /* reserved for compiler use */ +#else + unsigned reserved : 12; /* reserved for compiler use */ +#endif +#else // OMP_40_ENABLED + unsigned reserved : 13; /* reserved for compiler use */ +#endif // OMP_40_ENABLED + + /* Library flags */ /* Total library flags must be 16 bits */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */ + unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */ + unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */ + /* If either team_serial or tasking_ser is set, task team may be NULL */ + /* Task State Flags: */ + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocateed */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ + +} kmp_tasking_flags_t; + + +struct kmp_taskdata { /* aligned during dynamic allocation */ + kmp_int32 td_task_id; /* id, assigned by debugger */ + kmp_tasking_flags_t td_flags; /* task flags */ + kmp_team_t * td_team; /* team for this task */ + kmp_info_p * td_alloc_thread; /* thread that allocated data structures */ + /* Currently not used except for perhaps IDB */ + kmp_taskdata_t * td_parent; /* parent task */ + kmp_int32 td_level; /* task nesting level */ + ident_t * td_ident; /* task identifier */ + // Taskwait data. + ident_t * td_taskwait_ident; + kmp_uint32 td_taskwait_counter; + kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ + KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */ + volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ + volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */ +#if OMP_40_ENABLED + kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup + kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here + kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies +#endif +#if OMPT_SUPPORT + ompt_task_info_t ompt_task_info; +#endif +#if KMP_HAVE_QUAD + _Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t +#else + kmp_uint32 td_dummy[2]; +#endif +}; // struct kmp_taskdata + +// Make sure padding above worked +KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 ); + +// Data for task team but per thread +typedef struct kmp_base_thread_data { + kmp_info_p * td_thr; // Pointer back to thread info + // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued? + kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque + kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated + kmp_uint32 td_deque_head; // Head of deque (will wrap) + kmp_uint32 td_deque_tail; // Tail of deque (will wrap) + kmp_int32 td_deque_ntasks; // Number of tasks in deque + // GEH: shouldn't this be volatile since used in while-spin? + kmp_int32 td_deque_last_stolen; // Thread number of last successful steal +#ifdef BUILD_TIED_TASK_STACK + kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint +#endif // BUILD_TIED_TASK_STACK +} kmp_base_thread_data_t; + +typedef union KMP_ALIGN_CACHE kmp_thread_data { + kmp_base_thread_data_t td; + double td_align; /* use worst case alignment */ + char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ]; +} kmp_thread_data_t; + + +// Data for task teams which are used when tasking is enabled for the team +typedef struct kmp_base_task_team { + kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */ + /* must be bootstrap lock since used at library shutdown*/ + kmp_task_team_t * tt_next; /* For linking the task team free list */ + kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */ + /* Data survives task team deallocation */ + kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */ + /* TRUE means tt_threads_data is set up and initialized */ + kmp_int32 tt_nproc; /* #threads in team */ + kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */ +#if OMP_41_ENABLED + kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ +#endif + + KMP_ALIGN_CACHE + volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */ + + KMP_ALIGN_CACHE + volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ +} kmp_base_task_team_t; + +union KMP_ALIGN_CACHE kmp_task_team { + kmp_base_task_team_t tt; + double tt_align; /* use worst case alignment */ + char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ]; +}; + +#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) +// Free lists keep same-size free memory slots for fast memory allocation routines +typedef struct kmp_free_list { + void *th_free_list_self; // Self-allocated tasks free list + void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads + void *th_free_list_other; // Non-self free list (to be returned to owner's sync list) +} kmp_free_list_t; +#endif +#if KMP_NESTED_HOT_TEAMS +// Hot teams array keeps hot teams and their sizes for given thread. +// Hot teams are not put in teams pool, and they don't put threads in threads pool. +typedef struct kmp_hot_team_ptr { + kmp_team_p *hot_team; // pointer to hot_team of given nesting level + kmp_int32 hot_team_nth; // number of threads allocated for the hot_team +} kmp_hot_team_ptr_t; +#endif +#if OMP_40_ENABLED +typedef struct kmp_teams_size { + kmp_int32 nteams; // number of teams in a league + kmp_int32 nth; // number of threads in each team of the league +} kmp_teams_size_t; +#endif + +/* ------------------------------------------------------------------------ */ +// OpenMP thread data structures +// + +typedef struct KMP_ALIGN_CACHE kmp_base_info { +/* + * Start with the readonly data which is cache aligned and padded. + * this is written before the thread starts working by the master. + * (uber masters may update themselves later) + * (usage does not consider serialized regions) + */ + kmp_desc_t th_info; + kmp_team_p *th_team; /* team we belong to */ + kmp_root_p *th_root; /* pointer to root of task hierarchy */ + kmp_info_p *th_next_pool; /* next available thread in the pool */ + kmp_disp_t *th_dispatch; /* thread's dispatch data */ + int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ + + /* The following are cached from the team info structure */ + /* TODO use these in more places as determined to be needed via profiling */ + int th_team_nproc; /* number of threads in a team */ + kmp_info_p *th_team_master; /* the team's master thread */ + int th_team_serialized; /* team is serialized */ +#if OMP_40_ENABLED + microtask_t th_teams_microtask; /* save entry address for teams construct */ + int th_teams_level; /* save initial level of teams construct */ + /* it is 0 on device but may be any on host */ +#endif + + /* The blocktime info is copied from the team struct to the thread sruct */ + /* at the start of a barrier, and the values stored in the team are used */ + /* at points in the code where the team struct is no longer guaranteed */ + /* to exist (from the POV of worker threads). */ + int th_team_bt_intervals; + int th_team_bt_set; + + +#if KMP_AFFINITY_SUPPORTED + kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ +#endif + +/* + * The data set by the master at reinit, then R/W by the worker + */ + KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ +#if KMP_NESTED_HOT_TEAMS + kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ +#endif +#if OMP_40_ENABLED + kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ + kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */ +# if KMP_AFFINITY_SUPPORTED + int th_current_place; /* place currently bound to */ + int th_new_place; /* place to bind to in par reg */ + int th_first_place; /* first place in partition */ + int th_last_place; /* last place in partition */ +# endif +#endif +#if USE_ITT_BUILD + kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ + kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ + kmp_uint64 th_frame_time; /* frame timestamp */ + kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */ +#endif /* USE_ITT_BUILD */ + kmp_local_t th_local; + struct private_common *th_pri_head; + +/* + * Now the data only used by the worker (after initial allocation) + */ + /* TODO the first serial team should actually be stored in the info_t + * structure. this will help reduce initial allocation overhead */ + KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ + +#if OMPT_SUPPORT + ompt_thread_info_t ompt_thread_info; +#endif + +/* The following are also read by the master during reinit */ + struct common_table *th_pri_common; + + volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ + /* while awaiting queuing lock acquire */ + + volatile void *th_sleep_loc; // this points at a kmp_flag + + ident_t *th_ident; + unsigned th_x; // Random number generator data + unsigned th_a; // Random number generator data + +/* + * Tasking-related data for the thread + */ + kmp_task_team_t * th_task_team; // Task team struct + kmp_taskdata_t * th_current_task; // Innermost Task being executed + kmp_uint8 th_task_state; // alternating 0/1 for task team identification + kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels + kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack + kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack + + /* + * More stuff for keeping track of active/sleeping threads + * (this part is written by the worker thread) + */ + kmp_uint8 th_active_in_pool; // included in count of + // #active threads in pool + int th_active; // ! sleeping + // 32 bits for TCR/TCW + + + struct cons_header * th_cons; // used for consistency check + +/* + * Add the syncronizing data which is cache aligned and padded. + */ + KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ]; + + KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ + +#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) + #define NUM_LISTS 4 + kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines +#endif + +#if KMP_OS_WINDOWS + kmp_win32_cond_t th_suspend_cv; + kmp_win32_mutex_t th_suspend_mx; + int th_suspend_init; +#endif +#if KMP_OS_UNIX + kmp_cond_align_t th_suspend_cv; + kmp_mutex_align_t th_suspend_mx; + int th_suspend_init_count; +#endif + +#if USE_ITT_BUILD + kmp_itt_mark_t th_itt_mark_single; + // alignment ??? +#endif /* USE_ITT_BUILD */ +#if KMP_STATS_ENABLED + kmp_stats_list* th_stats; +#endif +} kmp_base_info_t; + +typedef union KMP_ALIGN_CACHE kmp_info { + double th_align; /* use worst case alignment */ + char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ]; + kmp_base_info_t th; +} kmp_info_t; + +/* ------------------------------------------------------------------------ */ +// OpenMP thread team data structures +// +typedef struct kmp_base_data { + volatile kmp_uint32 t_value; +} kmp_base_data_t; + +typedef union KMP_ALIGN_CACHE kmp_sleep_team { + double dt_align; /* use worst case alignment */ + char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; + kmp_base_data_t dt; +} kmp_sleep_team_t; + +typedef union KMP_ALIGN_CACHE kmp_ordered_team { + double dt_align; /* use worst case alignment */ + char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; + kmp_base_data_t dt; +} kmp_ordered_team_t; + +typedef int (*launch_t)( int gtid ); + +/* Minimum number of ARGV entries to malloc if necessary */ +#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 + +// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we +// have supported at least 96 bytes. Using a larger value for more space between the master write/worker +// read section and read/write by all section seems to buy more performance on EPCC PARALLEL. +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) ) +#else +# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) ) +#endif +#define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP ) + +typedef struct KMP_ALIGN_CACHE kmp_base_team { + // Synchronization Data --------------------------------------------------------------------------------- + KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; + kmp_balign_team_t t_bar[ bs_last_barrier ]; + volatile int t_construct; // count of single directive encountered by team + kmp_lock_t t_single_lock; // team specific lock + + // Master only ----------------------------------------------------------------------------------------- + KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team + int t_master_this_cons; // "this_construct" single counter of master in parent team + ident_t *t_ident; // if volatile, have to change too much other crud to volatile too + kmp_team_p *t_parent; // parent team + kmp_team_p *t_next_pool; // next free team in the team pool + kmp_disp_t *t_dispatch; // thread's dispatch data + kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 +#if OMP_40_ENABLED + kmp_proc_bind_t t_proc_bind; // bind type for par region +#endif // OMP_40_ENABLED +#if USE_ITT_BUILD + kmp_uint64 t_region_time; // region begin timestamp +#endif /* USE_ITT_BUILD */ + + // Master write, workers read -------------------------------------------------------------------------- + KMP_ALIGN_CACHE void **t_argv; + int t_argc; + int t_nproc; // number of threads in team + microtask_t t_pkfn; + launch_t t_invoke; // procedure to launch the microtask + +#if OMPT_SUPPORT + ompt_team_info_t ompt_team_info; + ompt_lw_taskteam_t *ompt_serialized_team_info; +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + kmp_int8 t_fp_control_saved; + kmp_int8 t_pad2b; + kmp_int16 t_x87_fpu_control_word; // FP control regs + kmp_uint32 t_mxcsr; +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; + + KMP_ALIGN_CACHE kmp_info_t **t_threads; + int t_max_argc; + int t_max_nproc; // maximum threads this team can handle (dynamicly expandable) + int t_serialized; // levels deep of serialized teams + dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system + int t_id; // team's id, assigned by debugger. + int t_level; // nested parallel level + int t_active_level; // nested active parallel level + kmp_r_sched_t t_sched; // run-time schedule for the team +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + int t_first_place; // first & last place in parent thread's partition. + int t_last_place; // Restore these values to master after par region. +#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call + + // Read/write by workers as well ----------------------------------------------------------------------- +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' + // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' + // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. + char dummy_padding[1024]; +#endif + KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task + kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. + // for SERIALIZED teams nested 2 or more levels deep +#if OMP_40_ENABLED + kmp_int32 t_cancel_request; // typed flag to store request state of cancellation +#endif + int t_master_active; // save on fork, restore on join + kmp_taskq_t t_taskq; // this team's task queue + void *t_copypriv_data; // team specific pointer to copyprivate data array + kmp_uint32 t_copyin_counter; +#if USE_ITT_BUILD + void *t_stack_id; // team specific stack stitching id (for ittnotify) +#endif /* USE_ITT_BUILD */ +} kmp_base_team_t; + +union KMP_ALIGN_CACHE kmp_team { + kmp_base_team_t t; + double t_align; /* use worst case alignment */ + char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ]; +}; + + +typedef union KMP_ALIGN_CACHE kmp_time_global { + double dt_align; /* use worst case alignment */ + char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; + kmp_base_data_t dt; +} kmp_time_global_t; + +typedef struct kmp_base_global { + /* cache-aligned */ + kmp_time_global_t g_time; + + /* non cache-aligned */ + volatile int g_abort; + volatile int g_done; + + int g_dynamic; + enum dynamic_mode g_dynamic_mode; +} kmp_base_global_t; + +typedef union KMP_ALIGN_CACHE kmp_global { + kmp_base_global_t g; + double g_align; /* use worst case alignment */ + char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ]; +} kmp_global_t; + + +typedef struct kmp_base_root { + // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0) + // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch + // overhead or keeping r_active + + volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ + // GEH: This is misnamed, should be r_in_parallel + volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. + int r_in_parallel; /* keeps a count of active parallel regions per root */ + // GEH: This is misnamed, should be r_active_levels + kmp_team_t *r_root_team; + kmp_team_t *r_hot_team; + kmp_info_t *r_uber_thread; + kmp_lock_t r_begin_lock; + volatile int r_begin; + int r_blocktime; /* blocktime for this root and descendants */ +} kmp_base_root_t; + +typedef union KMP_ALIGN_CACHE kmp_root { + kmp_base_root_t r; + double r_align; /* use worst case alignment */ + char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ]; +} kmp_root_t; + +struct fortran_inx_info { + kmp_int32 data; +}; + +/* ------------------------------------------------------------------------ */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +extern int __kmp_settings; +extern int __kmp_duplicate_library_ok; +#if USE_ITT_BUILD +extern int __kmp_forkjoin_frames; +extern int __kmp_forkjoin_frames_mode; +#endif +extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; +extern int __kmp_determ_red; + +#ifdef KMP_DEBUG +extern int kmp_a_debug; +extern int kmp_b_debug; +extern int kmp_c_debug; +extern int kmp_d_debug; +extern int kmp_e_debug; +extern int kmp_f_debug; +#endif /* KMP_DEBUG */ + +/* For debug information logging using rotating buffer */ +#define KMP_DEBUG_BUF_LINES_INIT 512 +#define KMP_DEBUG_BUF_LINES_MIN 1 + +#define KMP_DEBUG_BUF_CHARS_INIT 128 +#define KMP_DEBUG_BUF_CHARS_MIN 2 + +extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ +extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ +extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ +extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */ + +extern char *__kmp_debug_buffer; /* Debug buffer itself */ +extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */ +extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */ +/* end rotating debug buffer */ + +#ifdef KMP_DEBUG +extern int __kmp_par_range; /* +1 => only go par for constructs in range */ + +#define KMP_PAR_RANGE_ROUTINE_LEN 1024 +extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; +#define KMP_PAR_RANGE_FILENAME_LEN 1024 +extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; +extern int __kmp_par_range_lb; +extern int __kmp_par_range_ub; +#endif + +/* For printing out dynamic storage map for threads and teams */ +extern int __kmp_storage_map; /* True means print storage map for threads and teams */ +extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */ +extern int __kmp_storage_map_verbose_specified; + +extern kmp_cpuinfo_t __kmp_cpuinfo; + +extern volatile int __kmp_init_serial; +extern volatile int __kmp_init_gtid; +extern volatile int __kmp_init_common; +extern volatile int __kmp_init_middle; +extern volatile int __kmp_init_parallel; +extern volatile int __kmp_init_monitor; +extern volatile int __kmp_init_user_locks; +extern int __kmp_init_counter; +extern int __kmp_root_counter; +extern int __kmp_version; + +/* list of address of allocated caches for commons */ +extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; + +/* Barrier algorithm types and options */ +extern kmp_uint32 __kmp_barrier_gather_bb_dflt; +extern kmp_uint32 __kmp_barrier_release_bb_dflt; +extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; +extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; +extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ]; +extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ]; +extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ]; +extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ]; +extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ]; +extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ]; +extern char const *__kmp_barrier_type_name [ bs_last_barrier ]; +extern char const *__kmp_barrier_pattern_name [ bp_last_bar ]; + +/* Global Locks */ +extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ +extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ +extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ +extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ + +extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ +extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ +extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ + +/* used for yielding spin-waits */ +extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ +extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ + +extern enum library_type __kmp_library; + +extern enum sched_type __kmp_sched; /* default runtime scheduling */ +extern enum sched_type __kmp_static; /* default static scheduling method */ +extern enum sched_type __kmp_guided; /* default guided scheduling method */ +extern enum sched_type __kmp_auto; /* default auto scheduling method */ +extern int __kmp_chunk; /* default runtime chunk size */ + +extern size_t __kmp_stksize; /* stack size per thread */ +extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */ +extern size_t __kmp_stkoffset; /* stack offset per thread */ +extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ + +extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ +extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */ +extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ +extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */ +extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ +extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */ +extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ +extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ +extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */ +extern int __kmp_generate_warnings; /* should we issue warnings? */ +extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ + +#ifdef DEBUG_SUSPEND +extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ +#endif + +extern kmp_uint32 __kmp_yield_init; +extern kmp_uint32 __kmp_yield_next; +extern kmp_uint32 __kmp_yielding_on; +extern kmp_uint32 __kmp_yield_cycle; +extern kmp_int32 __kmp_yield_on_count; +extern kmp_int32 __kmp_yield_off_count; + +/* ------------------------------------------------------------------------- */ +extern int __kmp_allThreadsSpecified; + +extern size_t __kmp_align_alloc; +/* following data protected by initialization routines */ +extern int __kmp_xproc; /* number of processors in the system */ +extern int __kmp_avail_proc; /* number of processors available to the process */ +extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ +extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ +extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */ +extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ +extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */ +extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */ +extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */ +extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ +extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */ +extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ +extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */ +extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */ +#ifdef KMP_ADJUST_BLOCKTIME +extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ +#endif /* KMP_ADJUST_BLOCKTIME */ +#ifdef KMP_DFLT_NTH_CORES +extern int __kmp_ncores; /* Total number of cores for threads placement */ +#endif +extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */ + +extern int __kmp_need_register_atfork_specified; +extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */ +extern int __kmp_gtid_mode; /* Method of getting gtid, values: + 0 - not set, will be set at runtime + 1 - using stack search + 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)) + 3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only. + */ +extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ +#ifdef KMP_TDATA_GTID +#if KMP_OS_WINDOWS +extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ +#else +extern __thread int __kmp_gtid; +#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ +#endif +extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ +extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */ +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */ +extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */ +extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */ +#if KMP_NESTED_HOT_TEAMS +extern int __kmp_hot_teams_mode; +extern int __kmp_hot_teams_max_level; +#endif + +# if KMP_OS_LINUX +extern enum clock_function_type __kmp_clock_function; +extern int __kmp_clock_function_param; +# endif /* KMP_OS_LINUX */ + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +extern enum mic_type __kmp_mic_type; +#endif + +# ifdef USE_LOAD_BALANCE +extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */ +# endif /* USE_LOAD_BALANCE */ + +// OpenMP 3.1 - Nested num threads array +typedef struct kmp_nested_nthreads_t { + int * nth; + int size; + int used; +} kmp_nested_nthreads_t; + +extern kmp_nested_nthreads_t __kmp_nested_nth; + +#if KMP_USE_ADAPTIVE_LOCKS + +// Parameters for the speculative lock backoff system. +struct kmp_adaptive_backoff_params_t { + // Number of soft retries before it counts as a hard retry. + kmp_uint32 max_soft_retries; + // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right + kmp_uint32 max_badness; +}; + +extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; + +#if KMP_DEBUG_ADAPTIVE_LOCKS +extern char * __kmp_speculative_statsfile; +#endif + +#endif // KMP_USE_ADAPTIVE_LOCKS + +#if OMP_40_ENABLED +extern int __kmp_display_env; /* TRUE or FALSE */ +extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ +extern int __kmp_omp_cancellation; /* TRUE or FALSE */ +#endif + +/* ------------------------------------------------------------------------- */ + +/* --------------------------------------------------------------------------- */ +/* the following are protected by the fork/join lock */ +/* write: lock read: anytime */ +extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ +/* read/write: lock */ +extern volatile kmp_team_t * __kmp_team_pool; +extern volatile kmp_info_t * __kmp_thread_pool; + +/* total number of threads reachable from some root thread including all root threads*/ +extern volatile int __kmp_nth; +/* total number of threads reachable from some root thread including all root threads, + and those in the thread pool */ +extern volatile int __kmp_all_nth; +extern int __kmp_thread_pool_nth; +extern volatile int __kmp_thread_pool_active_nth; + +extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ +/* end data protected by fork/join lock */ +/* --------------------------------------------------------------------------- */ + +extern kmp_global_t __kmp_global; /* global status */ + +extern kmp_info_t __kmp_monitor; +extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library. +extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library. + +#if USE_DEBUGGER + +#define _KMP_GEN_ID( counter ) \ + ( \ + __kmp_debugging \ + ? \ + KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \ + : \ + ~ 0 \ + ) +#else +#define _KMP_GEN_ID( counter ) \ + ( \ + ~ 0 \ + ) +#endif /* USE_DEBUGGER */ + +#define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter ) +#define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter ) + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... ); + +extern void __kmp_serial_initialize( void ); +extern void __kmp_middle_initialize( void ); +extern void __kmp_parallel_initialize( void ); + +extern void __kmp_internal_begin( void ); +extern void __kmp_internal_end_library( int gtid ); +extern void __kmp_internal_end_thread( int gtid ); +extern void __kmp_internal_end_atexit( void ); +extern void __kmp_internal_end_fini( void ); +extern void __kmp_internal_end_dtor( void ); +extern void __kmp_internal_end_dest( void* ); + +extern int __kmp_register_root( int initial_thread ); +extern void __kmp_unregister_root( int gtid ); + +extern int __kmp_ignore_mppbeg( void ); +extern int __kmp_ignore_mppend( void ); + +extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ); +extern void __kmp_exit_single( int gtid ); + +extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); +extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); + +#ifdef USE_LOAD_BALANCE +extern int __kmp_get_load_balance( int ); +#endif + +#ifdef BUILD_TV +extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ); +#endif + +extern int __kmp_get_global_thread_id( void ); +extern int __kmp_get_global_thread_id_reg( void ); +extern void __kmp_exit_thread( int exit_status ); +extern void __kmp_abort( char const * format, ... ); +extern void __kmp_abort_thread( void ); +extern void __kmp_abort_process( void ); +extern void __kmp_warn( char const * format, ... ); + +extern void __kmp_set_num_threads( int new_nth, int gtid ); + +// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. +static inline kmp_info_t * __kmp_entry_thread() +{ + int gtid = __kmp_entry_gtid(); + + return __kmp_threads[gtid]; +} + +extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels ); +extern int __kmp_get_max_active_levels( int gtid ); +extern int __kmp_get_ancestor_thread_num( int gtid, int level ); +extern int __kmp_get_team_size( int gtid, int level ); +extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk ); +extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk ); + +extern unsigned short __kmp_get_random( kmp_info_t * thread ); +extern void __kmp_init_random( kmp_info_t * thread ); + +extern kmp_r_sched_t __kmp_get_schedule_global( void ); +extern void __kmp_adjust_num_threads( int new_nproc ); + +extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL ); +extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ); +extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL ); +#define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR ) +#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR ) +#define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR ) + +#if USE_FAST_MEMORY +extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ); +extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL ); +extern void __kmp_free_fast_memory( kmp_info_t *this_thr ); +extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr ); +#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR ) +#define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR ) +#endif + +extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ); +extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ); +extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ); +extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ); +#define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR ) +#define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR ) +#define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR ) +#define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR ) + +#define KMP_INTERNAL_MALLOC(sz) malloc(sz) +#define KMP_INTERNAL_FREE(p) free(p) +#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz)) +#define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz)) + +extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads ); + +#if OMP_40_ENABLED +extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind ); +extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads ); +#endif + +extern void __kmp_yield( int cond ); + +extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, + kmp_int32 chunk ); +extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk ); +extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, + kmp_int64 chunk ); +extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk ); + +extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ); +extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ); +extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ); +extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ); + +extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ); +extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ); +extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ); +extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ); + + +#ifdef KMP_GOMP_COMPAT + +extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws ); +extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws ); +extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws ); +extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws ); +extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ); +extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ); +extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ); +extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ); + +#endif /* KMP_GOMP_COMPAT */ + + +extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker ); +extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker ); +extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker ); +extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker ); +extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker ); + +extern kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker ); +extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker ); +extern kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker ); +extern kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker ); +extern kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker ); + +extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); +extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj ); + +class kmp_flag_32; +class kmp_flag_64; +class kmp_flag_oncore; +extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin +#if USE_ITT_BUILD + , void * itt_sync_obj +#endif + ); +extern void __kmp_release_32(kmp_flag_32 *flag); +extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin +#if USE_ITT_BUILD + , void * itt_sync_obj +#endif + ); +extern void __kmp_release_64(kmp_flag_64 *flag); +extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin +#if USE_ITT_BUILD + , void * itt_sync_obj +#endif + ); +extern void __kmp_release_oncore(kmp_flag_oncore *flag); + +extern void __kmp_infinite_loop( void ); + +extern void __kmp_cleanup( void ); + +#if KMP_HANDLE_SIGNALS + extern int __kmp_handle_signals; + extern void __kmp_install_signals( int parallel_init ); + extern void __kmp_remove_signals( void ); +#endif + +extern void __kmp_clear_system_time( void ); +extern void __kmp_read_system_time( double *delta ); + +extern void __kmp_check_stack_overlap( kmp_info_t *thr ); + +extern void __kmp_expand_host_name( char *buffer, size_t size ); +extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern ); + +#if KMP_OS_WINDOWS +extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */ +#endif + +extern void __kmp_runtime_initialize( void ); /* machine specific initialization */ +extern void __kmp_runtime_destroy( void ); + +#if KMP_AFFINITY_SUPPORTED +extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); +extern void __kmp_affinity_initialize(void); +extern void __kmp_affinity_uninitialize(void); +extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ +#if OMP_40_ENABLED +extern void __kmp_affinity_set_place(int gtid); +#endif +extern void __kmp_affinity_determine_capable( const char *env_var ); +extern int __kmp_aux_set_affinity(void **mask); +extern int __kmp_aux_get_affinity(void **mask); +extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); +extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); +extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); +extern void __kmp_balanced_affinity( int tid, int team_size ); +#endif /* KMP_AFFINITY_SUPPORTED */ + +extern void __kmp_cleanup_hierarchy(); +extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +extern int __kmp_futex_determine_capable( void ); + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +extern void __kmp_gtid_set_specific( int gtid ); +extern int __kmp_gtid_get_specific( void ); + +extern double __kmp_read_cpu_time( void ); + +extern int __kmp_read_system_info( struct kmp_sys_info *info ); + +extern void __kmp_create_monitor( kmp_info_t *th ); + +extern void *__kmp_launch_thread( kmp_info_t *thr ); + +extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ); + +#if KMP_OS_WINDOWS +extern int __kmp_still_running(kmp_info_t *th); +extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ); +extern void __kmp_free_handle( kmp_thread_t tHandle ); +#endif + +extern void __kmp_reap_monitor( kmp_info_t *th ); +extern void __kmp_reap_worker( kmp_info_t *th ); +extern void __kmp_terminate_thread( int gtid ); + +extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag ); +extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag ); +extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag ); +extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag ); +extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag ); +extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag ); + +extern void __kmp_elapsed( double * ); +extern void __kmp_elapsed_tick( double * ); + +extern void __kmp_enable( int old_state ); +extern void __kmp_disable( int *old_state ); + +extern void __kmp_thread_sleep( int millis ); + +extern void __kmp_common_initialize( void ); +extern void __kmp_common_destroy( void ); +extern void __kmp_common_destroy_gtid( int gtid ); + +#if KMP_OS_UNIX +extern void __kmp_register_atfork( void ); +#endif +extern void __kmp_suspend_initialize( void ); +extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th ); + +extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root, + kmp_team_t *team, int tid); +#if OMP_40_ENABLED +extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id, +#endif + kmp_proc_bind_t proc_bind, + kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); +#else +extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id, +#endif + kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); +#endif // OMP_40_ENABLED +extern void __kmp_free_thread( kmp_info_t * ); +extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) ); +extern kmp_team_t * __kmp_reap_team( kmp_team_t * ); + +/* ------------------------------------------------------------------------ */ + +extern void __kmp_initialize_bget( kmp_info_t *th ); +extern void __kmp_finalize_bget( kmp_info_t *th ); + +KMP_EXPORT void *kmpc_malloc( size_t size ); +KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize ); +KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size ); +KMP_EXPORT void kmpc_free( void *ptr ); + +/* ------------------------------------------------------------------------ */ +/* declarations for internal use */ + +extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split, + size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) ); +extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); + +/*! + * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call. + */ +enum fork_context_e +{ + fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */ + fork_context_intel, /**< Called from Intel generated code. */ + fork_context_last +}; +extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context, + kmp_int32 argc, +#if OMPT_SUPPORT + void *unwrapped_task, +#endif + microtask_t microtask, launch_t invoker, +/* TODO: revert workaround for Intel(R) 64 tracker #96 */ +#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX + va_list *ap +#else + va_list ap +#endif + ); + +extern void __kmp_join_call( ident_t *loc, int gtid +#if OMPT_SUPPORT + , enum fork_context_e fork_context +#endif +#if OMP_40_ENABLED + , int exit_teams = 0 +#endif + ); + +extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); +extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ); +extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ); +extern int __kmp_invoke_task_func( int gtid ); +extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); +extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); + +// should never have been exported +KMP_EXPORT int __kmpc_invoke_task_func( int gtid ); +#if OMP_40_ENABLED +extern int __kmp_invoke_teams_master( int gtid ); +extern void __kmp_teams_master( int gtid ); +#endif +extern void __kmp_save_internal_controls( kmp_info_t * thread ); +extern void __kmp_user_set_library (enum library_type arg); +extern void __kmp_aux_set_library (enum library_type arg); +extern void __kmp_aux_set_stacksize( size_t arg); +extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid); +extern void __kmp_aux_set_defaults( char const * str, int len ); + +/* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */ +void kmpc_set_blocktime (int arg); +void ompc_set_nested( int flag ); +void ompc_set_dynamic( int flag ); +void ompc_set_num_threads( int arg ); + +extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr, + kmp_team_t *team, int tid ); +extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ); +extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, + kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ); +extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, + kmp_team_t *team, int tid, int set_curr_task ); + +int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void * itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); +int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void * itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); +int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void * itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); + +extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); +extern void __kmp_reap_task_teams( void ); +extern void __kmp_wait_to_unref_task_teams( void ); +extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); +extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); +extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team +#if USE_ITT_BUILD + , void * itt_sync_obj +#endif /* USE_ITT_BUILD */ + , int wait=1 +); +extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); + +extern int __kmp_is_address_mapped( void *addr ); +extern kmp_uint64 __kmp_hardware_timestamp(void); + +#if KMP_OS_UNIX +extern int __kmp_read_from_file( char const *path, char const *format, ... ); +#endif + +/* ------------------------------------------------------------------------ */ +// +// Assembly routines that have no compiler intrinsic replacement +// + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +extern void __kmp_query_cpuid( kmp_cpuinfo_t *p ); + +#define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) +static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } + +extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +extern void __kmp_clear_x87_fpu_status_word(); +# define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] +#if OMPT_SUPPORT + , void **exit_frame_ptr +#endif +); + + +/* ------------------------------------------------------------------------ */ + +KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags ); +KMP_EXPORT void __kmpc_end ( ident_t * ); + +KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor, + kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length ); +KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor ); +KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size ); + +KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * ); +KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * ); +KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * ); +KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * ); + +KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * ); +KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... ); + +KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid ); + +KMP_EXPORT void __kmpc_flush ( ident_t *); +KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); +KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); + +#if OMP_41_ENABLED +KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint ); +#endif + +KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid ); + +KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid ); + +KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid ); + +KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk ); + +KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid ); + +KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ); + +extern void KMPC_SET_NUM_THREADS ( int arg ); +extern void KMPC_SET_DYNAMIC ( int flag ); +extern void KMPC_SET_NESTED ( int flag ); + +/* --------------------------------------------------------------------------- */ + +/* + * Taskq interface routines + */ + +KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk, + size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds); +KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); +KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); +KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status); +KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); +KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task); + +/* ------------------------------------------------------------------------ */ + +/* + * OMP 3.0 tasking interface routines + */ + +KMP_EXPORT kmp_int32 +__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); +KMP_EXPORT kmp_task_t* +__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ); +KMP_EXPORT void +__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); +KMP_EXPORT void +__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); +KMP_EXPORT kmp_int32 +__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); +KMP_EXPORT kmp_int32 +__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ); + +KMP_EXPORT kmp_int32 +__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ); + +#if TASK_UNUSED +void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); +void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); +#endif // TASK_UNUSED + +/* ------------------------------------------------------------------------ */ + +#if OMP_40_ENABLED + +KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); +KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); + +KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); +KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); +extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ); + +extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ); + +KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); +KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); + +#if OMP_41_ENABLED + +KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ); +KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); + +#endif + +#endif + + +/* + * Lock interface routines (fast versions with gtid passed in) + */ +KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); + +#if OMP_41_ENABLED +KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); +KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); +#endif + +/* ------------------------------------------------------------------------ */ + +/* + * Interface to fast scalable reduce methods routines + */ + +KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ); +KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); +KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ); +KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); + +/* + * internal fast reduction routines + */ + +extern PACKED_REDUCTION_METHOD_T +__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ); + +// this function is for testing set/get/determine reduce method +KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void ); + +KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); +KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); + +// this function exported for testing of KMP_PLACE_THREADS functionality +KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int); + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +// C++ port +// missing 'extern "C"' declarations + +KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc ); +KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid ); +KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ); + +#if OMP_40_ENABLED +KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind ); +KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ); +KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); + +#endif + +KMP_EXPORT void* +__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid, + void * data, size_t size, void *** cache ); + +// Symbols for MS mutual detection. +extern int _You_must_link_with_exactly_one_OpenMP_library; +extern int _You_must_link_with_Intel_OpenMP_library; +#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) + extern int _You_must_link_with_Microsoft_OpenMP_library; +#endif + + +// The routines below are not exported. +// Consider making them 'static' in corresponding source files. +void +kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); +struct private_common * +kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); + +// +// ompc_, kmpc_ entries moved from omp.h. +// +#if KMP_OS_WINDOWS +# define KMPC_CONVENTION __cdecl +#else +# define KMPC_CONVENTION +#endif + +#ifndef __OMP_H +typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 +} omp_sched_t; +typedef void * kmp_affinity_mask_t; +#endif + +KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); +KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); +KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); +KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); +KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); + +KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); + +#ifdef __cplusplus +} +#endif + +#endif /* KMP_H */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp index 4c16b2f7f35..4e6699ff214 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp @@ -1,4735 +1,4735 @@ -/* - * kmp_affinity.cpp -- affinity management - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_str.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_affinity.h" - -// Store the real or imagined machine hierarchy here -static hierarchy_info machine_hierarchy; - -void __kmp_cleanup_hierarchy() { - machine_hierarchy.fini(); -} - -void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { - kmp_uint32 depth; - // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. - if (TCR_1(machine_hierarchy.uninitialized)) - machine_hierarchy.init(NULL, nproc); - - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); - - depth = machine_hierarchy.depth; - KMP_DEBUG_ASSERT(depth > 0); - - thr_bar->depth = depth; - thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; - thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; -} - -#if KMP_AFFINITY_SUPPORTED - -// -// Print the affinity mask to the character array in a pretty format. -// -#if KMP_USE_HWLOC -char * -__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) -{ - int num_chars_to_write, num_chars_written; - char* scan; - KMP_ASSERT(buf_len >= 40); - - // bufsize of 0 just retrieves the needed buffer size. - num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask); - - // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes - // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not - // take into account the '\0' character. - if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) { - KMP_SNPRINTF(buf, buf_len, "{}"); - } else if(num_chars_to_write < buf_len - 3) { - // no problem fitting the mask into buf_len number of characters - buf[0] = '{'; - // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer - num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask); - buf[num_chars_written+1] = '}'; - buf[num_chars_written+2] = '\0'; - } else { - // Need to truncate the affinity mask string and add ellipsis. - // To do this, we first write out the '{' + str(mask) - buf[0] = '{'; - hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask); - // then, what we do here is go to the 7th to last character, then go backwards until we are NOT - // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't - // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get - // { 45, 67,...} instead. - scan = buf + buf_len - 7; - while(*scan >= '0' && *scan <= '9' && scan >= buf) - scan--; - *(scan+1) = '.'; - *(scan+2) = '.'; - *(scan+3) = '.'; - *(scan+4) = '}'; - *(scan+5) = '\0'; - } - return buf; -} -#else -char * -__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) -{ - KMP_ASSERT(buf_len >= 40); - char *scan = buf; - char *end = buf + buf_len - 1; - - // - // Find first element / check for empty set. - // - size_t i; - for (i = 0; i < KMP_CPU_SETSIZE; i++) { - if (KMP_CPU_ISSET(i, mask)) { - break; - } - } - if (i == KMP_CPU_SETSIZE) { - KMP_SNPRINTF(scan, end-scan+1, "{}"); - while (*scan != '\0') scan++; - KMP_ASSERT(scan <= end); - return buf; - } - - KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i); - while (*scan != '\0') scan++; - i++; - for (; i < KMP_CPU_SETSIZE; i++) { - if (! KMP_CPU_ISSET(i, mask)) { - continue; - } - - // - // Check for buffer overflow. A string of the form "," will have - // at most 10 characters, plus we want to leave room to print ",...}" - // if the set is too large to print for a total of 15 characters. - // We already left room for '\0' in setting end. - // - if (end - scan < 15) { - break; - } - KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i); - while (*scan != '\0') scan++; - } - if (i < KMP_CPU_SETSIZE) { - KMP_SNPRINTF(scan, end-scan+1, ",..."); - while (*scan != '\0') scan++; - } - KMP_SNPRINTF(scan, end-scan+1, "}"); - while (*scan != '\0') scan++; - KMP_ASSERT(scan <= end); - return buf; -} -#endif // KMP_USE_HWLOC - - -void -__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) -{ - KMP_CPU_ZERO(mask); - -# if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - int group; - KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); - for (group = 0; group < __kmp_num_proc_groups; group++) { - int i; - int num = __kmp_GetActiveProcessorCount(group); - for (i = 0; i < num; i++) { - KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); - } - } - } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - int proc; - for (proc = 0; proc < __kmp_xproc; proc++) { - KMP_CPU_SET(proc, mask); - } - } -} - -// -// When sorting by labels, __kmp_affinity_assign_child_nums() must first be -// called to renumber the labels from [0..n] and place them into the child_num -// vector of the address object. This is done in case the labels used for -// the children at one node of the hierarchy differ from those used for -// another node at the same level. Example: suppose the machine has 2 nodes -// with 2 packages each. The first node contains packages 601 and 602, and -// second node contains packages 603 and 604. If we try to sort the table -// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604 -// because we are paying attention to the labels themselves, not the ordinal -// child numbers. By using the child numbers in the sort, the result is -// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. -// -static void -__kmp_affinity_assign_child_nums(AddrUnsPair *address2os, - int numAddrs) -{ - KMP_DEBUG_ASSERT(numAddrs > 0); - int depth = address2os->first.depth; - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *lastLabel = (unsigned *)__kmp_allocate(depth - * sizeof(unsigned)); - int labCt; - for (labCt = 0; labCt < depth; labCt++) { - address2os[0].first.childNums[labCt] = counts[labCt] = 0; - lastLabel[labCt] = address2os[0].first.labels[labCt]; - } - int i; - for (i = 1; i < numAddrs; i++) { - for (labCt = 0; labCt < depth; labCt++) { - if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { - int labCt2; - for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { - counts[labCt2] = 0; - lastLabel[labCt2] = address2os[i].first.labels[labCt2]; - } - counts[labCt]++; - lastLabel[labCt] = address2os[i].first.labels[labCt]; - break; - } - } - for (labCt = 0; labCt < depth; labCt++) { - address2os[i].first.childNums[labCt] = counts[labCt]; - } - for (; labCt < (int)Address::maxDepth; labCt++) { - address2os[i].first.childNums[labCt] = 0; - } - } -} - - -// -// All of the __kmp_affinity_create_*_map() routines should set -// __kmp_affinity_masks to a vector of affinity mask objects of length -// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and -// return the number of levels in the machine topology tree (zero if -// __kmp_affinity_type == affinity_none). -// -// All of the __kmp_affinity_create_*_map() routines should set *fullMask -// to the affinity mask for the initialization thread. They need to save and -// restore the mask, and it could be needed later, so saving it is just an -// optimization to avoid calling kmp_get_system_affinity() again. -// -static kmp_affin_mask_t *fullMask = NULL; - -kmp_affin_mask_t * -__kmp_affinity_get_fullMask() { return fullMask; } - - -static int nCoresPerPkg, nPackages; -static int __kmp_nThreadsPerCore; -#ifndef KMP_DFLT_NTH_CORES -static int __kmp_ncores; -#endif - -// -// __kmp_affinity_uniform_topology() doesn't work when called from -// places which support arbitrarily many levels in the machine topology -// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() -// __kmp_affinity_create_x2apicid_map(). -// -inline static bool -__kmp_affinity_uniform_topology() -{ - return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); -} - - -// -// Print out the detailed machine topology map, i.e. the physical locations -// of each OS proc. -// -static void -__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth, - int pkgLevel, int coreLevel, int threadLevel) -{ - int proc; - - KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); - for (proc = 0; proc < len; proc++) { - int level; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - for (level = 0; level < depth; level++) { - if (level == threadLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); - } - else if (level == coreLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); - } - else if (level == pkgLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); - } - else if (level > pkgLevel) { - __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), - level - pkgLevel - 1); - } - else { - __kmp_str_buf_print(&buf, "L%d ", level); - } - __kmp_str_buf_print(&buf, "%d ", - address2os[proc].first.labels[level]); - } - KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, - buf.str); - __kmp_str_buf_free(&buf); - } -} - -#if KMP_USE_HWLOC -static int -__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); - - unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology); - int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU); - int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE); - int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET); - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0; - - // - // This makes an assumption about the topology being four levels: - // machines -> packages -> cores -> hardware threads - // - hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology); - hwloc_obj_t child_iterator; - for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); - child_iterator != NULL; - child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) - { - nPackages++; - } - current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0); - for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); - child_iterator != NULL; - child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) - { - nCoresPerPkg++; - } - current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0); - for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); - child_iterator != NULL; - child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) - { - __kmp_nThreadsPerCore++; - } - - if (! KMP_AFFINITY_CAPABLE()) - { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // Allocate the data structure to be returned. - // - AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - - unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); - unsigned i; - hwloc_obj_t hardware_thread_iterator; - int nActiveThreads = 0; - for(i=0;iparent->parent->logical_index; - addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg; - addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore; - retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index); - nActiveThreads++; - } - - // - // If there's only one thread context to bind to, return now. - // - KMP_ASSERT(nActiveThreads > 0); - if (nActiveThreads == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Form an Address object which only includes the package level. - // - Address addr(1); - addr.labels[0] = retval[0].first.labels[pkgLevel-1]; - retval[0].first = addr; - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } - - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; - } - - // - // Sort the table by physical Id. - // - qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels); - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - // - __kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel); - - // - // Check to see if the machine topology is uniform - // - unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel); - unsigned ncores = __kmp_ncores; - unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); - unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads); - - // - // Print the machine topology summary. - // - if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - - __kmp_str_buf_print(&buf, "%d", npackages); - //for (level = 1; level <= pkgLevel; level++) { - // __kmp_str_buf_print(&buf, " x %d", maxCt[level]); - // } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - - __kmp_str_buf_free(&buf); - } - - if (__kmp_affinity_type == affinity_none) { - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - // - int new_depth = 0; - int level; - unsigned proc; - for (level = 1; level < (int)depth; level++) { - if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { - continue; - } - new_depth++; - } - - // - // If we are removing any levels, allocate a new vector to return, - // and copy the relevant information to it. - // - if (new_depth != depth-1) { - AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( - sizeof(AddrUnsPair) * nActiveThreads); - for (proc = 0; (int)proc < nActiveThreads; proc++) { - Address addr(new_depth); - new_retval[proc] = AddrUnsPair(addr, retval[proc].second); - } - int new_level = 0; - for (level = 1; level < (int)depth; level++) { - if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { - if (level == threadLevel) { - threadLevel = -1; - } - else if ((threadLevel >= 0) && (level < threadLevel)) { - threadLevel--; - } - if (level == coreLevel) { - coreLevel = -1; - } - else if ((coreLevel >= 0) && (level < coreLevel)) { - coreLevel--; - } - if (level < pkgLevel) { - pkgLevel--; - } - continue; - } - for (proc = 0; (int)proc < nActiveThreads; proc++) { - new_retval[proc].first.labels[new_level] - = retval[proc].first.labels[level]; - } - new_level++; - } - - __kmp_free(retval); - retval = new_retval; - depth = new_depth; - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1, - coreLevel-1, threadLevel-1); - } - - KMP_CPU_FREE(oldMask); - *address2os = retval; - if(depth == 0) return 0; - else return depth-1; -} -#endif // KMP_USE_HWLOC - -// -// If we don't know how to retrieve the machine's processor topology, or -// encounter an error in doing so, this routine is called to form a "flat" -// mapping of os thread id's <-> processor id's. -// -static int -__kmp_affinity_create_flat_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Even if __kmp_affinity_type == affinity_none, this routine might still - // called to set __kmp_ncores, as well as - // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // - if (! KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - __kmp_ncores = nPackages = __kmp_xproc; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = nPackages = __kmp_avail_proc; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); - - KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - if (__kmp_affinity_type == affinity_none) { - return 0; - } - - // - // Contruct the data structure to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); - int avail_ct = 0; - unsigned int i; - KMP_CPU_SET_ITERATE(i, fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, fullMask)) { - continue; - } - - Address addr(1); - addr.labels[0] = i; - (*address2os)[avail_ct++] = AddrUnsPair(addr,i); - } - if (__kmp_affinity_verbose) { - KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Only the package level is modeled in the machine topology map, - // so the #levels of granularity is either 0 or 1. - // - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels = 1; - } - else { - __kmp_affinity_gran_levels = 0; - } - } - return 1; -} - - -# if KMP_GROUP_AFFINITY - -// -// If multiple Windows* OS processor groups exist, we can create a 2-level -// topology map with the groups at level 0 and the individual procs at -// level 1. -// -// This facilitates letting the threads float among all procs in a group, -// if granularity=group (the default when there are multiple groups). -// -static int -__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // If we don't have multiple processor groups, return now. - // The flat mapping will be used. - // - if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) { - // FIXME set *msg_id - return -1; - } - - // - // Contruct the data structure to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, fullMask)) { - continue; - } - - Address addr(2); - addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); - addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); - (*address2os)[avail_ct++] = AddrUnsPair(addr,i); - - if (__kmp_affinity_verbose) { - KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], - addr.labels[1]); - } - } - - if (__kmp_affinity_gran_levels < 0) { - if (__kmp_affinity_gran == affinity_gran_group) { - __kmp_affinity_gran_levels = 1; - } - else if ((__kmp_affinity_gran == affinity_gran_fine) - || (__kmp_affinity_gran == affinity_gran_thread)) { - __kmp_affinity_gran_levels = 0; - } - else { - const char *gran_str = NULL; - if (__kmp_affinity_gran == affinity_gran_core) { - gran_str = "core"; - } - else if (__kmp_affinity_gran == affinity_gran_package) { - gran_str = "package"; - } - else if (__kmp_affinity_gran == affinity_gran_node) { - gran_str = "node"; - } - else { - KMP_ASSERT(0); - } - - // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread" - __kmp_affinity_gran_levels = 0; - } - } - return 2; -} - -# endif /* KMP_GROUP_AFFINITY */ - - -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -static int -__kmp_cpuid_mask_width(int count) { - int r = 0; - - while((1<osId < bb->osId) return -1; - if (aa->osId > bb->osId) return 1; - return 0; -} - - -static int -__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b) -{ - const apicThreadInfo *aa = (const apicThreadInfo *)a; - const apicThreadInfo *bb = (const apicThreadInfo *)b; - if (aa->pkgId < bb->pkgId) return -1; - if (aa->pkgId > bb->pkgId) return 1; - if (aa->coreId < bb->coreId) return -1; - if (aa->coreId > bb->coreId) return 1; - if (aa->threadId < bb->threadId) return -1; - if (aa->threadId > bb->threadId) return 1; - return 0; -} - - -// -// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use -// an algorithm which cycles through the available os threads, setting -// the current thread's affinity mask to that thread, and then retrieves -// the Apic Id for each thread context using the cpuid instruction. -// -static int -__kmp_affinity_create_apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - kmp_cpuid buf; - int rc; - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Check if cpuid leaf 4 is supported. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 4) { - *msg_id = kmp_i18n_str_NoLeaf4Support; - return -1; - } - - // - // The algorithm used starts by setting the affinity to each available - // thread and retrieving info from the cpuid instruction, so if we are - // not capable of calling __kmp_get_system_affinity() and - // _kmp_get_system_affinity(), then we need to do something else - use - // the defaults that we calculated from issuing cpuid without binding - // to each proc. - // - if (! KMP_AFFINITY_CAPABLE()) { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - // - // Get an upper bound on the number of threads per package using - // cpuid(1). - // - // On some OS/chps combinations where HT is supported by the chip - // but is disabled, this value will be 2 on a single core chip. - // Usually, it will be 2 if HT is enabled and 1 if HT is disabled. - // - __kmp_x86_cpuid(1, 0, &buf); - int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (maxThreadsPerPkg == 0) { - maxThreadsPerPkg = 1; - } - - // - // The num cores per pkg comes from cpuid(4). - // 1 must be added to the encoded value. - // - // The author of cpu_count.cpp treated this only an upper bound - // on the number of cores, but I haven't seen any cases where it - // was greater than the actual number of cores, so we will treat - // it as exact in this block of code. - // - // First, we need to check if cpuid(4) is supported on this chip. - // To see if cpuid(n) is supported, issue cpuid(0) and check if eax - // has the value n or greater. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } - else { - nCoresPerPkg = 1; - } - - // - // There is no way to reliably tell if HT is enabled without issuing - // the cpuid instruction from every thread, can correlating the cpuid - // info, so if the machine is not affinity capable, we assume that HT - // is off. We have seen quite a few machines where maxThreadsPerPkg - // is 2, yet the machine does not support HT. - // - // - Older OSes are usually found on machines with older chips, which - // do not support HT. - // - // - The performance penalty for mistakenly identifying a machine as - // HT when it isn't (which results in blocktime being incorrecly set - // to 0) is greater than the penalty when for mistakenly identifying - // a machine as being 1 thread/core when it is really HT enabled - // (which results in blocktime being incorrectly set to a positive - // value). - // - __kmp_ncores = __kmp_xproc; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - __kmp_nThreadsPerCore = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), - // even if __kmp_affinity_type = affinity_none. - // - - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - KMP_ASSERT(oldMask != NULL); - __kmp_get_system_affinity(oldMask, TRUE); - - // - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - // - // The relevant information is: - // - // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context - // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. - // - // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The - // value of this field determines the width of the core# + thread# - // fields in the Apic Id. It is also an upper bound on the number - // of threads per package, but it has been verified that situations - // happen were it is not exact. In particular, on certain OS/chip - // combinations where Intel(R) Hyper-Threading Technology is supported - // by the chip but has - // been disabled, the value of this field will be 2 (for a single core - // chip). On other OS/chip combinations supporting - // Intel(R) Hyper-Threading Technology, the value of - // this field will be 1 when Intel(R) Hyper-Threading Technology is - // disabled and 2 when it is enabled. - // - // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The - // value of this field (+1) determines the width of the core# field in - // the Apic Id. The comments in "cpucount.cpp" say that this value is - // an upper bound, but the IA-32 architecture manual says that it is - // exactly the number of cores per package, and I haven't seen any - // case where it wasn't. - // - // From this information, deduce the package Id, core Id, and thread Id, - // and set the corresponding fields in the apicThreadInfo struct. - // - unsigned i; - apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( - __kmp_avail_proc * sizeof(apicThreadInfo)); - unsigned nApics = 0; - KMP_CPU_SET_ITERATE(i, fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, fullMask)) { - continue; - } - KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); - - __kmp_affinity_bind_thread(i); - threadInfo[nApics].osId = i; - - // - // The apic id and max threads per pkg come from cpuid(1). - // - __kmp_x86_cpuid(1, 0, &buf); - if (! (buf.edx >> 9) & 1) { - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_ApicNotPresent; - return -1; - } - threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; - threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (threadInfo[nApics].maxThreadsPerPkg == 0) { - threadInfo[nApics].maxThreadsPerPkg = 1; - } - - // - // Max cores per pkg comes from cpuid(4). - // 1 must be added to the encoded value. - // - // First, we need to check if cpuid(4) is supported on this chip. - // To see if cpuid(n) is supported, issue cpuid(0) and check if eax - // has the value n or greater. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } - else { - threadInfo[nApics].maxCoresPerPkg = 1; - } - - // - // Infer the pkgId / coreId / threadId using only the info - // obtained locally. - // - int widthCT = __kmp_cpuid_mask_width( - threadInfo[nApics].maxThreadsPerPkg); - threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; - - int widthC = __kmp_cpuid_mask_width( - threadInfo[nApics].maxCoresPerPkg); - int widthT = widthCT - widthC; - if (widthT < 0) { - // - // I've never seen this one happen, but I suppose it could, if - // the cpuid instruction on a chip was really screwed up. - // Make sure to restore the affinity mask before the tail call. - // - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - - int maskC = (1 << widthC) - 1; - threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) - &maskC; - - int maskT = (1 << widthT) - 1; - threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT; - - nApics++; - } - - // - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - // - __kmp_set_system_affinity(oldMask, TRUE); - - // - // If there's only one thread context to bind to, form an Address object - // with depth 1 and return immediately (or, if affinity is off, set - // address2os to NULL and return). - // - // If it is configured to omit the package level when there is only a - // single package, the logic at the end of this routine won't work if - // there is only a single thread - it would try to form an Address - // object with depth 0. - // - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; - } - - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0].pkgId; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 1; - } - - // - // Sort the threadInfo table by physical Id. - // - qsort(threadInfo, nApics, sizeof(*threadInfo), - __kmp_affinity_cmp_apicThreadInfo_phys_id); - - // - // The table is now sorted by pkgId / coreId / threadId, but we really - // don't know the radix of any of the fields. pkgId's may be sparsely - // assigned among the chips on a system. Although coreId's are usually - // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore - // (or the total # packages) are at this point - we want to determine - // that now. We only have an upper bound on the first two figures. - // - // We also perform a consistency check at this point: the values returned - // by the cpuid instruction for any thread bound to a given package had - // better return the same info for maxThreadsPerPkg and maxCoresPerPkg. - // - nPackages = 1; - nCoresPerPkg = 1; - __kmp_nThreadsPerCore = 1; - unsigned nCores = 1; - - unsigned pkgCt = 1; // to determine radii - unsigned lastPkgId = threadInfo[0].pkgId; - unsigned coreCt = 1; - unsigned lastCoreId = threadInfo[0].coreId; - unsigned threadCt = 1; - unsigned lastThreadId = threadInfo[0].threadId; - - // intra-pkg consist checks - unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; - unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; - - for (i = 1; i < nApics; i++) { - if (threadInfo[i].pkgId != lastPkgId) { - nCores++; - pkgCt++; - lastPkgId = threadInfo[i].pkgId; - if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; - coreCt = 1; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - - // - // This is a different package, so go on to the next iteration - // without doing any consistency checks. Reset the consistency - // check vars, though. - // - prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; - prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; - continue; - } - - if (threadInfo[i].coreId != lastCoreId) { - nCores++; - coreCt++; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - } - else if (threadInfo[i].threadId != lastThreadId) { - threadCt++; - lastThreadId = threadInfo[i].threadId; - } - else { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; - return -1; - } - - // - // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg - // fields agree between all the threads bounds to a given package. - // - if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) - || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - } - nPackages = pkgCt; - if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = nCores; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Now that we've determined the number of packages, the number of cores - // per package, and the number of threads per core, we can construct the - // data structure that is to be returned. - // - int pkgLevel = 0; - int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; - int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); - unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); - - KMP_ASSERT(depth > 0); - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics); - - for (i = 0; i < nApics; ++i) { - Address addr(depth); - unsigned os = threadInfo[i].osId; - int d = 0; - - if (pkgLevel >= 0) { - addr.labels[d++] = threadInfo[i].pkgId; - } - if (coreLevel >= 0) { - addr.labels[d++] = threadInfo[i].coreId; - } - if (threadLevel >= 0) { - addr.labels[d++] = threadInfo[i].threadId; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) - && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { - __kmp_affinity_gran_levels++; - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, - coreLevel, threadLevel); - } - - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return depth; -} - - -// -// Intel(R) microarchitecture code name Nehalem, Dunnington and later -// architectures support a newer interface for specifying the x2APIC Ids, -// based on cpuid leaf 11. -// -static int -__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - kmp_cpuid buf; - - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Check to see if cpuid leaf 11 is supported. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 11) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; - } - __kmp_x86_cpuid(11, 0, &buf); - if (buf.ebx == 0) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; - } - - // - // Find the number of levels in the machine topology. While we're at it, - // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will - // try to get more accurate values later by explicitly counting them, - // but get reasonable defaults now, in case we return early. - // - int level; - int threadLevel = -1; - int coreLevel = -1; - int pkgLevel = -1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - - for (level = 0;; level++) { - if (level > 31) { - // - // FIXME: Hack for DPD200163180 - // - // If level is big then something went wrong -> exiting - // - // There could actually be 32 valid levels in the machine topology, - // but so far, the only machine we have seen which does not exit - // this loop before iteration 32 has fubar x2APIC settings. - // - // For now, just reject this case based upon loop trip count. - // - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - __kmp_x86_cpuid(11, level, &buf); - if (buf.ebx == 0) { - if (pkgLevel < 0) { - // - // Will infer nPackages from __kmp_xproc - // - pkgLevel = level; - level++; - } - break; - } - int kind = (buf.ecx >> 8) & 0xff; - if (kind == 1) { - // - // SMT level - // - threadLevel = level; - coreLevel = -1; - pkgLevel = -1; - __kmp_nThreadsPerCore = buf.ebx & 0xff; - if (__kmp_nThreadsPerCore == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - else if (kind == 2) { - // - // core level - // - coreLevel = level; - pkgLevel = -1; - nCoresPerPkg = buf.ebx & 0xff; - if (nCoresPerPkg == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - else { - if (level <= 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - if (pkgLevel >= 0) { - continue; - } - pkgLevel = level; - nPackages = buf.ebx & 0xff; - if (nPackages == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - } - int depth = level; - - // - // In the above loop, "level" was counted from the finest level (usually - // thread) to the coarsest. The caller expects that we will place the - // labels in (*address2os)[].first.labels[] in the inverse order, so - // we need to invert the vars saying which level means what. - // - if (threadLevel >= 0) { - threadLevel = depth - threadLevel - 1; - } - if (coreLevel >= 0) { - coreLevel = depth - coreLevel - 1; - } - KMP_DEBUG_ASSERT(pkgLevel >= 0); - pkgLevel = depth - pkgLevel - 1; - - // - // The algorithm used starts by setting the affinity to each available - // thread and retrieving info from the cpuid instruction, so if we are - // not capable of calling __kmp_get_system_affinity() and - // _kmp_get_system_affinity(), then we need to do something else - use - // the defaults that we calculated from issuing cpuid without binding - // to each proc. - // - if (! KMP_AFFINITY_CAPABLE()) - { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), - // even if __kmp_affinity_type = affinity_none. - // - - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); - - // - // Allocate the data structure to be returned. - // - AddrUnsPair *retval = (AddrUnsPair *) - __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - - // - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - // - unsigned int proc; - int nApics = 0; - KMP_CPU_SET_ITERATE(proc, fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(proc, fullMask)) { - continue; - } - KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); - - __kmp_affinity_bind_thread(proc); - - // - // Extrach the labels for each level in the machine topology map - // from the Apic ID. - // - Address addr(depth); - int prev_shift = 0; - - for (level = 0; level < depth; level++) { - __kmp_x86_cpuid(11, level, &buf); - unsigned apicId = buf.edx; - if (buf.ebx == 0) { - if (level != depth - 1) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - addr.labels[depth - level - 1] = apicId >> prev_shift; - level++; - break; - } - int shift = buf.eax & 0x1f; - int mask = (1 << shift) - 1; - addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; - prev_shift = shift; - } - if (level != depth) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - - retval[nApics] = AddrUnsPair(addr, proc); - nApics++; - } - - // - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - // - __kmp_set_system_affinity(oldMask, TRUE); - - // - // If there's only one thread context to bind to, return now. - // - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Form an Address object which only includes the package level. - // - Address addr(1); - addr.labels[0] = retval[0].first.labels[pkgLevel]; - retval[0].first = addr; - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } - - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; - } - - // - // Sort the table by physical Id. - // - qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); - - // - // Find the radix at each of the levels. - // - unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - for (level = 0; level < depth; level++) { - totals[level] = 1; - maxCt[level] = 1; - counts[level] = 1; - last[level] = retval[0].first.labels[level]; - } - - // - // From here on, the iteration variable "level" runs from the finest - // level to the coarsest, i.e. we iterate forward through - // (*address2os)[].first.labels[] - in the previous loops, we iterated - // backwards. - // - for (proc = 1; (int)proc < nApics; proc++) { - int level; - for (level = 0; level < depth; level++) { - if (retval[proc].first.labels[level] != last[level]) { - int j; - for (j = level + 1; j < depth; j++) { - totals[j]++; - counts[j] = 1; - // The line below causes printing incorrect topology information - // in case the max value for some level (maxCt[level]) is encountered earlier than - // some less value while going through the array. - // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2 - // whereas it must be 4. - // TODO!!! Check if it can be commented safely - //maxCt[j] = 1; - last[j] = retval[proc].first.labels[j]; - } - totals[level]++; - counts[level]++; - if (counts[level] > maxCt[level]) { - maxCt[level] = counts[level]; - } - last[level] = retval[proc].first.labels[level]; - break; - } - else if (level == depth - 1) { - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; - return -1; - } - } - } - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - // - if (threadLevel >= 0) { - __kmp_nThreadsPerCore = maxCt[threadLevel]; - } - else { - __kmp_nThreadsPerCore = 1; - } - nPackages = totals[pkgLevel]; - - if (coreLevel >= 0) { - __kmp_ncores = totals[coreLevel]; - nCoresPerPkg = maxCt[coreLevel]; - } - else { - __kmp_ncores = nPackages; - nCoresPerPkg = 1; - } - - // - // Check to see if the machine topology is uniform - // - unsigned prod = maxCt[0]; - for (level = 1; level < depth; level++) { - prod *= maxCt[level]; - } - bool uniform = (prod == totals[level - 1]); - - // - // Print the machine topology summary. - // - if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - - __kmp_str_buf_print(&buf, "%d", totals[0]); - for (level = 1; level <= pkgLevel; level++) { - __kmp_str_buf_print(&buf, " x %d", maxCt[level]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - - __kmp_str_buf_free(&buf); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - // - int new_depth = 0; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) && (level != pkgLevel)) { - continue; - } - new_depth++; - } - - // - // If we are removing any levels, allocate a new vector to return, - // and copy the relevant information to it. - // - if (new_depth != depth) { - AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( - sizeof(AddrUnsPair) * nApics); - for (proc = 0; (int)proc < nApics; proc++) { - Address addr(new_depth); - new_retval[proc] = AddrUnsPair(addr, retval[proc].second); - } - int new_level = 0; - int newPkgLevel = -1; - int newCoreLevel = -1; - int newThreadLevel = -1; - int i; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) - && (level != pkgLevel)) { - // - // Remove this level. Never remove the package level - // - continue; - } - if (level == pkgLevel) { - newPkgLevel = level; - } - if (level == coreLevel) { - newCoreLevel = level; - } - if (level == threadLevel) { - newThreadLevel = level; - } - for (proc = 0; (int)proc < nApics; proc++) { - new_retval[proc].first.labels[new_level] - = retval[proc].first.labels[level]; - } - new_level++; - } - - __kmp_free(retval); - retval = new_retval; - depth = new_depth; - pkgLevel = newPkgLevel; - coreLevel = newCoreLevel; - threadLevel = newThreadLevel; - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, - coreLevel, threadLevel); - } - - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - KMP_CPU_FREE(oldMask); - *address2os = retval; - return depth; -} - - -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - -#define osIdIndex 0 -#define threadIdIndex 1 -#define coreIdIndex 2 -#define pkgIdIndex 3 -#define nodeIdIndex 4 - -typedef unsigned *ProcCpuInfo; -static unsigned maxIndex = pkgIdIndex; - - -static int -__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) -{ - const unsigned *aa = (const unsigned *)a; - const unsigned *bb = (const unsigned *)b; - if (aa[osIdIndex] < bb[osIdIndex]) return -1; - if (aa[osIdIndex] > bb[osIdIndex]) return 1; - return 0; -}; - - -static int -__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b) -{ - unsigned i; - const unsigned *aa = *((const unsigned **)a); - const unsigned *bb = *((const unsigned **)b); - for (i = maxIndex; ; i--) { - if (aa[i] < bb[i]) return -1; - if (aa[i] > bb[i]) return 1; - if (i == osIdIndex) break; - } - return 0; -} - - -// -// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the -// affinity map. -// -static int -__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, - kmp_i18n_id_t *const msg_id, FILE *f) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Scan of the file, and count the number of "processor" (osId) fields, - // and find the highest value of for a node_ field. - // - char buf[256]; - unsigned num_records = 0; - while (! feof(f)) { - buf[sizeof(buf) - 1] = 1; - if (! fgets(buf, sizeof(buf), f)) { - // - // Read errors presumably because of EOF - // - break; - } - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - num_records++; - continue; - } - - // - // FIXME - this will match "node_ " - // - unsigned level; - if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { - if (nodeIdIndex + level >= maxIndex) { - maxIndex = nodeIdIndex + level; - } - continue; - } - } - - // - // Check for empty file / no valid processor records, or too many. - // The number of records can't exceed the number of valid bits in the - // affinity mask. - // - if (num_records == 0) { - *line = 0; - *msg_id = kmp_i18n_str_NoProcRecords; - return -1; - } - if (num_records > (unsigned)__kmp_xproc) { - *line = 0; - *msg_id = kmp_i18n_str_TooManyProcRecords; - return -1; - } - - // - // Set the file pointer back to the begginning, so that we can scan the - // file again, this time performing a full parse of the data. - // Allocate a vector of ProcCpuInfo object, where we will place the data. - // Adding an extra element at the end allows us to remove a lot of extra - // checks for termination conditions. - // - if (fseek(f, 0, SEEK_SET) != 0) { - *line = 0; - *msg_id = kmp_i18n_str_CantRewindCpuinfo; - return -1; - } - - // - // Allocate the array of records to store the proc info in. The dummy - // element at the end makes the logic in filling them out easier to code. - // - unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1) - * sizeof(unsigned *)); - unsigned i; - for (i = 0; i <= num_records; i++) { - threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - } - -#define CLEANUP_THREAD_INFO \ - for (i = 0; i <= num_records; i++) { \ - __kmp_free(threadInfo[i]); \ - } \ - __kmp_free(threadInfo); - - // - // A value of UINT_MAX means that we didn't find the field - // - unsigned __index; - -#define INIT_PROC_INFO(p) \ - for (__index = 0; __index <= maxIndex; __index++) { \ - (p)[__index] = UINT_MAX; \ - } - - for (i = 0; i <= num_records; i++) { - INIT_PROC_INFO(threadInfo[i]); - } - - unsigned num_avail = 0; - *line = 0; - while (! feof(f)) { - // - // Create an inner scoping level, so that all the goto targets at the - // end of the loop appear in an outer scoping level. This avoids - // warnings about jumping past an initialization to a target in the - // same block. - // - { - buf[sizeof(buf) - 1] = 1; - bool long_line = false; - if (! fgets(buf, sizeof(buf), f)) { - // - // Read errors presumably because of EOF - // - // If there is valid data in threadInfo[num_avail], then fake - // a blank line in ensure that the last address gets parsed. - // - bool valid = false; - for (i = 0; i <= maxIndex; i++) { - if (threadInfo[num_avail][i] != UINT_MAX) { - valid = true; - } - } - if (! valid) { - break; - } - buf[0] = 0; - } else if (!buf[sizeof(buf) - 1]) { - // - // The line is longer than the buffer. Set a flag and don't - // emit an error if we were going to ignore the line, anyway. - // - long_line = true; - -#define CHECK_LINE \ - if (long_line) { \ - CLEANUP_THREAD_INFO; \ - *msg_id = kmp_i18n_str_LongLineCpuinfo; \ - return -1; \ - } - } - (*line)++; - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s1) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][osIdIndex] = val; -#if KMP_OS_LINUX && USE_SYSFS_INFO - char path[256]; - KMP_SNPRINTF(path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); - - KMP_SNPRINTF(path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/core_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); - continue; -#else - } - char s2[] = "physical id"; - if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s2) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][pkgIdIndex] = val; - continue; - } - char s3[] = "core id"; - if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s3) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][coreIdIndex] = val; - continue; -#endif // KMP_OS_LINUX && USE_SYSFS_INFO - } - char s4[] = "thread id"; - if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][threadIdIndex] = val; - continue; - } - unsigned level; - if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - KMP_ASSERT(nodeIdIndex + level <= maxIndex); - if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field; - threadInfo[num_avail][nodeIdIndex + level] = val; - continue; - } - - // - // We didn't recognize the leading token on the line. - // There are lots of leading tokens that we don't recognize - - // if the line isn't empty, go on to the next line. - // - if ((*buf != 0) && (*buf != '\n')) { - // - // If the line is longer than the buffer, read characters - // until we find a newline. - // - if (long_line) { - int ch; - while (((ch = fgetc(f)) != EOF) && (ch != '\n')); - } - continue; - } - - // - // A newline has signalled the end of the processor record. - // Check that there aren't too many procs specified. - // - if ((int)num_avail == __kmp_xproc) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_TooManyEntries; - return -1; - } - - // - // Check for missing fields. The osId field must be there, and we - // currently require that the physical id field is specified, also. - // - if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingProcField; - return -1; - } - if (threadInfo[0][pkgIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingPhysicalIDField; - return -1; - } - - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) { - INIT_PROC_INFO(threadInfo[num_avail]); - continue; - } - - // - // We have a successful parse of this proc's info. - // Increment the counter, and prepare for the next proc. - // - num_avail++; - KMP_ASSERT(num_avail <= num_records); - INIT_PROC_INFO(threadInfo[num_avail]); - } - continue; - - no_val: - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingValCpuinfo; - return -1; - - dup_field: - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; - return -1; - } - *line = 0; - -# if KMP_MIC && REDUCE_TEAM_SIZE - unsigned teamSize = 0; -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - // check for num_records == __kmp_xproc ??? - - // - // If there's only one thread context to bind to, form an Address object - // with depth 1 and return immediately (or, if affinity is off, set - // address2os to NULL and return). - // - // If it is configured to omit the package level when there is only a - // single package, the logic at the end of this routine won't work if - // there is only a single thread - it would try to form an Address - // object with depth 0. - // - KMP_ASSERT(num_avail > 0); - KMP_ASSERT(num_avail <= num_records); - if (num_avail == 1) { - __kmp_ncores = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - if (__kmp_affinity_verbose) { - if (! KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } - else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } - int index; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "1"); - for (index = maxIndex - 1; index > pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x 1"); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); - __kmp_str_buf_free(&buf); - } - - if (__kmp_affinity_type == affinity_none) { - CLEANUP_THREAD_INFO; - return 0; - } - - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0][pkgIdIndex]; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - - CLEANUP_THREAD_INFO; - return 1; - } - - // - // Sort the threadInfo table by physical Id. - // - qsort(threadInfo, num_avail, sizeof(*threadInfo), - __kmp_affinity_cmp_ProcCpuInfo_phys_id); - - // - // The table is now sorted by pkgId / coreId / threadId, but we really - // don't know the radix of any of the fields. pkgId's may be sparsely - // assigned among the chips on a system. Although coreId's are usually - // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore - // (or the total # packages) are at this point - we want to determine - // that now. We only have an upper bound on the first two figures. - // - unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - - bool assign_thread_ids = false; - unsigned threadIdCt; - unsigned index; - - restart_radix_check: - threadIdCt = 0; - - // - // Initialize the counter arrays with data from threadInfo[0]. - // - if (assign_thread_ids) { - if (threadInfo[0][threadIdIndex] == UINT_MAX) { - threadInfo[0][threadIdIndex] = threadIdCt++; - } - else if (threadIdCt <= threadInfo[0][threadIdIndex]) { - threadIdCt = threadInfo[0][threadIdIndex] + 1; - } - } - for (index = 0; index <= maxIndex; index++) { - counts[index] = 1; - maxCt[index] = 1; - totals[index] = 1; - lastId[index] = threadInfo[0][index];; - } - - // - // Run through the rest of the OS procs. - // - for (i = 1; i < num_avail; i++) { - // - // Find the most significant index whose id differs - // from the id for the previous OS proc. - // - for (index = maxIndex; index >= threadIdIndex; index--) { - if (assign_thread_ids && (index == threadIdIndex)) { - // - // Auto-assign the thread id field if it wasn't specified. - // - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } - - // - // Aparrently the thread id field was specified for some - // entries and not others. Start the thread id counter - // off at the next higher thread id. - // - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - if (threadInfo[i][index] != lastId[index]) { - // - // Run through all indices which are less significant, - // and reset the counts to 1. - // - // At all levels up to and including index, we need to - // increment the totals and record the last id. - // - unsigned index2; - for (index2 = threadIdIndex; index2 < index; index2++) { - totals[index2]++; - if (counts[index2] > maxCt[index2]) { - maxCt[index2] = counts[index2]; - } - counts[index2] = 1; - lastId[index2] = threadInfo[i][index2]; - } - counts[index]++; - totals[index]++; - lastId[index] = threadInfo[i][index]; - - if (assign_thread_ids && (index > threadIdIndex)) { - -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - // - teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - // - // Restart the thread counter, as we are on a new core. - // - threadIdCt = 0; - - // - // Auto-assign the thread id field if it wasn't specified. - // - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } - - // - // Aparrently the thread id field was specified for some - // entries and not others. Start the thread id counter - // off at the next higher thread id. - // - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - break; - } - } - if (index < threadIdIndex) { - // - // If thread ids were specified, it is an error if they are not - // unique. Also, check that we waven't already restarted the - // loop (to be safe - shouldn't need to). - // - if ((threadInfo[i][threadIdIndex] != UINT_MAX) - || assign_thread_ids) { - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; - return -1; - } - - // - // If the thread ids were not specified and we see entries - // entries that are duplicates, start the loop over and - // assign the thread ids manually. - // - assign_thread_ids = true; - goto restart_radix_check; - } - } - -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - // - teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - for (index = threadIdIndex; index <= maxIndex; index++) { - if (counts[index] > maxCt[index]) { - maxCt[index] = counts[index]; - } - } - - __kmp_nThreadsPerCore = maxCt[threadIdIndex]; - nCoresPerPkg = maxCt[coreIdIndex]; - nPackages = totals[pkgIdIndex]; - - // - // Check to see if the machine topology is uniform - // - unsigned prod = totals[maxIndex]; - for (index = threadIdIndex; index < maxIndex; index++) { - prod *= maxCt[index]; - } - bool uniform = (prod == totals[threadIdIndex]); - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = totals[coreIdIndex]; - - if (__kmp_affinity_verbose) { - if (! KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - } - else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - } - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - - __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); - for (index = maxIndex - 1; index >= pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x %d", maxCt[index]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex], - maxCt[threadIdIndex], __kmp_ncores); - - __kmp_str_buf_free(&buf); - } - -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // Set the default team size. - // - if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { - __kmp_dflt_team_nth = teamSize; - KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth)); - } -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - return 0; - } - - // - // Count the number of levels which have more nodes at that level than - // at the parent's level (with there being an implicit root node of - // the top level). This is equivalent to saying that there is at least - // one node at this level which has a sibling. These levels are in the - // map, and the package level is always in the map. - // - bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); - int level = 0; - for (index = threadIdIndex; index < maxIndex; index++) { - KMP_ASSERT(totals[index] >= totals[index + 1]); - inMap[index] = (totals[index] > totals[index + 1]); - } - inMap[maxIndex] = (totals[maxIndex] > 1); - inMap[pkgIdIndex] = true; - - int depth = 0; - for (index = threadIdIndex; index <= maxIndex; index++) { - if (inMap[index]) { - depth++; - } - } - KMP_ASSERT(depth > 0); - - // - // Construct the data structure that is to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(AddrUnsPair) * num_avail); - int pkgLevel = -1; - int coreLevel = -1; - int threadLevel = -1; - - for (i = 0; i < num_avail; ++i) { - Address addr(depth); - unsigned os = threadInfo[i][osIdIndex]; - int src_index; - int dst_index = 0; - - for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { - if (! inMap[src_index]) { - continue; - } - addr.labels[dst_index] = threadInfo[i][src_index]; - if (src_index == pkgIdIndex) { - pkgLevel = dst_index; - } - else if (src_index == coreIdIndex) { - coreLevel = dst_index; - } - else if (src_index == threadIdIndex) { - threadLevel = dst_index; - } - dst_index++; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - unsigned src_index; - __kmp_affinity_gran_levels = 0; - for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { - if (! inMap[src_index]) { - continue; - } - switch (src_index) { - case threadIdIndex: - if (__kmp_affinity_gran > affinity_gran_thread) { - __kmp_affinity_gran_levels++; - } - - break; - case coreIdIndex: - if (__kmp_affinity_gran > affinity_gran_core) { - __kmp_affinity_gran_levels++; - } - break; - - case pkgIdIndex: - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - break; - } - } - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, - coreLevel, threadLevel); - } - - __kmp_free(inMap); - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - return depth; -} - - -// -// Create and return a table of affinity masks, indexed by OS thread ID. -// This routine handles OR'ing together all the affinity masks of threads -// that are sufficiently close, if granularity > fine. -// -static kmp_affin_mask_t * -__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique, - AddrUnsPair *address2os, unsigned numAddrs) -{ - // - // First form a table of affinity masks in order of OS thread id. - // - unsigned depth; - unsigned maxOsId; - unsigned i; - - KMP_ASSERT(numAddrs > 0); - depth = address2os[0].first.depth; - - maxOsId = 0; - for (i = 0; i < numAddrs; i++) { - unsigned osId = address2os[i].second; - if (osId > maxOsId) { - maxOsId = osId; - } - } - kmp_affin_mask_t *osId2Mask; - KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1)); - - // - // Sort the address2os table according to physical order. Doing so - // will put all threads on the same core/package/node in consecutive - // locations. - // - qsort(address2os, numAddrs, sizeof(*address2os), - __kmp_affinity_cmp_Address_labels); - - KMP_ASSERT(__kmp_affinity_gran_levels >= 0); - if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { - KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); - } - if (__kmp_affinity_gran_levels >= (int)depth) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffThreadsMayMigrate); - } - } - - // - // Run through the table, forming the masks for all threads on each - // core. Threads on the same core will have identical "Address" - // objects, not considering the last level, which must be the thread - // id. All threads on a core will appear consecutively. - // - unsigned unique = 0; - unsigned j = 0; // index of 1st thread on core - unsigned leader = 0; - Address *leaderAddr = &(address2os[0].first); - kmp_affin_mask_t *sum; - KMP_CPU_ALLOC_ON_STACK(sum); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[0].second, sum); - for (i = 1; i < numAddrs; i++) { - // - // If this thread is sufficiently close to the leader (within the - // granularity setting), then set the bit for this os thread in the - // affinity mask for this group, and go on to the next thread. - // - if (leaderAddr->isClose(address2os[i].first, - __kmp_affinity_gran_levels)) { - KMP_CPU_SET(address2os[i].second, sum); - continue; - } - - // - // For every thread in this group, copy the mask to the thread's - // entry in the osId2Mask table. Mark the first address as a - // leader. - // - for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); - } - unique++; - - // - // Start a new mask. - // - leader = i; - leaderAddr = &(address2os[i].first); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[i].second, sum); - } - - // - // For every thread in last group, copy the mask to the thread's - // entry in the osId2Mask table. - // - for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); - } - unique++; - KMP_CPU_FREE_FROM_STACK(sum); - - *maxIndex = maxOsId; - *numUnique = unique; - return osId2Mask; -} - - -// -// Stuff for the affinity proclist parsers. It's easier to declare these vars -// as file-static than to try and pass them through the calling sequence of -// the recursive-descent OMP_PLACES parser. -// -static kmp_affin_mask_t *newMasks; -static int numNewMasks; -static int nextNewMask; - -#define ADD_MASK(_mask) \ - { \ - if (nextNewMask >= numNewMasks) { \ - int i; \ - numNewMasks *= 2; \ - kmp_affin_mask_t* temp; \ - KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ - for(i=0;i _maxOsId) || \ - (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ - if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ - && (__kmp_affinity_type != affinity_none))) { \ - KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ - } \ - } \ - else { \ - ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ - } \ - } - - -// -// Re-parse the proclist (for the explicit affinity type), and form the list -// of affinity newMasks indexed by gtid. -// -static void -__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, const char *proclist, - kmp_affin_mask_t *osId2Mask, int maxOsId) -{ - int i; - const char *scan = proclist; - const char *next = proclist; - - // - // We use malloc() for the temporary mask vector, - // so that we can use realloc() to extend it. - // - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - kmp_affin_mask_t *sumMask; - KMP_CPU_ALLOC(sumMask); - int setSize = 0; - - for (;;) { - int start, end, stride; - - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; - } - - if (*next == '{') { - int num; - setSize = 0; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // - // Read the first integer in the set. - // - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad proclist"); - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // - // Copy the mask for that osId to the sum (union) mask. - // - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - KMP_CPU_ZERO(sumMask); - } - else { - KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize = 1; - } - - for (;;) { - // - // Check for end of set. - // - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // - // Read the next integer in the set. - // - scan = next; - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad explicit proc list"); - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // - // Add the mask for that osId to the sum mask. - // - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } - else { - KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize++; - } - } - if (setSize > 0) { - ADD_MASK(sumMask); - } - - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // - // Read the first integer. - // - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(start >= 0, "bad explicit proc list"); - SKIP_WS(next); - - // - // If this isn't a range, then add a mask to the list and go on. - // - if (*next != '-') { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // - // This is a range. Skip over the '-' and read in the 2nd int. - // - next++; // skip '-' - SKIP_WS(next); - scan = next; - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(end >= 0, "bad explicit proc list"); - - // - // Check for a stride parameter - // - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // - // A stride is specified. Skip over the ':" and read the 3rd int. - // - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad explicit proc list"); - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(stride >= 0, "bad explicit proc list"); - stride *= sign; - } - - // - // Do some range checks. - // - KMP_ASSERT2(stride != 0, "bad explicit proc list"); - if (stride > 0) { - KMP_ASSERT2(start <= end, "bad explicit proc list"); - } - else { - KMP_ASSERT2(start >= end, "bad explicit proc list"); - } - KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); - - // - // Add the mask for each OS proc # to the list. - // - if (stride > 0) { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start <= end); - } - else { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start >= end); - } - - // - // Skip optional comma. - // - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - } - - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; - } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - for(i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); - } - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - KMP_CPU_FREE(sumMask); -} - - -# if OMP_40_ENABLED - -/*----------------------------------------------------------------------------- - -Re-parse the OMP_PLACES proc id list, forming the newMasks for the different -places. Again, Here is the grammar: - -place_list := place -place_list := place , place_list -place := num -place := place : num -place := place : num : signed -place := { subplacelist } -place := ! place // (lowest priority) -subplace_list := subplace -subplace_list := subplace , subplace_list -subplace := num -subplace := num : num -subplace := num : num : signed -signed := num -signed := + signed -signed := - signed - ------------------------------------------------------------------------------*/ - -static void -__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) -{ - const char *next; - - for (;;) { - int start, count, stride, i; - - // - // Read in the starting proc id - // - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - (*setSize)++; - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start++; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // - // valid follow sets are ',' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start += stride; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - - KMP_ASSERT2(0, "bad explicit places list"); - } -} - - -static void -__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) -{ - const char *next; - - // - // valid follow sets are '{' '!' and num - // - SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask, - setSize); - KMP_ASSERT2(**scan == '}', "bad explicit places list"); - (*scan)++; // skip '}' - } - else if (**scan == '!') { - (*scan)++; // skip '!' - __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); - KMP_CPU_COMPLEMENT(maxOsId, tempMask); - } - else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int num = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(num >= 0); - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); - (*setSize)++; - } - *scan = next; // skip num - } - else { - KMP_ASSERT2(0, "bad explicit places list"); - } -} - - -//static void -void -__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, const char *placelist, - kmp_affin_mask_t *osId2Mask, int maxOsId) -{ - int i,j,count,stride,sign; - const char *scan = placelist; - const char *next = placelist; - - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - - // tempMask is modified based on the previous or initial - // place to form the current place - // previousMask contains the previous place - kmp_affin_mask_t *tempMask; - kmp_affin_mask_t *previousMask; - KMP_CPU_ALLOC(tempMask); - KMP_CPU_ZERO(tempMask); - KMP_CPU_ALLOC(previousMask); - KMP_CPU_ZERO(previousMask); - int setSize = 0; - - for (;;) { - __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - if (setSize > 0) { - ADD_MASK(tempMask); - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - if (*scan == '\0') { - break; - } - scan++; // skip ',' - continue; - } - - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), - "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - stride = +1; - } - else { - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // - // Read stride parameter - // - sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), - "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_DEBUG_ASSERT(stride >= 0); - scan = next; - stride *= sign; - } - - // Add places determined by initial_place : count : stride - for (i = 0; i < count; i++) { - if (setSize == 0) { - break; - } - // Add the current place, then build the next place (tempMask) from that - KMP_CPU_COPY(previousMask, tempMask); - ADD_MASK(previousMask); - KMP_CPU_ZERO(tempMask); - setSize = 0; - KMP_CPU_SET_ITERATE(j, previousMask) { - if (! KMP_CPU_ISSET(j, previousMask)) { - continue; - } - else if ((j+stride > maxOsId) || (j+stride < 0) || - (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) { - if ((__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) && i < count - 1) { - KMP_WARNING(AffIgnoreInvalidProcID, j+stride); - } - } - else { - KMP_CPU_SET(j+stride, tempMask); - setSize++; - } - } - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - - // - // valid follow sets are ',' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - - KMP_ASSERT2(0, "bad explicit places list"); - } - - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; - } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - KMP_CPU_FREE(tempMask); - KMP_CPU_FREE(previousMask); - for(i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); - } - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); -} - -# endif /* OMP_40_ENABLED */ - -#undef ADD_MASK -#undef ADD_MASK_OSID - -static void -__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) -{ - if (__kmp_place_num_sockets == 0 && - __kmp_place_num_cores == 0 && - __kmp_place_num_threads_per_core == 0 ) - return; // no topology limiting actions requested, exit - if (__kmp_place_num_sockets == 0) - __kmp_place_num_sockets = nPackages; // use all available sockets - if (__kmp_place_num_cores == 0) - __kmp_place_num_cores = nCoresPerPkg; // use all available cores - if (__kmp_place_num_threads_per_core == 0 || - __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore) - __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts - - if ( !__kmp_affinity_uniform_topology() ) { - KMP_WARNING( AffThrPlaceNonUniform ); - return; // don't support non-uniform topology - } - if ( depth != 3 ) { - KMP_WARNING( AffThrPlaceNonThreeLevel ); - return; // don't support not-3-level topology - } - if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) { - KMP_WARNING(AffThrPlaceManySockets); - return; - } - if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) { - KMP_WARNING( AffThrPlaceManyCores ); - return; - } - - AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * - __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); - - int i, j, k, n_old = 0, n_new = 0; - for (i = 0; i < nPackages; ++i) - if (i < __kmp_place_socket_offset || - i >= __kmp_place_socket_offset + __kmp_place_num_sockets) - n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket - else - for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket - if (j < __kmp_place_core_offset || - j >= __kmp_place_core_offset + __kmp_place_num_cores) - n_old += __kmp_nThreadsPerCore; // skip not-requested core - else - for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core - if (k < __kmp_place_num_threads_per_core) { - newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data - n_new++; - } - n_old++; - } - KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); - KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores * - __kmp_place_num_threads_per_core); - - nPackages = __kmp_place_num_sockets; // correct nPackages - nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg - __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore - __kmp_avail_proc = n_new; // correct avail_proc - __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores - - __kmp_free( *pAddr ); - *pAddr = newAddr; // replace old topology with new one -} - - -static AddrUnsPair *address2os = NULL; -static int * procarr = NULL; -static int __kmp_aff_depth = 0; - -static void -__kmp_aux_affinity_initialize(void) -{ - if (__kmp_affinity_masks != NULL) { - KMP_ASSERT(fullMask != NULL); - return; - } - - // - // Create the "full" mask - this defines all of the processors that we - // consider to be in the machine model. If respect is set, then it is - // the initialization thread's affinity mask. Otherwise, it is all - // processors that we know about on the machine. - // - if (fullMask == NULL) { - KMP_CPU_ALLOC(fullMask); - } - if (KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_respect_mask) { - __kmp_get_system_affinity(fullMask, TRUE); - - // - // Count the number of available processors. - // - unsigned i; - __kmp_avail_proc = 0; - KMP_CPU_SET_ITERATE(i, fullMask) { - if (! KMP_CPU_ISSET(i, fullMask)) { - continue; - } - __kmp_avail_proc++; - } - if (__kmp_avail_proc > __kmp_xproc) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - KMP_AFFINITY_DISABLE(); - return; - } - } - else { - __kmp_affinity_entire_machine_mask(fullMask); - __kmp_avail_proc = __kmp_xproc; - } - } - - int depth = -1; - kmp_i18n_id_t msg_id = kmp_i18n_null; - - // - // For backward compatibility, setting KMP_CPUINFO_FILE => - // KMP_TOPOLOGY_METHOD=cpuinfo - // - if ((__kmp_cpuinfo_file != NULL) && - (__kmp_affinity_top_method == affinity_top_method_all)) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } - - if (__kmp_affinity_top_method == affinity_top_method_all) { - // - // In the default code path, errors are not fatal - we just try using - // another method. We only emit a warning message if affinity is on, - // or the verbose flag is set, an the nowarnings flag was not set. - // - const char *file_name = NULL; - int line = 0; -# if KMP_USE_HWLOC - if (depth < 0) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - if(!__kmp_hwloc_error) { - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } else if(depth < 0 && __kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } else if(__kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } -# endif - -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - - if (depth < 0) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); - } - - file_name = NULL; - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), - KMP_I18N_STR(DecodingLegacyAPIC)); - } - else { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); - } - } - - file_name = NULL; - depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - } - } - -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -# if KMP_OS_LINUX - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); - } - else { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); - } - } - - FILE *f = fopen("/proc/cpuinfo", "r"); - if (f == NULL) { - msg_id = kmp_i18n_str_CantOpenCpuinfo; - } - else { - file_name = "/proc/cpuinfo"; - depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); - fclose(f); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - } - } - -# endif /* KMP_OS_LINUX */ - -# if KMP_GROUP_AFFINITY - - if ((depth < 0) && (__kmp_num_proc_groups > 1)) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - } - -# endif /* KMP_GROUP_AFFINITY */ - - if (depth < 0) { - if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { - if (file_name == NULL) { - KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); - } - else if (line == 0) { - KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); - } - else { - KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); - } - } - // FIXME - print msg if msg_id = kmp_i18n_null ??? - - file_name = ""; - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); - } - } - - // - // If the user has specified that a paricular topology discovery method - // is to be used, then we abort if that method fails. The exception is - // group affinity, which might have been implicitly set. - // - -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - - else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", - KMP_I18N_STR(Decodingx2APIC)); - } - - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - else if (__kmp_affinity_top_method == affinity_top_method_apicid) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", - KMP_I18N_STR(DecodingLegacyAPIC)); - } - - depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { - const char *filename; - if (__kmp_cpuinfo_file != NULL) { - filename = __kmp_cpuinfo_file; - } - else { - filename = "/proc/cpuinfo"; - } - - if (__kmp_affinity_verbose) { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); - } - - FILE *f = fopen(filename, "r"); - if (f == NULL) { - int code = errno; - if (__kmp_cpuinfo_file != NULL) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(CantOpenFileForReading, filename), - KMP_ERR(code), - KMP_HNT(NameComesFrom_CPUINFO_FILE), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(CantOpenFileForReading, filename), - KMP_ERR(code), - __kmp_msg_null - ); - } - } - int line = 0; - depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); - fclose(f); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - if (line > 0) { - KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)); - } - else { - KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); - } - } - if (__kmp_affinity_type == affinity_none) { - KMP_ASSERT(depth == 0); - KMP_ASSERT(address2os == NULL); - return; - } - } - -# if KMP_GROUP_AFFINITY - - else if (__kmp_affinity_top_method == affinity_top_method_group) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - -# endif /* KMP_GROUP_AFFINITY */ - - else if (__kmp_affinity_top_method == affinity_top_method_flat) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - // should not fail - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); - } - -# if KMP_USE_HWLOC - else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } -# if KMP_DEBUG - AddrUnsPair *otheraddress2os = NULL; - int otherdepth = -1; -# if KMP_MIC - otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id); -# else - otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id); -# endif - if(otheraddress2os != NULL && address2os != NULL) { - int i; - unsigned arent_equal_flag = 0; - for(i=0;i<__kmp_avail_proc;i++) { - if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1; - } - if(arent_equal_flag) { - KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n")); - KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n")); - for(i=0;i<__kmp_avail_proc;i++) { - otheraddress2os[i].print(); __kmp_printf("\n"); - } - KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n")); - for(i=0;i<__kmp_avail_proc;i++) { - address2os[i].print(); __kmp_printf("\n"); - } - } - else { - KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n")); - } - } -# endif // KMP_DEBUG - } -# endif // KMP_USE_HWLOC - - if (address2os == NULL) { - if (KMP_AFFINITY_CAPABLE() - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - KMP_AFFINITY_DISABLE(); - return; - } - - __kmp_apply_thread_places(&address2os, depth); - - // - // Create the table of masks, indexed by thread Id. - // - unsigned maxIndex; - unsigned numUnique; - kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique, - address2os, __kmp_avail_proc); - if (__kmp_affinity_gran_levels == 0) { - KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); - } - - // - // Set the childNums vector in all Address objects. This must be done - // before we can sort using __kmp_affinity_cmp_Address_child_num(), - // which takes into account the setting of __kmp_affinity_compact. - // - __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); - - switch (__kmp_affinity_type) { - - case affinity_explicit: - KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); -# if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) -# endif - { - __kmp_affinity_process_proclist(&__kmp_affinity_masks, - &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, - maxIndex); - } -# if OMP_40_ENABLED - else { - __kmp_affinity_process_placelist(&__kmp_affinity_masks, - &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, - maxIndex); - } -# endif - if (__kmp_affinity_num_masks == 0) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffNoValidProcID); - } - __kmp_affinity_type = affinity_none; - return; - } - break; - - // - // The other affinity types rely on sorting the Addresses according - // to some permutation of the machine topology tree. Set - // __kmp_affinity_compact and __kmp_affinity_offset appropriately, - // then jump to a common code fragment to do the sort and create - // the array of affinity masks. - // - - case affinity_logical: - __kmp_affinity_compact = 0; - if (__kmp_affinity_offset) { - __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset - % __kmp_avail_proc; - } - goto sortAddresses; - - case affinity_physical: - if (__kmp_nThreadsPerCore > 1) { - __kmp_affinity_compact = 1; - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } - } else { - __kmp_affinity_compact = 0; - } - if (__kmp_affinity_offset) { - __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset - % __kmp_avail_proc; - } - goto sortAddresses; - - case affinity_scatter: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } - else { - __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; - } - goto sortAddresses; - - case affinity_compact: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; - } - goto sortAddresses; - - case affinity_balanced: - // Balanced works only for the case of a single package - if( nPackages > 1 ) { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" ); - } - __kmp_affinity_type = affinity_none; - return; - } else if( __kmp_affinity_uniform_topology() ) { - break; - } else { // Non-uniform topology - - // Save the depth for further usage - __kmp_aff_depth = depth; - - // Number of hyper threads per core in HT machine - int nth_per_core = __kmp_nThreadsPerCore; - - int core_level; - if( nth_per_core > 1 ) { - core_level = depth - 2; - } else { - core_level = depth - 1; - } - int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; - int nproc = nth_per_core * ncores; - - procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); - for( int i = 0; i < nproc; i++ ) { - procarr[ i ] = -1; - } - - for( int i = 0; i < __kmp_avail_proc; i++ ) { - int proc = address2os[ i ].second; - // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread. - // If there is only one thread per core then depth == 2: level 0 - package, - // level 1 - core. - int level = depth - 1; - - // __kmp_nth_per_core == 1 - int thread = 0; - int core = address2os[ i ].first.labels[ level ]; - // If the thread level exists, that is we have more than one thread context per core - if( nth_per_core > 1 ) { - thread = address2os[ i ].first.labels[ level ] % nth_per_core; - core = address2os[ i ].first.labels[ level - 1 ]; - } - procarr[ core * nth_per_core + thread ] = proc; - } - - break; - } - - sortAddresses: - // - // Allocate the gtid->affinity mask table. - // - if (__kmp_affinity_dups) { - __kmp_affinity_num_masks = __kmp_avail_proc; - } - else { - __kmp_affinity_num_masks = numUnique; - } - -# if OMP_40_ENABLED - if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) - && ( __kmp_affinity_num_places > 0 ) - && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) { - __kmp_affinity_num_masks = __kmp_affinity_num_places; - } -# endif - - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - - // - // Sort the address2os table according to the current setting of - // __kmp_affinity_compact, then fill out __kmp_affinity_masks. - // - qsort(address2os, __kmp_avail_proc, sizeof(*address2os), - __kmp_affinity_cmp_Address_child_num); - { - int i; - unsigned j; - for (i = 0, j = 0; i < __kmp_avail_proc; i++) { - if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) { - continue; - } - unsigned osId = address2os[i].second; - kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); - kmp_affin_mask_t *dest - = KMP_CPU_INDEX(__kmp_affinity_masks, j); - KMP_ASSERT(KMP_CPU_ISSET(osId, src)); - KMP_CPU_COPY(dest, src); - if (++j >= __kmp_affinity_num_masks) { - break; - } - } - KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); - } - break; - - default: - KMP_ASSERT2(0, "Unexpected affinity setting"); - } - - __kmp_free(osId2Mask); - machine_hierarchy.init(address2os, __kmp_avail_proc); -} - - -void -__kmp_affinity_initialize(void) -{ - // - // Much of the code above was written assumming that if a machine was not - // affinity capable, then __kmp_affinity_type == affinity_none. We now - // explicitly represent this as __kmp_affinity_type == affinity_disabled. - // - // There are too many checks for __kmp_affinity_type == affinity_none - // in this code. Instead of trying to change them all, check if - // __kmp_affinity_type == affinity_disabled, and if so, slam it with - // affinity_none, call the real initialization routine, then restore - // __kmp_affinity_type to affinity_disabled. - // - int disabled = (__kmp_affinity_type == affinity_disabled); - if (! KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(disabled); - } - if (disabled) { - __kmp_affinity_type = affinity_none; - } - __kmp_aux_affinity_initialize(); - if (disabled) { - __kmp_affinity_type = affinity_disabled; - } -} - - -void -__kmp_affinity_uninitialize(void) -{ - if (__kmp_affinity_masks != NULL) { - KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - __kmp_affinity_masks = NULL; - } - if (fullMask != NULL) { - KMP_CPU_FREE(fullMask); - fullMask = NULL; - } - __kmp_affinity_num_masks = 0; -# if OMP_40_ENABLED - __kmp_affinity_num_places = 0; -# endif - if (__kmp_affinity_proclist != NULL) { - __kmp_free(__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } - if( address2os != NULL ) { - __kmp_free( address2os ); - address2os = NULL; - } - if( procarr != NULL ) { - __kmp_free( procarr ); - procarr = NULL; - } -} - - -void -__kmp_affinity_set_init_mask(int gtid, int isa_root) -{ - if (! KMP_AFFINITY_CAPABLE()) { - return; - } - - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - if (th->th.th_affin_mask == NULL) { - KMP_CPU_ALLOC(th->th.th_affin_mask); - } - else { - KMP_CPU_ZERO(th->th.th_affin_mask); - } - - // - // Copy the thread mask to the kmp_info_t strucuture. - // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one - // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask - // is set, then the full mask is the same as the mask of the initialization - // thread. - // - kmp_affin_mask_t *mask; - int i; - -# if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) -# endif - { - if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced) - ) { -# if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -# endif - KMP_ASSERT(fullMask != NULL); - i = KMP_PLACE_ALL; - mask = fullMask; - } - else { - KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } - } -# if OMP_40_ENABLED - else { - if ((! isa_root) - || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { -# if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -# endif - KMP_ASSERT(fullMask != NULL); - i = KMP_PLACE_ALL; - mask = fullMask; - } - else { - // - // int i = some hash function or just a counter that doesn't - // always start at 0. Use gtid for now. - // - KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } - } -# endif - -# if OMP_40_ENABLED - th->th.th_current_place = i; - if (isa_root) { - th->th.th_new_place = i; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; - } - - if (i == KMP_PLACE_ALL) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", - gtid)); - } - else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", - gtid, i)); - } -# else - if (i == -1) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n", - gtid)); - } - else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", - gtid, i)); - } -# endif /* OMP_40_ENABLED */ - - KMP_CPU_COPY(th->th.th_affin_mask, mask); - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid, - buf); - } - -# if KMP_OS_WINDOWS - // - // On Windows* OS, the process affinity mask might have changed. - // If the user didn't request affinity and this call fails, - // just continue silently. See CQ171393. - // - if ( __kmp_affinity_type == affinity_none ) { - __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); - } - else -# endif - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} - - -# if OMP_40_ENABLED - -void -__kmp_affinity_set_place(int gtid) -{ - int retval; - - if (! KMP_AFFINITY_CAPABLE()) { - return; - } - - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - - KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n", - gtid, th->th.th_new_place, th->th.th_current_place)); - - // - // Check that the new place is within this thread's partition. - // - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - KMP_ASSERT(th->th.th_new_place >= 0); - KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); - if (th->th.th_first_place <= th->th.th_last_place) { - KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) - && (th->th.th_new_place <= th->th.th_last_place)); - } - else { - KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) - || (th->th.th_new_place >= th->th.th_last_place)); - } - - // - // Copy the thread mask to the kmp_info_t strucuture, - // and set this thread's affinity. - // - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, - th->th.th_new_place); - KMP_CPU_COPY(th->th.th_affin_mask, mask); - th->th.th_current_place = th->th.th_new_place; - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), - gtid, buf); - } - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} - -# endif /* OMP_40_ENABLED */ - - -int -__kmp_aux_set_affinity(void **mask) -{ - int gtid; - kmp_info_t *th; - int retval; - - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } - - gtid = __kmp_entry_gtid(); - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n", - gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - else { - unsigned proc; - int num_procs = 0; - - KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) { - if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { - continue; - } - num_procs++; - if (! KMP_CPU_ISSET(proc, fullMask)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - break; - } - } - if (num_procs == 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - -# if KMP_GROUP_AFFINITY - if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } -# endif /* KMP_GROUP_AFFINITY */ - - } - } - - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - if (retval == 0) { - KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); - } - -# if OMP_40_ENABLED - th->th.th_current_place = KMP_PLACE_UNDEFINED; - th->th.th_new_place = KMP_PLACE_UNDEFINED; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; - - // - // Turn off 4.0 affinity for the current tread at this parallel level. - // - th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; -# endif - - return retval; -} - - -int -__kmp_aux_get_affinity(void **mask) -{ - int gtid; - int retval; - kmp_info_t *th; - - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } - - gtid = __kmp_entry_gtid(); - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); - } - } - -# if !KMP_OS_WINDOWS - - retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf); - }); - return retval; - -# else - - KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); - return 0; - -# endif /* KMP_OS_WINDOWS */ - -} - -int -__kmp_aux_set_affinity_mask_proc(int proc, void **mask) -{ - int retval; - - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); - } - } - - if ((proc < 0) -# if !KMP_USE_HWLOC - || ((unsigned)proc >= KMP_CPU_SETSIZE) -# endif - ) { - return -1; - } - if (! KMP_CPU_ISSET(proc, fullMask)) { - return -2; - } - - KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); - return 0; -} - - -int -__kmp_aux_unset_affinity_mask_proc(int proc, void **mask) -{ - int retval; - - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); - } - } - - if ((proc < 0) -# if !KMP_USE_HWLOC - || ((unsigned)proc >= KMP_CPU_SETSIZE) -# endif - ) { - return -1; - } - if (! KMP_CPU_ISSET(proc, fullMask)) { - return -2; - } - - KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); - return 0; -} - - -int -__kmp_aux_get_affinity_mask_proc(int proc, void **mask) -{ - int retval; - - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } - - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); - } - } - - if ((proc < 0) -# if !KMP_USE_HWLOC - || ((unsigned)proc >= KMP_CPU_SETSIZE) -# endif - ) { - return -1; - } - if (! KMP_CPU_ISSET(proc, fullMask)) { - return 0; - } - - return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); -} - - -// Dynamic affinity settings - Affinity balanced -void __kmp_balanced_affinity( int tid, int nthreads ) -{ - if( __kmp_affinity_uniform_topology() ) { - int coreID; - int threadID; - // Number of hyper threads per core in HT machine - int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; - // Number of cores - int ncores = __kmp_ncores; - // How many threads will be bound to each core - int chunk = nthreads / ncores; - // How many cores will have an additional thread bound to it - "big cores" - int big_cores = nthreads % ncores; - // Number of threads on the big cores - int big_nth = ( chunk + 1 ) * big_cores; - if( tid < big_nth ) { - coreID = tid / (chunk + 1 ); - threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ; - } else { //tid >= big_nth - coreID = ( tid - big_cores ) / chunk; - threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ; - } - - KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - - // Granularity == thread - if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { - int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second; - KMP_CPU_SET( osID, mask); - } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core - for( int i = 0; i < __kmp_nth_per_core; i++ ) { - int osID; - osID = address2os[ coreID * __kmp_nth_per_core + i ].second; - KMP_CPU_SET( osID, mask); - } - } - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - tid, buf); - } - __kmp_set_system_affinity( mask, TRUE ); - KMP_CPU_FREE_FROM_STACK(mask); - } else { // Non-uniform topology - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - - // Number of hyper threads per core in HT machine - int nth_per_core = __kmp_nThreadsPerCore; - int core_level; - if( nth_per_core > 1 ) { - core_level = __kmp_aff_depth - 2; - } else { - core_level = __kmp_aff_depth - 1; - } - - // Number of cores - maximum value; it does not count trail cores with 0 processors - int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; - - // For performance gain consider the special case nthreads == __kmp_avail_proc - if( nthreads == __kmp_avail_proc ) { - if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { - int osID = address2os[ tid ].second; - KMP_CPU_SET( osID, mask); - } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core - int coreID = address2os[ tid ].first.labels[ core_level ]; - // We'll count found osIDs for the current core; they can be not more than nth_per_core; - // since the address2os is sortied we can break when cnt==nth_per_core - int cnt = 0; - for( int i = 0; i < __kmp_avail_proc; i++ ) { - int osID = address2os[ i ].second; - int core = address2os[ i ].first.labels[ core_level ]; - if( core == coreID ) { - KMP_CPU_SET( osID, mask); - cnt++; - if( cnt == nth_per_core ) { - break; - } - } - } - } - } else if( nthreads <= __kmp_ncores ) { - - int core = 0; - for( int i = 0; i < ncores; i++ ) { - // Check if this core from procarr[] is in the mask - int in_mask = 0; - for( int j = 0; j < nth_per_core; j++ ) { - if( procarr[ i * nth_per_core + j ] != - 1 ) { - in_mask = 1; - break; - } - } - if( in_mask ) { - if( tid == core ) { - for( int j = 0; j < nth_per_core; j++ ) { - int osID = procarr[ i * nth_per_core + j ]; - if( osID != -1 ) { - KMP_CPU_SET( osID, mask ); - // For granularity=thread it is enough to set the first available osID for this core - if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { - break; - } - } - } - break; - } else { - core++; - } - } - } - - } else { // nthreads > __kmp_ncores - - // Array to save the number of processors at each core - int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores); - // Array to save the number of cores with "x" available processors; - int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); - // Array to save the number of cores with # procs from x to nth_per_core - int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); - - for( int i = 0; i <= nth_per_core; i++ ) { - ncores_with_x_procs[ i ] = 0; - ncores_with_x_to_max_procs[ i ] = 0; - } - - for( int i = 0; i < ncores; i++ ) { - int cnt = 0; - for( int j = 0; j < nth_per_core; j++ ) { - if( procarr[ i * nth_per_core + j ] != -1 ) { - cnt++; - } - } - nproc_at_core[ i ] = cnt; - ncores_with_x_procs[ cnt ]++; - } - - for( int i = 0; i <= nth_per_core; i++ ) { - for( int j = i; j <= nth_per_core; j++ ) { - ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ]; - } - } - - // Max number of processors - int nproc = nth_per_core * ncores; - // An array to keep number of threads per each context - int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); - for( int i = 0; i < nproc; i++ ) { - newarr[ i ] = 0; - } - - int nth = nthreads; - int flag = 0; - while( nth > 0 ) { - for( int j = 1; j <= nth_per_core; j++ ) { - int cnt = ncores_with_x_to_max_procs[ j ]; - for( int i = 0; i < ncores; i++ ) { - // Skip the core with 0 processors - if( nproc_at_core[ i ] == 0 ) { - continue; - } - for( int k = 0; k < nth_per_core; k++ ) { - if( procarr[ i * nth_per_core + k ] != -1 ) { - if( newarr[ i * nth_per_core + k ] == 0 ) { - newarr[ i * nth_per_core + k ] = 1; - cnt--; - nth--; - break; - } else { - if( flag != 0 ) { - newarr[ i * nth_per_core + k ] ++; - cnt--; - nth--; - break; - } - } - } - } - if( cnt == 0 || nth == 0 ) { - break; - } - } - if( nth == 0 ) { - break; - } - } - flag = 1; - } - int sum = 0; - for( int i = 0; i < nproc; i++ ) { - sum += newarr[ i ]; - if( sum > tid ) { - // Granularity == thread - if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { - int osID = procarr[ i ]; - KMP_CPU_SET( osID, mask); - } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core - int coreID = i / nth_per_core; - for( int ii = 0; ii < nth_per_core; ii++ ) { - int osID = procarr[ coreID * nth_per_core + ii ]; - if( osID != -1 ) { - KMP_CPU_SET( osID, mask); - } - } - } - break; - } - } - __kmp_free( newarr ); - } - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - tid, buf); - } - __kmp_set_system_affinity( mask, TRUE ); - KMP_CPU_FREE_FROM_STACK(mask); - } -} - -#endif // KMP_AFFINITY_SUPPORTED +/* + * kmp_affinity.cpp -- affinity management + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_str.h" +#include "kmp_wrapper_getpid.h" +#include "kmp_affinity.h" + +// Store the real or imagined machine hierarchy here +static hierarchy_info machine_hierarchy; + +void __kmp_cleanup_hierarchy() { + machine_hierarchy.fini(); +} + +void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { + kmp_uint32 depth; + // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. + if (TCR_1(machine_hierarchy.uninitialized)) + machine_hierarchy.init(NULL, nproc); + + // Adjust the hierarchy in case num threads exceeds original + if (nproc > machine_hierarchy.base_num_threads) + machine_hierarchy.resize(nproc); + + depth = machine_hierarchy.depth; + KMP_DEBUG_ASSERT(depth > 0); + + thr_bar->depth = depth; + thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; + thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; +} + +#if KMP_AFFINITY_SUPPORTED + +// +// Print the affinity mask to the character array in a pretty format. +// +#if KMP_USE_HWLOC +char * +__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) +{ + int num_chars_to_write, num_chars_written; + char* scan; + KMP_ASSERT(buf_len >= 40); + + // bufsize of 0 just retrieves the needed buffer size. + num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask); + + // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes + // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not + // take into account the '\0' character. + if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) { + KMP_SNPRINTF(buf, buf_len, "{}"); + } else if(num_chars_to_write < buf_len - 3) { + // no problem fitting the mask into buf_len number of characters + buf[0] = '{'; + // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer + num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask); + buf[num_chars_written+1] = '}'; + buf[num_chars_written+2] = '\0'; + } else { + // Need to truncate the affinity mask string and add ellipsis. + // To do this, we first write out the '{' + str(mask) + buf[0] = '{'; + hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask); + // then, what we do here is go to the 7th to last character, then go backwards until we are NOT + // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't + // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get + // { 45, 67,...} instead. + scan = buf + buf_len - 7; + while(*scan >= '0' && *scan <= '9' && scan >= buf) + scan--; + *(scan+1) = '.'; + *(scan+2) = '.'; + *(scan+3) = '.'; + *(scan+4) = '}'; + *(scan+5) = '\0'; + } + return buf; +} +#else +char * +__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) +{ + KMP_ASSERT(buf_len >= 40); + char *scan = buf; + char *end = buf + buf_len - 1; + + // + // Find first element / check for empty set. + // + size_t i; + for (i = 0; i < KMP_CPU_SETSIZE; i++) { + if (KMP_CPU_ISSET(i, mask)) { + break; + } + } + if (i == KMP_CPU_SETSIZE) { + KMP_SNPRINTF(scan, end-scan+1, "{}"); + while (*scan != '\0') scan++; + KMP_ASSERT(scan <= end); + return buf; + } + + KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i); + while (*scan != '\0') scan++; + i++; + for (; i < KMP_CPU_SETSIZE; i++) { + if (! KMP_CPU_ISSET(i, mask)) { + continue; + } + + // + // Check for buffer overflow. A string of the form "," will have + // at most 10 characters, plus we want to leave room to print ",...}" + // if the set is too large to print for a total of 15 characters. + // We already left room for '\0' in setting end. + // + if (end - scan < 15) { + break; + } + KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i); + while (*scan != '\0') scan++; + } + if (i < KMP_CPU_SETSIZE) { + KMP_SNPRINTF(scan, end-scan+1, ",..."); + while (*scan != '\0') scan++; + } + KMP_SNPRINTF(scan, end-scan+1, "}"); + while (*scan != '\0') scan++; + KMP_ASSERT(scan <= end); + return buf; +} +#endif // KMP_USE_HWLOC + + +void +__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) +{ + KMP_CPU_ZERO(mask); + +# if KMP_GROUP_AFFINITY + + if (__kmp_num_proc_groups > 1) { + int group; + KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); + for (group = 0; group < __kmp_num_proc_groups; group++) { + int i; + int num = __kmp_GetActiveProcessorCount(group); + for (i = 0; i < num; i++) { + KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); + } + } + } + else + +# endif /* KMP_GROUP_AFFINITY */ + + { + int proc; + for (proc = 0; proc < __kmp_xproc; proc++) { + KMP_CPU_SET(proc, mask); + } + } +} + +// +// When sorting by labels, __kmp_affinity_assign_child_nums() must first be +// called to renumber the labels from [0..n] and place them into the child_num +// vector of the address object. This is done in case the labels used for +// the children at one node of the hierarchy differ from those used for +// another node at the same level. Example: suppose the machine has 2 nodes +// with 2 packages each. The first node contains packages 601 and 602, and +// second node contains packages 603 and 604. If we try to sort the table +// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604 +// because we are paying attention to the labels themselves, not the ordinal +// child numbers. By using the child numbers in the sort, the result is +// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. +// +static void +__kmp_affinity_assign_child_nums(AddrUnsPair *address2os, + int numAddrs) +{ + KMP_DEBUG_ASSERT(numAddrs > 0); + int depth = address2os->first.depth; + unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *lastLabel = (unsigned *)__kmp_allocate(depth + * sizeof(unsigned)); + int labCt; + for (labCt = 0; labCt < depth; labCt++) { + address2os[0].first.childNums[labCt] = counts[labCt] = 0; + lastLabel[labCt] = address2os[0].first.labels[labCt]; + } + int i; + for (i = 1; i < numAddrs; i++) { + for (labCt = 0; labCt < depth; labCt++) { + if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { + int labCt2; + for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { + counts[labCt2] = 0; + lastLabel[labCt2] = address2os[i].first.labels[labCt2]; + } + counts[labCt]++; + lastLabel[labCt] = address2os[i].first.labels[labCt]; + break; + } + } + for (labCt = 0; labCt < depth; labCt++) { + address2os[i].first.childNums[labCt] = counts[labCt]; + } + for (; labCt < (int)Address::maxDepth; labCt++) { + address2os[i].first.childNums[labCt] = 0; + } + } +} + + +// +// All of the __kmp_affinity_create_*_map() routines should set +// __kmp_affinity_masks to a vector of affinity mask objects of length +// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and +// return the number of levels in the machine topology tree (zero if +// __kmp_affinity_type == affinity_none). +// +// All of the __kmp_affinity_create_*_map() routines should set *fullMask +// to the affinity mask for the initialization thread. They need to save and +// restore the mask, and it could be needed later, so saving it is just an +// optimization to avoid calling kmp_get_system_affinity() again. +// +static kmp_affin_mask_t *fullMask = NULL; + +kmp_affin_mask_t * +__kmp_affinity_get_fullMask() { return fullMask; } + + +static int nCoresPerPkg, nPackages; +static int __kmp_nThreadsPerCore; +#ifndef KMP_DFLT_NTH_CORES +static int __kmp_ncores; +#endif + +// +// __kmp_affinity_uniform_topology() doesn't work when called from +// places which support arbitrarily many levels in the machine topology +// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() +// __kmp_affinity_create_x2apicid_map(). +// +inline static bool +__kmp_affinity_uniform_topology() +{ + return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); +} + + +// +// Print out the detailed machine topology map, i.e. the physical locations +// of each OS proc. +// +static void +__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth, + int pkgLevel, int coreLevel, int threadLevel) +{ + int proc; + + KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); + for (proc = 0; proc < len; proc++) { + int level; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + for (level = 0; level < depth; level++) { + if (level == threadLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); + } + else if (level == coreLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); + } + else if (level == pkgLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); + } + else if (level > pkgLevel) { + __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), + level - pkgLevel - 1); + } + else { + __kmp_str_buf_print(&buf, "L%d ", level); + } + __kmp_str_buf_print(&buf, "%d ", + address2os[proc].first.labels[level]); + } + KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, + buf.str); + __kmp_str_buf_free(&buf); + } +} + +#if KMP_USE_HWLOC +static int +__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) +{ + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // Save the affinity mask for the current thread. + // + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + __kmp_get_system_affinity(oldMask, TRUE); + + unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology); + int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU); + int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE); + int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET); + __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0; + + // + // This makes an assumption about the topology being four levels: + // machines -> packages -> cores -> hardware threads + // + hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology); + hwloc_obj_t child_iterator; + for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); + child_iterator != NULL; + child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) + { + nPackages++; + } + current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0); + for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); + child_iterator != NULL; + child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) + { + nCoresPerPkg++; + } + current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0); + for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); + child_iterator != NULL; + child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) + { + __kmp_nThreadsPerCore++; + } + + if (! KMP_AFFINITY_CAPABLE()) + { + // + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + // + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + return 0; + } + + // + // Allocate the data structure to be returned. + // + AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + + unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); + unsigned i; + hwloc_obj_t hardware_thread_iterator; + int nActiveThreads = 0; + for(i=0;iparent->parent->logical_index; + addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg; + addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore; + retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index); + nActiveThreads++; + } + + // + // If there's only one thread context to bind to, return now. + // + KMP_ASSERT(nActiveThreads > 0); + if (nActiveThreads == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } + + // + // Form an Address object which only includes the package level. + // + Address addr(1); + addr.labels[0] = retval[0].first.labels[pkgLevel-1]; + retval[0].first = addr; + + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); + } + + *address2os = retval; + KMP_CPU_FREE(oldMask); + return 1; + } + + // + // Sort the table by physical Id. + // + qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels); + + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return if affinity is not enabled. + // + __kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel); + + // + // Check to see if the machine topology is uniform + // + unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel); + unsigned ncores = __kmp_ncores; + unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); + unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads); + + // + // Print the machine topology summary. + // + if (__kmp_affinity_verbose) { + char mask[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + + __kmp_str_buf_print(&buf, "%d", npackages); + //for (level = 1; level <= pkgLevel; level++) { + // __kmp_str_buf_print(&buf, " x %d", maxCt[level]); + // } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + + __kmp_str_buf_free(&buf); + } + + if (__kmp_affinity_type == affinity_none) { + KMP_CPU_FREE(oldMask); + return 0; + } + + // + // Find any levels with radiix 1, and remove them from the map + // (except for the package level). + // + int new_depth = 0; + int level; + unsigned proc; + for (level = 1; level < (int)depth; level++) { + if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { + continue; + } + new_depth++; + } + + // + // If we are removing any levels, allocate a new vector to return, + // and copy the relevant information to it. + // + if (new_depth != depth-1) { + AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( + sizeof(AddrUnsPair) * nActiveThreads); + for (proc = 0; (int)proc < nActiveThreads; proc++) { + Address addr(new_depth); + new_retval[proc] = AddrUnsPair(addr, retval[proc].second); + } + int new_level = 0; + for (level = 1; level < (int)depth; level++) { + if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { + if (level == threadLevel) { + threadLevel = -1; + } + else if ((threadLevel >= 0) && (level < threadLevel)) { + threadLevel--; + } + if (level == coreLevel) { + coreLevel = -1; + } + else if ((coreLevel >= 0) && (level < coreLevel)) { + coreLevel--; + } + if (level < pkgLevel) { + pkgLevel--; + } + continue; + } + for (proc = 0; (int)proc < nActiveThreads; proc++) { + new_retval[proc].first.labels[new_level] + = retval[proc].first.labels[level]; + } + new_level++; + } + + __kmp_free(retval); + retval = new_retval; + depth = new_depth; + } + + if (__kmp_affinity_gran_levels < 0) { + // + // Set the granularity level based on what levels are modeled + // in the machine topology map. + // + __kmp_affinity_gran_levels = 0; + if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; + } + if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; + } + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; + } + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1, + coreLevel-1, threadLevel-1); + } + + KMP_CPU_FREE(oldMask); + *address2os = retval; + if(depth == 0) return 0; + else return depth-1; +} +#endif // KMP_USE_HWLOC + +// +// If we don't know how to retrieve the machine's processor topology, or +// encounter an error in doing so, this routine is called to form a "flat" +// mapping of os thread id's <-> processor id's. +// +static int +__kmp_affinity_create_flat_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) +{ + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // Even if __kmp_affinity_type == affinity_none, this routine might still + // called to set __kmp_ncores, as well as + // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + // + if (! KMP_AFFINITY_CAPABLE()) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + __kmp_ncores = nPackages = __kmp_xproc; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + return 0; + } + + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return now if affinity is not enabled. + // + __kmp_ncores = nPackages = __kmp_avail_proc; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); + + KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + if (__kmp_affinity_type == affinity_none) { + return 0; + } + + // + // Contruct the data structure to be returned. + // + *address2os = (AddrUnsPair*) + __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + int avail_ct = 0; + unsigned int i; + KMP_CPU_SET_ITERATE(i, fullMask) { + // + // Skip this proc if it is not included in the machine model. + // + if (! KMP_CPU_ISSET(i, fullMask)) { + continue; + } + + Address addr(1); + addr.labels[0] = i; + (*address2os)[avail_ct++] = AddrUnsPair(addr,i); + } + if (__kmp_affinity_verbose) { + KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); + } + + if (__kmp_affinity_gran_levels < 0) { + // + // Only the package level is modeled in the machine topology map, + // so the #levels of granularity is either 0 or 1. + // + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels = 1; + } + else { + __kmp_affinity_gran_levels = 0; + } + } + return 1; +} + + +# if KMP_GROUP_AFFINITY + +// +// If multiple Windows* OS processor groups exist, we can create a 2-level +// topology map with the groups at level 0 and the individual procs at +// level 1. +// +// This facilitates letting the threads float among all procs in a group, +// if granularity=group (the default when there are multiple groups). +// +static int +__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) +{ + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // If we don't have multiple processor groups, return now. + // The flat mapping will be used. + // + if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) { + // FIXME set *msg_id + return -1; + } + + // + // Contruct the data structure to be returned. + // + *address2os = (AddrUnsPair*) + __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + int avail_ct = 0; + int i; + KMP_CPU_SET_ITERATE(i, fullMask) { + // + // Skip this proc if it is not included in the machine model. + // + if (! KMP_CPU_ISSET(i, fullMask)) { + continue; + } + + Address addr(2); + addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); + addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); + (*address2os)[avail_ct++] = AddrUnsPair(addr,i); + + if (__kmp_affinity_verbose) { + KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], + addr.labels[1]); + } + } + + if (__kmp_affinity_gran_levels < 0) { + if (__kmp_affinity_gran == affinity_gran_group) { + __kmp_affinity_gran_levels = 1; + } + else if ((__kmp_affinity_gran == affinity_gran_fine) + || (__kmp_affinity_gran == affinity_gran_thread)) { + __kmp_affinity_gran_levels = 0; + } + else { + const char *gran_str = NULL; + if (__kmp_affinity_gran == affinity_gran_core) { + gran_str = "core"; + } + else if (__kmp_affinity_gran == affinity_gran_package) { + gran_str = "package"; + } + else if (__kmp_affinity_gran == affinity_gran_node) { + gran_str = "node"; + } + else { + KMP_ASSERT(0); + } + + // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread" + __kmp_affinity_gran_levels = 0; + } + } + return 2; +} + +# endif /* KMP_GROUP_AFFINITY */ + + +# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +static int +__kmp_cpuid_mask_width(int count) { + int r = 0; + + while((1<osId < bb->osId) return -1; + if (aa->osId > bb->osId) return 1; + return 0; +} + + +static int +__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b) +{ + const apicThreadInfo *aa = (const apicThreadInfo *)a; + const apicThreadInfo *bb = (const apicThreadInfo *)b; + if (aa->pkgId < bb->pkgId) return -1; + if (aa->pkgId > bb->pkgId) return 1; + if (aa->coreId < bb->coreId) return -1; + if (aa->coreId > bb->coreId) return 1; + if (aa->threadId < bb->threadId) return -1; + if (aa->threadId > bb->threadId) return 1; + return 0; +} + + +// +// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use +// an algorithm which cycles through the available os threads, setting +// the current thread's affinity mask to that thread, and then retrieves +// the Apic Id for each thread context using the cpuid instruction. +// +static int +__kmp_affinity_create_apicid_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) +{ + kmp_cpuid buf; + int rc; + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // Check if cpuid leaf 4 is supported. + // + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax < 4) { + *msg_id = kmp_i18n_str_NoLeaf4Support; + return -1; + } + + // + // The algorithm used starts by setting the affinity to each available + // thread and retrieving info from the cpuid instruction, so if we are + // not capable of calling __kmp_get_system_affinity() and + // _kmp_get_system_affinity(), then we need to do something else - use + // the defaults that we calculated from issuing cpuid without binding + // to each proc. + // + if (! KMP_AFFINITY_CAPABLE()) { + // + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + // + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + // + // Get an upper bound on the number of threads per package using + // cpuid(1). + // + // On some OS/chps combinations where HT is supported by the chip + // but is disabled, this value will be 2 on a single core chip. + // Usually, it will be 2 if HT is enabled and 1 if HT is disabled. + // + __kmp_x86_cpuid(1, 0, &buf); + int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; + if (maxThreadsPerPkg == 0) { + maxThreadsPerPkg = 1; + } + + // + // The num cores per pkg comes from cpuid(4). + // 1 must be added to the encoded value. + // + // The author of cpu_count.cpp treated this only an upper bound + // on the number of cores, but I haven't seen any cases where it + // was greater than the actual number of cores, so we will treat + // it as exact in this block of code. + // + // First, we need to check if cpuid(4) is supported on this chip. + // To see if cpuid(n) is supported, issue cpuid(0) and check if eax + // has the value n or greater. + // + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax >= 4) { + __kmp_x86_cpuid(4, 0, &buf); + nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; + } + else { + nCoresPerPkg = 1; + } + + // + // There is no way to reliably tell if HT is enabled without issuing + // the cpuid instruction from every thread, can correlating the cpuid + // info, so if the machine is not affinity capable, we assume that HT + // is off. We have seen quite a few machines where maxThreadsPerPkg + // is 2, yet the machine does not support HT. + // + // - Older OSes are usually found on machines with older chips, which + // do not support HT. + // + // - The performance penalty for mistakenly identifying a machine as + // HT when it isn't (which results in blocktime being incorrecly set + // to 0) is greater than the penalty when for mistakenly identifying + // a machine as being 1 thread/core when it is really HT enabled + // (which results in blocktime being incorrectly set to a positive + // value). + // + __kmp_ncores = __kmp_xproc; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; + __kmp_nThreadsPerCore = 1; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + return 0; + } + + // + // + // From here on, we can assume that it is safe to call + // __kmp_get_system_affinity() and __kmp_set_system_affinity(), + // even if __kmp_affinity_type = affinity_none. + // + + // + // Save the affinity mask for the current thread. + // + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + KMP_ASSERT(oldMask != NULL); + __kmp_get_system_affinity(oldMask, TRUE); + + // + // Run through each of the available contexts, binding the current thread + // to it, and obtaining the pertinent information using the cpuid instr. + // + // The relevant information is: + // + // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context + // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. + // + // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The + // value of this field determines the width of the core# + thread# + // fields in the Apic Id. It is also an upper bound on the number + // of threads per package, but it has been verified that situations + // happen were it is not exact. In particular, on certain OS/chip + // combinations where Intel(R) Hyper-Threading Technology is supported + // by the chip but has + // been disabled, the value of this field will be 2 (for a single core + // chip). On other OS/chip combinations supporting + // Intel(R) Hyper-Threading Technology, the value of + // this field will be 1 when Intel(R) Hyper-Threading Technology is + // disabled and 2 when it is enabled. + // + // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The + // value of this field (+1) determines the width of the core# field in + // the Apic Id. The comments in "cpucount.cpp" say that this value is + // an upper bound, but the IA-32 architecture manual says that it is + // exactly the number of cores per package, and I haven't seen any + // case where it wasn't. + // + // From this information, deduce the package Id, core Id, and thread Id, + // and set the corresponding fields in the apicThreadInfo struct. + // + unsigned i; + apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( + __kmp_avail_proc * sizeof(apicThreadInfo)); + unsigned nApics = 0; + KMP_CPU_SET_ITERATE(i, fullMask) { + // + // Skip this proc if it is not included in the machine model. + // + if (! KMP_CPU_ISSET(i, fullMask)) { + continue; + } + KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); + + __kmp_affinity_bind_thread(i); + threadInfo[nApics].osId = i; + + // + // The apic id and max threads per pkg come from cpuid(1). + // + __kmp_x86_cpuid(1, 0, &buf); + if (! (buf.edx >> 9) & 1) { + __kmp_set_system_affinity(oldMask, TRUE); + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_ApicNotPresent; + return -1; + } + threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; + threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; + if (threadInfo[nApics].maxThreadsPerPkg == 0) { + threadInfo[nApics].maxThreadsPerPkg = 1; + } + + // + // Max cores per pkg comes from cpuid(4). + // 1 must be added to the encoded value. + // + // First, we need to check if cpuid(4) is supported on this chip. + // To see if cpuid(n) is supported, issue cpuid(0) and check if eax + // has the value n or greater. + // + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax >= 4) { + __kmp_x86_cpuid(4, 0, &buf); + threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; + } + else { + threadInfo[nApics].maxCoresPerPkg = 1; + } + + // + // Infer the pkgId / coreId / threadId using only the info + // obtained locally. + // + int widthCT = __kmp_cpuid_mask_width( + threadInfo[nApics].maxThreadsPerPkg); + threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; + + int widthC = __kmp_cpuid_mask_width( + threadInfo[nApics].maxCoresPerPkg); + int widthT = widthCT - widthC; + if (widthT < 0) { + // + // I've never seen this one happen, but I suppose it could, if + // the cpuid instruction on a chip was really screwed up. + // Make sure to restore the affinity mask before the tail call. + // + __kmp_set_system_affinity(oldMask, TRUE); + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + + int maskC = (1 << widthC) - 1; + threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) + &maskC; + + int maskT = (1 << widthT) - 1; + threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT; + + nApics++; + } + + // + // We've collected all the info we need. + // Restore the old affinity mask for this thread. + // + __kmp_set_system_affinity(oldMask, TRUE); + + // + // If there's only one thread context to bind to, form an Address object + // with depth 1 and return immediately (or, if affinity is off, set + // address2os to NULL and return). + // + // If it is configured to omit the package level when there is only a + // single package, the logic at the end of this routine won't work if + // there is only a single thread - it would try to form an Address + // object with depth 0. + // + KMP_ASSERT(nApics > 0); + if (nApics == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return 0; + } + + *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); + Address addr(1); + addr.labels[0] = threadInfo[0].pkgId; + (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); + + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); + } + + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return 1; + } + + // + // Sort the threadInfo table by physical Id. + // + qsort(threadInfo, nApics, sizeof(*threadInfo), + __kmp_affinity_cmp_apicThreadInfo_phys_id); + + // + // The table is now sorted by pkgId / coreId / threadId, but we really + // don't know the radix of any of the fields. pkgId's may be sparsely + // assigned among the chips on a system. Although coreId's are usually + // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned + // [0..threadsPerCore-1], we don't want to make any such assumptions. + // + // For that matter, we don't know what coresPerPkg and threadsPerCore + // (or the total # packages) are at this point - we want to determine + // that now. We only have an upper bound on the first two figures. + // + // We also perform a consistency check at this point: the values returned + // by the cpuid instruction for any thread bound to a given package had + // better return the same info for maxThreadsPerPkg and maxCoresPerPkg. + // + nPackages = 1; + nCoresPerPkg = 1; + __kmp_nThreadsPerCore = 1; + unsigned nCores = 1; + + unsigned pkgCt = 1; // to determine radii + unsigned lastPkgId = threadInfo[0].pkgId; + unsigned coreCt = 1; + unsigned lastCoreId = threadInfo[0].coreId; + unsigned threadCt = 1; + unsigned lastThreadId = threadInfo[0].threadId; + + // intra-pkg consist checks + unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; + unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; + + for (i = 1; i < nApics; i++) { + if (threadInfo[i].pkgId != lastPkgId) { + nCores++; + pkgCt++; + lastPkgId = threadInfo[i].pkgId; + if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; + coreCt = 1; + lastCoreId = threadInfo[i].coreId; + if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; + threadCt = 1; + lastThreadId = threadInfo[i].threadId; + + // + // This is a different package, so go on to the next iteration + // without doing any consistency checks. Reset the consistency + // check vars, though. + // + prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; + prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; + continue; + } + + if (threadInfo[i].coreId != lastCoreId) { + nCores++; + coreCt++; + lastCoreId = threadInfo[i].coreId; + if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; + threadCt = 1; + lastThreadId = threadInfo[i].threadId; + } + else if (threadInfo[i].threadId != lastThreadId) { + threadCt++; + lastThreadId = threadInfo[i].threadId; + } + else { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; + return -1; + } + + // + // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg + // fields agree between all the threads bounds to a given package. + // + if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) + || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; + } + } + nPackages = pkgCt; + if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; + if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; + + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return now if affinity is not enabled. + // + __kmp_ncores = nCores; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + + } + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return 0; + } + + // + // Now that we've determined the number of packages, the number of cores + // per package, and the number of threads per core, we can construct the + // data structure that is to be returned. + // + int pkgLevel = 0; + int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; + int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); + unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); + + KMP_ASSERT(depth > 0); + *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics); + + for (i = 0; i < nApics; ++i) { + Address addr(depth); + unsigned os = threadInfo[i].osId; + int d = 0; + + if (pkgLevel >= 0) { + addr.labels[d++] = threadInfo[i].pkgId; + } + if (coreLevel >= 0) { + addr.labels[d++] = threadInfo[i].coreId; + } + if (threadLevel >= 0) { + addr.labels[d++] = threadInfo[i].threadId; + } + (*address2os)[i] = AddrUnsPair(addr, os); + } + + if (__kmp_affinity_gran_levels < 0) { + // + // Set the granularity level based on what levels are modeled + // in the machine topology map. + // + __kmp_affinity_gran_levels = 0; + if ((threadLevel >= 0) + && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; + } + if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; + } + if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { + __kmp_affinity_gran_levels++; + } + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, + coreLevel, threadLevel); + } + + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return depth; +} + + +// +// Intel(R) microarchitecture code name Nehalem, Dunnington and later +// architectures support a newer interface for specifying the x2APIC Ids, +// based on cpuid leaf 11. +// +static int +__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) +{ + kmp_cpuid buf; + + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // Check to see if cpuid leaf 11 is supported. + // + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax < 11) { + *msg_id = kmp_i18n_str_NoLeaf11Support; + return -1; + } + __kmp_x86_cpuid(11, 0, &buf); + if (buf.ebx == 0) { + *msg_id = kmp_i18n_str_NoLeaf11Support; + return -1; + } + + // + // Find the number of levels in the machine topology. While we're at it, + // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will + // try to get more accurate values later by explicitly counting them, + // but get reasonable defaults now, in case we return early. + // + int level; + int threadLevel = -1; + int coreLevel = -1; + int pkgLevel = -1; + __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; + + for (level = 0;; level++) { + if (level > 31) { + // + // FIXME: Hack for DPD200163180 + // + // If level is big then something went wrong -> exiting + // + // There could actually be 32 valid levels in the machine topology, + // but so far, the only machine we have seen which does not exit + // this loop before iteration 32 has fubar x2APIC settings. + // + // For now, just reject this case based upon loop trip count. + // + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + __kmp_x86_cpuid(11, level, &buf); + if (buf.ebx == 0) { + if (pkgLevel < 0) { + // + // Will infer nPackages from __kmp_xproc + // + pkgLevel = level; + level++; + } + break; + } + int kind = (buf.ecx >> 8) & 0xff; + if (kind == 1) { + // + // SMT level + // + threadLevel = level; + coreLevel = -1; + pkgLevel = -1; + __kmp_nThreadsPerCore = buf.ebx & 0xff; + if (__kmp_nThreadsPerCore == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + } + else if (kind == 2) { + // + // core level + // + coreLevel = level; + pkgLevel = -1; + nCoresPerPkg = buf.ebx & 0xff; + if (nCoresPerPkg == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + } + else { + if (level <= 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + if (pkgLevel >= 0) { + continue; + } + pkgLevel = level; + nPackages = buf.ebx & 0xff; + if (nPackages == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + } + } + int depth = level; + + // + // In the above loop, "level" was counted from the finest level (usually + // thread) to the coarsest. The caller expects that we will place the + // labels in (*address2os)[].first.labels[] in the inverse order, so + // we need to invert the vars saying which level means what. + // + if (threadLevel >= 0) { + threadLevel = depth - threadLevel - 1; + } + if (coreLevel >= 0) { + coreLevel = depth - coreLevel - 1; + } + KMP_DEBUG_ASSERT(pkgLevel >= 0); + pkgLevel = depth - pkgLevel - 1; + + // + // The algorithm used starts by setting the affinity to each available + // thread and retrieving info from the cpuid instruction, so if we are + // not capable of calling __kmp_get_system_affinity() and + // _kmp_get_system_affinity(), then we need to do something else - use + // the defaults that we calculated from issuing cpuid without binding + // to each proc. + // + if (! KMP_AFFINITY_CAPABLE()) + { + // + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + // + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + return 0; + } + + // + // + // From here on, we can assume that it is safe to call + // __kmp_get_system_affinity() and __kmp_set_system_affinity(), + // even if __kmp_affinity_type = affinity_none. + // + + // + // Save the affinity mask for the current thread. + // + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + __kmp_get_system_affinity(oldMask, TRUE); + + // + // Allocate the data structure to be returned. + // + AddrUnsPair *retval = (AddrUnsPair *) + __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + + // + // Run through each of the available contexts, binding the current thread + // to it, and obtaining the pertinent information using the cpuid instr. + // + unsigned int proc; + int nApics = 0; + KMP_CPU_SET_ITERATE(proc, fullMask) { + // + // Skip this proc if it is not included in the machine model. + // + if (! KMP_CPU_ISSET(proc, fullMask)) { + continue; + } + KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); + + __kmp_affinity_bind_thread(proc); + + // + // Extrach the labels for each level in the machine topology map + // from the Apic ID. + // + Address addr(depth); + int prev_shift = 0; + + for (level = 0; level < depth; level++) { + __kmp_x86_cpuid(11, level, &buf); + unsigned apicId = buf.edx; + if (buf.ebx == 0) { + if (level != depth - 1) { + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; + } + addr.labels[depth - level - 1] = apicId >> prev_shift; + level++; + break; + } + int shift = buf.eax & 0x1f; + int mask = (1 << shift) - 1; + addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; + prev_shift = shift; + } + if (level != depth) { + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; + } + + retval[nApics] = AddrUnsPair(addr, proc); + nApics++; + } + + // + // We've collected all the info we need. + // Restore the old affinity mask for this thread. + // + __kmp_set_system_affinity(oldMask, TRUE); + + // + // If there's only one thread context to bind to, return now. + // + KMP_ASSERT(nApics > 0); + if (nApics == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } + + // + // Form an Address object which only includes the package level. + // + Address addr(1); + addr.labels[0] = retval[0].first.labels[pkgLevel]; + retval[0].first = addr; + + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); + } + + *address2os = retval; + KMP_CPU_FREE(oldMask); + return 1; + } + + // + // Sort the table by physical Id. + // + qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); + + // + // Find the radix at each of the levels. + // + unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + for (level = 0; level < depth; level++) { + totals[level] = 1; + maxCt[level] = 1; + counts[level] = 1; + last[level] = retval[0].first.labels[level]; + } + + // + // From here on, the iteration variable "level" runs from the finest + // level to the coarsest, i.e. we iterate forward through + // (*address2os)[].first.labels[] - in the previous loops, we iterated + // backwards. + // + for (proc = 1; (int)proc < nApics; proc++) { + int level; + for (level = 0; level < depth; level++) { + if (retval[proc].first.labels[level] != last[level]) { + int j; + for (j = level + 1; j < depth; j++) { + totals[j]++; + counts[j] = 1; + // The line below causes printing incorrect topology information + // in case the max value for some level (maxCt[level]) is encountered earlier than + // some less value while going through the array. + // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2 + // whereas it must be 4. + // TODO!!! Check if it can be commented safely + //maxCt[j] = 1; + last[j] = retval[proc].first.labels[j]; + } + totals[level]++; + counts[level]++; + if (counts[level] > maxCt[level]) { + maxCt[level] = counts[level]; + } + last[level] = retval[proc].first.labels[level]; + break; + } + else if (level == depth - 1) { + __kmp_free(last); + __kmp_free(maxCt); + __kmp_free(counts); + __kmp_free(totals); + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; + return -1; + } + } + } + + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return if affinity is not enabled. + // + if (threadLevel >= 0) { + __kmp_nThreadsPerCore = maxCt[threadLevel]; + } + else { + __kmp_nThreadsPerCore = 1; + } + nPackages = totals[pkgLevel]; + + if (coreLevel >= 0) { + __kmp_ncores = totals[coreLevel]; + nCoresPerPkg = maxCt[coreLevel]; + } + else { + __kmp_ncores = nPackages; + nCoresPerPkg = 1; + } + + // + // Check to see if the machine topology is uniform + // + unsigned prod = maxCt[0]; + for (level = 1; level < depth; level++) { + prod *= maxCt[level]; + } + bool uniform = (prod == totals[level - 1]); + + // + // Print the machine topology summary. + // + if (__kmp_affinity_verbose) { + char mask[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + + __kmp_str_buf_print(&buf, "%d", totals[0]); + for (level = 1; level <= pkgLevel; level++) { + __kmp_str_buf_print(&buf, " x %d", maxCt[level]); + } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + + __kmp_str_buf_free(&buf); + } + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(last); + __kmp_free(maxCt); + __kmp_free(counts); + __kmp_free(totals); + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } + + // + // Find any levels with radiix 1, and remove them from the map + // (except for the package level). + // + int new_depth = 0; + for (level = 0; level < depth; level++) { + if ((maxCt[level] == 1) && (level != pkgLevel)) { + continue; + } + new_depth++; + } + + // + // If we are removing any levels, allocate a new vector to return, + // and copy the relevant information to it. + // + if (new_depth != depth) { + AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( + sizeof(AddrUnsPair) * nApics); + for (proc = 0; (int)proc < nApics; proc++) { + Address addr(new_depth); + new_retval[proc] = AddrUnsPair(addr, retval[proc].second); + } + int new_level = 0; + int newPkgLevel = -1; + int newCoreLevel = -1; + int newThreadLevel = -1; + int i; + for (level = 0; level < depth; level++) { + if ((maxCt[level] == 1) + && (level != pkgLevel)) { + // + // Remove this level. Never remove the package level + // + continue; + } + if (level == pkgLevel) { + newPkgLevel = level; + } + if (level == coreLevel) { + newCoreLevel = level; + } + if (level == threadLevel) { + newThreadLevel = level; + } + for (proc = 0; (int)proc < nApics; proc++) { + new_retval[proc].first.labels[new_level] + = retval[proc].first.labels[level]; + } + new_level++; + } + + __kmp_free(retval); + retval = new_retval; + depth = new_depth; + pkgLevel = newPkgLevel; + coreLevel = newCoreLevel; + threadLevel = newThreadLevel; + } + + if (__kmp_affinity_gran_levels < 0) { + // + // Set the granularity level based on what levels are modeled + // in the machine topology map. + // + __kmp_affinity_gran_levels = 0; + if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; + } + if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; + } + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; + } + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, + coreLevel, threadLevel); + } + + __kmp_free(last); + __kmp_free(maxCt); + __kmp_free(counts); + __kmp_free(totals); + KMP_CPU_FREE(oldMask); + *address2os = retval; + return depth; +} + + +# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + +#define osIdIndex 0 +#define threadIdIndex 1 +#define coreIdIndex 2 +#define pkgIdIndex 3 +#define nodeIdIndex 4 + +typedef unsigned *ProcCpuInfo; +static unsigned maxIndex = pkgIdIndex; + + +static int +__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) +{ + const unsigned *aa = (const unsigned *)a; + const unsigned *bb = (const unsigned *)b; + if (aa[osIdIndex] < bb[osIdIndex]) return -1; + if (aa[osIdIndex] > bb[osIdIndex]) return 1; + return 0; +}; + + +static int +__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b) +{ + unsigned i; + const unsigned *aa = *((const unsigned **)a); + const unsigned *bb = *((const unsigned **)b); + for (i = maxIndex; ; i--) { + if (aa[i] < bb[i]) return -1; + if (aa[i] > bb[i]) return 1; + if (i == osIdIndex) break; + } + return 0; +} + + +// +// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the +// affinity map. +// +static int +__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, + kmp_i18n_id_t *const msg_id, FILE *f) +{ + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // + // Scan of the file, and count the number of "processor" (osId) fields, + // and find the highest value of for a node_ field. + // + char buf[256]; + unsigned num_records = 0; + while (! feof(f)) { + buf[sizeof(buf) - 1] = 1; + if (! fgets(buf, sizeof(buf), f)) { + // + // Read errors presumably because of EOF + // + break; + } + + char s1[] = "processor"; + if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { + num_records++; + continue; + } + + // + // FIXME - this will match "node_ " + // + unsigned level; + if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { + if (nodeIdIndex + level >= maxIndex) { + maxIndex = nodeIdIndex + level; + } + continue; + } + } + + // + // Check for empty file / no valid processor records, or too many. + // The number of records can't exceed the number of valid bits in the + // affinity mask. + // + if (num_records == 0) { + *line = 0; + *msg_id = kmp_i18n_str_NoProcRecords; + return -1; + } + if (num_records > (unsigned)__kmp_xproc) { + *line = 0; + *msg_id = kmp_i18n_str_TooManyProcRecords; + return -1; + } + + // + // Set the file pointer back to the begginning, so that we can scan the + // file again, this time performing a full parse of the data. + // Allocate a vector of ProcCpuInfo object, where we will place the data. + // Adding an extra element at the end allows us to remove a lot of extra + // checks for termination conditions. + // + if (fseek(f, 0, SEEK_SET) != 0) { + *line = 0; + *msg_id = kmp_i18n_str_CantRewindCpuinfo; + return -1; + } + + // + // Allocate the array of records to store the proc info in. The dummy + // element at the end makes the logic in filling them out easier to code. + // + unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1) + * sizeof(unsigned *)); + unsigned i; + for (i = 0; i <= num_records; i++) { + threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1) + * sizeof(unsigned)); + } + +#define CLEANUP_THREAD_INFO \ + for (i = 0; i <= num_records; i++) { \ + __kmp_free(threadInfo[i]); \ + } \ + __kmp_free(threadInfo); + + // + // A value of UINT_MAX means that we didn't find the field + // + unsigned __index; + +#define INIT_PROC_INFO(p) \ + for (__index = 0; __index <= maxIndex; __index++) { \ + (p)[__index] = UINT_MAX; \ + } + + for (i = 0; i <= num_records; i++) { + INIT_PROC_INFO(threadInfo[i]); + } + + unsigned num_avail = 0; + *line = 0; + while (! feof(f)) { + // + // Create an inner scoping level, so that all the goto targets at the + // end of the loop appear in an outer scoping level. This avoids + // warnings about jumping past an initialization to a target in the + // same block. + // + { + buf[sizeof(buf) - 1] = 1; + bool long_line = false; + if (! fgets(buf, sizeof(buf), f)) { + // + // Read errors presumably because of EOF + // + // If there is valid data in threadInfo[num_avail], then fake + // a blank line in ensure that the last address gets parsed. + // + bool valid = false; + for (i = 0; i <= maxIndex; i++) { + if (threadInfo[num_avail][i] != UINT_MAX) { + valid = true; + } + } + if (! valid) { + break; + } + buf[0] = 0; + } else if (!buf[sizeof(buf) - 1]) { + // + // The line is longer than the buffer. Set a flag and don't + // emit an error if we were going to ignore the line, anyway. + // + long_line = true; + +#define CHECK_LINE \ + if (long_line) { \ + CLEANUP_THREAD_INFO; \ + *msg_id = kmp_i18n_str_LongLineCpuinfo; \ + return -1; \ + } + } + (*line)++; + + char s1[] = "processor"; + if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s1) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; + if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field; + threadInfo[num_avail][osIdIndex] = val; +#if KMP_OS_LINUX && USE_SYSFS_INFO + char path[256]; + KMP_SNPRINTF(path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); + + KMP_SNPRINTF(path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/core_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); + continue; +#else + } + char s2[] = "physical id"; + if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s2) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; + if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field; + threadInfo[num_avail][pkgIdIndex] = val; + continue; + } + char s3[] = "core id"; + if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s3) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; + if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field; + threadInfo[num_avail][coreIdIndex] = val; + continue; +#endif // KMP_OS_LINUX && USE_SYSFS_INFO + } + char s4[] = "thread id"; + if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s4) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; + if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field; + threadInfo[num_avail][threadIdIndex] = val; + continue; + } + unsigned level; + if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s4) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; + KMP_ASSERT(nodeIdIndex + level <= maxIndex); + if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field; + threadInfo[num_avail][nodeIdIndex + level] = val; + continue; + } + + // + // We didn't recognize the leading token on the line. + // There are lots of leading tokens that we don't recognize - + // if the line isn't empty, go on to the next line. + // + if ((*buf != 0) && (*buf != '\n')) { + // + // If the line is longer than the buffer, read characters + // until we find a newline. + // + if (long_line) { + int ch; + while (((ch = fgetc(f)) != EOF) && (ch != '\n')); + } + continue; + } + + // + // A newline has signalled the end of the processor record. + // Check that there aren't too many procs specified. + // + if ((int)num_avail == __kmp_xproc) { + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_TooManyEntries; + return -1; + } + + // + // Check for missing fields. The osId field must be there, and we + // currently require that the physical id field is specified, also. + // + if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_MissingProcField; + return -1; + } + if (threadInfo[0][pkgIdIndex] == UINT_MAX) { + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_MissingPhysicalIDField; + return -1; + } + + // + // Skip this proc if it is not included in the machine model. + // + if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) { + INIT_PROC_INFO(threadInfo[num_avail]); + continue; + } + + // + // We have a successful parse of this proc's info. + // Increment the counter, and prepare for the next proc. + // + num_avail++; + KMP_ASSERT(num_avail <= num_records); + INIT_PROC_INFO(threadInfo[num_avail]); + } + continue; + + no_val: + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_MissingValCpuinfo; + return -1; + + dup_field: + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; + return -1; + } + *line = 0; + +# if KMP_MIC && REDUCE_TEAM_SIZE + unsigned teamSize = 0; +# endif // KMP_MIC && REDUCE_TEAM_SIZE + + // check for num_records == __kmp_xproc ??? + + // + // If there's only one thread context to bind to, form an Address object + // with depth 1 and return immediately (or, if affinity is off, set + // address2os to NULL and return). + // + // If it is configured to omit the package level when there is only a + // single package, the logic at the end of this routine won't work if + // there is only a single thread - it would try to form an Address + // object with depth 0. + // + KMP_ASSERT(num_avail > 0); + KMP_ASSERT(num_avail <= num_records); + if (num_avail == 1) { + __kmp_ncores = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; + if (__kmp_affinity_verbose) { + if (! KMP_AFFINITY_CAPABLE()) { + KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } + else { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + fullMask); + KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } + int index; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_str_buf_print(&buf, "1"); + for (index = maxIndex - 1; index > pkgIdIndex; index--) { + __kmp_str_buf_print(&buf, " x 1"); + } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); + __kmp_str_buf_free(&buf); + } + + if (__kmp_affinity_type == affinity_none) { + CLEANUP_THREAD_INFO; + return 0; + } + + *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); + Address addr(1); + addr.labels[0] = threadInfo[0][pkgIdIndex]; + (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); + + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); + } + + CLEANUP_THREAD_INFO; + return 1; + } + + // + // Sort the threadInfo table by physical Id. + // + qsort(threadInfo, num_avail, sizeof(*threadInfo), + __kmp_affinity_cmp_ProcCpuInfo_phys_id); + + // + // The table is now sorted by pkgId / coreId / threadId, but we really + // don't know the radix of any of the fields. pkgId's may be sparsely + // assigned among the chips on a system. Although coreId's are usually + // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned + // [0..threadsPerCore-1], we don't want to make any such assumptions. + // + // For that matter, we don't know what coresPerPkg and threadsPerCore + // (or the total # packages) are at this point - we want to determine + // that now. We only have an upper bound on the first two figures. + // + unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1) + * sizeof(unsigned)); + unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1) + * sizeof(unsigned)); + unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1) + * sizeof(unsigned)); + unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1) + * sizeof(unsigned)); + + bool assign_thread_ids = false; + unsigned threadIdCt; + unsigned index; + + restart_radix_check: + threadIdCt = 0; + + // + // Initialize the counter arrays with data from threadInfo[0]. + // + if (assign_thread_ids) { + if (threadInfo[0][threadIdIndex] == UINT_MAX) { + threadInfo[0][threadIdIndex] = threadIdCt++; + } + else if (threadIdCt <= threadInfo[0][threadIdIndex]) { + threadIdCt = threadInfo[0][threadIdIndex] + 1; + } + } + for (index = 0; index <= maxIndex; index++) { + counts[index] = 1; + maxCt[index] = 1; + totals[index] = 1; + lastId[index] = threadInfo[0][index];; + } + + // + // Run through the rest of the OS procs. + // + for (i = 1; i < num_avail; i++) { + // + // Find the most significant index whose id differs + // from the id for the previous OS proc. + // + for (index = maxIndex; index >= threadIdIndex; index--) { + if (assign_thread_ids && (index == threadIdIndex)) { + // + // Auto-assign the thread id field if it wasn't specified. + // + if (threadInfo[i][threadIdIndex] == UINT_MAX) { + threadInfo[i][threadIdIndex] = threadIdCt++; + } + + // + // Aparrently the thread id field was specified for some + // entries and not others. Start the thread id counter + // off at the next higher thread id. + // + else if (threadIdCt <= threadInfo[i][threadIdIndex]) { + threadIdCt = threadInfo[i][threadIdIndex] + 1; + } + } + if (threadInfo[i][index] != lastId[index]) { + // + // Run through all indices which are less significant, + // and reset the counts to 1. + // + // At all levels up to and including index, we need to + // increment the totals and record the last id. + // + unsigned index2; + for (index2 = threadIdIndex; index2 < index; index2++) { + totals[index2]++; + if (counts[index2] > maxCt[index2]) { + maxCt[index2] = counts[index2]; + } + counts[index2] = 1; + lastId[index2] = threadInfo[i][index2]; + } + counts[index]++; + totals[index]++; + lastId[index] = threadInfo[i][index]; + + if (assign_thread_ids && (index > threadIdIndex)) { + +# if KMP_MIC && REDUCE_TEAM_SIZE + // + // The default team size is the total #threads in the machine + // minus 1 thread for every core that has 3 or more threads. + // + teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); +# endif // KMP_MIC && REDUCE_TEAM_SIZE + + // + // Restart the thread counter, as we are on a new core. + // + threadIdCt = 0; + + // + // Auto-assign the thread id field if it wasn't specified. + // + if (threadInfo[i][threadIdIndex] == UINT_MAX) { + threadInfo[i][threadIdIndex] = threadIdCt++; + } + + // + // Aparrently the thread id field was specified for some + // entries and not others. Start the thread id counter + // off at the next higher thread id. + // + else if (threadIdCt <= threadInfo[i][threadIdIndex]) { + threadIdCt = threadInfo[i][threadIdIndex] + 1; + } + } + break; + } + } + if (index < threadIdIndex) { + // + // If thread ids were specified, it is an error if they are not + // unique. Also, check that we waven't already restarted the + // loop (to be safe - shouldn't need to). + // + if ((threadInfo[i][threadIdIndex] != UINT_MAX) + || assign_thread_ids) { + __kmp_free(lastId); + __kmp_free(totals); + __kmp_free(maxCt); + __kmp_free(counts); + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; + return -1; + } + + // + // If the thread ids were not specified and we see entries + // entries that are duplicates, start the loop over and + // assign the thread ids manually. + // + assign_thread_ids = true; + goto restart_radix_check; + } + } + +# if KMP_MIC && REDUCE_TEAM_SIZE + // + // The default team size is the total #threads in the machine + // minus 1 thread for every core that has 3 or more threads. + // + teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); +# endif // KMP_MIC && REDUCE_TEAM_SIZE + + for (index = threadIdIndex; index <= maxIndex; index++) { + if (counts[index] > maxCt[index]) { + maxCt[index] = counts[index]; + } + } + + __kmp_nThreadsPerCore = maxCt[threadIdIndex]; + nCoresPerPkg = maxCt[coreIdIndex]; + nPackages = totals[pkgIdIndex]; + + // + // Check to see if the machine topology is uniform + // + unsigned prod = totals[maxIndex]; + for (index = threadIdIndex; index < maxIndex; index++) { + prod *= maxCt[index]; + } + bool uniform = (prod == totals[threadIdIndex]); + + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return now if affinity is not enabled. + // + __kmp_ncores = totals[coreIdIndex]; + + if (__kmp_affinity_verbose) { + if (! KMP_AFFINITY_CAPABLE()) { + KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + } + else { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); + KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + } + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + + __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); + for (index = maxIndex - 1; index >= pkgIdIndex; index--) { + __kmp_str_buf_print(&buf, " x %d", maxCt[index]); + } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex], + maxCt[threadIdIndex], __kmp_ncores); + + __kmp_str_buf_free(&buf); + } + +# if KMP_MIC && REDUCE_TEAM_SIZE + // + // Set the default team size. + // + if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { + __kmp_dflt_team_nth = teamSize; + KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n", + __kmp_dflt_team_nth)); + } +# endif // KMP_MIC && REDUCE_TEAM_SIZE + + if (__kmp_affinity_type == affinity_none) { + __kmp_free(lastId); + __kmp_free(totals); + __kmp_free(maxCt); + __kmp_free(counts); + CLEANUP_THREAD_INFO; + return 0; + } + + // + // Count the number of levels which have more nodes at that level than + // at the parent's level (with there being an implicit root node of + // the top level). This is equivalent to saying that there is at least + // one node at this level which has a sibling. These levels are in the + // map, and the package level is always in the map. + // + bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); + int level = 0; + for (index = threadIdIndex; index < maxIndex; index++) { + KMP_ASSERT(totals[index] >= totals[index + 1]); + inMap[index] = (totals[index] > totals[index + 1]); + } + inMap[maxIndex] = (totals[maxIndex] > 1); + inMap[pkgIdIndex] = true; + + int depth = 0; + for (index = threadIdIndex; index <= maxIndex; index++) { + if (inMap[index]) { + depth++; + } + } + KMP_ASSERT(depth > 0); + + // + // Construct the data structure that is to be returned. + // + *address2os = (AddrUnsPair*) + __kmp_allocate(sizeof(AddrUnsPair) * num_avail); + int pkgLevel = -1; + int coreLevel = -1; + int threadLevel = -1; + + for (i = 0; i < num_avail; ++i) { + Address addr(depth); + unsigned os = threadInfo[i][osIdIndex]; + int src_index; + int dst_index = 0; + + for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { + if (! inMap[src_index]) { + continue; + } + addr.labels[dst_index] = threadInfo[i][src_index]; + if (src_index == pkgIdIndex) { + pkgLevel = dst_index; + } + else if (src_index == coreIdIndex) { + coreLevel = dst_index; + } + else if (src_index == threadIdIndex) { + threadLevel = dst_index; + } + dst_index++; + } + (*address2os)[i] = AddrUnsPair(addr, os); + } + + if (__kmp_affinity_gran_levels < 0) { + // + // Set the granularity level based on what levels are modeled + // in the machine topology map. + // + unsigned src_index; + __kmp_affinity_gran_levels = 0; + for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { + if (! inMap[src_index]) { + continue; + } + switch (src_index) { + case threadIdIndex: + if (__kmp_affinity_gran > affinity_gran_thread) { + __kmp_affinity_gran_levels++; + } + + break; + case coreIdIndex: + if (__kmp_affinity_gran > affinity_gran_core) { + __kmp_affinity_gran_levels++; + } + break; + + case pkgIdIndex: + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; + } + break; + } + } + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, + coreLevel, threadLevel); + } + + __kmp_free(inMap); + __kmp_free(lastId); + __kmp_free(totals); + __kmp_free(maxCt); + __kmp_free(counts); + CLEANUP_THREAD_INFO; + return depth; +} + + +// +// Create and return a table of affinity masks, indexed by OS thread ID. +// This routine handles OR'ing together all the affinity masks of threads +// that are sufficiently close, if granularity > fine. +// +static kmp_affin_mask_t * +__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique, + AddrUnsPair *address2os, unsigned numAddrs) +{ + // + // First form a table of affinity masks in order of OS thread id. + // + unsigned depth; + unsigned maxOsId; + unsigned i; + + KMP_ASSERT(numAddrs > 0); + depth = address2os[0].first.depth; + + maxOsId = 0; + for (i = 0; i < numAddrs; i++) { + unsigned osId = address2os[i].second; + if (osId > maxOsId) { + maxOsId = osId; + } + } + kmp_affin_mask_t *osId2Mask; + KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1)); + + // + // Sort the address2os table according to physical order. Doing so + // will put all threads on the same core/package/node in consecutive + // locations. + // + qsort(address2os, numAddrs, sizeof(*address2os), + __kmp_affinity_cmp_Address_labels); + + KMP_ASSERT(__kmp_affinity_gran_levels >= 0); + if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { + KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); + } + if (__kmp_affinity_gran_levels >= (int)depth) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffThreadsMayMigrate); + } + } + + // + // Run through the table, forming the masks for all threads on each + // core. Threads on the same core will have identical "Address" + // objects, not considering the last level, which must be the thread + // id. All threads on a core will appear consecutively. + // + unsigned unique = 0; + unsigned j = 0; // index of 1st thread on core + unsigned leader = 0; + Address *leaderAddr = &(address2os[0].first); + kmp_affin_mask_t *sum; + KMP_CPU_ALLOC_ON_STACK(sum); + KMP_CPU_ZERO(sum); + KMP_CPU_SET(address2os[0].second, sum); + for (i = 1; i < numAddrs; i++) { + // + // If this thread is sufficiently close to the leader (within the + // granularity setting), then set the bit for this os thread in the + // affinity mask for this group, and go on to the next thread. + // + if (leaderAddr->isClose(address2os[i].first, + __kmp_affinity_gran_levels)) { + KMP_CPU_SET(address2os[i].second, sum); + continue; + } + + // + // For every thread in this group, copy the mask to the thread's + // entry in the osId2Mask table. Mark the first address as a + // leader. + // + for (; j < i; j++) { + unsigned osId = address2os[j].second; + KMP_DEBUG_ASSERT(osId <= maxOsId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + KMP_CPU_COPY(mask, sum); + address2os[j].first.leader = (j == leader); + } + unique++; + + // + // Start a new mask. + // + leader = i; + leaderAddr = &(address2os[i].first); + KMP_CPU_ZERO(sum); + KMP_CPU_SET(address2os[i].second, sum); + } + + // + // For every thread in last group, copy the mask to the thread's + // entry in the osId2Mask table. + // + for (; j < i; j++) { + unsigned osId = address2os[j].second; + KMP_DEBUG_ASSERT(osId <= maxOsId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + KMP_CPU_COPY(mask, sum); + address2os[j].first.leader = (j == leader); + } + unique++; + KMP_CPU_FREE_FROM_STACK(sum); + + *maxIndex = maxOsId; + *numUnique = unique; + return osId2Mask; +} + + +// +// Stuff for the affinity proclist parsers. It's easier to declare these vars +// as file-static than to try and pass them through the calling sequence of +// the recursive-descent OMP_PLACES parser. +// +static kmp_affin_mask_t *newMasks; +static int numNewMasks; +static int nextNewMask; + +#define ADD_MASK(_mask) \ + { \ + if (nextNewMask >= numNewMasks) { \ + int i; \ + numNewMasks *= 2; \ + kmp_affin_mask_t* temp; \ + KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ + for(i=0;i _maxOsId) || \ + (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ + if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ + && (__kmp_affinity_type != affinity_none))) { \ + KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ + } \ + } \ + else { \ + ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ + } \ + } + + +// +// Re-parse the proclist (for the explicit affinity type), and form the list +// of affinity newMasks indexed by gtid. +// +static void +__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, + unsigned int *out_numMasks, const char *proclist, + kmp_affin_mask_t *osId2Mask, int maxOsId) +{ + int i; + const char *scan = proclist; + const char *next = proclist; + + // + // We use malloc() for the temporary mask vector, + // so that we can use realloc() to extend it. + // + numNewMasks = 2; + KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); + nextNewMask = 0; + kmp_affin_mask_t *sumMask; + KMP_CPU_ALLOC(sumMask); + int setSize = 0; + + for (;;) { + int start, end, stride; + + SKIP_WS(scan); + next = scan; + if (*next == '\0') { + break; + } + + if (*next == '{') { + int num; + setSize = 0; + next++; // skip '{' + SKIP_WS(next); + scan = next; + + // + // Read the first integer in the set. + // + KMP_ASSERT2((*next >= '0') && (*next <= '9'), + "bad proclist"); + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(num >= 0, "bad explicit proc list"); + + // + // Copy the mask for that osId to the sum (union) mask. + // + if ((num > maxOsId) || + (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + KMP_CPU_ZERO(sumMask); + } + else { + KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); + setSize = 1; + } + + for (;;) { + // + // Check for end of set. + // + SKIP_WS(next); + if (*next == '}') { + next++; // skip '}' + break; + } + + // + // Skip optional comma. + // + if (*next == ',') { + next++; + } + SKIP_WS(next); + + // + // Read the next integer in the set. + // + scan = next; + KMP_ASSERT2((*next >= '0') && (*next <= '9'), + "bad explicit proc list"); + + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(num >= 0, "bad explicit proc list"); + + // + // Add the mask for that osId to the sum mask. + // + if ((num > maxOsId) || + (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + } + else { + KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); + setSize++; + } + } + if (setSize > 0) { + ADD_MASK(sumMask); + } + + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // + // Read the first integer. + // + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); + start = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(start >= 0, "bad explicit proc list"); + SKIP_WS(next); + + // + // If this isn't a range, then add a mask to the list and go on. + // + if (*next != '-') { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + + // + // Skip optional comma. + // + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // + // This is a range. Skip over the '-' and read in the 2nd int. + // + next++; // skip '-' + SKIP_WS(next); + scan = next; + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); + end = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(end >= 0, "bad explicit proc list"); + + // + // Check for a stride parameter + // + stride = 1; + SKIP_WS(next); + if (*next == ':') { + // + // A stride is specified. Skip over the ':" and read the 3rd int. + // + int sign = +1; + next++; // skip ':' + SKIP_WS(next); + scan = next; + if (*next == '-') { + sign = -1; + next++; + SKIP_WS(next); + scan = next; + } + KMP_ASSERT2((*next >= '0') && (*next <= '9'), + "bad explicit proc list"); + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(stride >= 0, "bad explicit proc list"); + stride *= sign; + } + + // + // Do some range checks. + // + KMP_ASSERT2(stride != 0, "bad explicit proc list"); + if (stride > 0) { + KMP_ASSERT2(start <= end, "bad explicit proc list"); + } + else { + KMP_ASSERT2(start >= end, "bad explicit proc list"); + } + KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); + + // + // Add the mask for each OS proc # to the list. + // + if (stride > 0) { + do { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + start += stride; + } while (start <= end); + } + else { + do { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + start += stride; + } while (start >= end); + } + + // + // Skip optional comma. + // + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + } + + *out_numMasks = nextNewMask; + if (nextNewMask == 0) { + *out_masks = NULL; + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + return; + } + KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); + for(i = 0; i < nextNewMask; i++) { + kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); + kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); + KMP_CPU_COPY(dest, src); + } + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + KMP_CPU_FREE(sumMask); +} + + +# if OMP_40_ENABLED + +/*----------------------------------------------------------------------------- + +Re-parse the OMP_PLACES proc id list, forming the newMasks for the different +places. Again, Here is the grammar: + +place_list := place +place_list := place , place_list +place := num +place := place : num +place := place : num : signed +place := { subplacelist } +place := ! place // (lowest priority) +subplace_list := subplace +subplace_list := subplace , subplace_list +subplace := num +subplace := num : num +subplace := num : num : signed +signed := num +signed := + signed +signed := - signed + +-----------------------------------------------------------------------------*/ + +static void +__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask, + int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) +{ + const char *next; + + for (;;) { + int start, count, stride, i; + + // + // Read in the starting proc id + // + SKIP_WS(*scan); + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), + "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + start = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(start >= 0); + *scan = next; + + // + // valid follow sets are ',' ':' and '}' + // + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + if ((start > maxOsId) || + (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + } + else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + (*setSize)++; + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; + } + KMP_ASSERT2(**scan == ':', "bad explicit places list"); + (*scan)++; // skip ':' + + // + // Read count parameter + // + SKIP_WS(*scan); + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), + "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(count >= 0); + *scan = next; + + // + // valid follow sets are ',' ':' and '}' + // + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + for (i = 0; i < count; i++) { + if ((start > maxOsId) || + (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + break; // don't proliferate warnings for large count + } + else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + start++; + (*setSize)++; + } + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; + } + KMP_ASSERT2(**scan == ':', "bad explicit places list"); + (*scan)++; // skip ':' + + // + // Read stride parameter + // + int sign = +1; + for (;;) { + SKIP_WS(*scan); + if (**scan == '+') { + (*scan)++; // skip '+' + continue; + } + if (**scan == '-') { + sign *= -1; + (*scan)++; // skip '-' + continue; + } + break; + } + SKIP_WS(*scan); + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), + "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(stride >= 0); + *scan = next; + stride *= sign; + + // + // valid follow sets are ',' and '}' + // + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + for (i = 0; i < count; i++) { + if ((start > maxOsId) || + (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + break; // don't proliferate warnings for large count + } + else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + start += stride; + (*setSize)++; + } + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; + } + + KMP_ASSERT2(0, "bad explicit places list"); + } +} + + +static void +__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, + int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) +{ + const char *next; + + // + // valid follow sets are '{' '!' and num + // + SKIP_WS(*scan); + if (**scan == '{') { + (*scan)++; // skip '{' + __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask, + setSize); + KMP_ASSERT2(**scan == '}', "bad explicit places list"); + (*scan)++; // skip '}' + } + else if (**scan == '!') { + (*scan)++; // skip '!' + __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); + KMP_CPU_COMPLEMENT(maxOsId, tempMask); + } + else if ((**scan >= '0') && (**scan <= '9')) { + next = *scan; + SKIP_DIGITS(next); + int num = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(num >= 0); + if ((num > maxOsId) || + (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + } + else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); + (*setSize)++; + } + *scan = next; // skip num + } + else { + KMP_ASSERT2(0, "bad explicit places list"); + } +} + + +//static void +void +__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, + unsigned int *out_numMasks, const char *placelist, + kmp_affin_mask_t *osId2Mask, int maxOsId) +{ + int i,j,count,stride,sign; + const char *scan = placelist; + const char *next = placelist; + + numNewMasks = 2; + KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); + nextNewMask = 0; + + // tempMask is modified based on the previous or initial + // place to form the current place + // previousMask contains the previous place + kmp_affin_mask_t *tempMask; + kmp_affin_mask_t *previousMask; + KMP_CPU_ALLOC(tempMask); + KMP_CPU_ZERO(tempMask); + KMP_CPU_ALLOC(previousMask); + KMP_CPU_ZERO(previousMask); + int setSize = 0; + + for (;;) { + __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); + + // + // valid follow sets are ',' ':' and EOL + // + SKIP_WS(scan); + if (*scan == '\0' || *scan == ',') { + if (setSize > 0) { + ADD_MASK(tempMask); + } + KMP_CPU_ZERO(tempMask); + setSize = 0; + if (*scan == '\0') { + break; + } + scan++; // skip ',' + continue; + } + + KMP_ASSERT2(*scan == ':', "bad explicit places list"); + scan++; // skip ':' + + // + // Read count parameter + // + SKIP_WS(scan); + KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), + "bad explicit places list"); + next = scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(scan, *next); + KMP_ASSERT(count >= 0); + scan = next; + + // + // valid follow sets are ',' ':' and EOL + // + SKIP_WS(scan); + if (*scan == '\0' || *scan == ',') { + stride = +1; + } + else { + KMP_ASSERT2(*scan == ':', "bad explicit places list"); + scan++; // skip ':' + + // + // Read stride parameter + // + sign = +1; + for (;;) { + SKIP_WS(scan); + if (*scan == '+') { + scan++; // skip '+' + continue; + } + if (*scan == '-') { + sign *= -1; + scan++; // skip '-' + continue; + } + break; + } + SKIP_WS(scan); + KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), + "bad explicit places list"); + next = scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_DEBUG_ASSERT(stride >= 0); + scan = next; + stride *= sign; + } + + // Add places determined by initial_place : count : stride + for (i = 0; i < count; i++) { + if (setSize == 0) { + break; + } + // Add the current place, then build the next place (tempMask) from that + KMP_CPU_COPY(previousMask, tempMask); + ADD_MASK(previousMask); + KMP_CPU_ZERO(tempMask); + setSize = 0; + KMP_CPU_SET_ITERATE(j, previousMask) { + if (! KMP_CPU_ISSET(j, previousMask)) { + continue; + } + else if ((j+stride > maxOsId) || (j+stride < 0) || + (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) { + if ((__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) && i < count - 1) { + KMP_WARNING(AffIgnoreInvalidProcID, j+stride); + } + } + else { + KMP_CPU_SET(j+stride, tempMask); + setSize++; + } + } + } + KMP_CPU_ZERO(tempMask); + setSize = 0; + + // + // valid follow sets are ',' and EOL + // + SKIP_WS(scan); + if (*scan == '\0') { + break; + } + if (*scan == ',') { + scan++; // skip ',' + continue; + } + + KMP_ASSERT2(0, "bad explicit places list"); + } + + *out_numMasks = nextNewMask; + if (nextNewMask == 0) { + *out_masks = NULL; + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + return; + } + KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); + KMP_CPU_FREE(tempMask); + KMP_CPU_FREE(previousMask); + for(i = 0; i < nextNewMask; i++) { + kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); + kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); + KMP_CPU_COPY(dest, src); + } + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); +} + +# endif /* OMP_40_ENABLED */ + +#undef ADD_MASK +#undef ADD_MASK_OSID + +static void +__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) +{ + if (__kmp_place_num_sockets == 0 && + __kmp_place_num_cores == 0 && + __kmp_place_num_threads_per_core == 0 ) + return; // no topology limiting actions requested, exit + if (__kmp_place_num_sockets == 0) + __kmp_place_num_sockets = nPackages; // use all available sockets + if (__kmp_place_num_cores == 0) + __kmp_place_num_cores = nCoresPerPkg; // use all available cores + if (__kmp_place_num_threads_per_core == 0 || + __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore) + __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts + + if ( !__kmp_affinity_uniform_topology() ) { + KMP_WARNING( AffThrPlaceNonUniform ); + return; // don't support non-uniform topology + } + if ( depth != 3 ) { + KMP_WARNING( AffThrPlaceNonThreeLevel ); + return; // don't support not-3-level topology + } + if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) { + KMP_WARNING(AffThrPlaceManySockets); + return; + } + if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) { + KMP_WARNING( AffThrPlaceManyCores ); + return; + } + + AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * + __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); + + int i, j, k, n_old = 0, n_new = 0; + for (i = 0; i < nPackages; ++i) + if (i < __kmp_place_socket_offset || + i >= __kmp_place_socket_offset + __kmp_place_num_sockets) + n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket + else + for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket + if (j < __kmp_place_core_offset || + j >= __kmp_place_core_offset + __kmp_place_num_cores) + n_old += __kmp_nThreadsPerCore; // skip not-requested core + else + for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core + if (k < __kmp_place_num_threads_per_core) { + newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data + n_new++; + } + n_old++; + } + KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); + KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores * + __kmp_place_num_threads_per_core); + + nPackages = __kmp_place_num_sockets; // correct nPackages + nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg + __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore + __kmp_avail_proc = n_new; // correct avail_proc + __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores + + __kmp_free( *pAddr ); + *pAddr = newAddr; // replace old topology with new one +} + + +static AddrUnsPair *address2os = NULL; +static int * procarr = NULL; +static int __kmp_aff_depth = 0; + +static void +__kmp_aux_affinity_initialize(void) +{ + if (__kmp_affinity_masks != NULL) { + KMP_ASSERT(fullMask != NULL); + return; + } + + // + // Create the "full" mask - this defines all of the processors that we + // consider to be in the machine model. If respect is set, then it is + // the initialization thread's affinity mask. Otherwise, it is all + // processors that we know about on the machine. + // + if (fullMask == NULL) { + KMP_CPU_ALLOC(fullMask); + } + if (KMP_AFFINITY_CAPABLE()) { + if (__kmp_affinity_respect_mask) { + __kmp_get_system_affinity(fullMask, TRUE); + + // + // Count the number of available processors. + // + unsigned i; + __kmp_avail_proc = 0; + KMP_CPU_SET_ITERATE(i, fullMask) { + if (! KMP_CPU_ISSET(i, fullMask)) { + continue; + } + __kmp_avail_proc++; + } + if (__kmp_avail_proc > __kmp_xproc) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(ErrorInitializeAffinity); + } + __kmp_affinity_type = affinity_none; + KMP_AFFINITY_DISABLE(); + return; + } + } + else { + __kmp_affinity_entire_machine_mask(fullMask); + __kmp_avail_proc = __kmp_xproc; + } + } + + int depth = -1; + kmp_i18n_id_t msg_id = kmp_i18n_null; + + // + // For backward compatibility, setting KMP_CPUINFO_FILE => + // KMP_TOPOLOGY_METHOD=cpuinfo + // + if ((__kmp_cpuinfo_file != NULL) && + (__kmp_affinity_top_method == affinity_top_method_all)) { + __kmp_affinity_top_method = affinity_top_method_cpuinfo; + } + + if (__kmp_affinity_top_method == affinity_top_method_all) { + // + // In the default code path, errors are not fatal - we just try using + // another method. We only emit a warning message if affinity is on, + // or the verbose flag is set, an the nowarnings flag was not set. + // + const char *file_name = NULL; + int line = 0; +# if KMP_USE_HWLOC + if (depth < 0) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + } + if(!__kmp_hwloc_error) { + depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } else if(depth < 0 && __kmp_affinity_verbose) { + KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); + } + } else if(__kmp_affinity_verbose) { + KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); + } + } +# endif + +# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + + if (depth < 0) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); + } + + file_name = NULL; + depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + + if (depth < 0) { + if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), + KMP_I18N_STR(DecodingLegacyAPIC)); + } + else { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); + } + } + + file_name = NULL; + depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + } + } + +# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +# if KMP_OS_LINUX + + if (depth < 0) { + if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { + KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); + } + else { + KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); + } + } + + FILE *f = fopen("/proc/cpuinfo", "r"); + if (f == NULL) { + msg_id = kmp_i18n_str_CantOpenCpuinfo; + } + else { + file_name = "/proc/cpuinfo"; + depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); + fclose(f); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + } + } + +# endif /* KMP_OS_LINUX */ + +# if KMP_GROUP_AFFINITY + + if ((depth < 0) && (__kmp_num_proc_groups > 1)) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); + } + + depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); + KMP_ASSERT(depth != 0); + } + +# endif /* KMP_GROUP_AFFINITY */ + + if (depth < 0) { + if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { + if (file_name == NULL) { + KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); + } + else if (line == 0) { + KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); + } + else { + KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); + } + } + // FIXME - print msg if msg_id = kmp_i18n_null ??? + + file_name = ""; + depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + KMP_ASSERT(depth > 0); + KMP_ASSERT(address2os != NULL); + } + } + + // + // If the user has specified that a paricular topology discovery method + // is to be used, then we abort if that method fails. The exception is + // group affinity, which might have been implicitly set. + // + +# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + + else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", + KMP_I18N_STR(Decodingx2APIC)); + } + + depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); + } + } + else if (__kmp_affinity_top_method == affinity_top_method_apicid) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", + KMP_I18N_STR(DecodingLegacyAPIC)); + } + + depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); + } + } + +# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { + const char *filename; + if (__kmp_cpuinfo_file != NULL) { + filename = __kmp_cpuinfo_file; + } + else { + filename = "/proc/cpuinfo"; + } + + if (__kmp_affinity_verbose) { + KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); + } + + FILE *f = fopen(filename, "r"); + if (f == NULL) { + int code = errno; + if (__kmp_cpuinfo_file != NULL) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(CantOpenFileForReading, filename), + KMP_ERR(code), + KMP_HNT(NameComesFrom_CPUINFO_FILE), + __kmp_msg_null + ); + } + else { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(CantOpenFileForReading, filename), + KMP_ERR(code), + __kmp_msg_null + ); + } + } + int line = 0; + depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); + fclose(f); + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + if (line > 0) { + KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)); + } + else { + KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); + } + } + if (__kmp_affinity_type == affinity_none) { + KMP_ASSERT(depth == 0); + KMP_ASSERT(address2os == NULL); + return; + } + } + +# if KMP_GROUP_AFFINITY + + else if (__kmp_affinity_top_method == affinity_top_method_group) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); + } + + depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); + KMP_ASSERT(depth != 0); + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); + } + } + +# endif /* KMP_GROUP_AFFINITY */ + + else if (__kmp_affinity_top_method == affinity_top_method_flat) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); + } + + depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } + // should not fail + KMP_ASSERT(depth > 0); + KMP_ASSERT(address2os != NULL); + } + +# if KMP_USE_HWLOC + else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + } + depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); + if (depth == 0) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(address2os == NULL); + return; + } +# if KMP_DEBUG + AddrUnsPair *otheraddress2os = NULL; + int otherdepth = -1; +# if KMP_MIC + otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id); +# else + otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id); +# endif + if(otheraddress2os != NULL && address2os != NULL) { + int i; + unsigned arent_equal_flag = 0; + for(i=0;i<__kmp_avail_proc;i++) { + if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1; + } + if(arent_equal_flag) { + KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n")); + KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n")); + for(i=0;i<__kmp_avail_proc;i++) { + otheraddress2os[i].print(); __kmp_printf("\n"); + } + KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n")); + for(i=0;i<__kmp_avail_proc;i++) { + address2os[i].print(); __kmp_printf("\n"); + } + } + else { + KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n")); + } + } +# endif // KMP_DEBUG + } +# endif // KMP_USE_HWLOC + + if (address2os == NULL) { + if (KMP_AFFINITY_CAPABLE() + && (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none)))) { + KMP_WARNING(ErrorInitializeAffinity); + } + __kmp_affinity_type = affinity_none; + KMP_AFFINITY_DISABLE(); + return; + } + + __kmp_apply_thread_places(&address2os, depth); + + // + // Create the table of masks, indexed by thread Id. + // + unsigned maxIndex; + unsigned numUnique; + kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique, + address2os, __kmp_avail_proc); + if (__kmp_affinity_gran_levels == 0) { + KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); + } + + // + // Set the childNums vector in all Address objects. This must be done + // before we can sort using __kmp_affinity_cmp_Address_child_num(), + // which takes into account the setting of __kmp_affinity_compact. + // + __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); + + switch (__kmp_affinity_type) { + + case affinity_explicit: + KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); +# if OMP_40_ENABLED + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) +# endif + { + __kmp_affinity_process_proclist(&__kmp_affinity_masks, + &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, + maxIndex); + } +# if OMP_40_ENABLED + else { + __kmp_affinity_process_placelist(&__kmp_affinity_masks, + &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, + maxIndex); + } +# endif + if (__kmp_affinity_num_masks == 0) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffNoValidProcID); + } + __kmp_affinity_type = affinity_none; + return; + } + break; + + // + // The other affinity types rely on sorting the Addresses according + // to some permutation of the machine topology tree. Set + // __kmp_affinity_compact and __kmp_affinity_offset appropriately, + // then jump to a common code fragment to do the sort and create + // the array of affinity masks. + // + + case affinity_logical: + __kmp_affinity_compact = 0; + if (__kmp_affinity_offset) { + __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset + % __kmp_avail_proc; + } + goto sortAddresses; + + case affinity_physical: + if (__kmp_nThreadsPerCore > 1) { + __kmp_affinity_compact = 1; + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = 0; + } + } else { + __kmp_affinity_compact = 0; + } + if (__kmp_affinity_offset) { + __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset + % __kmp_avail_proc; + } + goto sortAddresses; + + case affinity_scatter: + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = 0; + } + else { + __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; + } + goto sortAddresses; + + case affinity_compact: + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = depth - 1; + } + goto sortAddresses; + + case affinity_balanced: + // Balanced works only for the case of a single package + if( nPackages > 1 ) { + if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { + KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" ); + } + __kmp_affinity_type = affinity_none; + return; + } else if( __kmp_affinity_uniform_topology() ) { + break; + } else { // Non-uniform topology + + // Save the depth for further usage + __kmp_aff_depth = depth; + + // Number of hyper threads per core in HT machine + int nth_per_core = __kmp_nThreadsPerCore; + + int core_level; + if( nth_per_core > 1 ) { + core_level = depth - 2; + } else { + core_level = depth - 1; + } + int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; + int nproc = nth_per_core * ncores; + + procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); + for( int i = 0; i < nproc; i++ ) { + procarr[ i ] = -1; + } + + for( int i = 0; i < __kmp_avail_proc; i++ ) { + int proc = address2os[ i ].second; + // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread. + // If there is only one thread per core then depth == 2: level 0 - package, + // level 1 - core. + int level = depth - 1; + + // __kmp_nth_per_core == 1 + int thread = 0; + int core = address2os[ i ].first.labels[ level ]; + // If the thread level exists, that is we have more than one thread context per core + if( nth_per_core > 1 ) { + thread = address2os[ i ].first.labels[ level ] % nth_per_core; + core = address2os[ i ].first.labels[ level - 1 ]; + } + procarr[ core * nth_per_core + thread ] = proc; + } + + break; + } + + sortAddresses: + // + // Allocate the gtid->affinity mask table. + // + if (__kmp_affinity_dups) { + __kmp_affinity_num_masks = __kmp_avail_proc; + } + else { + __kmp_affinity_num_masks = numUnique; + } + +# if OMP_40_ENABLED + if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) + && ( __kmp_affinity_num_places > 0 ) + && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) { + __kmp_affinity_num_masks = __kmp_affinity_num_places; + } +# endif + + KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); + + // + // Sort the address2os table according to the current setting of + // __kmp_affinity_compact, then fill out __kmp_affinity_masks. + // + qsort(address2os, __kmp_avail_proc, sizeof(*address2os), + __kmp_affinity_cmp_Address_child_num); + { + int i; + unsigned j; + for (i = 0, j = 0; i < __kmp_avail_proc; i++) { + if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) { + continue; + } + unsigned osId = address2os[i].second; + kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); + kmp_affin_mask_t *dest + = KMP_CPU_INDEX(__kmp_affinity_masks, j); + KMP_ASSERT(KMP_CPU_ISSET(osId, src)); + KMP_CPU_COPY(dest, src); + if (++j >= __kmp_affinity_num_masks) { + break; + } + } + KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); + } + break; + + default: + KMP_ASSERT2(0, "Unexpected affinity setting"); + } + + __kmp_free(osId2Mask); + machine_hierarchy.init(address2os, __kmp_avail_proc); +} + + +void +__kmp_affinity_initialize(void) +{ + // + // Much of the code above was written assumming that if a machine was not + // affinity capable, then __kmp_affinity_type == affinity_none. We now + // explicitly represent this as __kmp_affinity_type == affinity_disabled. + // + // There are too many checks for __kmp_affinity_type == affinity_none + // in this code. Instead of trying to change them all, check if + // __kmp_affinity_type == affinity_disabled, and if so, slam it with + // affinity_none, call the real initialization routine, then restore + // __kmp_affinity_type to affinity_disabled. + // + int disabled = (__kmp_affinity_type == affinity_disabled); + if (! KMP_AFFINITY_CAPABLE()) { + KMP_ASSERT(disabled); + } + if (disabled) { + __kmp_affinity_type = affinity_none; + } + __kmp_aux_affinity_initialize(); + if (disabled) { + __kmp_affinity_type = affinity_disabled; + } +} + + +void +__kmp_affinity_uninitialize(void) +{ + if (__kmp_affinity_masks != NULL) { + KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); + __kmp_affinity_masks = NULL; + } + if (fullMask != NULL) { + KMP_CPU_FREE(fullMask); + fullMask = NULL; + } + __kmp_affinity_num_masks = 0; +# if OMP_40_ENABLED + __kmp_affinity_num_places = 0; +# endif + if (__kmp_affinity_proclist != NULL) { + __kmp_free(__kmp_affinity_proclist); + __kmp_affinity_proclist = NULL; + } + if( address2os != NULL ) { + __kmp_free( address2os ); + address2os = NULL; + } + if( procarr != NULL ) { + __kmp_free( procarr ); + procarr = NULL; + } +} + + +void +__kmp_affinity_set_init_mask(int gtid, int isa_root) +{ + if (! KMP_AFFINITY_CAPABLE()) { + return; + } + + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + if (th->th.th_affin_mask == NULL) { + KMP_CPU_ALLOC(th->th.th_affin_mask); + } + else { + KMP_CPU_ZERO(th->th.th_affin_mask); + } + + // + // Copy the thread mask to the kmp_info_t strucuture. + // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one + // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask + // is set, then the full mask is the same as the mask of the initialization + // thread. + // + kmp_affin_mask_t *mask; + int i; + +# if OMP_40_ENABLED + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) +# endif + { + if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced) + ) { +# if KMP_GROUP_AFFINITY + if (__kmp_num_proc_groups > 1) { + return; + } +# endif + KMP_ASSERT(fullMask != NULL); + i = KMP_PLACE_ALL; + mask = fullMask; + } + else { + KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); + i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; + mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); + } + } +# if OMP_40_ENABLED + else { + if ((! isa_root) + || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { +# if KMP_GROUP_AFFINITY + if (__kmp_num_proc_groups > 1) { + return; + } +# endif + KMP_ASSERT(fullMask != NULL); + i = KMP_PLACE_ALL; + mask = fullMask; + } + else { + // + // int i = some hash function or just a counter that doesn't + // always start at 0. Use gtid for now. + // + KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); + i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; + mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); + } + } +# endif + +# if OMP_40_ENABLED + th->th.th_current_place = i; + if (isa_root) { + th->th.th_new_place = i; + th->th.th_first_place = 0; + th->th.th_last_place = __kmp_affinity_num_masks - 1; + } + + if (i == KMP_PLACE_ALL) { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", + gtid)); + } + else { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", + gtid, i)); + } +# else + if (i == -1) { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n", + gtid)); + } + else { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", + gtid, i)); + } +# endif /* OMP_40_ENABLED */ + + KMP_CPU_COPY(th->th.th_affin_mask, mask); + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid, + buf); + } + +# if KMP_OS_WINDOWS + // + // On Windows* OS, the process affinity mask might have changed. + // If the user didn't request affinity and this call fails, + // just continue silently. See CQ171393. + // + if ( __kmp_affinity_type == affinity_none ) { + __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); + } + else +# endif + __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); +} + + +# if OMP_40_ENABLED + +void +__kmp_affinity_set_place(int gtid) +{ + int retval; + + if (! KMP_AFFINITY_CAPABLE()) { + return; + } + + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + + KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n", + gtid, th->th.th_new_place, th->th.th_current_place)); + + // + // Check that the new place is within this thread's partition. + // + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + KMP_ASSERT(th->th.th_new_place >= 0); + KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); + if (th->th.th_first_place <= th->th.th_last_place) { + KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) + && (th->th.th_new_place <= th->th.th_last_place)); + } + else { + KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) + || (th->th.th_new_place >= th->th.th_last_place)); + } + + // + // Copy the thread mask to the kmp_info_t strucuture, + // and set this thread's affinity. + // + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, + th->th.th_new_place); + KMP_CPU_COPY(th->th.th_affin_mask, mask); + th->th.th_current_place = th->th.th_new_place; + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), + gtid, buf); + } + __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); +} + +# endif /* OMP_40_ENABLED */ + + +int +__kmp_aux_set_affinity(void **mask) +{ + int gtid; + kmp_info_t *th; + int retval; + + if (! KMP_AFFINITY_CAPABLE()) { + return -1; + } + + gtid = __kmp_entry_gtid(); + KA_TRACE(1000, ;{ + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n", + gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + else { + unsigned proc; + int num_procs = 0; + + KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) { + if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { + continue; + } + num_procs++; + if (! KMP_CPU_ISSET(proc, fullMask)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + break; + } + } + if (num_procs == 0) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + +# if KMP_GROUP_AFFINITY + if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } +# endif /* KMP_GROUP_AFFINITY */ + + } + } + + th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); + if (retval == 0) { + KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); + } + +# if OMP_40_ENABLED + th->th.th_current_place = KMP_PLACE_UNDEFINED; + th->th.th_new_place = KMP_PLACE_UNDEFINED; + th->th.th_first_place = 0; + th->th.th_last_place = __kmp_affinity_num_masks - 1; + + // + // Turn off 4.0 affinity for the current tread at this parallel level. + // + th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; +# endif + + return retval; +} + + +int +__kmp_aux_get_affinity(void **mask) +{ + int gtid; + int retval; + kmp_info_t *th; + + if (! KMP_AFFINITY_CAPABLE()) { + return -1; + } + + gtid = __kmp_entry_gtid(); + th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + + KA_TRACE(1000, ;{ + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); + } + } + +# if !KMP_OS_WINDOWS + + retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); + KA_TRACE(1000, ;{ + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf); + }); + return retval; + +# else + + KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); + return 0; + +# endif /* KMP_OS_WINDOWS */ + +} + +int +__kmp_aux_set_affinity_mask_proc(int proc, void **mask) +{ + int retval; + + if (! KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ;{ + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); + } + } + + if ((proc < 0) +# if !KMP_USE_HWLOC + || ((unsigned)proc >= KMP_CPU_SETSIZE) +# endif + ) { + return -1; + } + if (! KMP_CPU_ISSET(proc, fullMask)) { + return -2; + } + + KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); + return 0; +} + + +int +__kmp_aux_unset_affinity_mask_proc(int proc, void **mask) +{ + int retval; + + if (! KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ;{ + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); + } + } + + if ((proc < 0) +# if !KMP_USE_HWLOC + || ((unsigned)proc >= KMP_CPU_SETSIZE) +# endif + ) { + return -1; + } + if (! KMP_CPU_ISSET(proc, fullMask)) { + return -2; + } + + KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); + return 0; +} + + +int +__kmp_aux_get_affinity_mask_proc(int proc, void **mask) +{ + int retval; + + if (! KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ;{ + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); + } + } + + if ((proc < 0) +# if !KMP_USE_HWLOC + || ((unsigned)proc >= KMP_CPU_SETSIZE) +# endif + ) { + return -1; + } + if (! KMP_CPU_ISSET(proc, fullMask)) { + return 0; + } + + return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); +} + + +// Dynamic affinity settings - Affinity balanced +void __kmp_balanced_affinity( int tid, int nthreads ) +{ + if( __kmp_affinity_uniform_topology() ) { + int coreID; + int threadID; + // Number of hyper threads per core in HT machine + int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; + // Number of cores + int ncores = __kmp_ncores; + // How many threads will be bound to each core + int chunk = nthreads / ncores; + // How many cores will have an additional thread bound to it - "big cores" + int big_cores = nthreads % ncores; + // Number of threads on the big cores + int big_nth = ( chunk + 1 ) * big_cores; + if( tid < big_nth ) { + coreID = tid / (chunk + 1 ); + threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ; + } else { //tid >= big_nth + coreID = ( tid - big_cores ) / chunk; + threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ; + } + + KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); + + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + + // Granularity == thread + if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { + int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second; + KMP_CPU_SET( osID, mask); + } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core + for( int i = 0; i < __kmp_nth_per_core; i++ ) { + int osID; + osID = address2os[ coreID * __kmp_nth_per_core + i ].second; + KMP_CPU_SET( osID, mask); + } + } + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + tid, buf); + } + __kmp_set_system_affinity( mask, TRUE ); + KMP_CPU_FREE_FROM_STACK(mask); + } else { // Non-uniform topology + + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + + // Number of hyper threads per core in HT machine + int nth_per_core = __kmp_nThreadsPerCore; + int core_level; + if( nth_per_core > 1 ) { + core_level = __kmp_aff_depth - 2; + } else { + core_level = __kmp_aff_depth - 1; + } + + // Number of cores - maximum value; it does not count trail cores with 0 processors + int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; + + // For performance gain consider the special case nthreads == __kmp_avail_proc + if( nthreads == __kmp_avail_proc ) { + if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { + int osID = address2os[ tid ].second; + KMP_CPU_SET( osID, mask); + } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core + int coreID = address2os[ tid ].first.labels[ core_level ]; + // We'll count found osIDs for the current core; they can be not more than nth_per_core; + // since the address2os is sortied we can break when cnt==nth_per_core + int cnt = 0; + for( int i = 0; i < __kmp_avail_proc; i++ ) { + int osID = address2os[ i ].second; + int core = address2os[ i ].first.labels[ core_level ]; + if( core == coreID ) { + KMP_CPU_SET( osID, mask); + cnt++; + if( cnt == nth_per_core ) { + break; + } + } + } + } + } else if( nthreads <= __kmp_ncores ) { + + int core = 0; + for( int i = 0; i < ncores; i++ ) { + // Check if this core from procarr[] is in the mask + int in_mask = 0; + for( int j = 0; j < nth_per_core; j++ ) { + if( procarr[ i * nth_per_core + j ] != - 1 ) { + in_mask = 1; + break; + } + } + if( in_mask ) { + if( tid == core ) { + for( int j = 0; j < nth_per_core; j++ ) { + int osID = procarr[ i * nth_per_core + j ]; + if( osID != -1 ) { + KMP_CPU_SET( osID, mask ); + // For granularity=thread it is enough to set the first available osID for this core + if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { + break; + } + } + } + break; + } else { + core++; + } + } + } + + } else { // nthreads > __kmp_ncores + + // Array to save the number of processors at each core + int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores); + // Array to save the number of cores with "x" available processors; + int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); + // Array to save the number of cores with # procs from x to nth_per_core + int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); + + for( int i = 0; i <= nth_per_core; i++ ) { + ncores_with_x_procs[ i ] = 0; + ncores_with_x_to_max_procs[ i ] = 0; + } + + for( int i = 0; i < ncores; i++ ) { + int cnt = 0; + for( int j = 0; j < nth_per_core; j++ ) { + if( procarr[ i * nth_per_core + j ] != -1 ) { + cnt++; + } + } + nproc_at_core[ i ] = cnt; + ncores_with_x_procs[ cnt ]++; + } + + for( int i = 0; i <= nth_per_core; i++ ) { + for( int j = i; j <= nth_per_core; j++ ) { + ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ]; + } + } + + // Max number of processors + int nproc = nth_per_core * ncores; + // An array to keep number of threads per each context + int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); + for( int i = 0; i < nproc; i++ ) { + newarr[ i ] = 0; + } + + int nth = nthreads; + int flag = 0; + while( nth > 0 ) { + for( int j = 1; j <= nth_per_core; j++ ) { + int cnt = ncores_with_x_to_max_procs[ j ]; + for( int i = 0; i < ncores; i++ ) { + // Skip the core with 0 processors + if( nproc_at_core[ i ] == 0 ) { + continue; + } + for( int k = 0; k < nth_per_core; k++ ) { + if( procarr[ i * nth_per_core + k ] != -1 ) { + if( newarr[ i * nth_per_core + k ] == 0 ) { + newarr[ i * nth_per_core + k ] = 1; + cnt--; + nth--; + break; + } else { + if( flag != 0 ) { + newarr[ i * nth_per_core + k ] ++; + cnt--; + nth--; + break; + } + } + } + } + if( cnt == 0 || nth == 0 ) { + break; + } + } + if( nth == 0 ) { + break; + } + } + flag = 1; + } + int sum = 0; + for( int i = 0; i < nproc; i++ ) { + sum += newarr[ i ]; + if( sum > tid ) { + // Granularity == thread + if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { + int osID = procarr[ i ]; + KMP_CPU_SET( osID, mask); + } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core + int coreID = i / nth_per_core; + for( int ii = 0; ii < nth_per_core; ii++ ) { + int osID = procarr[ coreID * nth_per_core + ii ]; + if( osID != -1 ) { + KMP_CPU_SET( osID, mask); + } + } + } + break; + } + } + __kmp_free( newarr ); + } + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + tid, buf); + } + __kmp_set_system_affinity( mask, TRUE ); + KMP_CPU_FREE_FROM_STACK(mask); + } +} + +#endif // KMP_AFFINITY_SUPPORTED diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.h b/contrib/libs/cxxsupp/openmp/kmp_affinity.h index c4d08e3a350..4ff6dbaac69 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_affinity.h +++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.h @@ -1,319 +1,319 @@ -/* - * kmp_affinity.h -- header for affinity management - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_AFFINITY_H -#define KMP_AFFINITY_H - -extern int __kmp_affinity_compact; /* Affinity 'compact' value */ - -class Address { -public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth) - : depth(_depth), leader(FALSE) { - } - Address &operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; - } - bool operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool operator!=(const Address &b) const { - return !operator==(b); - } - void print() const { - unsigned i; - printf("Depth: %u --- ", depth); - for(i=0;ifirst); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - for (i = 0; i < depth; i++) { - if (aa->labels[i] < bb->labels[i]) return -1; - if (aa->labels[i] > bb->labels[i]) return 1; - } - return 0; -} - -#if KMP_AFFINITY_SUPPORTED -static int -__kmp_affinity_cmp_Address_child_num(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); - KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); - for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { - int j = depth - i - 1; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - for (; i < depth; i++) { - int j = i - __kmp_affinity_compact; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - return 0; -} -#endif - -/** A structure for holding machine-specific hierarchy info to be computed once at init. - This structure represents a mapping of threads to the actual machine hierarchy, or to - our best guess at what the hierarchy might be, for the purpose of performing an - efficient barrier. In the worst case, when there is no machine hierarchy information, - it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ -class hierarchy_info { -public: - /** Good default values for number of leaves and branching factor, given no affinity information. - Behaves a bit like hyper barrier. */ - static const kmp_uint32 maxLeaves=4; - static const kmp_uint32 minBranch=4; - /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package - or socket, packages/node, nodes/machine, etc. We don't want to get specific with - nomenclature. When the machine is oversubscribed we add levels to duplicate the - hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ - kmp_uint32 maxLevels; - - /** This is specifically the depth of the machine configuration hierarchy, in terms of the - number of levels along the longest path from root to any leaf. It corresponds to the - number of entries in numPerLevel if we exclude all but one trailing 1. */ - kmp_uint32 depth; - kmp_uint32 base_num_threads; - enum init_status { initialized=0, not_initialized=1, initializing=2 }; - volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress - volatile kmp_int8 resizing; // 0=not resizing, 1=resizing - - /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a - node at level i has. For example, if we have a machine with 4 packages, 4 cores/package - and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; - - void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { - int hier_depth = adr2os[0].first.depth; - int level = 0; - for (int i=hier_depth-1; i>=0; --i) { - int max = -1; - for (int j=0; j max) max = next; - } - numPerLevel[level] = max+1; - ++level; - } - } - - hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} - - void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } - - void init(AddrUnsPair *adr2os, int num_addrs) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); - if (bool_result == 0) { // Wait for initialization - while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result==1); - - /* Added explicit initialization of the data fields here to prevent usage of dirty value - observed when static library is re-initialized multiple times (e.g. when - non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ - depth = 1; - resizing = 0; - maxLevels = 7; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - for (kmp_uint32 i=0; i=0; --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' - depth++; - - kmp_uint32 branch = minBranch; - if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; - if (branch branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! - if (numPerLevel[d] & 1) numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d+1] == 1) depth++; - numPerLevel[d+1] = numPerLevel[d+1] << 1; - } - if(numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch<4) branch = minBranch; - } - } - - for (kmp_uint32 i=1; iold_sz; ++i) { - skipPerLevel[i] = 2*skipPerLevel[i-1]; - numPerLevel[i-1] *= 2; - old_sz *= 2; - depth++; - } - if (nproc > old_sz) { // Not enough space, need to expand hierarchy - while (nproc > old_sz) { - old_sz *=2; - incs++; - depth++; - } - maxLevels += incs; - - // Resize arrays - kmp_uint32 *old_numPerLevel = numPerLevel; - kmp_uint32 *old_skipPerLevel = skipPerLevel; - numPerLevel = skipPerLevel = NULL; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - - // Copy old elements from old arrays - for (kmp_uint32 i=0; i= depth) + return true; + for (unsigned i = 0; i < (depth - level); i++) + if(labels[i] != b.labels[i]) + return false; + return true; + } + bool operator!=(const Address &b) const { + return !operator==(b); + } + void print() const { + unsigned i; + printf("Depth: %u --- ", depth); + for(i=0;ifirst); + const Address *bb = (const Address *)&(((AddrUnsPair *)b) + ->first); + unsigned depth = aa->depth; + unsigned i; + KMP_DEBUG_ASSERT(depth == bb->depth); + for (i = 0; i < depth; i++) { + if (aa->labels[i] < bb->labels[i]) return -1; + if (aa->labels[i] > bb->labels[i]) return 1; + } + return 0; +} + +#if KMP_AFFINITY_SUPPORTED +static int +__kmp_affinity_cmp_Address_child_num(const void *a, const void *b) +{ + const Address *aa = (const Address *)&(((AddrUnsPair *)a) + ->first); + const Address *bb = (const Address *)&(((AddrUnsPair *)b) + ->first); + unsigned depth = aa->depth; + unsigned i; + KMP_DEBUG_ASSERT(depth == bb->depth); + KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); + KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); + for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { + int j = depth - i - 1; + if (aa->childNums[j] < bb->childNums[j]) return -1; + if (aa->childNums[j] > bb->childNums[j]) return 1; + } + for (; i < depth; i++) { + int j = i - __kmp_affinity_compact; + if (aa->childNums[j] < bb->childNums[j]) return -1; + if (aa->childNums[j] > bb->childNums[j]) return 1; + } + return 0; +} +#endif + +/** A structure for holding machine-specific hierarchy info to be computed once at init. + This structure represents a mapping of threads to the actual machine hierarchy, or to + our best guess at what the hierarchy might be, for the purpose of performing an + efficient barrier. In the worst case, when there is no machine hierarchy information, + it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ +class hierarchy_info { +public: + /** Good default values for number of leaves and branching factor, given no affinity information. + Behaves a bit like hyper barrier. */ + static const kmp_uint32 maxLeaves=4; + static const kmp_uint32 minBranch=4; + /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package + or socket, packages/node, nodes/machine, etc. We don't want to get specific with + nomenclature. When the machine is oversubscribed we add levels to duplicate the + hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ + kmp_uint32 maxLevels; + + /** This is specifically the depth of the machine configuration hierarchy, in terms of the + number of levels along the longest path from root to any leaf. It corresponds to the + number of entries in numPerLevel if we exclude all but one trailing 1. */ + kmp_uint32 depth; + kmp_uint32 base_num_threads; + enum init_status { initialized=0, not_initialized=1, initializing=2 }; + volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress + volatile kmp_int8 resizing; // 0=not resizing, 1=resizing + + /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a + node at level i has. For example, if we have a machine with 4 packages, 4 cores/package + and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ + kmp_uint32 *numPerLevel; + kmp_uint32 *skipPerLevel; + + void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { + int hier_depth = adr2os[0].first.depth; + int level = 0; + for (int i=hier_depth-1; i>=0; --i) { + int max = -1; + for (int j=0; j max) max = next; + } + numPerLevel[level] = max+1; + ++level; + } + } + + hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} + + void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } + + void init(AddrUnsPair *adr2os, int num_addrs) + { + kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); + if (bool_result == 0) { // Wait for initialization + while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); + return; + } + KMP_DEBUG_ASSERT(bool_result==1); + + /* Added explicit initialization of the data fields here to prevent usage of dirty value + observed when static library is re-initialized multiple times (e.g. when + non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ + depth = 1; + resizing = 0; + maxLevels = 7; + numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + for (kmp_uint32 i=0; i=0; --i) // count non-empty levels to get depth + if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' + depth++; + + kmp_uint32 branch = minBranch; + if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; + if (branch branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! + if (numPerLevel[d] & 1) numPerLevel[d]++; + numPerLevel[d] = numPerLevel[d] >> 1; + if (numPerLevel[d+1] == 1) depth++; + numPerLevel[d+1] = numPerLevel[d+1] << 1; + } + if(numPerLevel[0] == 1) { + branch = branch >> 1; + if (branch<4) branch = minBranch; + } + } + + for (kmp_uint32 i=1; iold_sz; ++i) { + skipPerLevel[i] = 2*skipPerLevel[i-1]; + numPerLevel[i-1] *= 2; + old_sz *= 2; + depth++; + } + if (nproc > old_sz) { // Not enough space, need to expand hierarchy + while (nproc > old_sz) { + old_sz *=2; + incs++; + depth++; + } + maxLevels += incs; + + // Resize arrays + kmp_uint32 *old_numPerLevel = numPerLevel; + kmp_uint32 *old_skipPerLevel = skipPerLevel; + numPerLevel = skipPerLevel = NULL; + numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + + // Copy old elements from old arrays + for (kmp_uint32 i=0; i0: (common) block size for all - bpool calls made so far - */ - bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */ -} thr_data_t; - -/* Minimum allocation quantum: */ - -#define QLSize (sizeof(qlinks_t)) -#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) -#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) ) - // Maximun for the requested size. - -/* End sentinel: value placed in bsize field of dummy block delimiting - end of pool block. The most negative number which will fit in a - bufsize, defined in a way that the compiler will accept. */ - -#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2)) - -/* ------------------------------------------------------------------------ */ - -/* Thread Data management routines */ - -static int -bget_get_bin( bufsize size ) -{ - // binary chop bins - int lo = 0, hi = MAX_BGET_BINS - 1; - - KMP_DEBUG_ASSERT( size > 0 ); - - while ( (hi - lo) > 1 ) { - int mid = (lo + hi) >> 1; - if (size < bget_bin_size[ mid ]) - hi = mid - 1; - else - lo = mid; - } - - KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) ); - - return lo; -} - -static void -set_thr_data( kmp_info_t *th ) -{ - int i; - thr_data_t *data; - - data = - (thr_data_t *)( - ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data - ); - - memset( data, '\0', sizeof( *data ) ); - - for (i = 0; i < MAX_BGET_BINS; ++i) { - data->freelist[ i ].ql.flink = & data->freelist[ i ]; - data->freelist[ i ].ql.blink = & data->freelist[ i ]; - } - - th->th.th_local.bget_data = data; - th->th.th_local.bget_list = 0; -#if ! USE_CMP_XCHG_FOR_BGET -#ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_init_lock( & th->th.th_local.bget_lock ); -#else - __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock ); -#endif /* USE_LOCK_FOR_BGET */ -#endif /* ! USE_CMP_XCHG_FOR_BGET */ -} - -static thr_data_t * -get_thr_data( kmp_info_t *th ) -{ - thr_data_t *data; - - data = (thr_data_t *) th->th.th_local.bget_data; - - KMP_DEBUG_ASSERT( data != 0 ); - - return data; -} - - -#ifdef KMP_DEBUG - -static void -__kmp_bget_validate_queue( kmp_info_t *th ) -{ - /* NOTE: assume that the global_lock is held */ - - void *p = (void *) th->th.th_local.bget_list; - - while (p != 0) { - bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - p = (void *) b->ql.flink; - } -} - -#endif - -/* Walk the free list and release the enqueued buffers */ - -static void -__kmp_bget_dequeue( kmp_info_t *th ) -{ - void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); - - if (p != 0) { - #if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - while ( ! KMP_COMPARE_AND_STORE_PTR( - & th->th.th_local.bget_list, old_value, NULL ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - } - p = (void *) old_value; - } - #else /* ! USE_CMP_XCHG_FOR_BGET */ - #ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock( & th->th.th_local.bget_lock, - __kmp_gtid_from_thread(th) ); - #else - __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); - #endif /* USE_QUEUING_LOCK_FOR_BGET */ - - p = (void *) th->th.th_local.bget_list; - th->th.th_local.bget_list = 0; - - #ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock( & th->th.th_local.bget_lock, - __kmp_gtid_from_thread(th) ); - #else - __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); - #endif - #endif /* USE_CMP_XCHG_FOR_BGET */ - - /* Check again to make sure the list is not empty */ - - while (p != 0) { - void *buf = p; - bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); - KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == - (kmp_uintptr_t)th ); // clear possible mark - KMP_DEBUG_ASSERT( b->ql.blink == 0 ); - - p = (void *) b->ql.flink; - - brel( th, buf ); - } - } -} - -/* Chain together the free buffers by using the thread owner field */ - -static void -__kmp_bget_enqueue( kmp_info_t *th, void *buf -#ifdef USE_QUEUING_LOCK_FOR_BGET - , kmp_int32 rel_gtid -#endif - ) -{ - bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); - KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == - (kmp_uintptr_t)th ); // clear possible mark - - b->ql.blink = 0; - - KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n", - __kmp_gtid_from_thread( th ) ) ); - -#if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH( old_value ); - - while ( ! KMP_COMPARE_AND_STORE_PTR( - & th->th.th_local.bget_list, old_value, buf ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH( old_value ); - } - } -#else /* ! USE_CMP_XCHG_FOR_BGET */ -# ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid ); -# else - __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); - # endif - - b->ql.flink = BFH( th->th.th_local.bget_list ); - th->th.th_local.bget_list = (void *) buf; - -# ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid ); -# else - __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); -# endif -#endif /* USE_CMP_XCHG_FOR_BGET */ -} - -/* insert buffer back onto a new freelist */ - -static void -__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b ) -{ - int bin; - - KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 ); - KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 ); - - bin = bget_get_bin( b->bh.bb.bsize ); - - KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]); - KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]); - - b->ql.flink = &thr->freelist[ bin ]; - b->ql.blink = thr->freelist[ bin ].ql.blink; - - thr->freelist[ bin ].ql.blink = b; - b->ql.blink->ql.flink = b; -} - -/* unlink the buffer from the old freelist */ - -static void -__kmp_bget_remove_from_freelist( bfhead_t *b ) -{ - KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); - KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); - - b->ql.blink->ql.flink = b->ql.flink; - b->ql.flink->ql.blink = b->ql.blink; -} - -/* ------------------------------------------------------------------------ */ - -/* GET STATS -- check info on free list */ - -static void -bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free ) -{ - thr_data_t *thr = get_thr_data( th ); - int bin; - - *total_free = *max_free = 0; - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b, *best; - - best = &thr->freelist[ bin ]; - b = best->ql.flink; - - while (b != &thr->freelist[ bin ]) { - *total_free += (b->bh.bb.bsize - sizeof( bhead_t )); - if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) - best = b; - - /* Link to next buffer */ - b = b->ql.flink; - } - - if (*max_free < best->bh.bb.bsize) - *max_free = best->bh.bb.bsize; - } - - if (*max_free > (bufsize)sizeof( bhead_t )) - *max_free -= sizeof( bhead_t ); -} - -/* ------------------------------------------------------------------------ */ - -/* BGET -- Allocate a buffer. */ - -static void * -bget( kmp_info_t *th, bufsize requested_size ) -{ - thr_data_t *thr = get_thr_data( th ); - bufsize size = requested_size; - bfhead_t *b; - void *buf; - int compactseq = 0; - int use_blink = 0; -/* For BestFit */ - bfhead_t *best; - - if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) { - return NULL; - }; // if - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - - if (size < (bufsize)SizeQ) { /* Need at least room for the */ - size = SizeQ; /* queue links. */ - } - #if defined( SizeQuant ) && ( SizeQuant > 1 ) - size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); - #endif - - size += sizeof(bhead_t); /* Add overhead in allocated buffer - to size required. */ - KMP_DEBUG_ASSERT( size >= 0 ); - KMP_DEBUG_ASSERT( size % SizeQuant == 0 ); - - use_blink = ( thr->mode == bget_mode_lifo ); - - /* If a compact function was provided in the call to bectl(), wrap - a loop around the allocation process to allow compaction to - intervene in case we don't find a suitable buffer in the chain. */ - - for (;;) { - int bin; - - for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) { - /* Link to next buffer */ - b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink ); - - if (thr->mode == bget_mode_best) { - best = &thr->freelist[ bin ]; - - /* Scan the free list searching for the first buffer big enough - to hold the requested size buffer. */ - - while (b != &thr->freelist[ bin ]) { - if (b->bh.bb.bsize >= (bufsize) size) { - if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) { - best = b; - } - } - - /* Link to next buffer */ - b = ( use_blink ? b->ql.blink : b->ql.flink ); - } - b = best; - } - - while (b != &thr->freelist[ bin ]) { - if ((bufsize) b->bh.bb.bsize >= (bufsize) size) { - - /* Buffer is big enough to satisfy the request. Allocate it - to the caller. We must decide whether the buffer is large - enough to split into the part given to the caller and a - free buffer that remains on the free list, or whether the - entire buffer should be removed from the free list and - given to the caller in its entirety. We only split the - buffer if enough room remains for a header plus the minimum - quantum of allocation. */ - - if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) { - bhead_t *ba, *bn; - - ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size)); - bn = BH(((char *) ba) + size); - - KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); - - /* Subtract size from length of free block. */ - b->bh.bb.bsize -= (bufsize) size; - - /* Link allocated buffer to the previous free buffer. */ - ba->bb.prevfree = b->bh.bb.bsize; - - /* Plug negative size into user buffer. */ - ba->bb.bsize = -size; - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) - /* Mark buffer after this one not preceded by free block. */ - bn->bb.prevfree = 0; - - /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ - __kmp_bget_remove_from_freelist( b ); - __kmp_bget_insert_into_freelist( thr, b ); -#if BufStats - thr->totalloc += (size_t) size; - thr->numget++; /* Increment number of bget() calls */ -#endif - buf = (void *) ((((char *) ba) + sizeof(bhead_t))); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } else { - bhead_t *ba; - - ba = BH(((char *) b) + b->bh.bb.bsize); - - KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); - - /* The buffer isn't big enough to split. Give the whole - shebang to the caller and remove it from the free list. */ - - __kmp_bget_remove_from_freelist( b ); -#if BufStats - thr->totalloc += (size_t) b->bh.bb.bsize; - thr->numget++; /* Increment number of bget() calls */ -#endif - /* Negate size to mark buffer allocated. */ - b->bh.bb.bsize = -(b->bh.bb.bsize); - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) - /* Zero the back pointer in the next buffer in memory - to indicate that this buffer is allocated. */ - ba->bb.prevfree = 0; - - /* Give user buffer starting at queue links. */ - buf = (void *) &(b->ql); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } - } - - /* Link to next buffer */ - b = ( use_blink ? b->ql.blink : b->ql.flink ); - } - } - - /* We failed to find a buffer. If there's a compact function - defined, notify it of the size requested. If it returns - TRUE, try the allocation again. */ - - if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { - break; - } - } - - /* No buffer available with requested size free. */ - - /* Don't give up yet -- look in the reserve supply. */ - - if (thr->acqfcn != 0) { - if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) { - - /* Request is too large to fit in a single expansion - block. Try to satisy it by a direct buffer acquisition. */ - - bdhead_t *bdh; - - size += sizeof(bdhead_t) - sizeof(bhead_t); - - KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) ); - - /* richryan */ - bdh = BDH((*thr->acqfcn)((bufsize) size)); - if (bdh != NULL) { - - /* Mark the buffer special by setting the size field - of its header to zero. */ - bdh->bh.bb.bsize = 0; - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, - // because direct buffer never goes to free list - bdh->bh.bb.prevfree = 0; - bdh->tsize = size; -#if BufStats - thr->totalloc += (size_t) size; - thr->numget++; /* Increment number of bget() calls */ - thr->numdget++; /* Direct bget() call count */ -#endif - buf = (void *) (bdh + 1); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } - - } else { - - /* Try to obtain a new expansion block */ - - void *newpool; - - KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) ); - - /* richryan */ - newpool = (*thr->acqfcn)((bufsize) thr->exp_incr); - KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 ); - if (newpool != NULL) { - bpool( th, newpool, thr->exp_incr); - buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */ - return buf; - } - } - } - - /* Still no buffer available */ - - return NULL; -} - -/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear - the entire contents of the buffer to zero, not just the - region requested by the caller. */ - -static void * -bgetz( kmp_info_t *th, bufsize size ) -{ - char *buf = (char *) bget( th, size); - - if (buf != NULL) { - bhead_t *b; - bufsize rsize; - - b = BH(buf - sizeof(bhead_t)); - rsize = -(b->bb.bsize); - if (rsize == 0) { - bdhead_t *bd; - - bd = BDH(buf - sizeof(bdhead_t)); - rsize = bd->tsize - (bufsize) sizeof(bdhead_t); - } else { - rsize -= sizeof(bhead_t); - } - - KMP_DEBUG_ASSERT(rsize >= size); - - (void) memset(buf, 0, (bufsize) rsize); - } - return ((void *) buf); -} - -/* BGETR -- Reallocate a buffer. This is a minimal implementation, - simply in terms of brel() and bget(). It could be - enhanced to allow the buffer to grow into adjacent free - blocks and to avoid moving data unnecessarily. */ - -static void * -bgetr( kmp_info_t *th, void *buf, bufsize size) -{ - void *nbuf; - bufsize osize; /* Old size of buffer */ - bhead_t *b; - - nbuf = bget( th, size ); - if ( nbuf == NULL ) { /* Acquire new buffer */ - return NULL; - } - if ( buf == NULL ) { - return nbuf; - } - b = BH(((char *) buf) - sizeof(bhead_t)); - osize = -b->bb.bsize; - if (osize == 0) { - /* Buffer acquired directly through acqfcn. */ - bdhead_t *bd; - - bd = BDH(((char *) buf) - sizeof(bdhead_t)); - osize = bd->tsize - (bufsize) sizeof(bdhead_t); - } else { - osize -= sizeof(bhead_t); - }; - - KMP_DEBUG_ASSERT(osize > 0); - - (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */ - (size_t) ((size < osize) ? size : osize)); - brel( th, buf ); - - return nbuf; -} - -/* BREL -- Release a buffer. */ - -static void -brel( kmp_info_t *th, void *buf ) -{ - thr_data_t *thr = get_thr_data( th ); - bfhead_t *b, *bn; - kmp_info_t *bth; - - KMP_DEBUG_ASSERT(buf != NULL); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - - b = BFH(((char *) buf) - sizeof(bhead_t)); - - if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ - bdhead_t *bdh; - - bdh = BDH(((char *) buf) - sizeof(bdhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); -#if BufStats - thr->totalloc -= (size_t) bdh->tsize; - thr->numdrel++; /* Number of direct releases */ - thr->numrel++; /* Increment number of brel() calls */ -#endif /* BufStats */ -#ifdef FreeWipe - (void) memset((char *) buf, 0x55, - (size_t) (bdh->tsize - sizeof(bdhead_t))); -#endif /* FreeWipe */ - - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) ); - - KMP_DEBUG_ASSERT( thr->relfcn != 0 ); - (*thr->relfcn)((void *) bdh); /* Release it directly. */ - return; - } - - bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison - if ( bth != th ) { - /* Add this buffer to be released by the owning thread later */ - __kmp_bget_enqueue( bth, buf -#ifdef USE_QUEUING_LOCK_FOR_BGET - , __kmp_gtid_from_thread( th ) -#endif - ); - return; - } - - /* Buffer size must be negative, indicating that the buffer is - allocated. */ - - if (b->bh.bb.bsize >= 0) { - bn = NULL; - } - KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); - - /* Back pointer in next buffer must be zero, indicating the - same thing: */ - - KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0); - -#if BufStats - thr->numrel++; /* Increment number of brel() calls */ - thr->totalloc += (size_t) b->bh.bb.bsize; -#endif - - /* If the back link is nonzero, the previous buffer is free. */ - - if (b->bh.bb.prevfree != 0) { - /* The previous buffer is free. Consolidate this buffer with it - by adding the length of this buffer to the previous free - buffer. Note that we subtract the size in the buffer being - released, since it's negative to indicate that the buffer is - allocated. */ - +// Disable bget when it is not used +#if KMP_USE_BGET + +/* Thread private buffer management code */ + +typedef int (*bget_compact_t)(size_t, int); +typedef void *(*bget_acquire_t)(size_t); +typedef void (*bget_release_t)(void *); + +/* NOTE: bufsize must be a signed datatype */ + +#if KMP_OS_WINDOWS +# if KMP_ARCH_X86 || KMP_ARCH_ARM + typedef kmp_int32 bufsize; +# else + typedef kmp_int64 bufsize; +# endif +#else + typedef ssize_t bufsize; +#endif + +/* The three modes of operation are, fifo search, lifo search, and best-fit */ + +typedef enum bget_mode { + bget_mode_fifo = 0, + bget_mode_lifo = 1, + bget_mode_best = 2 +} bget_mode_t; + + +static void bpool( kmp_info_t *th, void *buffer, bufsize len); +static void *bget( kmp_info_t *th, bufsize size); +static void *bgetz( kmp_info_t *th, bufsize size); +static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize); +static void brel( kmp_info_t *th, void *buf); +static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr ); + +#ifdef KMP_DEBUG +static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel); +static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel); +static void bufdump( kmp_info_t *th, void *buf); +static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree); +static int bpoolv( kmp_info_t *th, void *pool); +#endif + +/* BGET CONFIGURATION */ + /* Buffer allocation size quantum: + all buffers allocated are a + multiple of this size. This + MUST be a power of two. */ + + /* On IA-32 architecture with Linux* OS, + malloc() does not + ensure 16 byte alignmnent */ + +#if KMP_ARCH_X86 || !KMP_HAVE_QUAD + +#define SizeQuant 8 +#define AlignType double + +#else + +#define SizeQuant 16 +#define AlignType _Quad + +#endif + +#define BufStats 1 /* Define this symbol to enable the + bstats() function which calculates + the total free space in the buffer + pool, the largest available + buffer, and the total space + currently allocated. */ + +#ifdef KMP_DEBUG + +#define BufDump 1 /* Define this symbol to enable the + bpoold() function which dumps the + buffers in a buffer pool. */ + +#define BufValid 1 /* Define this symbol to enable the + bpoolv() function for validating + a buffer pool. */ + +#define DumpData 1 /* Define this symbol to enable the + bufdump() function which allows + dumping the contents of an allocated + or free buffer. */ +#ifdef NOT_USED_NOW + +#define FreeWipe 1 /* Wipe free buffers to a guaranteed + pattern of garbage to trip up + miscreants who attempt to use + pointers into released buffers. */ + +#define BestFit 1 /* Use a best fit algorithm when + searching for space for an + allocation request. This uses + memory more efficiently, but + allocation will be much slower. */ +#endif /* NOT_USED_NOW */ +#endif /* KMP_DEBUG */ + + +static bufsize bget_bin_size[ ] = { + 0, +// 1 << 6, /* .5 Cache line */ + 1 << 7, /* 1 Cache line, new */ + 1 << 8, /* 2 Cache lines */ + 1 << 9, /* 4 Cache lines, new */ + 1 << 10, /* 8 Cache lines */ + 1 << 11, /* 16 Cache lines, new */ + 1 << 12, + 1 << 13, /* new */ + 1 << 14, + 1 << 15, /* new */ + 1 << 16, + 1 << 17, + 1 << 18, + 1 << 19, + 1 << 20, /* 1MB */ + 1 << 21, /* 2MB */ + 1 << 22, /* 4MB */ + 1 << 23, /* 8MB */ + 1 << 24, /* 16MB */ + 1 << 25, /* 32MB */ +}; + +#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) + +struct bfhead; + +/* Declare the interface, including the requested buffer size type, + bufsize. */ + +/* Queue links */ + +typedef struct qlinks { + struct bfhead *flink; /* Forward link */ + struct bfhead *blink; /* Backward link */ +} qlinks_t; + +/* Header in allocated and free buffers */ + +typedef struct bhead2 { + kmp_info_t *bthr; /* The thread which owns the buffer pool */ + bufsize prevfree; /* Relative link back to previous + free buffer in memory or 0 if + previous buffer is allocated. */ + bufsize bsize; /* Buffer size: positive if free, + negative if allocated. */ +} bhead2_t; + +/* Make sure the bhead structure is a multiple of SizeQuant in size. */ + +typedef union bhead { + KMP_ALIGN( SizeQuant ) + AlignType b_align; + char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ]; + bhead2_t bb; +} bhead_t; +#define BH(p) ((bhead_t *) (p)) + +/* Header in directly allocated buffers (by acqfcn) */ + +typedef struct bdhead +{ + bufsize tsize; /* Total size, including overhead */ + bhead_t bh; /* Common header */ +} bdhead_t; +#define BDH(p) ((bdhead_t *) (p)) + +/* Header in free buffers */ + +typedef struct bfhead { + bhead_t bh; /* Common allocated/free header */ + qlinks_t ql; /* Links on free list */ +} bfhead_t; +#define BFH(p) ((bfhead_t *) (p)) + +typedef struct thr_data { + bfhead_t freelist[ MAX_BGET_BINS ]; +#if BufStats + size_t totalloc; /* Total space currently allocated */ + long numget, numrel; /* Number of bget() and brel() calls */ + long numpblk; /* Number of pool blocks */ + long numpget, numprel; /* Number of block gets and rels */ + long numdget, numdrel; /* Number of direct gets and rels */ +#endif /* BufStats */ + + /* Automatic expansion block management functions */ + bget_compact_t compfcn; + bget_acquire_t acqfcn; + bget_release_t relfcn; + + bget_mode_t mode; /* what allocation mode to use? */ + + bufsize exp_incr; /* Expansion block size */ + bufsize pool_len; /* 0: no bpool calls have been made + -1: not all pool blocks are + the same size + >0: (common) block size for all + bpool calls made so far + */ + bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */ +} thr_data_t; + +/* Minimum allocation quantum: */ + +#define QLSize (sizeof(qlinks_t)) +#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) +#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) ) + // Maximun for the requested size. + +/* End sentinel: value placed in bsize field of dummy block delimiting + end of pool block. The most negative number which will fit in a + bufsize, defined in a way that the compiler will accept. */ + +#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2)) + +/* ------------------------------------------------------------------------ */ + +/* Thread Data management routines */ + +static int +bget_get_bin( bufsize size ) +{ + // binary chop bins + int lo = 0, hi = MAX_BGET_BINS - 1; + + KMP_DEBUG_ASSERT( size > 0 ); + + while ( (hi - lo) > 1 ) { + int mid = (lo + hi) >> 1; + if (size < bget_bin_size[ mid ]) + hi = mid - 1; + else + lo = mid; + } + + KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) ); + + return lo; +} + +static void +set_thr_data( kmp_info_t *th ) +{ + int i; + thr_data_t *data; + + data = + (thr_data_t *)( + ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data + ); + + memset( data, '\0', sizeof( *data ) ); + + for (i = 0; i < MAX_BGET_BINS; ++i) { + data->freelist[ i ].ql.flink = & data->freelist[ i ]; + data->freelist[ i ].ql.blink = & data->freelist[ i ]; + } + + th->th.th_local.bget_data = data; + th->th.th_local.bget_list = 0; +#if ! USE_CMP_XCHG_FOR_BGET +#ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_init_lock( & th->th.th_local.bget_lock ); +#else + __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock ); +#endif /* USE_LOCK_FOR_BGET */ +#endif /* ! USE_CMP_XCHG_FOR_BGET */ +} + +static thr_data_t * +get_thr_data( kmp_info_t *th ) +{ + thr_data_t *data; + + data = (thr_data_t *) th->th.th_local.bget_data; + + KMP_DEBUG_ASSERT( data != 0 ); + + return data; +} + + +#ifdef KMP_DEBUG + +static void +__kmp_bget_validate_queue( kmp_info_t *th ) +{ + /* NOTE: assume that the global_lock is held */ + + void *p = (void *) th->th.th_local.bget_list; + + while (p != 0) { + bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); + + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + p = (void *) b->ql.flink; + } +} + +#endif + +/* Walk the free list and release the enqueued buffers */ + +static void +__kmp_bget_dequeue( kmp_info_t *th ) +{ + void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); + + if (p != 0) { + #if USE_CMP_XCHG_FOR_BGET + { + volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); + while ( ! KMP_COMPARE_AND_STORE_PTR( + & th->th.th_local.bget_list, old_value, NULL ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); + } + p = (void *) old_value; + } + #else /* ! USE_CMP_XCHG_FOR_BGET */ + #ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_acquire_lock( & th->th.th_local.bget_lock, + __kmp_gtid_from_thread(th) ); + #else + __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); + #endif /* USE_QUEUING_LOCK_FOR_BGET */ + + p = (void *) th->th.th_local.bget_list; + th->th.th_local.bget_list = 0; + + #ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_release_lock( & th->th.th_local.bget_lock, + __kmp_gtid_from_thread(th) ); + #else + __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); + #endif + #endif /* USE_CMP_XCHG_FOR_BGET */ + + /* Check again to make sure the list is not empty */ + + while (p != 0) { + void *buf = p; + bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); + + KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); + KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == + (kmp_uintptr_t)th ); // clear possible mark + KMP_DEBUG_ASSERT( b->ql.blink == 0 ); + + p = (void *) b->ql.flink; + + brel( th, buf ); + } + } +} + +/* Chain together the free buffers by using the thread owner field */ + +static void +__kmp_bget_enqueue( kmp_info_t *th, void *buf +#ifdef USE_QUEUING_LOCK_FOR_BGET + , kmp_int32 rel_gtid +#endif + ) +{ + bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t)); + + KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); + KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == + (kmp_uintptr_t)th ); // clear possible mark + + b->ql.blink = 0; + + KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n", + __kmp_gtid_from_thread( th ) ) ); + +#if USE_CMP_XCHG_FOR_BGET + { + volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); + /* the next pointer must be set before setting bget_list to buf to avoid + exposing a broken list to other threads, even for an instant. */ + b->ql.flink = BFH( old_value ); + + while ( ! KMP_COMPARE_AND_STORE_PTR( + & th->th.th_local.bget_list, old_value, buf ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_PTR(th->th.th_local.bget_list); + /* the next pointer must be set before setting bget_list to buf to avoid + exposing a broken list to other threads, even for an instant. */ + b->ql.flink = BFH( old_value ); + } + } +#else /* ! USE_CMP_XCHG_FOR_BGET */ +# ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid ); +# else + __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); + # endif + + b->ql.flink = BFH( th->th.th_local.bget_list ); + th->th.th_local.bget_list = (void *) buf; + +# ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid ); +# else + __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); +# endif +#endif /* USE_CMP_XCHG_FOR_BGET */ +} + +/* insert buffer back onto a new freelist */ + +static void +__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b ) +{ + int bin; + + KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 ); + KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 ); + + bin = bget_get_bin( b->bh.bb.bsize ); + + KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]); + KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]); + + b->ql.flink = &thr->freelist[ bin ]; + b->ql.blink = thr->freelist[ bin ].ql.blink; + + thr->freelist[ bin ].ql.blink = b; + b->ql.blink->ql.flink = b; +} + +/* unlink the buffer from the old freelist */ + +static void +__kmp_bget_remove_from_freelist( bfhead_t *b ) +{ + KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); + KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); + + b->ql.blink->ql.flink = b->ql.flink; + b->ql.flink->ql.blink = b->ql.blink; +} + +/* ------------------------------------------------------------------------ */ + +/* GET STATS -- check info on free list */ + +static void +bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free ) +{ + thr_data_t *thr = get_thr_data( th ); + int bin; + + *total_free = *max_free = 0; + + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b, *best; + + best = &thr->freelist[ bin ]; + b = best->ql.flink; + + while (b != &thr->freelist[ bin ]) { + *total_free += (b->bh.bb.bsize - sizeof( bhead_t )); + if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) + best = b; + + /* Link to next buffer */ + b = b->ql.flink; + } + + if (*max_free < best->bh.bb.bsize) + *max_free = best->bh.bb.bsize; + } + + if (*max_free > (bufsize)sizeof( bhead_t )) + *max_free -= sizeof( bhead_t ); +} + +/* ------------------------------------------------------------------------ */ + +/* BGET -- Allocate a buffer. */ + +static void * +bget( kmp_info_t *th, bufsize requested_size ) +{ + thr_data_t *thr = get_thr_data( th ); + bufsize size = requested_size; + bfhead_t *b; + void *buf; + int compactseq = 0; + int use_blink = 0; +/* For BestFit */ + bfhead_t *best; + + if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) { + return NULL; + }; // if + + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + + if (size < (bufsize)SizeQ) { /* Need at least room for the */ + size = SizeQ; /* queue links. */ + } + #if defined( SizeQuant ) && ( SizeQuant > 1 ) + size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); + #endif + + size += sizeof(bhead_t); /* Add overhead in allocated buffer + to size required. */ + KMP_DEBUG_ASSERT( size >= 0 ); + KMP_DEBUG_ASSERT( size % SizeQuant == 0 ); + + use_blink = ( thr->mode == bget_mode_lifo ); + + /* If a compact function was provided in the call to bectl(), wrap + a loop around the allocation process to allow compaction to + intervene in case we don't find a suitable buffer in the chain. */ + + for (;;) { + int bin; + + for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) { + /* Link to next buffer */ + b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink ); + + if (thr->mode == bget_mode_best) { + best = &thr->freelist[ bin ]; + + /* Scan the free list searching for the first buffer big enough + to hold the requested size buffer. */ + + while (b != &thr->freelist[ bin ]) { + if (b->bh.bb.bsize >= (bufsize) size) { + if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) { + best = b; + } + } + + /* Link to next buffer */ + b = ( use_blink ? b->ql.blink : b->ql.flink ); + } + b = best; + } + + while (b != &thr->freelist[ bin ]) { + if ((bufsize) b->bh.bb.bsize >= (bufsize) size) { + + /* Buffer is big enough to satisfy the request. Allocate it + to the caller. We must decide whether the buffer is large + enough to split into the part given to the caller and a + free buffer that remains on the free list, or whether the + entire buffer should be removed from the free list and + given to the caller in its entirety. We only split the + buffer if enough room remains for a header plus the minimum + quantum of allocation. */ + + if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) { + bhead_t *ba, *bn; + + ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size)); + bn = BH(((char *) ba) + size); + + KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); + + /* Subtract size from length of free block. */ + b->bh.bb.bsize -= (bufsize) size; + + /* Link allocated buffer to the previous free buffer. */ + ba->bb.prevfree = b->bh.bb.bsize; + + /* Plug negative size into user buffer. */ + ba->bb.bsize = -size; + + /* Mark this buffer as owned by this thread. */ + TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) + /* Mark buffer after this one not preceded by free block. */ + bn->bb.prevfree = 0; + + /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ + __kmp_bget_remove_from_freelist( b ); + __kmp_bget_insert_into_freelist( thr, b ); +#if BufStats + thr->totalloc += (size_t) size; + thr->numget++; /* Increment number of bget() calls */ +#endif + buf = (void *) ((((char *) ba) + sizeof(bhead_t))); + KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); + return buf; + } else { + bhead_t *ba; + + ba = BH(((char *) b) + b->bh.bb.bsize); + + KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); + + /* The buffer isn't big enough to split. Give the whole + shebang to the caller and remove it from the free list. */ + + __kmp_bget_remove_from_freelist( b ); +#if BufStats + thr->totalloc += (size_t) b->bh.bb.bsize; + thr->numget++; /* Increment number of bget() calls */ +#endif + /* Negate size to mark buffer allocated. */ + b->bh.bb.bsize = -(b->bh.bb.bsize); + + /* Mark this buffer as owned by this thread. */ + TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) + /* Zero the back pointer in the next buffer in memory + to indicate that this buffer is allocated. */ + ba->bb.prevfree = 0; + + /* Give user buffer starting at queue links. */ + buf = (void *) &(b->ql); + KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); + return buf; + } + } + + /* Link to next buffer */ + b = ( use_blink ? b->ql.blink : b->ql.flink ); + } + } + + /* We failed to find a buffer. If there's a compact function + defined, notify it of the size requested. If it returns + TRUE, try the allocation again. */ + + if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { + break; + } + } + + /* No buffer available with requested size free. */ + + /* Don't give up yet -- look in the reserve supply. */ + + if (thr->acqfcn != 0) { + if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) { + + /* Request is too large to fit in a single expansion + block. Try to satisy it by a direct buffer acquisition. */ + + bdhead_t *bdh; + + size += sizeof(bdhead_t) - sizeof(bhead_t); + + KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) ); + + /* richryan */ + bdh = BDH((*thr->acqfcn)((bufsize) size)); + if (bdh != NULL) { + + /* Mark the buffer special by setting the size field + of its header to zero. */ + bdh->bh.bb.bsize = 0; + + /* Mark this buffer as owned by this thread. */ + TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, + // because direct buffer never goes to free list + bdh->bh.bb.prevfree = 0; + bdh->tsize = size; +#if BufStats + thr->totalloc += (size_t) size; + thr->numget++; /* Increment number of bget() calls */ + thr->numdget++; /* Direct bget() call count */ +#endif + buf = (void *) (bdh + 1); + KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); + return buf; + } + + } else { + + /* Try to obtain a new expansion block */ + + void *newpool; + + KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) ); + + /* richryan */ + newpool = (*thr->acqfcn)((bufsize) thr->exp_incr); + KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 ); + if (newpool != NULL) { + bpool( th, newpool, thr->exp_incr); + buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */ + return buf; + } + } + } + + /* Still no buffer available */ + + return NULL; +} + +/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear + the entire contents of the buffer to zero, not just the + region requested by the caller. */ + +static void * +bgetz( kmp_info_t *th, bufsize size ) +{ + char *buf = (char *) bget( th, size); + + if (buf != NULL) { + bhead_t *b; + bufsize rsize; + + b = BH(buf - sizeof(bhead_t)); + rsize = -(b->bb.bsize); + if (rsize == 0) { + bdhead_t *bd; + + bd = BDH(buf - sizeof(bdhead_t)); + rsize = bd->tsize - (bufsize) sizeof(bdhead_t); + } else { + rsize -= sizeof(bhead_t); + } + + KMP_DEBUG_ASSERT(rsize >= size); + + (void) memset(buf, 0, (bufsize) rsize); + } + return ((void *) buf); +} + +/* BGETR -- Reallocate a buffer. This is a minimal implementation, + simply in terms of brel() and bget(). It could be + enhanced to allow the buffer to grow into adjacent free + blocks and to avoid moving data unnecessarily. */ + +static void * +bgetr( kmp_info_t *th, void *buf, bufsize size) +{ + void *nbuf; + bufsize osize; /* Old size of buffer */ + bhead_t *b; + + nbuf = bget( th, size ); + if ( nbuf == NULL ) { /* Acquire new buffer */ + return NULL; + } + if ( buf == NULL ) { + return nbuf; + } + b = BH(((char *) buf) - sizeof(bhead_t)); + osize = -b->bb.bsize; + if (osize == 0) { + /* Buffer acquired directly through acqfcn. */ + bdhead_t *bd; + + bd = BDH(((char *) buf) - sizeof(bdhead_t)); + osize = bd->tsize - (bufsize) sizeof(bdhead_t); + } else { + osize -= sizeof(bhead_t); + }; + + KMP_DEBUG_ASSERT(osize > 0); + + (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */ + (size_t) ((size < osize) ? size : osize)); + brel( th, buf ); + + return nbuf; +} + +/* BREL -- Release a buffer. */ + +static void +brel( kmp_info_t *th, void *buf ) +{ + thr_data_t *thr = get_thr_data( th ); + bfhead_t *b, *bn; + kmp_info_t *bth; + + KMP_DEBUG_ASSERT(buf != NULL); + KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); + + b = BFH(((char *) buf) - sizeof(bhead_t)); + + if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ + bdhead_t *bdh; + + bdh = BDH(((char *) buf) - sizeof(bdhead_t)); + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); +#if BufStats + thr->totalloc -= (size_t) bdh->tsize; + thr->numdrel++; /* Number of direct releases */ + thr->numrel++; /* Increment number of brel() calls */ +#endif /* BufStats */ +#ifdef FreeWipe + (void) memset((char *) buf, 0x55, + (size_t) (bdh->tsize - sizeof(bdhead_t))); +#endif /* FreeWipe */ + + KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) ); + + KMP_DEBUG_ASSERT( thr->relfcn != 0 ); + (*thr->relfcn)((void *) bdh); /* Release it directly. */ + return; + } + + bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison + if ( bth != th ) { + /* Add this buffer to be released by the owning thread later */ + __kmp_bget_enqueue( bth, buf +#ifdef USE_QUEUING_LOCK_FOR_BGET + , __kmp_gtid_from_thread( th ) +#endif + ); + return; + } + + /* Buffer size must be negative, indicating that the buffer is + allocated. */ + + if (b->bh.bb.bsize >= 0) { + bn = NULL; + } + KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); + + /* Back pointer in next buffer must be zero, indicating the + same thing: */ + + KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0); + +#if BufStats + thr->numrel++; /* Increment number of brel() calls */ + thr->totalloc += (size_t) b->bh.bb.bsize; +#endif + + /* If the back link is nonzero, the previous buffer is free. */ + + if (b->bh.bb.prevfree != 0) { + /* The previous buffer is free. Consolidate this buffer with it + by adding the length of this buffer to the previous free + buffer. Note that we subtract the size in the buffer being + released, since it's negative to indicate that the buffer is + allocated. */ + bufsize size = b->bh.bb.bsize; - - /* Make the previous buffer the one we're working on. */ - KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree); - b = BFH(((char *) b) - b->bh.bb.prevfree); - b->bh.bb.bsize -= size; - - /* unlink the buffer from the old freelist */ - __kmp_bget_remove_from_freelist( b ); - } - else { - /* The previous buffer isn't allocated. Mark this buffer - size as positive (i.e. free) and fall through to place - the buffer on the free list as an isolated free block. */ - - b->bh.bb.bsize = -b->bh.bb.bsize; - } - - /* insert buffer back onto a new freelist */ - __kmp_bget_insert_into_freelist( thr, b ); - - - /* Now we look at the next buffer in memory, located by advancing from - the start of this buffer by its size, to see if that buffer is - free. If it is, we combine this buffer with the next one in - memory, dechaining the second buffer from the free list. */ - - bn = BFH(((char *) b) + b->bh.bb.bsize); - if (bn->bh.bb.bsize > 0) { - - /* The buffer is free. Remove it from the free list and add - its size to that of our buffer. */ - - KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize); - - __kmp_bget_remove_from_freelist( bn ); - - b->bh.bb.bsize += bn->bh.bb.bsize; - - /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ - - __kmp_bget_remove_from_freelist( b ); - __kmp_bget_insert_into_freelist( thr, b ); - - /* Finally, advance to the buffer that follows the newly - consolidated free block. We must set its backpointer to the - head of the consolidated free block. We know the next block - must be an allocated block because the process of recombination - guarantees that two free blocks will never be contiguous in - memory. */ - - bn = BFH(((char *) b) + b->bh.bb.bsize); - } -#ifdef FreeWipe - (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, - (size_t) (b->bh.bb.bsize - sizeof(bfhead_t))); -#endif - KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); - - /* The next buffer is allocated. Set the backpointer in it to point - to this buffer; the previous free buffer in memory. */ - - bn->bh.bb.prevfree = b->bh.bb.bsize; - - /* If a block-release function is defined, and this free buffer - constitutes the entire block, release it. Note that pool_len - is defined in such a way that the test will fail unless all - pool blocks are the same size. */ - - if (thr->relfcn != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) - { -#if BufStats - if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */ -#endif - - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); - - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist( b ); - - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); - - (*thr->relfcn)(b); -#if BufStats - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - - /* avoid leaving stale last_pool pointer around if it is being dealloced */ - if (thr->last_pool == b) thr->last_pool = 0; - } - else { - thr->last_pool = b; - } -#endif /* BufStats */ - } -} - -/* BECTL -- Establish automatic pool expansion control */ - -static void -bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr) -{ - thr_data_t *thr = get_thr_data( th ); - - thr->compfcn = compact; - thr->acqfcn = acquire; - thr->relfcn = release; - thr->exp_incr = pool_incr; -} - -/* BPOOL -- Add a region of memory to the buffer pool. */ - -static void -bpool( kmp_info_t *th, void *buf, bufsize len) -{ -/* int bin = 0; */ - thr_data_t *thr = get_thr_data( th ); - bfhead_t *b = BFH(buf); - bhead_t *bn; - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - -#ifdef SizeQuant - len &= ~(SizeQuant - 1); -#endif - if (thr->pool_len == 0) { - thr->pool_len = len; - } else if (len != thr->pool_len) { - thr->pool_len = -1; - } -#if BufStats - thr->numpget++; /* Number of block acquisitions */ - thr->numpblk++; /* Number of blocks total */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); -#endif /* BufStats */ - - /* Since the block is initially occupied by a single free buffer, - it had better not be (much) larger than the largest buffer - whose size we can store in bhead.bb.bsize. */ - - KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1)); - - /* Clear the backpointer at the start of the block to indicate that - there is no free block prior to this one. That blocks - recombination when the first block in memory is released. */ - - b->bh.bb.prevfree = 0; - - /* Create a dummy allocated buffer at the end of the pool. This dummy - buffer is seen when a buffer at the end of the pool is released and - blocks recombination of the last buffer with the dummy buffer at - the end. The length in the dummy buffer is set to the largest - negative number to denote the end of the pool for diagnostic - routines (this specific value is not counted on by the actual - allocation and release functions). */ - - len -= sizeof(bhead_t); - b->bh.bb.bsize = (bufsize) len; - /* Set the owner of this buffer */ - TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address - - /* Chain the new block to the free list. */ - __kmp_bget_insert_into_freelist( thr, b ); - -#ifdef FreeWipe - (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, - (size_t) (len - sizeof(bfhead_t))); -#endif - bn = BH(((char *) b) + len); - bn->bb.prevfree = (bufsize) len; - /* Definition of ESent assumes two's complement! */ - KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) ); - - bn->bb.bsize = ESent; -} - -/* ------------------------------------------------------------------------ */ - -/* BFREED -- Dump the free lists for this thread. */ - -static void -bfreed( kmp_info_t *th ) -{ - int bin = 0, count = 0; - int gtid = __kmp_gtid_from_thread( th ); - thr_data_t *thr = get_thr_data( th ); - -#if BufStats - __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \ - KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \ - " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n", - gtid, (kmp_uint64) thr->totalloc, - (kmp_int64) thr->numget, (kmp_int64) thr->numrel, - (kmp_int64) thr->numpblk, - (kmp_int64) thr->numpget, (kmp_int64) thr->numprel, - (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel ); -#endif - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b; - - for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) { - bufsize bs = b->bh.bb.bsize; - - KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b ); - KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b ); - KMP_DEBUG_ASSERT( bs > 0 ); - - count += 1; - - __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs ); -#ifdef FreeWipe - { - char *lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid ); - } - } -#endif - } - } - - if (count == 0) - __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid ); -} - -/* ------------------------------------------------------------------------ */ - -#ifdef KMP_DEBUG - -#if BufStats - -/* BSTATS -- Return buffer allocation free space statistics. */ - -static void -bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel) -{ - int bin = 0; - thr_data_t *thr = get_thr_data( th ); - - *nget = thr->numget; - *nrel = thr->numrel; - *curalloc = (bufsize) thr->totalloc; - *totfree = 0; - *maxfree = -1; - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b = thr->freelist[ bin ].ql.flink; - - while (b != &thr->freelist[ bin ]) { - KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); - *totfree += b->bh.bb.bsize; - if (b->bh.bb.bsize > *maxfree) { - *maxfree = b->bh.bb.bsize; - } - b = b->ql.flink; /* Link to next buffer */ - } - } -} - -/* BSTATSE -- Return extended statistics */ - -static void -bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel) -{ - thr_data_t *thr = get_thr_data( th ); - - *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; - *npool = thr->numpblk; - *npget = thr->numpget; - *nprel = thr->numprel; - *ndget = thr->numdget; - *ndrel = thr->numdrel; -} - -#endif /* BufStats */ - -/* BUFDUMP -- Dump the data in a buffer. This is called with the user - data pointer, and backs up to the buffer header. It will - dump either a free block or an allocated one. */ - -static void -bufdump( kmp_info_t *th, void *buf ) -{ - bfhead_t *b; - unsigned char *bdump; - bufsize bdlen; - - b = BFH(((char *) buf) - sizeof(bhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - if (b->bh.bb.bsize < 0) { - bdump = (unsigned char *) buf; - bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t); - } else { - bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t)); - bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t); - } - - while (bdlen > 0) { - int i, dupes = 0; - bufsize l = bdlen; - char bhex[50], bascii[20]; - - if (l > 16) { - l = 16; - } - - for (i = 0; i < l; i++) { - (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); - if (bdump[i] > 0x20 && bdump[i] < 0x7F) - bascii[ i ] = bdump[ i ]; - else - bascii[ i ] = ' '; - } - bascii[i] = 0; - (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii); - bdump += l; - bdlen -= l; - while ((bdlen > 16) && (memcmp((char *) (bdump - 16), - (char *) bdump, 16) == 0)) { - dupes++; - bdump += 16; - bdlen -= 16; - } - if (dupes > 1) { - (void) __kmp_printf_no_lock( - " (%d lines [%d bytes] identical to above line skipped)\n", - dupes, dupes * 16); - } else if (dupes == 1) { - bdump -= 16; - bdlen += 16; - } - } -} - -/* BPOOLD -- Dump a buffer pool. The buffer headers are always listed. - If DUMPALLOC is nonzero, the contents of allocated buffers - are dumped. If DUMPFREE is nonzero, free blocks are - dumped as well. If FreeWipe checking is enabled, free - blocks which have been clobbered will always be dumped. */ - -static void -bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) -{ - bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t)); - - while (b->bh.bb.bsize != ESent) { - bufsize bs = b->bh.bb.bsize; - - if (bs < 0) { - bs = -bs; - (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs); - if (dumpalloc) { - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } - } else { - const char *lerr = ""; - - KMP_DEBUG_ASSERT(bs > 0); - if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { - lerr = " (Bad free list links)"; - } - (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", - (long) bs, lerr); -#ifdef FreeWipe - lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || - (memcmp(lerr, lerr + 1, - (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - (void) __kmp_printf_no_lock( - "(Contents of above free block have been overstored.)\n"); - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } else -#endif - if (dumpfree) { - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } - } - b = BFH(((char *) b) + bs); - } -} - -/* BPOOLV -- Validate a buffer pool. */ - -static int -bpoolv( kmp_info_t *th, void *buf ) -{ - bfhead_t *b = BFH(buf); - - while (b->bh.bb.bsize != ESent) { - bufsize bs = b->bh.bb.bsize; - - if (bs < 0) { - bs = -bs; - } else { -#ifdef FreeWipe - char *lerr = ""; -#endif - - KMP_DEBUG_ASSERT(bs > 0); - if (bs <= 0) { - return 0; - } - if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { - (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n", - (long) bs); - KMP_DEBUG_ASSERT(0); - return 0; - } -#ifdef FreeWipe - lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || - (memcmp(lerr, lerr + 1, - (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - (void) __kmp_printf_no_lock( - "(Contents of above free block have been overstored.)\n"); - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - KMP_DEBUG_ASSERT(0); - return 0; - } -#endif /* FreeWipe */ - } - b = BFH(((char *) b) + bs); - } - return 1; -} - -#endif /* KMP_DEBUG */ - -/* ------------------------------------------------------------------------ */ - -void -__kmp_initialize_bget( kmp_info_t *th ) -{ - KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) ); - - set_thr_data( th ); - - bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free, - (bufsize) __kmp_malloc_pool_incr ); -} - -void -__kmp_finalize_bget( kmp_info_t *th ) -{ - thr_data_t *thr; - bfhead_t *b; - - KMP_DEBUG_ASSERT( th != 0 ); - -#if BufStats - thr = (thr_data_t *) th->th.th_local.bget_data; - KMP_DEBUG_ASSERT( thr != NULL ); - b = thr->last_pool; - - /* If a block-release function is defined, and this free buffer - constitutes the entire block, release it. Note that pool_len - is defined in such a way that the test will fail unless all - pool blocks are the same size. */ - - /* Deallocate the last pool if one exists because we no longer do it in brel() */ - if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) - { - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); - - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist( b ); - - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); - - (*thr->relfcn)(b); - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - } -#endif /* BufStats */ - - /* Deallocate bget_data */ - if ( th->th.th_local.bget_data != NULL ) { - __kmp_free( th->th.th_local.bget_data ); - th->th.th_local.bget_data = NULL; - }; // if -} - -void -kmpc_set_poolsize( size_t size ) -{ - bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc, - (bget_release_t) free, (bufsize) size ); -} - -size_t -kmpc_get_poolsize( void ) -{ - thr_data_t *p; - - p = get_thr_data( __kmp_get_thread() ); - - return p->exp_incr; -} - -void -kmpc_set_poolmode( int mode ) -{ - thr_data_t *p; - - if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) { - p = get_thr_data( __kmp_get_thread() ); - p->mode = (bget_mode_t) mode; - } -} - -int -kmpc_get_poolmode( void ) -{ - thr_data_t *p; - - p = get_thr_data( __kmp_get_thread() ); - - return p->mode; -} - -void -kmpc_get_poolstat( size_t *maxmem, size_t *allmem ) -{ - kmp_info_t *th = __kmp_get_thread(); - bufsize a, b; - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - - bcheck( th, &a, &b ); - - *maxmem = a; - *allmem = b; -} - -void -kmpc_poolprint( void ) -{ - kmp_info_t *th = __kmp_get_thread(); - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - - bfreed( th ); -} - -#endif // #if KMP_USE_BGET - -/* ------------------------------------------------------------------------ */ - -void * -kmpc_malloc( size_t size ) -{ - void * ptr; - ptr = bget( __kmp_entry_thread(), (bufsize) size ); - - return ptr; -} - -void * -kmpc_calloc( size_t nelem, size_t elsize ) -{ - void * ptr; - ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) ); - - return ptr; -} - -void * -kmpc_realloc( void * ptr, size_t size ) -{ - void * result = NULL; - - if ( ptr == NULL ) { - // If pointer is NULL, realloc behaves like malloc. - result = bget( __kmp_entry_thread(), (bufsize) size ); - } else if ( size == 0 ) { - // If size is 0, realloc behaves like free. - // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before. - // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread(). - brel( __kmp_get_thread(), ptr ); - } else { - result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size ); - }; // if - - return result; -} - -/* NOTE: the library must have already been initialized by a previous allocate */ - -void -kmpc_free( void * ptr ) -{ - if ( ! __kmp_init_serial ) { - return; - }; // if - if ( ptr != NULL ) { - kmp_info_t *th = __kmp_get_thread(); - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - brel( th, ptr ); - }; -} - - -/* ------------------------------------------------------------------------ */ - -void * -___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ) -{ - void * ptr; - KE_TRACE( 30, ( - "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", - th, - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = bget( th, (bufsize) size ); - KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) ); - return ptr; -} - -void * -___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ) -{ - void * ptr; - KE_TRACE( 30, ( - "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", - th, - (int) nelem, - (int) elsize - KMP_SRC_LOC_PARM - ) ); - ptr = bgetz( th, (bufsize) (nelem * elsize) ); - KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) ); - return ptr; -} - -void * -___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ) -{ - KE_TRACE( 30, ( - "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", - th, - ptr, - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = bgetr( th, ptr, (bufsize) size ); - KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) ); - return ptr; -} - -void -___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ) -{ - KE_TRACE( 30, ( - "-> __kmp_thread_free( %p, %p ) called from %s:%d\n", - th, - ptr - KMP_SRC_LOC_PARM - ) ); - if ( ptr != NULL ) { - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - brel( th, ptr ); - } - KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -/* - If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it - may be useful for debugging memory corruptions, used freed pointers, etc. -*/ -/* #define LEAK_MEMORY */ - -struct kmp_mem_descr { // Memory block descriptor. - void * ptr_allocated; // Pointer returned by malloc(), subject for free(). - size_t size_allocated; // Size of allocated memory block. - void * ptr_aligned; // Pointer to aligned memory, to be used by client code. - size_t size_aligned; // Size of aligned memory block. -}; -typedef struct kmp_mem_descr kmp_mem_descr_t; - -/* - Allocate memory on requested boundary, fill allocated memory with 0x00. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -static -void * -___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL ) -{ - /* - __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to - return properly aligned pointer. Original pointer returned by malloc() and size of allocated - block is saved in descriptor just before the aligned pointer. This information used by - __kmp_free() -- it has to pass to free() original pointer, not aligned one. - - +---------+------------+-----------------------------------+---------+ - | padding | descriptor | aligned block | padding | - +---------+------------+-----------------------------------+---------+ - ^ ^ - | | - | +- Aligned pointer returned to caller - +- Pointer returned by malloc() - - Aligned block is filled with zeros, paddings are filled with 0xEF. - */ - - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address to return to caller. - kmp_uintptr_t addr_descr; // Address of memory block descriptor. - - KE_TRACE( 25, ( - "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", - (int) size, - (int) alignment - KMP_SRC_LOC_PARM - ) ); - - KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too - KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) ); - // Make sure kmp_uintptr_t is enough to store addresses. - - descr.size_aligned = size; - descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment; - - #if KMP_DEBUG - descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ ); - #else - descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM ); - #endif + + /* Make the previous buffer the one we're working on. */ + KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree); + b = BFH(((char *) b) - b->bh.bb.prevfree); + b->bh.bb.bsize -= size; + + /* unlink the buffer from the old freelist */ + __kmp_bget_remove_from_freelist( b ); + } + else { + /* The previous buffer isn't allocated. Mark this buffer + size as positive (i.e. free) and fall through to place + the buffer on the free list as an isolated free block. */ + + b->bh.bb.bsize = -b->bh.bb.bsize; + } + + /* insert buffer back onto a new freelist */ + __kmp_bget_insert_into_freelist( thr, b ); + + + /* Now we look at the next buffer in memory, located by advancing from + the start of this buffer by its size, to see if that buffer is + free. If it is, we combine this buffer with the next one in + memory, dechaining the second buffer from the free list. */ + + bn = BFH(((char *) b) + b->bh.bb.bsize); + if (bn->bh.bb.bsize > 0) { + + /* The buffer is free. Remove it from the free list and add + its size to that of our buffer. */ + + KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize); + + __kmp_bget_remove_from_freelist( bn ); + + b->bh.bb.bsize += bn->bh.bb.bsize; + + /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ + + __kmp_bget_remove_from_freelist( b ); + __kmp_bget_insert_into_freelist( thr, b ); + + /* Finally, advance to the buffer that follows the newly + consolidated free block. We must set its backpointer to the + head of the consolidated free block. We know the next block + must be an allocated block because the process of recombination + guarantees that two free blocks will never be contiguous in + memory. */ + + bn = BFH(((char *) b) + b->bh.bb.bsize); + } +#ifdef FreeWipe + (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, + (size_t) (b->bh.bb.bsize - sizeof(bfhead_t))); +#endif + KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); + + /* The next buffer is allocated. Set the backpointer in it to point + to this buffer; the previous free buffer in memory. */ + + bn->bh.bb.prevfree = b->bh.bb.bsize; + + /* If a block-release function is defined, and this free buffer + constitutes the entire block, release it. Note that pool_len + is defined in such a way that the test will fail unless all + pool blocks are the same size. */ + + if (thr->relfcn != 0 && + b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) + { +#if BufStats + if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */ +#endif + + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); + KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); + KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); + + /* Unlink the buffer from the free list */ + __kmp_bget_remove_from_freelist( b ); + + KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); + + (*thr->relfcn)(b); +#if BufStats + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); + + /* avoid leaving stale last_pool pointer around if it is being dealloced */ + if (thr->last_pool == b) thr->last_pool = 0; + } + else { + thr->last_pool = b; + } +#endif /* BufStats */ + } +} + +/* BECTL -- Establish automatic pool expansion control */ + +static void +bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr) +{ + thr_data_t *thr = get_thr_data( th ); + + thr->compfcn = compact; + thr->acqfcn = acquire; + thr->relfcn = release; + thr->exp_incr = pool_incr; +} + +/* BPOOL -- Add a region of memory to the buffer pool. */ + +static void +bpool( kmp_info_t *th, void *buf, bufsize len) +{ +/* int bin = 0; */ + thr_data_t *thr = get_thr_data( th ); + bfhead_t *b = BFH(buf); + bhead_t *bn; + + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + +#ifdef SizeQuant + len &= ~(SizeQuant - 1); +#endif + if (thr->pool_len == 0) { + thr->pool_len = len; + } else if (len != thr->pool_len) { + thr->pool_len = -1; + } +#if BufStats + thr->numpget++; /* Number of block acquisitions */ + thr->numpblk++; /* Number of blocks total */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); +#endif /* BufStats */ + + /* Since the block is initially occupied by a single free buffer, + it had better not be (much) larger than the largest buffer + whose size we can store in bhead.bb.bsize. */ + + KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1)); + + /* Clear the backpointer at the start of the block to indicate that + there is no free block prior to this one. That blocks + recombination when the first block in memory is released. */ + + b->bh.bb.prevfree = 0; + + /* Create a dummy allocated buffer at the end of the pool. This dummy + buffer is seen when a buffer at the end of the pool is released and + blocks recombination of the last buffer with the dummy buffer at + the end. The length in the dummy buffer is set to the largest + negative number to denote the end of the pool for diagnostic + routines (this specific value is not counted on by the actual + allocation and release functions). */ + + len -= sizeof(bhead_t); + b->bh.bb.bsize = (bufsize) len; + /* Set the owner of this buffer */ + TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address + + /* Chain the new block to the free list. */ + __kmp_bget_insert_into_freelist( thr, b ); + +#ifdef FreeWipe + (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, + (size_t) (len - sizeof(bfhead_t))); +#endif + bn = BH(((char *) b) + len); + bn->bb.prevfree = (bufsize) len; + /* Definition of ESent assumes two's complement! */ + KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) ); + + bn->bb.bsize = ESent; +} + +/* ------------------------------------------------------------------------ */ + +/* BFREED -- Dump the free lists for this thread. */ + +static void +bfreed( kmp_info_t *th ) +{ + int bin = 0, count = 0; + int gtid = __kmp_gtid_from_thread( th ); + thr_data_t *thr = get_thr_data( th ); + +#if BufStats + __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \ + KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \ + " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n", + gtid, (kmp_uint64) thr->totalloc, + (kmp_int64) thr->numget, (kmp_int64) thr->numrel, + (kmp_int64) thr->numpblk, + (kmp_int64) thr->numpget, (kmp_int64) thr->numprel, + (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel ); +#endif + + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b; + + for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) { + bufsize bs = b->bh.bb.bsize; + + KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b ); + KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b ); + KMP_DEBUG_ASSERT( bs > 0 ); + + count += 1; + + __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs ); +#ifdef FreeWipe + { + char *lerr = ((char *) b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { + __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid ); + } + } +#endif + } + } + + if (count == 0) + __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid ); +} + +/* ------------------------------------------------------------------------ */ + +#ifdef KMP_DEBUG + +#if BufStats + +/* BSTATS -- Return buffer allocation free space statistics. */ + +static void +bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel) +{ + int bin = 0; + thr_data_t *thr = get_thr_data( th ); + + *nget = thr->numget; + *nrel = thr->numrel; + *curalloc = (bufsize) thr->totalloc; + *totfree = 0; + *maxfree = -1; + + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b = thr->freelist[ bin ].ql.flink; + + while (b != &thr->freelist[ bin ]) { + KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); + *totfree += b->bh.bb.bsize; + if (b->bh.bb.bsize > *maxfree) { + *maxfree = b->bh.bb.bsize; + } + b = b->ql.flink; /* Link to next buffer */ + } + } +} + +/* BSTATSE -- Return extended statistics */ + +static void +bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel) +{ + thr_data_t *thr = get_thr_data( th ); + + *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; + *npool = thr->numpblk; + *npget = thr->numpget; + *nprel = thr->numprel; + *ndget = thr->numdget; + *ndrel = thr->numdrel; +} + +#endif /* BufStats */ + +/* BUFDUMP -- Dump the data in a buffer. This is called with the user + data pointer, and backs up to the buffer header. It will + dump either a free block or an allocated one. */ + +static void +bufdump( kmp_info_t *th, void *buf ) +{ + bfhead_t *b; + unsigned char *bdump; + bufsize bdlen; + + b = BFH(((char *) buf) - sizeof(bhead_t)); + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + if (b->bh.bb.bsize < 0) { + bdump = (unsigned char *) buf; + bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t); + } else { + bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t)); + bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t); + } + + while (bdlen > 0) { + int i, dupes = 0; + bufsize l = bdlen; + char bhex[50], bascii[20]; + + if (l > 16) { + l = 16; + } + + for (i = 0; i < l; i++) { + (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); + if (bdump[i] > 0x20 && bdump[i] < 0x7F) + bascii[ i ] = bdump[ i ]; + else + bascii[ i ] = ' '; + } + bascii[i] = 0; + (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii); + bdump += l; + bdlen -= l; + while ((bdlen > 16) && (memcmp((char *) (bdump - 16), + (char *) bdump, 16) == 0)) { + dupes++; + bdump += 16; + bdlen -= 16; + } + if (dupes > 1) { + (void) __kmp_printf_no_lock( + " (%d lines [%d bytes] identical to above line skipped)\n", + dupes, dupes * 16); + } else if (dupes == 1) { + bdump -= 16; + bdlen += 16; + } + } +} + +/* BPOOLD -- Dump a buffer pool. The buffer headers are always listed. + If DUMPALLOC is nonzero, the contents of allocated buffers + are dumped. If DUMPFREE is nonzero, free blocks are + dumped as well. If FreeWipe checking is enabled, free + blocks which have been clobbered will always be dumped. */ + +static void +bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) +{ + bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t)); + + while (b->bh.bb.bsize != ESent) { + bufsize bs = b->bh.bb.bsize; + + if (bs < 0) { + bs = -bs; + (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs); + if (dumpalloc) { + bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); + } + } else { + const char *lerr = ""; + + KMP_DEBUG_ASSERT(bs > 0); + if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { + lerr = " (Bad free list links)"; + } + (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", + (long) bs, lerr); +#ifdef FreeWipe + lerr = ((char *) b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || + (memcmp(lerr, lerr + 1, + (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { + (void) __kmp_printf_no_lock( + "(Contents of above free block have been overstored.)\n"); + bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); + } else +#endif + if (dumpfree) { + bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); + } + } + b = BFH(((char *) b) + bs); + } +} + +/* BPOOLV -- Validate a buffer pool. */ + +static int +bpoolv( kmp_info_t *th, void *buf ) +{ + bfhead_t *b = BFH(buf); + + while (b->bh.bb.bsize != ESent) { + bufsize bs = b->bh.bb.bsize; + + if (bs < 0) { + bs = -bs; + } else { +#ifdef FreeWipe + char *lerr = ""; +#endif + + KMP_DEBUG_ASSERT(bs > 0); + if (bs <= 0) { + return 0; + } + if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { + (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n", + (long) bs); + KMP_DEBUG_ASSERT(0); + return 0; + } +#ifdef FreeWipe + lerr = ((char *) b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || + (memcmp(lerr, lerr + 1, + (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { + (void) __kmp_printf_no_lock( + "(Contents of above free block have been overstored.)\n"); + bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); + KMP_DEBUG_ASSERT(0); + return 0; + } +#endif /* FreeWipe */ + } + b = BFH(((char *) b) + bs); + } + return 1; +} + +#endif /* KMP_DEBUG */ + +/* ------------------------------------------------------------------------ */ + +void +__kmp_initialize_bget( kmp_info_t *th ) +{ + KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) ); + + set_thr_data( th ); + + bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free, + (bufsize) __kmp_malloc_pool_incr ); +} + +void +__kmp_finalize_bget( kmp_info_t *th ) +{ + thr_data_t *thr; + bfhead_t *b; + + KMP_DEBUG_ASSERT( th != 0 ); + +#if BufStats + thr = (thr_data_t *) th->th.th_local.bget_data; + KMP_DEBUG_ASSERT( thr != NULL ); + b = thr->last_pool; + + /* If a block-release function is defined, and this free buffer + constitutes the entire block, release it. Note that pool_len + is defined in such a way that the test will fail unless all + pool blocks are the same size. */ + + /* Deallocate the last pool if one exists because we no longer do it in brel() */ + if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && + b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) + { + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); + KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); + KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); + + /* Unlink the buffer from the free list */ + __kmp_bget_remove_from_freelist( b ); + + KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); + + (*thr->relfcn)(b); + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); + } +#endif /* BufStats */ + + /* Deallocate bget_data */ + if ( th->th.th_local.bget_data != NULL ) { + __kmp_free( th->th.th_local.bget_data ); + th->th.th_local.bget_data = NULL; + }; // if +} + +void +kmpc_set_poolsize( size_t size ) +{ + bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc, + (bget_release_t) free, (bufsize) size ); +} + +size_t +kmpc_get_poolsize( void ) +{ + thr_data_t *p; + + p = get_thr_data( __kmp_get_thread() ); + + return p->exp_incr; +} + +void +kmpc_set_poolmode( int mode ) +{ + thr_data_t *p; + + if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) { + p = get_thr_data( __kmp_get_thread() ); + p->mode = (bget_mode_t) mode; + } +} + +int +kmpc_get_poolmode( void ) +{ + thr_data_t *p; + + p = get_thr_data( __kmp_get_thread() ); + + return p->mode; +} + +void +kmpc_get_poolstat( size_t *maxmem, size_t *allmem ) +{ + kmp_info_t *th = __kmp_get_thread(); + bufsize a, b; + + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + + bcheck( th, &a, &b ); + + *maxmem = a; + *allmem = b; +} + +void +kmpc_poolprint( void ) +{ + kmp_info_t *th = __kmp_get_thread(); + + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + + bfreed( th ); +} + +#endif // #if KMP_USE_BGET + +/* ------------------------------------------------------------------------ */ + +void * +kmpc_malloc( size_t size ) +{ + void * ptr; + ptr = bget( __kmp_entry_thread(), (bufsize) size ); + + return ptr; +} + +void * +kmpc_calloc( size_t nelem, size_t elsize ) +{ + void * ptr; + ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) ); + + return ptr; +} + +void * +kmpc_realloc( void * ptr, size_t size ) +{ + void * result = NULL; + + if ( ptr == NULL ) { + // If pointer is NULL, realloc behaves like malloc. + result = bget( __kmp_entry_thread(), (bufsize) size ); + } else if ( size == 0 ) { + // If size is 0, realloc behaves like free. + // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before. + // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread(). + brel( __kmp_get_thread(), ptr ); + } else { + result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size ); + }; // if + + return result; +} + +/* NOTE: the library must have already been initialized by a previous allocate */ + +void +kmpc_free( void * ptr ) +{ + if ( ! __kmp_init_serial ) { + return; + }; // if + if ( ptr != NULL ) { + kmp_info_t *th = __kmp_get_thread(); + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + brel( th, ptr ); + }; +} + + +/* ------------------------------------------------------------------------ */ + +void * +___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ) +{ + void * ptr; + KE_TRACE( 30, ( + "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", + th, + (int) size + KMP_SRC_LOC_PARM + ) ); + ptr = bget( th, (bufsize) size ); + KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) ); + return ptr; +} + +void * +___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ) +{ + void * ptr; + KE_TRACE( 30, ( + "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", + th, + (int) nelem, + (int) elsize + KMP_SRC_LOC_PARM + ) ); + ptr = bgetz( th, (bufsize) (nelem * elsize) ); + KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) ); + return ptr; +} + +void * +___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ) +{ + KE_TRACE( 30, ( + "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", + th, + ptr, + (int) size + KMP_SRC_LOC_PARM + ) ); + ptr = bgetr( th, ptr, (bufsize) size ); + KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) ); + return ptr; +} + +void +___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ) +{ + KE_TRACE( 30, ( + "-> __kmp_thread_free( %p, %p ) called from %s:%d\n", + th, + ptr + KMP_SRC_LOC_PARM + ) ); + if ( ptr != NULL ) { + __kmp_bget_dequeue( th ); /* Release any queued buffers */ + brel( th, ptr ); + } + KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ +/* + If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it + may be useful for debugging memory corruptions, used freed pointers, etc. +*/ +/* #define LEAK_MEMORY */ + +struct kmp_mem_descr { // Memory block descriptor. + void * ptr_allocated; // Pointer returned by malloc(), subject for free(). + size_t size_allocated; // Size of allocated memory block. + void * ptr_aligned; // Pointer to aligned memory, to be used by client code. + size_t size_aligned; // Size of aligned memory block. +}; +typedef struct kmp_mem_descr kmp_mem_descr_t; + +/* + Allocate memory on requested boundary, fill allocated memory with 0x00. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. + Must use __kmp_free when freeing memory allocated by this routine! + */ +static +void * +___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL ) +{ + /* + __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to + return properly aligned pointer. Original pointer returned by malloc() and size of allocated + block is saved in descriptor just before the aligned pointer. This information used by + __kmp_free() -- it has to pass to free() original pointer, not aligned one. + + +---------+------------+-----------------------------------+---------+ + | padding | descriptor | aligned block | padding | + +---------+------------+-----------------------------------+---------+ + ^ ^ + | | + | +- Aligned pointer returned to caller + +- Pointer returned by malloc() + + Aligned block is filled with zeros, paddings are filled with 0xEF. + */ + + kmp_mem_descr_t descr; + kmp_uintptr_t addr_allocated; // Address returned by malloc(). + kmp_uintptr_t addr_aligned; // Aligned address to return to caller. + kmp_uintptr_t addr_descr; // Address of memory block descriptor. + + KE_TRACE( 25, ( + "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", + (int) size, + (int) alignment + KMP_SRC_LOC_PARM + ) ); + + KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too + KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) ); + // Make sure kmp_uintptr_t is enough to store addresses. + + descr.size_aligned = size; + descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment; + + #if KMP_DEBUG + descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ ); + #else + descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM ); + #endif __lsan_ignore_object(descr.ptr_allocated); // espetrov@yandex-team.ru: asan considers descr.ptr_allocated leaked because of address alignment arithmetics - KE_TRACE( 10, ( - " malloc( %d ) returned %p\n", - (int) descr.size_allocated, - descr.ptr_allocated - ) ); - if ( descr.ptr_allocated == NULL ) { - KMP_FATAL( OutOfHeapMemory ); - }; - - addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; - addr_aligned = - ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment ) - & ~ ( alignment - 1 ); - addr_descr = addr_aligned - sizeof( kmp_mem_descr_t ); - - descr.ptr_aligned = (void *) addr_aligned; - - KE_TRACE( 26, ( - " ___kmp_allocate_align: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, - (int) descr.size_allocated, - descr.ptr_aligned, - (int) descr.size_aligned - ) ); - - KMP_DEBUG_ASSERT( addr_allocated <= addr_descr ); - KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned ); - KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); - KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 ); - - #ifdef KMP_DEBUG - memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); - // Fill allocated memory block with 0xEF. - #endif - memset( descr.ptr_aligned, 0x00, descr.size_aligned ); - // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not - // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding - // bytes remain filled with 0xEF in debugging library.) - * ( (kmp_mem_descr_t *) addr_descr ) = descr; - - KMP_MB(); - - KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) ); - return descr.ptr_aligned; - -} // func ___kmp_allocate_align - - -/* - Allocate memory on cache line boundary, fill allocated memory with 0x00. - Do not call this func directly! Use __kmp_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -void * -___kmp_allocate( size_t size KMP_SRC_LOC_DECL ) -{ - - void * ptr; - KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) ); - ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM ); - KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) ); - return ptr; - -} // func ___kmp_allocate - -#if (BUILD_MEMORY==FIRST_TOUCH) -void * -__kmp_ft_page_allocate(size_t size) -{ - void *adr, *aadr; -#if KMP_OS_LINUX - /* TODO: Use this function to get page size everywhere */ - int page_size = getpagesize(); -#else - /* TODO: Find windows function to get page size and use it everywhere */ - int page_size = PAGE_SIZE; -#endif /* KMP_OS_LINUX */ - - adr = (void *) __kmp_thread_malloc( __kmp_get_thread(), - size + page_size + KMP_PTR_SKIP); - if ( adr == 0 ) - KMP_FATAL( OutOfHeapMemory ); - - /* check to see if adr is on a page boundary. */ - if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0) - /* nothing to do if adr is already on a page boundary. */ - aadr = adr; - else - /* else set aadr to the first page boundary in the allocated memory. */ - aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) ); - - /* the first touch by the owner thread. */ - *((void**)aadr) = adr; - - /* skip the memory space used for storing adr above. */ - return (void*)((char*)aadr + KMP_PTR_SKIP); -} -#endif - -/* - Allocate memory on page boundary, fill allocated memory with 0x00. - Does not call this func directly! Use __kmp_page_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -void * -___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ) -{ - int page_size = 8 * 1024; - void * ptr; - - KE_TRACE( 25, ( - "-> __kmp_page_allocate( %d ) called from %s:%d\n", - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM ); - KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) ); - return ptr; -} // ___kmp_page_allocate - -/* - Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). - In debug mode, fill the memory block with 0xEF before call to free(). -*/ -void -___kmp_free( void * ptr KMP_SRC_LOC_DECL ) -{ - - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address passed by caller. - - KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) ); - KMP_ASSERT( ptr != NULL ); - - descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) ); - - KE_TRACE( 26, ( " __kmp_free: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, (int) descr.size_allocated, - descr.ptr_aligned, (int) descr.size_aligned )); - - addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; - addr_aligned = (kmp_uintptr_t) descr.ptr_aligned; - - KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 ); - KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr ); - KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned ); - KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated ); - KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); - - #ifdef KMP_DEBUG - memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); - // Fill memory block with 0xEF, it helps catch using freed memory. - #endif - - #ifndef LEAK_MEMORY - KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) ); - # ifdef KMP_DEBUG - _free_src_loc( descr.ptr_allocated, _file_, _line_ ); - # else - free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM ); - # endif - #endif - - KMP_MB(); - - KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) ); - -} // func ___kmp_free - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if USE_FAST_MEMORY == 3 -// Allocate fast memory by first scanning the thread's free lists -// If a chunk the right size exists, grab it off the free list. -// Otherwise allocate normally using kmp_thread_malloc. - -// AC: How to choose the limit? Just get 16 for now... -#define KMP_FREE_LIST_LIMIT 16 - -// Always use 128 bytes for determining buckets for caching memory blocks -#define DCACHE_LINE 128 - -void * -___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ) -{ - void * ptr; - int num_lines; - int idx; - int index; - void * alloc_ptr; - size_t alloc_size; - kmp_mem_descr_t * descr; - - KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) ); - - num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE; - idx = num_lines - 1; - KMP_DEBUG_ASSERT( idx >= 0 ); - if ( idx < 2 ) { - index = 0; // idx is [ 0, 1 ], use first free list - num_lines = 2; // 1, 2 cache lines or less than cache line - } else if ( ( idx >>= 2 ) == 0 ) { - index = 1; // idx is [ 2, 3 ], use second free list - num_lines = 4; // 3, 4 cache lines - } else if ( ( idx >>= 2 ) == 0 ) { - index = 2; // idx is [ 4, 15 ], use third free list - num_lines = 16; // 5, 6, ..., 16 cache lines - } else if ( ( idx >>= 2 ) == 0 ) { - index = 3; // idx is [ 16, 63 ], use fourth free list - num_lines = 64; // 17, 18, ..., 64 cache lines - } else { - goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists - } - - ptr = this_thr->th.th_free_lists[index].th_free_list_self; - if ( ptr != NULL ) { - // pop the head of no-sync free list - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( this_thr == - ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); - goto end; - }; - ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); - if ( ptr != NULL ) { - // no-sync free list is empty, use sync free list (filled in by other threads only) - // pop the head of the sync free list, push NULL instead - while ( ! KMP_COMPARE_AND_STORE_PTR( - &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) ) - { - KMP_CPU_PAUSE(); - ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); - } - // push the rest of chain into no-sync free list (can be NULL if there was the only block) - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( this_thr == - ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); - goto end; - } - - alloc_call: - // haven't found block in the free lists, thus allocate it - size = num_lines * DCACHE_LINE; - - alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE; - KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n", - __kmp_gtid_from_thread( this_thr ), alloc_size ) ); - alloc_ptr = bget( this_thr, (bufsize) alloc_size ); - - // align ptr to DCACHE_LINE - ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 )); - descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); - - descr->ptr_allocated = alloc_ptr; // remember allocated pointer - // we don't need size_allocated - descr->ptr_aligned = (void *)this_thr; // remember allocating thread - // (it is already saved in bget buffer, - // but we may want to use another allocator in future) - descr->size_aligned = size; - - end: - KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n", - __kmp_gtid_from_thread( this_thr ), ptr ) ); - return ptr; -} // func __kmp_fast_allocate - -// Free fast memory and place it on the thread's free list if it is of -// the correct size. -void -___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL ) -{ - kmp_mem_descr_t * descr; - kmp_info_t * alloc_thr; - size_t size; - size_t idx; - int index; - - KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) ); - KMP_ASSERT( ptr != NULL ); - - descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); - - KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", - (int) descr->size_aligned ) ); - - size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines - - idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block - if ( idx == size ) { - index = 0; // 2 cache lines - } else if ( ( idx <<= 1 ) == size ) { - index = 1; // 4 cache lines - } else if ( ( idx <<= 2 ) == size ) { - index = 2; // 16 cache lines - } else if ( ( idx <<= 2 ) == size ) { - index = 3; // 64 cache lines - } else { - KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 ); - goto free_call; // 65 or more cache lines ( > 8KB ) - } - - alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block - if ( alloc_thr == this_thr ) { - // push block to self no-sync free list, linking previous head (LIFO) - *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; - this_thr->th.th_free_lists[index].th_free_list_self = ptr; - } else { - void * head = this_thr->th.th_free_lists[index].th_free_list_other; - if ( head == NULL ) { - // Create new free list - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; // mark the tail of the list - descr->size_allocated = (size_t)1; // head of the list keeps its length - } else { - // need to check existed "other" list's owner thread and size of queue - kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) ); - kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes - size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task - if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) { - // we can add current task to "other" list, no sync needed - *((void **)ptr) = head; - descr->size_allocated = q_sz; - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - } else { - // either queue blocks owner is changing or size limit exceeded - // return old queue to allocating thread (q_th) synchroneously, - // and start new list for alloc_thr's tasks - void * old_ptr; - void * tail = head; - void * next = *((void **)head); - while ( next != NULL ) { - KMP_DEBUG_ASSERT( - // queue size should decrease by 1 each step through the list - ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 == - ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated ); - tail = next; // remember tail node - next = *((void **)next); - } - KMP_DEBUG_ASSERT( q_th != NULL ); - // push block to owner's sync free list - old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); - /* the next pointer must be set before setting free_list to ptr to avoid - exposing a broken list to other threads, even for an instant. */ - *((void **)tail) = old_ptr; - - while ( ! KMP_COMPARE_AND_STORE_PTR( - &q_th->th.th_free_lists[index].th_free_list_sync, - old_ptr, - head ) ) - { - KMP_CPU_PAUSE(); - old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); - *((void **)tail) = old_ptr; - } - - // start new list of not-selt tasks - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; - descr->size_allocated = (size_t)1; // head of queue keeps its length - } - } - } - goto end; - - free_call: - KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", - __kmp_gtid_from_thread( this_thr), size ) ); - __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */ - brel( this_thr, descr->ptr_allocated ); - - end: - KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) ); - -} // func __kmp_fast_free - - -// Initialize the thread free lists related to fast memory -// Only do this when a thread is initially created. -void -__kmp_initialize_fast_memory( kmp_info_t *this_thr ) -{ - KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) ); - - memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) ); -} - -// Free the memory in the thread free lists related to fast memory -// Only do this when a thread is being reaped (destroyed). -void -__kmp_free_fast_memory( kmp_info_t *th ) -{ - // Suppose we use BGET underlying allocator, walk through its structures... - int bin; - thr_data_t * thr = get_thr_data( th ); - void ** lst = NULL; - - KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n", - __kmp_gtid_from_thread( th ) ) ); - - __kmp_bget_dequeue( th ); // Release any queued buffers - - // Dig through free lists and extract all allocated blocks - for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) { - bfhead_t * b = thr->freelist[ bin ].ql.flink; - while ( b != &thr->freelist[ bin ] ) { - if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address? - *((void**)b) = lst; // link the list (override bthr, but keep flink yet) - lst = (void**)b; // push b into lst - } - b = b->ql.flink; // get next buffer - } - } - while ( lst != NULL ) { - void * next = *lst; - KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", - lst, next, th, __kmp_gtid_from_thread( th ) ) ); - (*thr->relfcn)(lst); - #if BufStats - // count blocks to prevent problems in __kmp_finalize_bget() - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - #endif - lst = (void**)next; - } - - KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n", - __kmp_gtid_from_thread( th ) ) ); -} - -#endif // USE_FAST_MEMORY + KE_TRACE( 10, ( + " malloc( %d ) returned %p\n", + (int) descr.size_allocated, + descr.ptr_allocated + ) ); + if ( descr.ptr_allocated == NULL ) { + KMP_FATAL( OutOfHeapMemory ); + }; + + addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; + addr_aligned = + ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment ) + & ~ ( alignment - 1 ); + addr_descr = addr_aligned - sizeof( kmp_mem_descr_t ); + + descr.ptr_aligned = (void *) addr_aligned; + + KE_TRACE( 26, ( + " ___kmp_allocate_align: " + "ptr_allocated=%p, size_allocated=%d, " + "ptr_aligned=%p, size_aligned=%d\n", + descr.ptr_allocated, + (int) descr.size_allocated, + descr.ptr_aligned, + (int) descr.size_aligned + ) ); + + KMP_DEBUG_ASSERT( addr_allocated <= addr_descr ); + KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned ); + KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); + KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 ); + + #ifdef KMP_DEBUG + memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); + // Fill allocated memory block with 0xEF. + #endif + memset( descr.ptr_aligned, 0x00, descr.size_aligned ); + // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not + // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding + // bytes remain filled with 0xEF in debugging library.) + * ( (kmp_mem_descr_t *) addr_descr ) = descr; + + KMP_MB(); + + KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) ); + return descr.ptr_aligned; + +} // func ___kmp_allocate_align + + +/* + Allocate memory on cache line boundary, fill allocated memory with 0x00. + Do not call this func directly! Use __kmp_allocate macro instead. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. + Must use __kmp_free when freeing memory allocated by this routine! + */ +void * +___kmp_allocate( size_t size KMP_SRC_LOC_DECL ) +{ + + void * ptr; + KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) ); + ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM ); + KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) ); + return ptr; + +} // func ___kmp_allocate + +#if (BUILD_MEMORY==FIRST_TOUCH) +void * +__kmp_ft_page_allocate(size_t size) +{ + void *adr, *aadr; +#if KMP_OS_LINUX + /* TODO: Use this function to get page size everywhere */ + int page_size = getpagesize(); +#else + /* TODO: Find windows function to get page size and use it everywhere */ + int page_size = PAGE_SIZE; +#endif /* KMP_OS_LINUX */ + + adr = (void *) __kmp_thread_malloc( __kmp_get_thread(), + size + page_size + KMP_PTR_SKIP); + if ( adr == 0 ) + KMP_FATAL( OutOfHeapMemory ); + + /* check to see if adr is on a page boundary. */ + if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0) + /* nothing to do if adr is already on a page boundary. */ + aadr = adr; + else + /* else set aadr to the first page boundary in the allocated memory. */ + aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) ); + + /* the first touch by the owner thread. */ + *((void**)aadr) = adr; + + /* skip the memory space used for storing adr above. */ + return (void*)((char*)aadr + KMP_PTR_SKIP); +} +#endif + +/* + Allocate memory on page boundary, fill allocated memory with 0x00. + Does not call this func directly! Use __kmp_page_allocate macro instead. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. + Must use __kmp_free when freeing memory allocated by this routine! + */ +void * +___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ) +{ + int page_size = 8 * 1024; + void * ptr; + + KE_TRACE( 25, ( + "-> __kmp_page_allocate( %d ) called from %s:%d\n", + (int) size + KMP_SRC_LOC_PARM + ) ); + ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM ); + KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) ); + return ptr; +} // ___kmp_page_allocate + +/* + Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). + In debug mode, fill the memory block with 0xEF before call to free(). +*/ +void +___kmp_free( void * ptr KMP_SRC_LOC_DECL ) +{ + + kmp_mem_descr_t descr; + kmp_uintptr_t addr_allocated; // Address returned by malloc(). + kmp_uintptr_t addr_aligned; // Aligned address passed by caller. + + KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) ); + KMP_ASSERT( ptr != NULL ); + + descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) ); + + KE_TRACE( 26, ( " __kmp_free: " + "ptr_allocated=%p, size_allocated=%d, " + "ptr_aligned=%p, size_aligned=%d\n", + descr.ptr_allocated, (int) descr.size_allocated, + descr.ptr_aligned, (int) descr.size_aligned )); + + addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; + addr_aligned = (kmp_uintptr_t) descr.ptr_aligned; + + KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 ); + KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr ); + KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned ); + KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated ); + KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); + + #ifdef KMP_DEBUG + memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); + // Fill memory block with 0xEF, it helps catch using freed memory. + #endif + + #ifndef LEAK_MEMORY + KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) ); + # ifdef KMP_DEBUG + _free_src_loc( descr.ptr_allocated, _file_, _line_ ); + # else + free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM ); + # endif + #endif + + KMP_MB(); + + KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) ); + +} // func ___kmp_free + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if USE_FAST_MEMORY == 3 +// Allocate fast memory by first scanning the thread's free lists +// If a chunk the right size exists, grab it off the free list. +// Otherwise allocate normally using kmp_thread_malloc. + +// AC: How to choose the limit? Just get 16 for now... +#define KMP_FREE_LIST_LIMIT 16 + +// Always use 128 bytes for determining buckets for caching memory blocks +#define DCACHE_LINE 128 + +void * +___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ) +{ + void * ptr; + int num_lines; + int idx; + int index; + void * alloc_ptr; + size_t alloc_size; + kmp_mem_descr_t * descr; + + KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", + __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) ); + + num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE; + idx = num_lines - 1; + KMP_DEBUG_ASSERT( idx >= 0 ); + if ( idx < 2 ) { + index = 0; // idx is [ 0, 1 ], use first free list + num_lines = 2; // 1, 2 cache lines or less than cache line + } else if ( ( idx >>= 2 ) == 0 ) { + index = 1; // idx is [ 2, 3 ], use second free list + num_lines = 4; // 3, 4 cache lines + } else if ( ( idx >>= 2 ) == 0 ) { + index = 2; // idx is [ 4, 15 ], use third free list + num_lines = 16; // 5, 6, ..., 16 cache lines + } else if ( ( idx >>= 2 ) == 0 ) { + index = 3; // idx is [ 16, 63 ], use fourth free list + num_lines = 64; // 17, 18, ..., 64 cache lines + } else { + goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists + } + + ptr = this_thr->th.th_free_lists[index].th_free_list_self; + if ( ptr != NULL ) { + // pop the head of no-sync free list + this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); + KMP_DEBUG_ASSERT( this_thr == + ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); + goto end; + }; + ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); + if ( ptr != NULL ) { + // no-sync free list is empty, use sync free list (filled in by other threads only) + // pop the head of the sync free list, push NULL instead + while ( ! KMP_COMPARE_AND_STORE_PTR( + &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) ) + { + KMP_CPU_PAUSE(); + ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); + } + // push the rest of chain into no-sync free list (can be NULL if there was the only block) + this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); + KMP_DEBUG_ASSERT( this_thr == + ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); + goto end; + } + + alloc_call: + // haven't found block in the free lists, thus allocate it + size = num_lines * DCACHE_LINE; + + alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE; + KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n", + __kmp_gtid_from_thread( this_thr ), alloc_size ) ); + alloc_ptr = bget( this_thr, (bufsize) alloc_size ); + + // align ptr to DCACHE_LINE + ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 )); + descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); + + descr->ptr_allocated = alloc_ptr; // remember allocated pointer + // we don't need size_allocated + descr->ptr_aligned = (void *)this_thr; // remember allocating thread + // (it is already saved in bget buffer, + // but we may want to use another allocator in future) + descr->size_aligned = size; + + end: + KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n", + __kmp_gtid_from_thread( this_thr ), ptr ) ); + return ptr; +} // func __kmp_fast_allocate + +// Free fast memory and place it on the thread's free list if it is of +// the correct size. +void +___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL ) +{ + kmp_mem_descr_t * descr; + kmp_info_t * alloc_thr; + size_t size; + size_t idx; + int index; + + KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", + __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) ); + KMP_ASSERT( ptr != NULL ); + + descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); + + KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", + (int) descr->size_aligned ) ); + + size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines + + idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block + if ( idx == size ) { + index = 0; // 2 cache lines + } else if ( ( idx <<= 1 ) == size ) { + index = 1; // 4 cache lines + } else if ( ( idx <<= 2 ) == size ) { + index = 2; // 16 cache lines + } else if ( ( idx <<= 2 ) == size ) { + index = 3; // 64 cache lines + } else { + KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 ); + goto free_call; // 65 or more cache lines ( > 8KB ) + } + + alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block + if ( alloc_thr == this_thr ) { + // push block to self no-sync free list, linking previous head (LIFO) + *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; + this_thr->th.th_free_lists[index].th_free_list_self = ptr; + } else { + void * head = this_thr->th.th_free_lists[index].th_free_list_other; + if ( head == NULL ) { + // Create new free list + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + *((void **)ptr) = NULL; // mark the tail of the list + descr->size_allocated = (size_t)1; // head of the list keeps its length + } else { + // need to check existed "other" list's owner thread and size of queue + kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) ); + kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes + size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task + if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) { + // we can add current task to "other" list, no sync needed + *((void **)ptr) = head; + descr->size_allocated = q_sz; + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + } else { + // either queue blocks owner is changing or size limit exceeded + // return old queue to allocating thread (q_th) synchroneously, + // and start new list for alloc_thr's tasks + void * old_ptr; + void * tail = head; + void * next = *((void **)head); + while ( next != NULL ) { + KMP_DEBUG_ASSERT( + // queue size should decrease by 1 each step through the list + ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 == + ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated ); + tail = next; // remember tail node + next = *((void **)next); + } + KMP_DEBUG_ASSERT( q_th != NULL ); + // push block to owner's sync free list + old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); + /* the next pointer must be set before setting free_list to ptr to avoid + exposing a broken list to other threads, even for an instant. */ + *((void **)tail) = old_ptr; + + while ( ! KMP_COMPARE_AND_STORE_PTR( + &q_th->th.th_free_lists[index].th_free_list_sync, + old_ptr, + head ) ) + { + KMP_CPU_PAUSE(); + old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); + *((void **)tail) = old_ptr; + } + + // start new list of not-selt tasks + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + *((void **)ptr) = NULL; + descr->size_allocated = (size_t)1; // head of queue keeps its length + } + } + } + goto end; + + free_call: + KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", + __kmp_gtid_from_thread( this_thr), size ) ); + __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */ + brel( this_thr, descr->ptr_allocated ); + + end: + KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) ); + +} // func __kmp_fast_free + + +// Initialize the thread free lists related to fast memory +// Only do this when a thread is initially created. +void +__kmp_initialize_fast_memory( kmp_info_t *this_thr ) +{ + KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) ); + + memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) ); +} + +// Free the memory in the thread free lists related to fast memory +// Only do this when a thread is being reaped (destroyed). +void +__kmp_free_fast_memory( kmp_info_t *th ) +{ + // Suppose we use BGET underlying allocator, walk through its structures... + int bin; + thr_data_t * thr = get_thr_data( th ); + void ** lst = NULL; + + KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n", + __kmp_gtid_from_thread( th ) ) ); + + __kmp_bget_dequeue( th ); // Release any queued buffers + + // Dig through free lists and extract all allocated blocks + for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) { + bfhead_t * b = thr->freelist[ bin ].ql.flink; + while ( b != &thr->freelist[ bin ] ) { + if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address? + *((void**)b) = lst; // link the list (override bthr, but keep flink yet) + lst = (void**)b; // push b into lst + } + b = b->ql.flink; // get next buffer + } + } + while ( lst != NULL ) { + void * next = *lst; + KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", + lst, next, th, __kmp_gtid_from_thread( th ) ) ); + (*thr->relfcn)(lst); + #if BufStats + // count blocks to prevent problems in __kmp_finalize_bget() + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ + #endif + lst = (void**)next; + } + + KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n", + __kmp_gtid_from_thread( th ) ) ); +} + +#endif // USE_FAST_MEMORY diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.c b/contrib/libs/cxxsupp/openmp/kmp_atomic.c index a0ec4a1f5f3..5d5d3448f29 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_atomic.c +++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.c @@ -1,2907 +1,2907 @@ -/* - * kmp_atomic.c -- ATOMIC implementation routines - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp_atomic.h" -#include "kmp.h" // TRUE, asm routines prototypes - -typedef unsigned char uchar; -typedef unsigned short ushort; - -/*! -@defgroup ATOMIC_OPS Atomic Operations -These functions are used for implementing the many different varieties of atomic operations. - -The compiler is at liberty to inline atomic operations that are naturally supported -by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined -@code -static int s = 0; -#pragma omp atomic - s++; -@endcode -using the single instruction: `lock; incl s` - -However the runtime does provide entrypoints for these operations to support compilers that choose -not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the -increment above.) - -The names of the functions are encoded by using the data type name and the operation name, as in these tables. - -Data Type | Data type encoding ------------|--------------- -int8_t | `fixed1` -uint8_t | `fixed1u` -int16_t | `fixed2` -uint16_t | `fixed2u` -int32_t | `fixed4` -uint32_t | `fixed4u` -int32_t | `fixed8` -uint32_t | `fixed8u` -float | `float4` -double | `float8` -float 10 (8087 eighty bit float) | `float10` -complex | `cmplx4` -complex | `cmplx8` -complex | `cmplx10` -
- -Operation | Operation encoding -----------|------------------- -+ | add -- | sub -\* | mul -/ | div -& | andb -<< | shl -\>\> | shr -\| | orb -^ | xor -&& | andl -\|\| | orl -maximum | max -minimum | min -.eqv. | eqv -.neqv. | neqv - -
-For non-commutative operations, `_rev` can also be added for the reversed operation. -For the functions that capture the result, the suffix `_cpt` is added. - -Update Functions -================ -The general form of an atomic function that just performs an update (without a `capture`) -@code -void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); -@endcode -@param ident_t a pointer to source location -@param gtid the global thread id -@param lhs a pointer to the left operand -@param rhs the right operand - -`capture` functions -=================== -The capture functions perform an atomic update and return a result, which is either the value -before the capture, or that after. They take an additional argument to determine which result is returned. -Their general form is therefore -@code -TYPE __kmpc_atomic___cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); -@endcode -@param ident_t a pointer to source location -@param gtid the global thread id -@param lhs a pointer to the left operand -@param rhs the right operand -@param flag one if the result is to be captured *after* the operation, zero if captured *before*. - -The one set of exceptions to this is the `complex` type where the value is not returned, -rather an extra argument pointer is passed. - -They look like -@code -void __kmpc_atomic_cmplx4__cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -@endcode - -Read and Write Operations -========================= -The OpenMP* standard now supports atomic operations that simply ensure that the -value is read or written atomically, with no modification -performed. In many cases on IA-32 architecture these operations can be inlined since -the architecture guarantees that no tearing occurs on aligned objects -accessed with a single memory operation of up to 64 bits in size. - -The general form of the read operations is -@code -TYPE __kmpc_atomic__rd ( ident_t *id_ref, int gtid, TYPE * loc ); -@endcode - -For the write operations the form is -@code -void __kmpc_atomic__wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); -@endcode - -Full list of functions -====================== -This leads to the generation of 376 atomic functions, as follows. - -Functons for integers ---------------------- -There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). -@code - __kmpc_atomic_fixed1_add - __kmpc_atomic_fixed1_add_cpt - __kmpc_atomic_fixed1_add_fp - __kmpc_atomic_fixed1_andb - __kmpc_atomic_fixed1_andb_cpt - __kmpc_atomic_fixed1_andl - __kmpc_atomic_fixed1_andl_cpt - __kmpc_atomic_fixed1_div - __kmpc_atomic_fixed1_div_cpt - __kmpc_atomic_fixed1_div_cpt_rev - __kmpc_atomic_fixed1_div_float8 - __kmpc_atomic_fixed1_div_fp - __kmpc_atomic_fixed1_div_rev - __kmpc_atomic_fixed1_eqv - __kmpc_atomic_fixed1_eqv_cpt - __kmpc_atomic_fixed1_max - __kmpc_atomic_fixed1_max_cpt - __kmpc_atomic_fixed1_min - __kmpc_atomic_fixed1_min_cpt - __kmpc_atomic_fixed1_mul - __kmpc_atomic_fixed1_mul_cpt - __kmpc_atomic_fixed1_mul_float8 - __kmpc_atomic_fixed1_mul_fp - __kmpc_atomic_fixed1_neqv - __kmpc_atomic_fixed1_neqv_cpt - __kmpc_atomic_fixed1_orb - __kmpc_atomic_fixed1_orb_cpt - __kmpc_atomic_fixed1_orl - __kmpc_atomic_fixed1_orl_cpt - __kmpc_atomic_fixed1_rd - __kmpc_atomic_fixed1_shl - __kmpc_atomic_fixed1_shl_cpt - __kmpc_atomic_fixed1_shl_cpt_rev - __kmpc_atomic_fixed1_shl_rev - __kmpc_atomic_fixed1_shr - __kmpc_atomic_fixed1_shr_cpt - __kmpc_atomic_fixed1_shr_cpt_rev - __kmpc_atomic_fixed1_shr_rev - __kmpc_atomic_fixed1_sub - __kmpc_atomic_fixed1_sub_cpt - __kmpc_atomic_fixed1_sub_cpt_rev - __kmpc_atomic_fixed1_sub_fp - __kmpc_atomic_fixed1_sub_rev - __kmpc_atomic_fixed1_swp - __kmpc_atomic_fixed1_wr - __kmpc_atomic_fixed1_xor - __kmpc_atomic_fixed1_xor_cpt - __kmpc_atomic_fixed1u_div - __kmpc_atomic_fixed1u_div_cpt - __kmpc_atomic_fixed1u_div_cpt_rev - __kmpc_atomic_fixed1u_div_fp - __kmpc_atomic_fixed1u_div_rev - __kmpc_atomic_fixed1u_shr - __kmpc_atomic_fixed1u_shr_cpt - __kmpc_atomic_fixed1u_shr_cpt_rev - __kmpc_atomic_fixed1u_shr_rev - __kmpc_atomic_fixed2_add - __kmpc_atomic_fixed2_add_cpt - __kmpc_atomic_fixed2_add_fp - __kmpc_atomic_fixed2_andb - __kmpc_atomic_fixed2_andb_cpt - __kmpc_atomic_fixed2_andl - __kmpc_atomic_fixed2_andl_cpt - __kmpc_atomic_fixed2_div - __kmpc_atomic_fixed2_div_cpt - __kmpc_atomic_fixed2_div_cpt_rev - __kmpc_atomic_fixed2_div_float8 - __kmpc_atomic_fixed2_div_fp - __kmpc_atomic_fixed2_div_rev - __kmpc_atomic_fixed2_eqv - __kmpc_atomic_fixed2_eqv_cpt - __kmpc_atomic_fixed2_max - __kmpc_atomic_fixed2_max_cpt - __kmpc_atomic_fixed2_min - __kmpc_atomic_fixed2_min_cpt - __kmpc_atomic_fixed2_mul - __kmpc_atomic_fixed2_mul_cpt - __kmpc_atomic_fixed2_mul_float8 - __kmpc_atomic_fixed2_mul_fp - __kmpc_atomic_fixed2_neqv - __kmpc_atomic_fixed2_neqv_cpt - __kmpc_atomic_fixed2_orb - __kmpc_atomic_fixed2_orb_cpt - __kmpc_atomic_fixed2_orl - __kmpc_atomic_fixed2_orl_cpt - __kmpc_atomic_fixed2_rd - __kmpc_atomic_fixed2_shl - __kmpc_atomic_fixed2_shl_cpt - __kmpc_atomic_fixed2_shl_cpt_rev - __kmpc_atomic_fixed2_shl_rev - __kmpc_atomic_fixed2_shr - __kmpc_atomic_fixed2_shr_cpt - __kmpc_atomic_fixed2_shr_cpt_rev - __kmpc_atomic_fixed2_shr_rev - __kmpc_atomic_fixed2_sub - __kmpc_atomic_fixed2_sub_cpt - __kmpc_atomic_fixed2_sub_cpt_rev - __kmpc_atomic_fixed2_sub_fp - __kmpc_atomic_fixed2_sub_rev - __kmpc_atomic_fixed2_swp - __kmpc_atomic_fixed2_wr - __kmpc_atomic_fixed2_xor - __kmpc_atomic_fixed2_xor_cpt - __kmpc_atomic_fixed2u_div - __kmpc_atomic_fixed2u_div_cpt - __kmpc_atomic_fixed2u_div_cpt_rev - __kmpc_atomic_fixed2u_div_fp - __kmpc_atomic_fixed2u_div_rev - __kmpc_atomic_fixed2u_shr - __kmpc_atomic_fixed2u_shr_cpt - __kmpc_atomic_fixed2u_shr_cpt_rev - __kmpc_atomic_fixed2u_shr_rev - __kmpc_atomic_fixed4_add - __kmpc_atomic_fixed4_add_cpt - __kmpc_atomic_fixed4_add_fp - __kmpc_atomic_fixed4_andb - __kmpc_atomic_fixed4_andb_cpt - __kmpc_atomic_fixed4_andl - __kmpc_atomic_fixed4_andl_cpt - __kmpc_atomic_fixed4_div - __kmpc_atomic_fixed4_div_cpt - __kmpc_atomic_fixed4_div_cpt_rev - __kmpc_atomic_fixed4_div_float8 - __kmpc_atomic_fixed4_div_fp - __kmpc_atomic_fixed4_div_rev - __kmpc_atomic_fixed4_eqv - __kmpc_atomic_fixed4_eqv_cpt - __kmpc_atomic_fixed4_max - __kmpc_atomic_fixed4_max_cpt - __kmpc_atomic_fixed4_min - __kmpc_atomic_fixed4_min_cpt - __kmpc_atomic_fixed4_mul - __kmpc_atomic_fixed4_mul_cpt - __kmpc_atomic_fixed4_mul_float8 - __kmpc_atomic_fixed4_mul_fp - __kmpc_atomic_fixed4_neqv - __kmpc_atomic_fixed4_neqv_cpt - __kmpc_atomic_fixed4_orb - __kmpc_atomic_fixed4_orb_cpt - __kmpc_atomic_fixed4_orl - __kmpc_atomic_fixed4_orl_cpt - __kmpc_atomic_fixed4_rd - __kmpc_atomic_fixed4_shl - __kmpc_atomic_fixed4_shl_cpt - __kmpc_atomic_fixed4_shl_cpt_rev - __kmpc_atomic_fixed4_shl_rev - __kmpc_atomic_fixed4_shr - __kmpc_atomic_fixed4_shr_cpt - __kmpc_atomic_fixed4_shr_cpt_rev - __kmpc_atomic_fixed4_shr_rev - __kmpc_atomic_fixed4_sub - __kmpc_atomic_fixed4_sub_cpt - __kmpc_atomic_fixed4_sub_cpt_rev - __kmpc_atomic_fixed4_sub_fp - __kmpc_atomic_fixed4_sub_rev - __kmpc_atomic_fixed4_swp - __kmpc_atomic_fixed4_wr - __kmpc_atomic_fixed4_xor - __kmpc_atomic_fixed4_xor_cpt - __kmpc_atomic_fixed4u_div - __kmpc_atomic_fixed4u_div_cpt - __kmpc_atomic_fixed4u_div_cpt_rev - __kmpc_atomic_fixed4u_div_fp - __kmpc_atomic_fixed4u_div_rev - __kmpc_atomic_fixed4u_shr - __kmpc_atomic_fixed4u_shr_cpt - __kmpc_atomic_fixed4u_shr_cpt_rev - __kmpc_atomic_fixed4u_shr_rev - __kmpc_atomic_fixed8_add - __kmpc_atomic_fixed8_add_cpt - __kmpc_atomic_fixed8_add_fp - __kmpc_atomic_fixed8_andb - __kmpc_atomic_fixed8_andb_cpt - __kmpc_atomic_fixed8_andl - __kmpc_atomic_fixed8_andl_cpt - __kmpc_atomic_fixed8_div - __kmpc_atomic_fixed8_div_cpt - __kmpc_atomic_fixed8_div_cpt_rev - __kmpc_atomic_fixed8_div_float8 - __kmpc_atomic_fixed8_div_fp - __kmpc_atomic_fixed8_div_rev - __kmpc_atomic_fixed8_eqv - __kmpc_atomic_fixed8_eqv_cpt - __kmpc_atomic_fixed8_max - __kmpc_atomic_fixed8_max_cpt - __kmpc_atomic_fixed8_min - __kmpc_atomic_fixed8_min_cpt - __kmpc_atomic_fixed8_mul - __kmpc_atomic_fixed8_mul_cpt - __kmpc_atomic_fixed8_mul_float8 - __kmpc_atomic_fixed8_mul_fp - __kmpc_atomic_fixed8_neqv - __kmpc_atomic_fixed8_neqv_cpt - __kmpc_atomic_fixed8_orb - __kmpc_atomic_fixed8_orb_cpt - __kmpc_atomic_fixed8_orl - __kmpc_atomic_fixed8_orl_cpt - __kmpc_atomic_fixed8_rd - __kmpc_atomic_fixed8_shl - __kmpc_atomic_fixed8_shl_cpt - __kmpc_atomic_fixed8_shl_cpt_rev - __kmpc_atomic_fixed8_shl_rev - __kmpc_atomic_fixed8_shr - __kmpc_atomic_fixed8_shr_cpt - __kmpc_atomic_fixed8_shr_cpt_rev - __kmpc_atomic_fixed8_shr_rev - __kmpc_atomic_fixed8_sub - __kmpc_atomic_fixed8_sub_cpt - __kmpc_atomic_fixed8_sub_cpt_rev - __kmpc_atomic_fixed8_sub_fp - __kmpc_atomic_fixed8_sub_rev - __kmpc_atomic_fixed8_swp - __kmpc_atomic_fixed8_wr - __kmpc_atomic_fixed8_xor - __kmpc_atomic_fixed8_xor_cpt - __kmpc_atomic_fixed8u_div - __kmpc_atomic_fixed8u_div_cpt - __kmpc_atomic_fixed8u_div_cpt_rev - __kmpc_atomic_fixed8u_div_fp - __kmpc_atomic_fixed8u_div_rev - __kmpc_atomic_fixed8u_shr - __kmpc_atomic_fixed8u_shr_cpt - __kmpc_atomic_fixed8u_shr_cpt_rev - __kmpc_atomic_fixed8u_shr_rev -@endcode - -Functions for floating point ----------------------------- -There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. -(Ten byte floats are used by X87, but are now rare). -@code - __kmpc_atomic_float4_add - __kmpc_atomic_float4_add_cpt - __kmpc_atomic_float4_add_float8 - __kmpc_atomic_float4_add_fp - __kmpc_atomic_float4_div - __kmpc_atomic_float4_div_cpt - __kmpc_atomic_float4_div_cpt_rev - __kmpc_atomic_float4_div_float8 - __kmpc_atomic_float4_div_fp - __kmpc_atomic_float4_div_rev - __kmpc_atomic_float4_max - __kmpc_atomic_float4_max_cpt - __kmpc_atomic_float4_min - __kmpc_atomic_float4_min_cpt - __kmpc_atomic_float4_mul - __kmpc_atomic_float4_mul_cpt - __kmpc_atomic_float4_mul_float8 - __kmpc_atomic_float4_mul_fp - __kmpc_atomic_float4_rd - __kmpc_atomic_float4_sub - __kmpc_atomic_float4_sub_cpt - __kmpc_atomic_float4_sub_cpt_rev - __kmpc_atomic_float4_sub_float8 - __kmpc_atomic_float4_sub_fp - __kmpc_atomic_float4_sub_rev - __kmpc_atomic_float4_swp - __kmpc_atomic_float4_wr - __kmpc_atomic_float8_add - __kmpc_atomic_float8_add_cpt - __kmpc_atomic_float8_add_fp - __kmpc_atomic_float8_div - __kmpc_atomic_float8_div_cpt - __kmpc_atomic_float8_div_cpt_rev - __kmpc_atomic_float8_div_fp - __kmpc_atomic_float8_div_rev - __kmpc_atomic_float8_max - __kmpc_atomic_float8_max_cpt - __kmpc_atomic_float8_min - __kmpc_atomic_float8_min_cpt - __kmpc_atomic_float8_mul - __kmpc_atomic_float8_mul_cpt - __kmpc_atomic_float8_mul_fp - __kmpc_atomic_float8_rd - __kmpc_atomic_float8_sub - __kmpc_atomic_float8_sub_cpt - __kmpc_atomic_float8_sub_cpt_rev - __kmpc_atomic_float8_sub_fp - __kmpc_atomic_float8_sub_rev - __kmpc_atomic_float8_swp - __kmpc_atomic_float8_wr - __kmpc_atomic_float10_add - __kmpc_atomic_float10_add_cpt - __kmpc_atomic_float10_add_fp - __kmpc_atomic_float10_div - __kmpc_atomic_float10_div_cpt - __kmpc_atomic_float10_div_cpt_rev - __kmpc_atomic_float10_div_fp - __kmpc_atomic_float10_div_rev - __kmpc_atomic_float10_mul - __kmpc_atomic_float10_mul_cpt - __kmpc_atomic_float10_mul_fp - __kmpc_atomic_float10_rd - __kmpc_atomic_float10_sub - __kmpc_atomic_float10_sub_cpt - __kmpc_atomic_float10_sub_cpt_rev - __kmpc_atomic_float10_sub_fp - __kmpc_atomic_float10_sub_rev - __kmpc_atomic_float10_swp - __kmpc_atomic_float10_wr - __kmpc_atomic_float16_add - __kmpc_atomic_float16_add_cpt - __kmpc_atomic_float16_div - __kmpc_atomic_float16_div_cpt - __kmpc_atomic_float16_div_cpt_rev - __kmpc_atomic_float16_div_rev - __kmpc_atomic_float16_max - __kmpc_atomic_float16_max_cpt - __kmpc_atomic_float16_min - __kmpc_atomic_float16_min_cpt - __kmpc_atomic_float16_mul - __kmpc_atomic_float16_mul_cpt - __kmpc_atomic_float16_rd - __kmpc_atomic_float16_sub - __kmpc_atomic_float16_sub_cpt - __kmpc_atomic_float16_sub_cpt_rev - __kmpc_atomic_float16_sub_rev - __kmpc_atomic_float16_swp - __kmpc_atomic_float16_wr -@endcode - -Functions for Complex types ---------------------------- -Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. -The names here are based on the size of the component float, *not* the size of the complex type. So -`__kmpc_atomc_cmplx8_add` is an operation on a `complex` or `complex(kind=8)`, *not* `complex`. - -@code - __kmpc_atomic_cmplx4_add - __kmpc_atomic_cmplx4_add_cmplx8 - __kmpc_atomic_cmplx4_add_cpt - __kmpc_atomic_cmplx4_div - __kmpc_atomic_cmplx4_div_cmplx8 - __kmpc_atomic_cmplx4_div_cpt - __kmpc_atomic_cmplx4_div_cpt_rev - __kmpc_atomic_cmplx4_div_rev - __kmpc_atomic_cmplx4_mul - __kmpc_atomic_cmplx4_mul_cmplx8 - __kmpc_atomic_cmplx4_mul_cpt - __kmpc_atomic_cmplx4_rd - __kmpc_atomic_cmplx4_sub - __kmpc_atomic_cmplx4_sub_cmplx8 - __kmpc_atomic_cmplx4_sub_cpt - __kmpc_atomic_cmplx4_sub_cpt_rev - __kmpc_atomic_cmplx4_sub_rev - __kmpc_atomic_cmplx4_swp - __kmpc_atomic_cmplx4_wr - __kmpc_atomic_cmplx8_add - __kmpc_atomic_cmplx8_add_cpt - __kmpc_atomic_cmplx8_div - __kmpc_atomic_cmplx8_div_cpt - __kmpc_atomic_cmplx8_div_cpt_rev - __kmpc_atomic_cmplx8_div_rev - __kmpc_atomic_cmplx8_mul - __kmpc_atomic_cmplx8_mul_cpt - __kmpc_atomic_cmplx8_rd - __kmpc_atomic_cmplx8_sub - __kmpc_atomic_cmplx8_sub_cpt - __kmpc_atomic_cmplx8_sub_cpt_rev - __kmpc_atomic_cmplx8_sub_rev - __kmpc_atomic_cmplx8_swp - __kmpc_atomic_cmplx8_wr - __kmpc_atomic_cmplx10_add - __kmpc_atomic_cmplx10_add_cpt - __kmpc_atomic_cmplx10_div - __kmpc_atomic_cmplx10_div_cpt - __kmpc_atomic_cmplx10_div_cpt_rev - __kmpc_atomic_cmplx10_div_rev - __kmpc_atomic_cmplx10_mul - __kmpc_atomic_cmplx10_mul_cpt - __kmpc_atomic_cmplx10_rd - __kmpc_atomic_cmplx10_sub - __kmpc_atomic_cmplx10_sub_cpt - __kmpc_atomic_cmplx10_sub_cpt_rev - __kmpc_atomic_cmplx10_sub_rev - __kmpc_atomic_cmplx10_swp - __kmpc_atomic_cmplx10_wr - __kmpc_atomic_cmplx16_add - __kmpc_atomic_cmplx16_add_cpt - __kmpc_atomic_cmplx16_div - __kmpc_atomic_cmplx16_div_cpt - __kmpc_atomic_cmplx16_div_cpt_rev - __kmpc_atomic_cmplx16_div_rev - __kmpc_atomic_cmplx16_mul - __kmpc_atomic_cmplx16_mul_cpt - __kmpc_atomic_cmplx16_rd - __kmpc_atomic_cmplx16_sub - __kmpc_atomic_cmplx16_sub_cpt - __kmpc_atomic_cmplx16_sub_cpt_rev - __kmpc_atomic_cmplx16_swp - __kmpc_atomic_cmplx16_wr -@endcode -*/ - -/*! -@ingroup ATOMIC_OPS -@{ -*/ - -/* - * Global vars - */ - -#ifndef KMP_GOMP_COMPAT -int __kmp_atomic_mode = 1; // Intel perf -#else -int __kmp_atomic_mode = 2; // GOMP compatibility -#endif /* KMP_GOMP_COMPAT */ - -KMP_ALIGN(128) - -kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ -kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ -kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ -kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ -kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ -kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ -kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ -kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ -kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ - - -/* - 2007-03-02: - Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a - bug on *_32 and *_32e. This is just a temporary workaround for the problem. - It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG - routines in assembler language. -*/ -#define KMP_ATOMIC_VOLATILE volatile - -#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD - - static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; - static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } - static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } - - static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; - static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } - static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } - - static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; - - static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; - -#endif - -/* ------------------------------------------------------------------------ */ -/* ATOMIC implementation routines */ -/* one routine for each operation and operand type */ -/* ------------------------------------------------------------------------ */ - -// All routines declarations looks like -// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); -// ------------------------------------------------------------------------ - -#define KMP_CHECK_GTID \ - if ( gtid == KMP_GTID_UNKNOWN ) { \ - gtid = __kmp_entry_gtid(); \ - } // check and get gtid when needed - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); - -// ------------------------------------------------------------------------ -// Lock variables used for critical sections for various size operands -#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat -#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char -#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short -#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int -#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float -#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int -#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double -#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex -#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double -#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad -#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex -#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex -#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*lhs) OP (rhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); - -// ------------------------------------------------------------------------ -// For GNU compatibility, we may need to use a critical section, -// even though it is not required by the ISA. -// -// On IA-32 architecture, all atomic operations except for fixed 4 byte add, -// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common -// critical section. On Intel(R) 64, all atomic operations are done with fetch -// and add or compare and exchange. Therefore, the FLAG parameter to this -// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which -// require a critical section, where we predict that they will be implemented -// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). -// -// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, -// the FLAG parameter should always be 1. If we know that we will be using -// a critical section, then we want to make certain that we use the generic -// lock __kmp_atomic_lock to protect the atomic update, and not of of the -// locks that are specialized based upon the size or type of the data. -// -// If FLAG is 0, then we are relying on dead code elimination by the build -// compiler to get rid of the useless block of code, and save a needless -// branch at runtime. -// - -#ifdef KMP_GOMP_COMPAT -# define OP_GOMP_CRITICAL(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL( OP, 0 ); \ - return; \ - } -# else -# define OP_GOMP_CRITICAL(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -#if KMP_MIC -# define KMP_DO_PAUSE _mm_delay_32( 1 ) -#else -# define KMP_DO_PAUSE KMP_CPU_PAUSE() -#endif /* KMP_MIC */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -#define OP_CMPXCHG(TYPE,BITS,OP) \ - { \ - TYPE old_value, new_value; \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - } \ - } - -#if USE_CMPXCHG_FIX -// 2007-06-25: -// workaround for C78287 (complex(kind=4) data type) -// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) -// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. -// This is a problem of the compiler. -// Related tracker is C76005, targeted to 11.0. -// I verified the asm of the workaround. -#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ - { \ - struct _sss { \ - TYPE cmp; \ - kmp_int##BITS *vvv; \ - }; \ - struct _sss old_value, new_value; \ - old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \ - new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \ - *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ - *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - } \ - } -// end of the first part of the workaround for C78287 -#endif // USE_CMPXCHG_FIX - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ -} -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} -#if USE_CMPXCHG_FIX -// ------------------------------------------------------------------------- -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ -} -// end of the second part of the workaround for C78287 -#endif - -#else -// ------------------------------------------------------------------------- -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -#if USE_CMPXCHG_FIX -// ------------------------------------------------------------------------- -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -// end of the second part of the workaround for C78287 -#endif // USE_CMPXCHG_FIX -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// Routines for ATOMIC 4-byte operands addition and subtraction -ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add -ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub - -ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add -ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub - -// Routines for ATOMIC 8-byte operands addition and subtraction -ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add -ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub - -ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add -ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// LCK_ID - lock identifier, used to possibly distinguish lock variable -// MASK - used for alignment check - -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add -ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb -ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div -ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div -ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul -ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb -ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl -ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr -ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr -ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub -ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor -ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add -ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb -ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div -ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div -ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul -ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb -ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl -ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr -ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr -ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub -ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor -ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb -ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div -ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div -ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul -ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb -ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl -ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr -ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr -ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor -ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb -ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div -ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div -ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul -ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb -ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl -ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr -ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr -ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor -ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div -ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul -ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div -ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul -// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG - - -/* ------------------------------------------------------------------------ */ -/* Routines for C/C++ Reduction operators && and || */ -/* ------------------------------------------------------------------------ */ - -// ------------------------------------------------------------------------ -// Need separate macros for &&, || because there is no combined assignment -// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used -#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ - OP_CRITICAL( = *lhs OP, LCK_ID ) \ -} - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} - -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \ - } \ -} -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl -ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl -ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl -ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl -ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl -ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl -ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl -ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl - - -/* ------------------------------------------------------------------------- */ -/* Routines for Fortran operators that matched no one in C: */ -/* MAX, MIN, .EQV., .NEQV. */ -/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ -/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ -/* ------------------------------------------------------------------------- */ - -// ------------------------------------------------------------------------- -// MIN and MAX need separate macros -// OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if ( *lhs OP rhs ) { /* still need actions? */ \ - *lhs = rhs; \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \ - if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT( OP, 0 ); \ - return; \ - } -#else -#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - while ( old_value OP rhs && /* still need actions? */ \ - ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ - { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - } - -// ------------------------------------------------------------------------- -// 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - MIN_MAX_CRITSECT(OP,LCK_ID) \ - } \ -} - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------- -// X86 or X86_64: no alignment problems ==================================== -#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ - } \ -} - -#else -// ------------------------------------------------------------------------- -// Code for other architectures that don't handle unaligned accesses. -#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \ - } \ - } \ -} -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max -MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min -MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max -MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min -MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max -MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min -MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max -MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min -MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max -MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min -MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max -MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min -#if KMP_HAVE_QUAD -MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max -MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min -#if ( KMP_ARCH_X86 ) - MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 - MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 -#endif -#endif -// ------------------------------------------------------------------------ -// Need separate macros for .EQV. because of the need of complement (~) -// OP ignored for critical sections, ^=~ used instead -#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \ -} - -// ------------------------------------------------------------------------ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} -// ------------------------------------------------------------------------ -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv -ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv -ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv -ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv -ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv -ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv -ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv -ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv - - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ -} - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add -ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub -ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul -ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add -ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub -ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul -ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16 - ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16 - ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 - ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 -#endif -#endif -// routines for complex types - -#if USE_CMPXCHG_FIX -// workaround for C78287 (complex(kind=4) data type) -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div -// end of the workaround for C78287 -#else -ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add -ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub -ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul -ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div -#endif // USE_CMPXCHG_FIX - -ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add -ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub -ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul -ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div -ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add -ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub -ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul -ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add -ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub -ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul -ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16 - ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16 - ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 - ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: x = expr binop x for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_REV(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*lhs) = (rhs) OP (*lhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_REV(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_REV( OP, 0 ); \ - return; \ - } -#else -#define OP_GOMP_CRITICAL_REV(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_REV(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_REV(TYPE,BITS,OP) \ -} - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// LCK_ID - lock identifier, used to possibly distinguish lock variable - -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev -ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev -ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev -ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev -ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev -ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev - -ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev -ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev -ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev -ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev -ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev -ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev - -ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev -ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev -ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev -ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev -ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev -ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev - -ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev -ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev -ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev -ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev -ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev -ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev - -ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev -ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev - -ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev -ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev -// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CRITICAL_REV(OP,LCK_ID) \ -} - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev -ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev -ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev - ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev -#endif -#endif - -// routines for complex types -ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev -ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev -ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev -ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev -ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev -ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev -ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev - ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev -#endif -#endif - - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 -// End of OpenMP 4.0: x = expr binop x for non-commutative operations. - -#endif //OMP_40_ENABLED - - -/* ------------------------------------------------------------------------ */ -/* Routines for mixed types of LHS and RHS, when RHS is "larger" */ -/* Note: in order to reduce the total number of types combinations */ -/* it is supposed that compiler converts RHS to longest floating type,*/ -/* that is _Quad, before call to any of these routines */ -/* Conversion to _Quad will be done by the compiler during calculation, */ -/* conversion back to TYPE - before the assignment, like: */ -/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ -/* Performance penalty expected because of SW emulation use */ -/* ------------------------------------------------------------------------ */ - -#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); - -// ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ -} - -// ------------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------- -// X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} -// ------------------------------------------------------------------------- -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// RHS=float8 -ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 -ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8 -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) -#if KMP_HAVE_QUAD -ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp -ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp -ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp -ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp -ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp - -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp - -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp - -ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp -ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp -ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp -ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// ------------------------------------------------------------------------ -// X86 or X86_64: no alignment problems ==================================== -#if USE_CMPXCHG_FIX -// workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ -} -// end of the second part of the workaround for C78287 -#else -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} -#endif // USE_CMPXCHG_FIX -#else -// ------------------------------------------------------------------------ -// Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 - -// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -////////////////////////////////////////////////////////////////////////////////////////////////////// -// ------------------------------------------------------------------------ -// Atomic READ routines -// ------------------------------------------------------------------------ - -// ------------------------------------------------------------------------ -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store_ret" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -// TODO: check if it is still necessary -// Return old value regardless of the result of "compare & swap# operation - -#define OP_CMPXCHG_READ(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - union f_i_union { \ - TYPE f_val; \ - kmp_int##BITS i_val; \ - }; \ - union f_i_union old_value; \ - temp_val = *loc; \ - old_value.f_val = temp_val; \ - old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ - new_value = old_value.f_val; \ - return new_value; \ - } - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_READ(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - new_value = (*loc); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ( OP, 0 ); \ - return new_value; \ - } -#else -#define OP_GOMP_CRITICAL_READ(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ - new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \ - return new_value; \ -} -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_READ(TYPE,BITS,OP) \ -} -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \ - return new_value; \ -} - -// ------------------------------------------------------------------------ -// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. -// Let's return the read value through the additional parameter. - -#if ( KMP_OS_WINDOWS ) - -#define OP_CRITICAL_READ_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*out) = (*loc); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ_WRK( OP, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ -#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); - -// ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \ -} - -#endif // KMP_OS_WINDOWS - -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd -ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd -ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd -ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd - -// !!! TODO: Remove lock operations for "char" since it can't be non-atomic -ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd -ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd - -ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd -#endif // KMP_HAVE_QUAD - -// Fix for CQ220361 on Windows* OS -#if ( KMP_OS_WINDOWS ) - ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd -#else - ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd -#endif -ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd -ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd - ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd -#endif -#endif - - -// ------------------------------------------------------------------------ -// Atomic WRITE routines -// ------------------------------------------------------------------------ - -#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - KMP_XCHG_FIXED##BITS( lhs, rhs ); \ -} -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - KMP_XCHG_REAL##BITS( lhs, rhs ); \ -} - - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_WR(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - OP_CMPXCHG_WR(TYPE,BITS,OP) \ -} - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP,LCK_ID) /* send assignment */ \ -} -// ------------------------------------------------------------------------- - -ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr -ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr -ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr -#else - ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr -#endif - -ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr -#else - ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr -#endif - -ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr -#endif -ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr -ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr -ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr - ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr -#endif -#endif - - -// ------------------------------------------------------------------------ -// Atomic CAPTURE routines -// ------------------------------------------------------------------------ - -// Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operands' type -#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) OP rhs; \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT( OP##=, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_CPT(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - } \ - if( flag ) { \ - return new_value; \ - } else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} - -// ------------------------------------------------------------------------- -#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE old_value, new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ - if( flag ) { \ - return old_value OP rhs; \ - } else \ - return old_value; \ -} -// ------------------------------------------------------------------------- - -ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt -ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt -ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt -ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt - -ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt -ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt -ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt -ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt - -// ------------------------------------------------------------------------ -// Entries definition for integer operands -// TYPE_ID - operands type and size (fixed4, float4) -// OP_ID - operation identifier (add, sub, mul, ...) -// TYPE - operand type -// BITS - size in bits, used to distinguish low level calls -// OP - operator (used in critical section) -// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG -// ------------------------------------------------------------------------ -// Routines for ATOMIC integer operands, other operators -// ------------------------------------------------------------------------ -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt -ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt -ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt -ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt -ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt -ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt -ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt -ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt -ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt -ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt -ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt -ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt -ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG - -// ------------------------------------------------------------------------ -// Routines for C/C++ Reduction operators && and || -// ------------------------------------------------------------------------ - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_L_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - new_value OP rhs; \ - } else \ - new_value = (*lhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_L_CPT( OP, 0 ); \ - return new_value; \ - } -#else -#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Need separate macros for &&, || because there is no combined assignment -#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} - -ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt -ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt -ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt -ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt -ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt -ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt -ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt -ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt - - -// ------------------------------------------------------------------------- -// Routines for Fortran operators that matched no one in C: -// MAX, MIN, .EQV., .NEQV. -// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt -// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt -// ------------------------------------------------------------------------- - -// ------------------------------------------------------------------------- -// MIN and MAX need separate macros -// OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if ( *lhs OP rhs ) { /* still need actions? */ \ - old_value = *lhs; \ - *lhs = rhs; \ - if ( flag ) \ - new_value = rhs; \ - else \ - new_value = old_value; \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; \ - -// ------------------------------------------------------------------------- -#ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \ - if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT_CPT( OP, 0 ); \ - } -#else -#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - /*TYPE old_value; */ \ - temp_val = *lhs; \ - old_value = temp_val; \ - while ( old_value OP rhs && /* still need actions? */ \ - ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ - { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - if( flag ) \ - return rhs; \ - else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -// 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value, old_value; \ - if ( *lhs OP rhs ) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ - MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ - } \ - return *lhs; \ -} - -#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value, old_value; \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ - MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ - } \ - return *lhs; \ -} - - -MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt -MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt -MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt -MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt -MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt -#if KMP_HAVE_QUAD -MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt -MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt -#if ( KMP_ARCH_X86 ) - MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt - MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt -#endif -#endif - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT( OP, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ -#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} - -// ------------------------------------------------------------------------ - -ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ -} - -// ------------------------------------------------------------------------ - -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) OP rhs; \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_WRK( OP##=, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \ -} -// The end of workaround for cmplx4 - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt -ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt -ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt -ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt -ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt -ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt -ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt - ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt - ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt - ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt -#endif -#endif - -// routines for complex types - -// cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt - -ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt -ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt -ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt -ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt -ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 - -// ------------------------------------------------------------------------- -// Operation on *lhs, rhs bound by critical section -// OP - operator (it's supposed to contain an assignment) -// LCK_ID - lock identifier -// Note: don't check gtid as it should always be valid -// 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT_REV(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - /*temp_val = (*lhs);*/\ - (*lhs) = (rhs) OP (*lhs); \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs);\ - (*lhs) = (rhs) OP (*lhs); \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV( OP, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ - -// ------------------------------------------------------------------------ -// Operation on *lhs, rhs using "compare_and_store" routine -// TYPE - operands' type -// BITS - size in bits, used to distinguish low level calls -// OP - operator -// Note: temp_val introduced in order to force the compiler to read -// *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - if( flag ) { \ - return new_value; \ - } else \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ -} - - -ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev -// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG - - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -// TYPE_ID, OP_ID, TYPE - detailed above -// OP - operator -// LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_REV(OP,LCK_ID) \ -} - - -/* ------------------------------------------------------------------------- */ -// routines for long double type -ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev -#if KMP_HAVE_QUAD -// routines for _Quad type -ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev - ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev -#endif -#endif - -// routines for complex types - -// ------------------------------------------------------------------------ - -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) = (rhs) OP (*lhs); \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) = (rhs) OP (*lhs); \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \ - } -#else -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ -} -// The end of workaround for cmplx4 - - -// !!! TODO: check if we need to return void for cmplx4 routines -// cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev - -ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev - ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev -#endif -#endif - -// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} - -#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ -TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); - -#define CRITICAL_SWP(LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - old_value = (*lhs); \ - (*lhs) = rhs; \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return old_value; - -// ------------------------------------------------------------------------ -#ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP(FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP( 0 ); \ - } -#else -#define GOMP_CRITICAL_SWP(FLAG) -#endif /* KMP_GOMP_COMPAT */ - - -#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \ - return old_value; \ -} -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \ - return old_value; \ -} - -// ------------------------------------------------------------------------ -#define CMPXCHG_SWP(TYPE,BITS) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - return old_value; \ - } - -// ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CMPXCHG_SWP(TYPE,BITS) \ -} - -ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp -ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp -ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp - -ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp - -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp - ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp -#else - ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp - ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp -#endif - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CRITICAL_SWP(LCK_ID) \ -} - -// ------------------------------------------------------------------------ - -// !!! TODO: check if we need to return void for cmplx4 routines -// Workaround for cmplx4. Regular routines with return value don't work -// on Win_32e. Let's return captured values through the additional parameter. - -#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); - - -#define CRITICAL_SWP_WRK(LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - tmp = (*lhs); \ - (*lhs) = (rhs); \ - (*out) = tmp; \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; - -// ------------------------------------------------------------------------ - -#ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP_WRK(FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP_WRK( 0 ); \ - } -#else -#define GOMP_CRITICAL_SWP_WRK(FLAG) -#endif /* KMP_GOMP_COMPAT */ -// ------------------------------------------------------------------------ - -#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ - TYPE tmp; \ - GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ - CRITICAL_SWP_WRK(LCK_ID) \ -} -// The end of workaround for cmplx4 - - -ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp -#endif -// cmplx4 routine to return void -ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp - -//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp - - -ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp -ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp -#if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp - ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp -#endif -#endif - - -// End of OpenMP 4.0 Capture - -#endif //OMP_40_ENABLED - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 - - -#undef OP_CRITICAL - -/* ------------------------------------------------------------------------ */ -/* Generic atomic routines */ -/* ------------------------------------------------------------------------ */ - -void -__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - if ( -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#else - TRUE -#endif - ) - { - kmp_int8 old_value, new_value; - - old_value = *(kmp_int8 *) lhs; - (*f)( &new_value, &old_value, rhs ); - - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, - *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) - { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int8 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; - } - else { - // - // All 1-byte data is of integer data type. - // - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); - } -} - -void -__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - if ( -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */ -#endif - ) - { - kmp_int16 old_value, new_value; - - old_value = *(kmp_int16 *) lhs; - (*f)( &new_value, &old_value, rhs ); - - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, - *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) - { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int16 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; - } - else { - // - // All 2-byte data is of integer data type. - // - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); - } -} - -void -__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - if ( - // - // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. - // Gomp compatibility is broken if this routine is called for floats. - // -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */ -#endif - ) - { - kmp_int32 old_value, new_value; - - old_value = *(kmp_int32 *) lhs; - (*f)( &new_value, &old_value, rhs ); - - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, - *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) - { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int32 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; - } - else { - // - // Use __kmp_atomic_lock_4i for all 4-byte data, - // even if it isn't of integer data type. - // - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); - } -} - -void -__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - if ( - -#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ -#else - ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */ -#endif - ) - { - kmp_int64 old_value, new_value; - - old_value = *(kmp_int64 *) lhs; - (*f)( &new_value, &old_value, rhs ); - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, - *(kmp_int64 *) &old_value, - *(kmp_int64 *) &new_value ) ) - { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int64 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; - } else { - // - // Use __kmp_atomic_lock_8i for all 8-byte data, - // even if it isn't of integer data type. - // - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); - } -} - -void -__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); -} - -void -__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); -} - -void -__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); -} - -void -__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); - - (*f)( lhs, lhs, rhs ); - -#ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else -#endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); -} - -// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler -// duplicated in order to not use 3-party names in pure Intel code -// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. -void -__kmpc_atomic_start(void) -{ - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); -} - - -void -__kmpc_atomic_end(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -/*! -@} -*/ - -// end of file +/* + * kmp_atomic.c -- ATOMIC implementation routines + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp_atomic.h" +#include "kmp.h" // TRUE, asm routines prototypes + +typedef unsigned char uchar; +typedef unsigned short ushort; + +/*! +@defgroup ATOMIC_OPS Atomic Operations +These functions are used for implementing the many different varieties of atomic operations. + +The compiler is at liberty to inline atomic operations that are naturally supported +by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined +@code +static int s = 0; +#pragma omp atomic + s++; +@endcode +using the single instruction: `lock; incl s` + +However the runtime does provide entrypoints for these operations to support compilers that choose +not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the +increment above.) + +The names of the functions are encoded by using the data type name and the operation name, as in these tables. + +Data Type | Data type encoding +-----------|--------------- +int8_t | `fixed1` +uint8_t | `fixed1u` +int16_t | `fixed2` +uint16_t | `fixed2u` +int32_t | `fixed4` +uint32_t | `fixed4u` +int32_t | `fixed8` +uint32_t | `fixed8u` +float | `float4` +double | `float8` +float 10 (8087 eighty bit float) | `float10` +complex | `cmplx4` +complex | `cmplx8` +complex | `cmplx10` +
+ +Operation | Operation encoding +----------|------------------- ++ | add +- | sub +\* | mul +/ | div +& | andb +<< | shl +\>\> | shr +\| | orb +^ | xor +&& | andl +\|\| | orl +maximum | max +minimum | min +.eqv. | eqv +.neqv. | neqv + +
+For non-commutative operations, `_rev` can also be added for the reversed operation. +For the functions that capture the result, the suffix `_cpt` is added. + +Update Functions +================ +The general form of an atomic function that just performs an update (without a `capture`) +@code +void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); +@endcode +@param ident_t a pointer to source location +@param gtid the global thread id +@param lhs a pointer to the left operand +@param rhs the right operand + +`capture` functions +=================== +The capture functions perform an atomic update and return a result, which is either the value +before the capture, or that after. They take an additional argument to determine which result is returned. +Their general form is therefore +@code +TYPE __kmpc_atomic___cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); +@endcode +@param ident_t a pointer to source location +@param gtid the global thread id +@param lhs a pointer to the left operand +@param rhs the right operand +@param flag one if the result is to be captured *after* the operation, zero if captured *before*. + +The one set of exceptions to this is the `complex` type where the value is not returned, +rather an extra argument pointer is passed. + +They look like +@code +void __kmpc_atomic_cmplx4__cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); +@endcode + +Read and Write Operations +========================= +The OpenMP* standard now supports atomic operations that simply ensure that the +value is read or written atomically, with no modification +performed. In many cases on IA-32 architecture these operations can be inlined since +the architecture guarantees that no tearing occurs on aligned objects +accessed with a single memory operation of up to 64 bits in size. + +The general form of the read operations is +@code +TYPE __kmpc_atomic__rd ( ident_t *id_ref, int gtid, TYPE * loc ); +@endcode + +For the write operations the form is +@code +void __kmpc_atomic__wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); +@endcode + +Full list of functions +====================== +This leads to the generation of 376 atomic functions, as follows. + +Functons for integers +--------------------- +There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). +@code + __kmpc_atomic_fixed1_add + __kmpc_atomic_fixed1_add_cpt + __kmpc_atomic_fixed1_add_fp + __kmpc_atomic_fixed1_andb + __kmpc_atomic_fixed1_andb_cpt + __kmpc_atomic_fixed1_andl + __kmpc_atomic_fixed1_andl_cpt + __kmpc_atomic_fixed1_div + __kmpc_atomic_fixed1_div_cpt + __kmpc_atomic_fixed1_div_cpt_rev + __kmpc_atomic_fixed1_div_float8 + __kmpc_atomic_fixed1_div_fp + __kmpc_atomic_fixed1_div_rev + __kmpc_atomic_fixed1_eqv + __kmpc_atomic_fixed1_eqv_cpt + __kmpc_atomic_fixed1_max + __kmpc_atomic_fixed1_max_cpt + __kmpc_atomic_fixed1_min + __kmpc_atomic_fixed1_min_cpt + __kmpc_atomic_fixed1_mul + __kmpc_atomic_fixed1_mul_cpt + __kmpc_atomic_fixed1_mul_float8 + __kmpc_atomic_fixed1_mul_fp + __kmpc_atomic_fixed1_neqv + __kmpc_atomic_fixed1_neqv_cpt + __kmpc_atomic_fixed1_orb + __kmpc_atomic_fixed1_orb_cpt + __kmpc_atomic_fixed1_orl + __kmpc_atomic_fixed1_orl_cpt + __kmpc_atomic_fixed1_rd + __kmpc_atomic_fixed1_shl + __kmpc_atomic_fixed1_shl_cpt + __kmpc_atomic_fixed1_shl_cpt_rev + __kmpc_atomic_fixed1_shl_rev + __kmpc_atomic_fixed1_shr + __kmpc_atomic_fixed1_shr_cpt + __kmpc_atomic_fixed1_shr_cpt_rev + __kmpc_atomic_fixed1_shr_rev + __kmpc_atomic_fixed1_sub + __kmpc_atomic_fixed1_sub_cpt + __kmpc_atomic_fixed1_sub_cpt_rev + __kmpc_atomic_fixed1_sub_fp + __kmpc_atomic_fixed1_sub_rev + __kmpc_atomic_fixed1_swp + __kmpc_atomic_fixed1_wr + __kmpc_atomic_fixed1_xor + __kmpc_atomic_fixed1_xor_cpt + __kmpc_atomic_fixed1u_div + __kmpc_atomic_fixed1u_div_cpt + __kmpc_atomic_fixed1u_div_cpt_rev + __kmpc_atomic_fixed1u_div_fp + __kmpc_atomic_fixed1u_div_rev + __kmpc_atomic_fixed1u_shr + __kmpc_atomic_fixed1u_shr_cpt + __kmpc_atomic_fixed1u_shr_cpt_rev + __kmpc_atomic_fixed1u_shr_rev + __kmpc_atomic_fixed2_add + __kmpc_atomic_fixed2_add_cpt + __kmpc_atomic_fixed2_add_fp + __kmpc_atomic_fixed2_andb + __kmpc_atomic_fixed2_andb_cpt + __kmpc_atomic_fixed2_andl + __kmpc_atomic_fixed2_andl_cpt + __kmpc_atomic_fixed2_div + __kmpc_atomic_fixed2_div_cpt + __kmpc_atomic_fixed2_div_cpt_rev + __kmpc_atomic_fixed2_div_float8 + __kmpc_atomic_fixed2_div_fp + __kmpc_atomic_fixed2_div_rev + __kmpc_atomic_fixed2_eqv + __kmpc_atomic_fixed2_eqv_cpt + __kmpc_atomic_fixed2_max + __kmpc_atomic_fixed2_max_cpt + __kmpc_atomic_fixed2_min + __kmpc_atomic_fixed2_min_cpt + __kmpc_atomic_fixed2_mul + __kmpc_atomic_fixed2_mul_cpt + __kmpc_atomic_fixed2_mul_float8 + __kmpc_atomic_fixed2_mul_fp + __kmpc_atomic_fixed2_neqv + __kmpc_atomic_fixed2_neqv_cpt + __kmpc_atomic_fixed2_orb + __kmpc_atomic_fixed2_orb_cpt + __kmpc_atomic_fixed2_orl + __kmpc_atomic_fixed2_orl_cpt + __kmpc_atomic_fixed2_rd + __kmpc_atomic_fixed2_shl + __kmpc_atomic_fixed2_shl_cpt + __kmpc_atomic_fixed2_shl_cpt_rev + __kmpc_atomic_fixed2_shl_rev + __kmpc_atomic_fixed2_shr + __kmpc_atomic_fixed2_shr_cpt + __kmpc_atomic_fixed2_shr_cpt_rev + __kmpc_atomic_fixed2_shr_rev + __kmpc_atomic_fixed2_sub + __kmpc_atomic_fixed2_sub_cpt + __kmpc_atomic_fixed2_sub_cpt_rev + __kmpc_atomic_fixed2_sub_fp + __kmpc_atomic_fixed2_sub_rev + __kmpc_atomic_fixed2_swp + __kmpc_atomic_fixed2_wr + __kmpc_atomic_fixed2_xor + __kmpc_atomic_fixed2_xor_cpt + __kmpc_atomic_fixed2u_div + __kmpc_atomic_fixed2u_div_cpt + __kmpc_atomic_fixed2u_div_cpt_rev + __kmpc_atomic_fixed2u_div_fp + __kmpc_atomic_fixed2u_div_rev + __kmpc_atomic_fixed2u_shr + __kmpc_atomic_fixed2u_shr_cpt + __kmpc_atomic_fixed2u_shr_cpt_rev + __kmpc_atomic_fixed2u_shr_rev + __kmpc_atomic_fixed4_add + __kmpc_atomic_fixed4_add_cpt + __kmpc_atomic_fixed4_add_fp + __kmpc_atomic_fixed4_andb + __kmpc_atomic_fixed4_andb_cpt + __kmpc_atomic_fixed4_andl + __kmpc_atomic_fixed4_andl_cpt + __kmpc_atomic_fixed4_div + __kmpc_atomic_fixed4_div_cpt + __kmpc_atomic_fixed4_div_cpt_rev + __kmpc_atomic_fixed4_div_float8 + __kmpc_atomic_fixed4_div_fp + __kmpc_atomic_fixed4_div_rev + __kmpc_atomic_fixed4_eqv + __kmpc_atomic_fixed4_eqv_cpt + __kmpc_atomic_fixed4_max + __kmpc_atomic_fixed4_max_cpt + __kmpc_atomic_fixed4_min + __kmpc_atomic_fixed4_min_cpt + __kmpc_atomic_fixed4_mul + __kmpc_atomic_fixed4_mul_cpt + __kmpc_atomic_fixed4_mul_float8 + __kmpc_atomic_fixed4_mul_fp + __kmpc_atomic_fixed4_neqv + __kmpc_atomic_fixed4_neqv_cpt + __kmpc_atomic_fixed4_orb + __kmpc_atomic_fixed4_orb_cpt + __kmpc_atomic_fixed4_orl + __kmpc_atomic_fixed4_orl_cpt + __kmpc_atomic_fixed4_rd + __kmpc_atomic_fixed4_shl + __kmpc_atomic_fixed4_shl_cpt + __kmpc_atomic_fixed4_shl_cpt_rev + __kmpc_atomic_fixed4_shl_rev + __kmpc_atomic_fixed4_shr + __kmpc_atomic_fixed4_shr_cpt + __kmpc_atomic_fixed4_shr_cpt_rev + __kmpc_atomic_fixed4_shr_rev + __kmpc_atomic_fixed4_sub + __kmpc_atomic_fixed4_sub_cpt + __kmpc_atomic_fixed4_sub_cpt_rev + __kmpc_atomic_fixed4_sub_fp + __kmpc_atomic_fixed4_sub_rev + __kmpc_atomic_fixed4_swp + __kmpc_atomic_fixed4_wr + __kmpc_atomic_fixed4_xor + __kmpc_atomic_fixed4_xor_cpt + __kmpc_atomic_fixed4u_div + __kmpc_atomic_fixed4u_div_cpt + __kmpc_atomic_fixed4u_div_cpt_rev + __kmpc_atomic_fixed4u_div_fp + __kmpc_atomic_fixed4u_div_rev + __kmpc_atomic_fixed4u_shr + __kmpc_atomic_fixed4u_shr_cpt + __kmpc_atomic_fixed4u_shr_cpt_rev + __kmpc_atomic_fixed4u_shr_rev + __kmpc_atomic_fixed8_add + __kmpc_atomic_fixed8_add_cpt + __kmpc_atomic_fixed8_add_fp + __kmpc_atomic_fixed8_andb + __kmpc_atomic_fixed8_andb_cpt + __kmpc_atomic_fixed8_andl + __kmpc_atomic_fixed8_andl_cpt + __kmpc_atomic_fixed8_div + __kmpc_atomic_fixed8_div_cpt + __kmpc_atomic_fixed8_div_cpt_rev + __kmpc_atomic_fixed8_div_float8 + __kmpc_atomic_fixed8_div_fp + __kmpc_atomic_fixed8_div_rev + __kmpc_atomic_fixed8_eqv + __kmpc_atomic_fixed8_eqv_cpt + __kmpc_atomic_fixed8_max + __kmpc_atomic_fixed8_max_cpt + __kmpc_atomic_fixed8_min + __kmpc_atomic_fixed8_min_cpt + __kmpc_atomic_fixed8_mul + __kmpc_atomic_fixed8_mul_cpt + __kmpc_atomic_fixed8_mul_float8 + __kmpc_atomic_fixed8_mul_fp + __kmpc_atomic_fixed8_neqv + __kmpc_atomic_fixed8_neqv_cpt + __kmpc_atomic_fixed8_orb + __kmpc_atomic_fixed8_orb_cpt + __kmpc_atomic_fixed8_orl + __kmpc_atomic_fixed8_orl_cpt + __kmpc_atomic_fixed8_rd + __kmpc_atomic_fixed8_shl + __kmpc_atomic_fixed8_shl_cpt + __kmpc_atomic_fixed8_shl_cpt_rev + __kmpc_atomic_fixed8_shl_rev + __kmpc_atomic_fixed8_shr + __kmpc_atomic_fixed8_shr_cpt + __kmpc_atomic_fixed8_shr_cpt_rev + __kmpc_atomic_fixed8_shr_rev + __kmpc_atomic_fixed8_sub + __kmpc_atomic_fixed8_sub_cpt + __kmpc_atomic_fixed8_sub_cpt_rev + __kmpc_atomic_fixed8_sub_fp + __kmpc_atomic_fixed8_sub_rev + __kmpc_atomic_fixed8_swp + __kmpc_atomic_fixed8_wr + __kmpc_atomic_fixed8_xor + __kmpc_atomic_fixed8_xor_cpt + __kmpc_atomic_fixed8u_div + __kmpc_atomic_fixed8u_div_cpt + __kmpc_atomic_fixed8u_div_cpt_rev + __kmpc_atomic_fixed8u_div_fp + __kmpc_atomic_fixed8u_div_rev + __kmpc_atomic_fixed8u_shr + __kmpc_atomic_fixed8u_shr_cpt + __kmpc_atomic_fixed8u_shr_cpt_rev + __kmpc_atomic_fixed8u_shr_rev +@endcode + +Functions for floating point +---------------------------- +There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. +(Ten byte floats are used by X87, but are now rare). +@code + __kmpc_atomic_float4_add + __kmpc_atomic_float4_add_cpt + __kmpc_atomic_float4_add_float8 + __kmpc_atomic_float4_add_fp + __kmpc_atomic_float4_div + __kmpc_atomic_float4_div_cpt + __kmpc_atomic_float4_div_cpt_rev + __kmpc_atomic_float4_div_float8 + __kmpc_atomic_float4_div_fp + __kmpc_atomic_float4_div_rev + __kmpc_atomic_float4_max + __kmpc_atomic_float4_max_cpt + __kmpc_atomic_float4_min + __kmpc_atomic_float4_min_cpt + __kmpc_atomic_float4_mul + __kmpc_atomic_float4_mul_cpt + __kmpc_atomic_float4_mul_float8 + __kmpc_atomic_float4_mul_fp + __kmpc_atomic_float4_rd + __kmpc_atomic_float4_sub + __kmpc_atomic_float4_sub_cpt + __kmpc_atomic_float4_sub_cpt_rev + __kmpc_atomic_float4_sub_float8 + __kmpc_atomic_float4_sub_fp + __kmpc_atomic_float4_sub_rev + __kmpc_atomic_float4_swp + __kmpc_atomic_float4_wr + __kmpc_atomic_float8_add + __kmpc_atomic_float8_add_cpt + __kmpc_atomic_float8_add_fp + __kmpc_atomic_float8_div + __kmpc_atomic_float8_div_cpt + __kmpc_atomic_float8_div_cpt_rev + __kmpc_atomic_float8_div_fp + __kmpc_atomic_float8_div_rev + __kmpc_atomic_float8_max + __kmpc_atomic_float8_max_cpt + __kmpc_atomic_float8_min + __kmpc_atomic_float8_min_cpt + __kmpc_atomic_float8_mul + __kmpc_atomic_float8_mul_cpt + __kmpc_atomic_float8_mul_fp + __kmpc_atomic_float8_rd + __kmpc_atomic_float8_sub + __kmpc_atomic_float8_sub_cpt + __kmpc_atomic_float8_sub_cpt_rev + __kmpc_atomic_float8_sub_fp + __kmpc_atomic_float8_sub_rev + __kmpc_atomic_float8_swp + __kmpc_atomic_float8_wr + __kmpc_atomic_float10_add + __kmpc_atomic_float10_add_cpt + __kmpc_atomic_float10_add_fp + __kmpc_atomic_float10_div + __kmpc_atomic_float10_div_cpt + __kmpc_atomic_float10_div_cpt_rev + __kmpc_atomic_float10_div_fp + __kmpc_atomic_float10_div_rev + __kmpc_atomic_float10_mul + __kmpc_atomic_float10_mul_cpt + __kmpc_atomic_float10_mul_fp + __kmpc_atomic_float10_rd + __kmpc_atomic_float10_sub + __kmpc_atomic_float10_sub_cpt + __kmpc_atomic_float10_sub_cpt_rev + __kmpc_atomic_float10_sub_fp + __kmpc_atomic_float10_sub_rev + __kmpc_atomic_float10_swp + __kmpc_atomic_float10_wr + __kmpc_atomic_float16_add + __kmpc_atomic_float16_add_cpt + __kmpc_atomic_float16_div + __kmpc_atomic_float16_div_cpt + __kmpc_atomic_float16_div_cpt_rev + __kmpc_atomic_float16_div_rev + __kmpc_atomic_float16_max + __kmpc_atomic_float16_max_cpt + __kmpc_atomic_float16_min + __kmpc_atomic_float16_min_cpt + __kmpc_atomic_float16_mul + __kmpc_atomic_float16_mul_cpt + __kmpc_atomic_float16_rd + __kmpc_atomic_float16_sub + __kmpc_atomic_float16_sub_cpt + __kmpc_atomic_float16_sub_cpt_rev + __kmpc_atomic_float16_sub_rev + __kmpc_atomic_float16_swp + __kmpc_atomic_float16_wr +@endcode + +Functions for Complex types +--------------------------- +Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. +The names here are based on the size of the component float, *not* the size of the complex type. So +`__kmpc_atomc_cmplx8_add` is an operation on a `complex` or `complex(kind=8)`, *not* `complex`. + +@code + __kmpc_atomic_cmplx4_add + __kmpc_atomic_cmplx4_add_cmplx8 + __kmpc_atomic_cmplx4_add_cpt + __kmpc_atomic_cmplx4_div + __kmpc_atomic_cmplx4_div_cmplx8 + __kmpc_atomic_cmplx4_div_cpt + __kmpc_atomic_cmplx4_div_cpt_rev + __kmpc_atomic_cmplx4_div_rev + __kmpc_atomic_cmplx4_mul + __kmpc_atomic_cmplx4_mul_cmplx8 + __kmpc_atomic_cmplx4_mul_cpt + __kmpc_atomic_cmplx4_rd + __kmpc_atomic_cmplx4_sub + __kmpc_atomic_cmplx4_sub_cmplx8 + __kmpc_atomic_cmplx4_sub_cpt + __kmpc_atomic_cmplx4_sub_cpt_rev + __kmpc_atomic_cmplx4_sub_rev + __kmpc_atomic_cmplx4_swp + __kmpc_atomic_cmplx4_wr + __kmpc_atomic_cmplx8_add + __kmpc_atomic_cmplx8_add_cpt + __kmpc_atomic_cmplx8_div + __kmpc_atomic_cmplx8_div_cpt + __kmpc_atomic_cmplx8_div_cpt_rev + __kmpc_atomic_cmplx8_div_rev + __kmpc_atomic_cmplx8_mul + __kmpc_atomic_cmplx8_mul_cpt + __kmpc_atomic_cmplx8_rd + __kmpc_atomic_cmplx8_sub + __kmpc_atomic_cmplx8_sub_cpt + __kmpc_atomic_cmplx8_sub_cpt_rev + __kmpc_atomic_cmplx8_sub_rev + __kmpc_atomic_cmplx8_swp + __kmpc_atomic_cmplx8_wr + __kmpc_atomic_cmplx10_add + __kmpc_atomic_cmplx10_add_cpt + __kmpc_atomic_cmplx10_div + __kmpc_atomic_cmplx10_div_cpt + __kmpc_atomic_cmplx10_div_cpt_rev + __kmpc_atomic_cmplx10_div_rev + __kmpc_atomic_cmplx10_mul + __kmpc_atomic_cmplx10_mul_cpt + __kmpc_atomic_cmplx10_rd + __kmpc_atomic_cmplx10_sub + __kmpc_atomic_cmplx10_sub_cpt + __kmpc_atomic_cmplx10_sub_cpt_rev + __kmpc_atomic_cmplx10_sub_rev + __kmpc_atomic_cmplx10_swp + __kmpc_atomic_cmplx10_wr + __kmpc_atomic_cmplx16_add + __kmpc_atomic_cmplx16_add_cpt + __kmpc_atomic_cmplx16_div + __kmpc_atomic_cmplx16_div_cpt + __kmpc_atomic_cmplx16_div_cpt_rev + __kmpc_atomic_cmplx16_div_rev + __kmpc_atomic_cmplx16_mul + __kmpc_atomic_cmplx16_mul_cpt + __kmpc_atomic_cmplx16_rd + __kmpc_atomic_cmplx16_sub + __kmpc_atomic_cmplx16_sub_cpt + __kmpc_atomic_cmplx16_sub_cpt_rev + __kmpc_atomic_cmplx16_swp + __kmpc_atomic_cmplx16_wr +@endcode +*/ + +/*! +@ingroup ATOMIC_OPS +@{ +*/ + +/* + * Global vars + */ + +#ifndef KMP_GOMP_COMPAT +int __kmp_atomic_mode = 1; // Intel perf +#else +int __kmp_atomic_mode = 2; // GOMP compatibility +#endif /* KMP_GOMP_COMPAT */ + +KMP_ALIGN(128) + +kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ +kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ +kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ +kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ +kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ +kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ +kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ +kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ +kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ +kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ +kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ +kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ +kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ + + +/* + 2007-03-02: + Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a + bug on *_32 and *_32e. This is just a temporary workaround for the problem. + It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG + routines in assembler language. +*/ +#define KMP_ATOMIC_VOLATILE volatile + +#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD + + static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; + static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; + static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; + static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; + static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } + static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } + + static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; + static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; + static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; + static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; + static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } + static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } + + static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; + static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; + static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; + static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; + + static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; + static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; + static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; + static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; + +#endif + +/* ------------------------------------------------------------------------ */ +/* ATOMIC implementation routines */ +/* one routine for each operation and operand type */ +/* ------------------------------------------------------------------------ */ + +// All routines declarations looks like +// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); +// ------------------------------------------------------------------------ + +#define KMP_CHECK_GTID \ + if ( gtid == KMP_GTID_UNKNOWN ) { \ + gtid = __kmp_entry_gtid(); \ + } // check and get gtid when needed + +// Beginning of a definition (provides name, parameters, gebug trace) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operands' type +#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ +RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); + +// ------------------------------------------------------------------------ +// Lock variables used for critical sections for various size operands +#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat +#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char +#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short +#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int +#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float +#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int +#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double +#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex +#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double +#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad +#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex +#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex +#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + (*lhs) OP (rhs); \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); + +// ------------------------------------------------------------------------ +// For GNU compatibility, we may need to use a critical section, +// even though it is not required by the ISA. +// +// On IA-32 architecture, all atomic operations except for fixed 4 byte add, +// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common +// critical section. On Intel(R) 64, all atomic operations are done with fetch +// and add or compare and exchange. Therefore, the FLAG parameter to this +// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which +// require a critical section, where we predict that they will be implemented +// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). +// +// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, +// the FLAG parameter should always be 1. If we know that we will be using +// a critical section, then we want to make certain that we use the generic +// lock __kmp_atomic_lock to protect the atomic update, and not of of the +// locks that are specialized based upon the size or type of the data. +// +// If FLAG is 0, then we are relying on dead code elimination by the build +// compiler to get rid of the useless block of code, and save a needless +// branch at runtime. +// + +#ifdef KMP_GOMP_COMPAT +# define OP_GOMP_CRITICAL(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL( OP, 0 ); \ + return; \ + } +# else +# define OP_GOMP_CRITICAL(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +#if KMP_MIC +# define KMP_DO_PAUSE _mm_delay_32( 1 ) +#else +# define KMP_DO_PAUSE KMP_CPU_PAUSE() +#endif /* KMP_MIC */ + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +#define OP_CMPXCHG(TYPE,BITS,OP) \ + { \ + TYPE old_value, new_value; \ + old_value = *(TYPE volatile *)lhs; \ + new_value = old_value OP rhs; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_DO_PAUSE; \ + \ + old_value = *(TYPE volatile *)lhs; \ + new_value = old_value OP rhs; \ + } \ + } + +#if USE_CMPXCHG_FIX +// 2007-06-25: +// workaround for C78287 (complex(kind=4) data type) +// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) +// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. +// This is a problem of the compiler. +// Related tracker is C76005, targeted to 11.0. +// I verified the asm of the workaround. +#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ + { \ + struct _sss { \ + TYPE cmp; \ + kmp_int##BITS *vvv; \ + }; \ + struct _sss old_value, new_value; \ + old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \ + new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \ + *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ + new_value.cmp = old_value.cmp OP rhs; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ + *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \ + { \ + KMP_DO_PAUSE; \ + \ + *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ + new_value.cmp = old_value.cmp OP rhs; \ + } \ + } +// end of the first part of the workaround for C78287 +#endif // USE_CMPXCHG_FIX + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +// ------------------------------------------------------------------------ +// X86 or X86_64: no alignment problems ==================================== +#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ +} +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} +#if USE_CMPXCHG_FIX +// ------------------------------------------------------------------------- +// workaround for C78287 (complex(kind=4) data type) +#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ +} +// end of the second part of the workaround for C78287 +#endif + +#else +// ------------------------------------------------------------------------- +// Code for other architectures that don't handle unaligned accesses. +#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +#if USE_CMPXCHG_FIX +// ------------------------------------------------------------------------- +// workaround for C78287 (complex(kind=4) data type) +#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +// end of the second part of the workaround for C78287 +#endif // USE_CMPXCHG_FIX +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +// Routines for ATOMIC 4-byte operands addition and subtraction +ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add +ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub + +ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add +ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub + +// Routines for ATOMIC 8-byte operands addition and subtraction +ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add +ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub + +ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add +ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub + +// ------------------------------------------------------------------------ +// Entries definition for integer operands +// TYPE_ID - operands type and size (fixed4, float4) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operand type +// BITS - size in bits, used to distinguish low level calls +// OP - operator (used in critical section) +// LCK_ID - lock identifier, used to possibly distinguish lock variable +// MASK - used for alignment check + +// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG +// ------------------------------------------------------------------------ +// Routines for ATOMIC integer operands, other operators +// ------------------------------------------------------------------------ +// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG +ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add +ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb +ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div +ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div +ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul +ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb +ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl +ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr +ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr +ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub +ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor +ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add +ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb +ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div +ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div +ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul +ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb +ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl +ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr +ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr +ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub +ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor +ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb +ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div +ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div +ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul +ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb +ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl +ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr +ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr +ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor +ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb +ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div +ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div +ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul +ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb +ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl +ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr +ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr +ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor +ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div +ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul +ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div +ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul +// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG + + +/* ------------------------------------------------------------------------ */ +/* Routines for C/C++ Reduction operators && and || */ +/* ------------------------------------------------------------------------ */ + +// ------------------------------------------------------------------------ +// Need separate macros for &&, || because there is no combined assignment +// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used +#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ + OP_CRITICAL( = *lhs OP, LCK_ID ) \ +} + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +// ------------------------------------------------------------------------ +// X86 or X86_64: no alignment problems =================================== +#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} + +#else +// ------------------------------------------------------------------------ +// Code for other architectures that don't handle unaligned accesses. +#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \ + } \ +} +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl +ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl +ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl +ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl +ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl +ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl +ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl +ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl + + +/* ------------------------------------------------------------------------- */ +/* Routines for Fortran operators that matched no one in C: */ +/* MAX, MIN, .EQV., .NEQV. */ +/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ +/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ +/* ------------------------------------------------------------------------- */ + +// ------------------------------------------------------------------------- +// MIN and MAX need separate macros +// OP - operator to check if we need any actions? +#define MIN_MAX_CRITSECT(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if ( *lhs OP rhs ) { /* still need actions? */ \ + *lhs = rhs; \ + } \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); + +// ------------------------------------------------------------------------- +#ifdef KMP_GOMP_COMPAT +#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \ + if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT( OP, 0 ); \ + return; \ + } +#else +#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------- +#define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + while ( old_value OP rhs && /* still need actions? */ \ + ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ + { \ + KMP_CPU_PAUSE(); \ + temp_val = *lhs; \ + old_value = temp_val; \ + } \ + } + +// ------------------------------------------------------------------------- +// 1-byte, 2-byte operands - use critical section +#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + if ( *lhs OP rhs ) { /* need actions? */ \ + GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ + MIN_MAX_CRITSECT(OP,LCK_ID) \ + } \ +} + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +// ------------------------------------------------------------------------- +// X86 or X86_64: no alignment problems ==================================== +#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + if ( *lhs OP rhs ) { \ + GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ + MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ + } \ +} + +#else +// ------------------------------------------------------------------------- +// Code for other architectures that don't handle unaligned accesses. +#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + if ( *lhs OP rhs ) { \ + GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \ + } \ + } \ +} +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max +MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min +MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max +MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min +MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max +MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min +MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max +MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min +MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max +MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min +MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max +MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min +#if KMP_HAVE_QUAD +MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max +MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min +#if ( KMP_ARCH_X86 ) + MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 + MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 +#endif +#endif +// ------------------------------------------------------------------------ +// Need separate macros for .EQV. because of the need of complement (~) +// OP ignored for critical sections, ^=~ used instead +#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \ +} + +// ------------------------------------------------------------------------ +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +// ------------------------------------------------------------------------ +// X86 or X86_64: no alignment problems =================================== +#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} +// ------------------------------------------------------------------------ +#else +// ------------------------------------------------------------------------ +// Code for other architectures that don't handle unaligned accesses. +#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv +ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv +ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv +ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv +ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv +ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv +ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv +ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv + + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ +} + +/* ------------------------------------------------------------------------- */ +// routines for long double type +ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add +ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub +ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul +ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div +#if KMP_HAVE_QUAD +// routines for _Quad type +ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add +ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub +ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul +ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16 + ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16 + ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 + ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 +#endif +#endif +// routines for complex types + +#if USE_CMPXCHG_FIX +// workaround for C78287 (complex(kind=4) data type) +ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add +ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub +ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul +ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div +// end of the workaround for C78287 +#else +ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add +ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub +ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul +ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div +#endif // USE_CMPXCHG_FIX + +ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add +ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub +ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul +ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div +ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add +ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub +ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul +ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add +ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub +ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul +ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16 + ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16 + ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 + ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 +#endif +#endif + +#if OMP_40_ENABLED + +// OpenMP 4.0: x = expr binop x for non-commutative operations. +// Supported only on IA-32 architecture and Intel(R) 64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL_REV(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + (*lhs) = (rhs) OP (*lhs); \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); + +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_REV(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_REV( OP, 0 ); \ + return; \ + } +#else +#define OP_GOMP_CRITICAL_REV(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + + +// Beginning of a definition (provides name, parameters, gebug trace) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operands' type +#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ +RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +// Note: temp_val introduced in order to force the compiler to read +// *lhs only once (w/o it the compiler reads *lhs twice) +#define OP_CMPXCHG_REV(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_DO_PAUSE; \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + } \ + } + +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ + OP_CMPXCHG_REV(TYPE,BITS,OP) \ +} + +// ------------------------------------------------------------------------ +// Entries definition for integer operands +// TYPE_ID - operands type and size (fixed4, float4) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operand type +// BITS - size in bits, used to distinguish low level calls +// OP - operator (used in critical section) +// LCK_ID - lock identifier, used to possibly distinguish lock variable + +// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG +// ------------------------------------------------------------------------ +// Routines for ATOMIC integer operands, other operators +// ------------------------------------------------------------------------ +// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG +ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev +ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev +ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev +ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev +ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev +ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev + +ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev +ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev +ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev +ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev +ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev +ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev + +ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev +ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev +ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev +ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev +ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev +ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev + +ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev +ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev +ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev +ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev +ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev +ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev + +ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev +ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev + +ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev +ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev +// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ + OP_CRITICAL_REV(OP,LCK_ID) \ +} + +/* ------------------------------------------------------------------------- */ +// routines for long double type +ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev +ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev +#if KMP_HAVE_QUAD +// routines for _Quad type +ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev +ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev + ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev +#endif +#endif + +// routines for complex types +ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev +ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev +ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev +ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev +ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev +ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev +ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev + ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev +#endif +#endif + + +#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 +// End of OpenMP 4.0: x = expr binop x for non-commutative operations. + +#endif //OMP_40_ENABLED + + +/* ------------------------------------------------------------------------ */ +/* Routines for mixed types of LHS and RHS, when RHS is "larger" */ +/* Note: in order to reduce the total number of types combinations */ +/* it is supposed that compiler converts RHS to longest floating type,*/ +/* that is _Quad, before call to any of these routines */ +/* Conversion to _Quad will be done by the compiler during calculation, */ +/* conversion back to TYPE - before the assignment, like: */ +/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ +/* Performance penalty expected because of SW emulation use */ +/* ------------------------------------------------------------------------ */ + +#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ +void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); + +// ------------------------------------------------------------------------- +#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ +} + +// ------------------------------------------------------------------------- +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +// ------------------------------------------------------------------------- +// X86 or X86_64: no alignment problems ==================================== +#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} +// ------------------------------------------------------------------------- +#else +// ------------------------------------------------------------------------ +// Code for other architectures that don't handle unaligned accesses. +#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +// RHS=float8 +ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 +ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 +ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 +ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8 +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8 +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 + +// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) +#if KMP_HAVE_QUAD +ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp +ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp +ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp +ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp +ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp + +ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp +ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp +ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp +ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp +ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp + +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp +ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp +ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp + +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp +ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp +ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp + +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp +ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp + +ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp +ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp +ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp +ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp + +ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp +ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp +ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp +ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +// ------------------------------------------------------------------------ +// X86 or X86_64: no alignment problems ==================================== +#if USE_CMPXCHG_FIX +// workaround for C78287 (complex(kind=4) data type) +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ +} +// end of the second part of the workaround for C78287 +#else +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} +#endif // USE_CMPXCHG_FIX +#else +// ------------------------------------------------------------------------ +// Code for other architectures that don't handle unaligned accesses. +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ + OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ + } \ +} +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 +ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 +ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 +ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 + +// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ------------------------------------------------------------------------ +// Atomic READ routines +// ------------------------------------------------------------------------ + +// ------------------------------------------------------------------------ +// Beginning of a definition (provides name, parameters, gebug trace) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operands' type +#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ +RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store_ret" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +// Note: temp_val introduced in order to force the compiler to read +// *lhs only once (w/o it the compiler reads *lhs twice) +// TODO: check if it is still necessary +// Return old value regardless of the result of "compare & swap# operation + +#define OP_CMPXCHG_READ(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + union f_i_union { \ + TYPE f_val; \ + kmp_int##BITS i_val; \ + }; \ + union f_i_union old_value; \ + temp_val = *loc; \ + old_value.f_val = temp_val; \ + old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ + new_value = old_value.f_val; \ + return new_value; \ + } + +// ------------------------------------------------------------------------- +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL_READ(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + new_value = (*loc); \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); + +// ------------------------------------------------------------------------- +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_READ(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_READ( OP, 0 ); \ + return new_value; \ + } +#else +#define OP_GOMP_CRITICAL_READ(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------- +#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ + new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \ + return new_value; \ +} +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ + OP_CMPXCHG_READ(TYPE,BITS,OP) \ +} +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \ + return new_value; \ +} + +// ------------------------------------------------------------------------ +// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. +// Let's return the read value through the additional parameter. + +#if ( KMP_OS_WINDOWS ) + +#define OP_CRITICAL_READ_WRK(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + (*out) = (*loc); \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_READ_WRK( OP, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ +// ------------------------------------------------------------------------ +#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ +void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); + +// ------------------------------------------------------------------------ +#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ + OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \ +} + +#endif // KMP_OS_WINDOWS + +// ------------------------------------------------------------------------ +// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG +ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd +ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd +ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd +ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd + +// !!! TODO: Remove lock operations for "char" since it can't be non-atomic +ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd +ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd + +ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd +#endif // KMP_HAVE_QUAD + +// Fix for CQ220361 on Windows* OS +#if ( KMP_OS_WINDOWS ) + ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd +#else + ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd +#endif +ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd +ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd + ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd +#endif +#endif + + +// ------------------------------------------------------------------------ +// Atomic WRITE routines +// ------------------------------------------------------------------------ + +#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ + KMP_XCHG_FIXED##BITS( lhs, rhs ); \ +} +// ------------------------------------------------------------------------ +#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ + KMP_XCHG_REAL##BITS( lhs, rhs ); \ +} + + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +// Note: temp_val introduced in order to force the compiler to read +// *lhs only once (w/o it the compiler reads *lhs twice) +#define OP_CMPXCHG_WR(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + } \ + } + +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ + OP_CMPXCHG_WR(TYPE,BITS,OP) \ +} + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ + OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP,LCK_ID) /* send assignment */ \ +} +// ------------------------------------------------------------------------- + +ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr +ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr +ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr +#if ( KMP_ARCH_X86 ) + ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr +#else + ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr +#endif + +ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr +#if ( KMP_ARCH_X86 ) + ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr +#else + ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr +#endif + +ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr +#endif +ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr +ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr +ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr + ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr +#endif +#endif + + +// ------------------------------------------------------------------------ +// Atomic CAPTURE routines +// ------------------------------------------------------------------------ + +// Beginning of a definition (provides name, parameters, gebug trace) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operands' type +#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \ +RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); + +// ------------------------------------------------------------------------- +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL_CPT(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if( flag ) { \ + (*lhs) OP rhs; \ + new_value = (*lhs); \ + } else { \ + new_value = (*lhs); \ + (*lhs) OP rhs; \ + } \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return new_value; + +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_CPT(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT( OP##=, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_CPT(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +// Note: temp_val introduced in order to force the compiler to read +// *lhs only once (w/o it the compiler reads *lhs twice) +#define OP_CMPXCHG_CPT(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = old_value OP rhs; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = old_value OP rhs; \ + } \ + if( flag ) { \ + return new_value; \ + } else \ + return old_value; \ + } + +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ + OP_CMPXCHG_CPT(TYPE,BITS,OP) \ +} + +// ------------------------------------------------------------------------- +#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE old_value, new_value; \ + OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ + if( flag ) { \ + return old_value OP rhs; \ + } else \ + return old_value; \ +} +// ------------------------------------------------------------------------- + +ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt +ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt +ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt +ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt + +ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt +ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt +ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt +ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt + +// ------------------------------------------------------------------------ +// Entries definition for integer operands +// TYPE_ID - operands type and size (fixed4, float4) +// OP_ID - operation identifier (add, sub, mul, ...) +// TYPE - operand type +// BITS - size in bits, used to distinguish low level calls +// OP - operator (used in critical section) +// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG +// ------------------------------------------------------------------------ +// Routines for ATOMIC integer operands, other operators +// ------------------------------------------------------------------------ +// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG +ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt +ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt +ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt +ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt +ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt +ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt +ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt +ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt +ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt +ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt +ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt +ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt +ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt +ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt +ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt +ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt +ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt +ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt +ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt +ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt +ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt +ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt +ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt +ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt +ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt +ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt +ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt +ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt +ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt +ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt +ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt +ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt +ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt +ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt +ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt +ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt +ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt +// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG + +// ------------------------------------------------------------------------ +// Routines for C/C++ Reduction operators && and || +// ------------------------------------------------------------------------ + +// ------------------------------------------------------------------------- +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL_L_CPT(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if( flag ) { \ + new_value OP rhs; \ + } else \ + new_value = (*lhs); \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); + +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_L_CPT( OP, 0 ); \ + return new_value; \ + } +#else +#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------ +// Need separate macros for &&, || because there is no combined assignment +#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \ + OP_CMPXCHG_CPT(TYPE,BITS,OP) \ +} + +ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt +ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt +ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt +ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt +ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt +ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt +ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt +ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt + + +// ------------------------------------------------------------------------- +// Routines for Fortran operators that matched no one in C: +// MAX, MIN, .EQV., .NEQV. +// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt +// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt +// ------------------------------------------------------------------------- + +// ------------------------------------------------------------------------- +// MIN and MAX need separate macros +// OP - operator to check if we need any actions? +#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if ( *lhs OP rhs ) { /* still need actions? */ \ + old_value = *lhs; \ + *lhs = rhs; \ + if ( flag ) \ + new_value = rhs; \ + else \ + new_value = old_value; \ + } \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return new_value; \ + +// ------------------------------------------------------------------------- +#ifdef KMP_GOMP_COMPAT +#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \ + if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT_CPT( OP, 0 ); \ + } +#else +#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------- +#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + /*TYPE old_value; */ \ + temp_val = *lhs; \ + old_value = temp_val; \ + while ( old_value OP rhs && /* still need actions? */ \ + ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ + { \ + KMP_CPU_PAUSE(); \ + temp_val = *lhs; \ + old_value = temp_val; \ + } \ + if( flag ) \ + return rhs; \ + else \ + return old_value; \ + } + +// ------------------------------------------------------------------------- +// 1-byte, 2-byte operands - use critical section +#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value, old_value; \ + if ( *lhs OP rhs ) { /* need actions? */ \ + GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ + MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ + } \ + return *lhs; \ +} + +#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value, old_value; \ + if ( *lhs OP rhs ) { \ + GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ + MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ + } \ + return *lhs; \ +} + + +MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt +MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt +MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt +MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt +MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt +MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt +MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt +MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt +MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt +MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt +MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt +MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt +#if KMP_HAVE_QUAD +MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt +MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt +#if ( KMP_ARCH_X86 ) + MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt + MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt +#endif +#endif + +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT( OP, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ +// ------------------------------------------------------------------------ +#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \ + OP_CMPXCHG_CPT(TYPE,BITS,OP) \ +} + +// ------------------------------------------------------------------------ + +ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt +ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt +ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt +ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt +ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt +ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt +ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt +ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ +} + +// ------------------------------------------------------------------------ + +// Workaround for cmplx4. Regular routines with return value don't work +// on Win_32e. Let's return captured values through the additional parameter. +#define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if( flag ) { \ + (*lhs) OP rhs; \ + (*out) = (*lhs); \ + } else { \ + (*out) = (*lhs); \ + (*lhs) OP rhs; \ + } \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return; +// ------------------------------------------------------------------------ + +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_WRK( OP##=, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ +// ------------------------------------------------------------------------ + +#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ +void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +// ------------------------------------------------------------------------ + +#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ + OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \ + OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \ +} +// The end of workaround for cmplx4 + +/* ------------------------------------------------------------------------- */ +// routines for long double type +ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt +ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt +ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt +ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt +#if KMP_HAVE_QUAD +// routines for _Quad type +ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt +ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt +ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt +ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt + ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt + ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt + ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt +#endif +#endif + +// routines for complex types + +// cmplx4 routines to return void +ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt +ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt +ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt +ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt + +ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt +ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt +ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt +ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt +ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt +ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt +ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt +ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt +ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt +ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt +ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt + ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt + ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt + ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt +#endif +#endif + +#if OMP_40_ENABLED + +// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. +// Supported only on IA-32 architecture and Intel(R) 64 + +// ------------------------------------------------------------------------- +// Operation on *lhs, rhs bound by critical section +// OP - operator (it's supposed to contain an assignment) +// LCK_ID - lock identifier +// Note: don't check gtid as it should always be valid +// 1, 2-byte - expect valid parameter, other - check before this macro +#define OP_CRITICAL_CPT_REV(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if( flag ) { \ + /*temp_val = (*lhs);*/\ + (*lhs) = (rhs) OP (*lhs); \ + new_value = (*lhs); \ + } else { \ + new_value = (*lhs);\ + (*lhs) = (rhs) OP (*lhs); \ + } \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return new_value; + +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_REV( OP, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ + +// ------------------------------------------------------------------------ +// Operation on *lhs, rhs using "compare_and_store" routine +// TYPE - operands' type +// BITS - size in bits, used to distinguish low level calls +// OP - operator +// Note: temp_val introduced in order to force the compiler to read +// *lhs only once (w/o it the compiler reads *lhs twice) +#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + } \ + if( flag ) { \ + return new_value; \ + } else \ + return old_value; \ + } + +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ + OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ +} + + +ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev +// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG + + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// TYPE_ID, OP_ID, TYPE - detailed above +// OP - operator +// LCK_ID - lock identifier, used to possibly distinguish lock variable +#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ + TYPE new_value; \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ + OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ + OP_CRITICAL_CPT_REV(OP,LCK_ID) \ +} + + +/* ------------------------------------------------------------------------- */ +// routines for long double type +ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev +#if KMP_HAVE_QUAD +// routines for _Quad type +ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev + ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev +#endif +#endif + +// routines for complex types + +// ------------------------------------------------------------------------ + +// Workaround for cmplx4. Regular routines with return value don't work +// on Win_32e. Let's return captured values through the additional parameter. +#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + if( flag ) { \ + (*lhs) = (rhs) OP (*lhs); \ + (*out) = (*lhs); \ + } else { \ + (*out) = (*lhs); \ + (*lhs) = (rhs) OP (*lhs); \ + } \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return; +// ------------------------------------------------------------------------ + +#ifdef KMP_GOMP_COMPAT +#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \ + } +#else +#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) +#endif /* KMP_GOMP_COMPAT */ +// ------------------------------------------------------------------------ + +#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ + OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \ + OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ +} +// The end of workaround for cmplx4 + + +// !!! TODO: check if we need to return void for cmplx4 routines +// cmplx4 routines to return void +ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev + +ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev +ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev + ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev +#endif +#endif + +// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} + +#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ +TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); + +#define CRITICAL_SWP(LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + old_value = (*lhs); \ + (*lhs) = rhs; \ + \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return old_value; + +// ------------------------------------------------------------------------ +#ifdef KMP_GOMP_COMPAT +#define GOMP_CRITICAL_SWP(FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + CRITICAL_SWP( 0 ); \ + } +#else +#define GOMP_CRITICAL_SWP(FLAG) +#endif /* KMP_GOMP_COMPAT */ + + +#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ +ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \ + return old_value; \ +} +// ------------------------------------------------------------------------ +#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ +ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \ + return old_value; \ +} + +// ------------------------------------------------------------------------ +#define CMPXCHG_SWP(TYPE,BITS) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ + { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + } \ + return old_value; \ + } + +// ------------------------------------------------------------------------- +#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ +ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + CMPXCHG_SWP(TYPE,BITS) \ +} + +ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp +ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp +ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp + +ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp + +#if ( KMP_ARCH_X86 ) + ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp + ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp +#else + ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp + ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp +#endif + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + CRITICAL_SWP(LCK_ID) \ +} + +// ------------------------------------------------------------------------ + +// !!! TODO: check if we need to return void for cmplx4 routines +// Workaround for cmplx4. Regular routines with return value don't work +// on Win_32e. Let's return captured values through the additional parameter. + +#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ +void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \ +{ \ + KMP_DEBUG_ASSERT( __kmp_init_serial ); \ + KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); + + +#define CRITICAL_SWP_WRK(LCK_ID) \ + __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + \ + tmp = (*lhs); \ + (*lhs) = (rhs); \ + (*out) = tmp; \ + __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ + return; + +// ------------------------------------------------------------------------ + +#ifdef KMP_GOMP_COMPAT +#define GOMP_CRITICAL_SWP_WRK(FLAG) \ + if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ + KMP_CHECK_GTID; \ + CRITICAL_SWP_WRK( 0 ); \ + } +#else +#define GOMP_CRITICAL_SWP_WRK(FLAG) +#endif /* KMP_GOMP_COMPAT */ +// ------------------------------------------------------------------------ + +#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \ +ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ + TYPE tmp; \ + GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ + CRITICAL_SWP_WRK(LCK_ID) \ +} +// The end of workaround for cmplx4 + + +ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp +#endif +// cmplx4 routine to return void +ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp + +//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp + + +ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp +ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp +#if KMP_HAVE_QUAD +ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp +#if ( KMP_ARCH_X86 ) + ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp + ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp +#endif +#endif + + +// End of OpenMP 4.0 Capture + +#endif //OMP_40_ENABLED + +#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 + + +#undef OP_CRITICAL + +/* ------------------------------------------------------------------------ */ +/* Generic atomic routines */ +/* ------------------------------------------------------------------------ */ + +void +__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + if ( +#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) + FALSE /* must use lock */ +#else + TRUE +#endif + ) + { + kmp_int8 old_value, new_value; + + old_value = *(kmp_int8 *) lhs; + (*f)( &new_value, &old_value, rhs ); + + /* TODO: Should this be acquire or release? */ + while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, + *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) + { + KMP_CPU_PAUSE(); + + old_value = *(kmp_int8 *) lhs; + (*f)( &new_value, &old_value, rhs ); + } + + return; + } + else { + // + // All 1-byte data is of integer data type. + // + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); + } +} + +void +__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + if ( +#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) + FALSE /* must use lock */ +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 + TRUE /* no alignment problems */ +#else + ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */ +#endif + ) + { + kmp_int16 old_value, new_value; + + old_value = *(kmp_int16 *) lhs; + (*f)( &new_value, &old_value, rhs ); + + /* TODO: Should this be acquire or release? */ + while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, + *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) + { + KMP_CPU_PAUSE(); + + old_value = *(kmp_int16 *) lhs; + (*f)( &new_value, &old_value, rhs ); + } + + return; + } + else { + // + // All 2-byte data is of integer data type. + // + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); + } +} + +void +__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + if ( + // + // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. + // Gomp compatibility is broken if this routine is called for floats. + // +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + TRUE /* no alignment problems */ +#else + ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */ +#endif + ) + { + kmp_int32 old_value, new_value; + + old_value = *(kmp_int32 *) lhs; + (*f)( &new_value, &old_value, rhs ); + + /* TODO: Should this be acquire or release? */ + while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, + *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) + { + KMP_CPU_PAUSE(); + + old_value = *(kmp_int32 *) lhs; + (*f)( &new_value, &old_value, rhs ); + } + + return; + } + else { + // + // Use __kmp_atomic_lock_4i for all 4-byte data, + // even if it isn't of integer data type. + // + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); + } +} + +void +__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + if ( + +#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) + FALSE /* must use lock */ +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 + TRUE /* no alignment problems */ +#else + ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */ +#endif + ) + { + kmp_int64 old_value, new_value; + + old_value = *(kmp_int64 *) lhs; + (*f)( &new_value, &old_value, rhs ); + /* TODO: Should this be acquire or release? */ + while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, + *(kmp_int64 *) &old_value, + *(kmp_int64 *) &new_value ) ) + { + KMP_CPU_PAUSE(); + + old_value = *(kmp_int64 *) lhs; + (*f)( &new_value, &old_value, rhs ); + } + + return; + } else { + // + // Use __kmp_atomic_lock_8i for all 8-byte data, + // even if it isn't of integer data type. + // + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); + } +} + +void +__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); +} + +void +__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); +} + +void +__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); +} + +void +__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); + + (*f)( lhs, lhs, rhs ); + +#ifdef KMP_GOMP_COMPAT + if ( __kmp_atomic_mode == 2 ) { + __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); + } + else +#endif /* KMP_GOMP_COMPAT */ + __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); +} + +// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler +// duplicated in order to not use 3-party names in pure Intel code +// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. +void +__kmpc_atomic_start(void) +{ + int gtid = __kmp_entry_gtid(); + KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); +} + + +void +__kmpc_atomic_end(void) +{ + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ +/*! +@} +*/ + +// end of file diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.h b/contrib/libs/cxxsupp/openmp/kmp_atomic.h index 586848e921b..33feae2189f 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_atomic.h +++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.h @@ -1,1038 +1,1038 @@ -/* - * kmp_atomic.h - ATOMIC header file - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_ATOMIC_H -#define KMP_ATOMIC_H - -#include "kmp_os.h" -#include "kmp_lock.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -// C++ build port. -// Intel compiler does not support _Complex datatype on win. -// Intel compiler supports _Complex datatype on lin and mac. -// On the other side, there is a problem of stack alignment on lin_32 and mac_32 -// if the rhs is cmplx80 or cmplx128 typedef'ed datatype. -// The decision is: to use compiler supported _Complex type on lin and mac, -// to use typedef'ed types on win. -// Condition for WIN64 was modified in anticipation of 10.1 build compiler. - -#if defined( __cplusplus ) && ( KMP_OS_WINDOWS ) - // create shortcuts for c99 complex types - - #if (_MSC_VER < 1600) && defined(_DEBUG) - // Workaround for the problem of _DebugHeapTag unresolved external. - // This problem prevented to use our static debug library for C tests - // compiled with /MDd option (the library itself built with /MTd), - #undef _DEBUG - #define _DEBUG_TEMPORARILY_UNSET_ - #endif - - #include - - template< typename type_lhs, typename type_rhs > - std::complex< type_lhs > __kmp_lhs_div_rhs( - const std::complex< type_lhs >& lhs, - const std::complex< type_rhs >& rhs ) { - type_lhs a = lhs.real(); - type_lhs b = lhs.imag(); - type_rhs c = rhs.real(); - type_rhs d = rhs.imag(); - type_rhs den = c*c + d*d; - type_rhs r = ( a*c + b*d ); - type_rhs i = ( b*c - a*d ); - std::complex< type_lhs > ret( r/den, i/den ); - return ret; - } - - // complex8 - struct __kmp_cmplx64_t : std::complex< double > { - - __kmp_cmplx64_t() : std::complex< double > () {} - - __kmp_cmplx64_t( const std::complex< double >& cd ) - : std::complex< double > ( cd ) {} - - void operator /= ( const __kmp_cmplx64_t& rhs ) { - std::complex< double > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) { - std::complex< double > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef struct __kmp_cmplx64_t kmp_cmplx64; - - // complex4 - struct __kmp_cmplx32_t : std::complex< float > { - - __kmp_cmplx32_t() : std::complex< float > () {} - - __kmp_cmplx32_t( const std::complex& cf ) - : std::complex< float > ( cf ) {} - - __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs + rhs ); - } - __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs - rhs ); - } - __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs * rhs ); - } - - __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) + b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) - b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) * b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - - void operator /= ( const __kmp_cmplx32_t& rhs ) { - std::complex< float > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) { - std::complex< float > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - void operator /= ( const kmp_cmplx64& rhs ) { - std::complex< float > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) { - std::complex< float > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - }; - typedef struct __kmp_cmplx32_t kmp_cmplx32; - - // complex10 - struct KMP_DO_ALIGN( 16 ) __kmp_cmplx80_t : std::complex< long double > { - - __kmp_cmplx80_t() : std::complex< long double > () {} - - __kmp_cmplx80_t( const std::complex< long double >& cld ) - : std::complex< long double > ( cld ) {} - - void operator /= ( const __kmp_cmplx80_t& rhs ) { - std::complex< long double > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) { - std::complex< long double > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80; - - // complex16 - #if KMP_HAVE_QUAD - struct __kmp_cmplx128_t : std::complex< _Quad > { - - __kmp_cmplx128_t() : std::complex< _Quad > () {} - - __kmp_cmplx128_t( const std::complex< _Quad >& cq ) - : std::complex< _Quad > ( cq ) {} - - void operator /= ( const __kmp_cmplx128_t& rhs ) { - std::complex< _Quad > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) { - std::complex< _Quad > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef struct __kmp_cmplx128_t kmp_cmplx128; - #endif /* KMP_HAVE_QUAD */ - - #ifdef _DEBUG_TEMPORARILY_UNSET_ - #undef _DEBUG_TEMPORARILY_UNSET_ - // Set it back now - #define _DEBUG 1 - #endif - -#else - // create shortcuts for c99 complex types - typedef float _Complex kmp_cmplx32; - typedef double _Complex kmp_cmplx64; - typedef long double _Complex kmp_cmplx80; - #if KMP_HAVE_QUAD - typedef _Quad _Complex kmp_cmplx128; - #endif -#endif - -// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad -// and kmp_cmplx128) on IA-32 architecture. The following aligned structures -// are implemented to support the old alignment in 10.1, 11.0, 11.1 and -// introduce the new alignment in 12.0. See CQ88405. -#if KMP_ARCH_X86 && KMP_HAVE_QUAD - - // 4-byte aligned structures for backward compatibility. - - #pragma pack( push, 4 ) - - - struct KMP_DO_ALIGN( 4 ) Quad_a4_t { - _Quad q; - - Quad_a4_t( ) : q( ) {} - Quad_a4_t( const _Quad & cq ) : q ( cq ) {} - - Quad_a4_t operator + ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs + rhs ); - } - - Quad_a4_t operator - ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs - rhs ); - } - Quad_a4_t operator * ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs * rhs ); - } - - Quad_a4_t operator / ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs / rhs ); - } - - }; - - struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t { - kmp_cmplx128 q; - - kmp_cmplx128_a4_t() : q () {} - - kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} - - kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs + rhs ); - } - kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs - rhs ); - } - kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs * rhs ); - } - - kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs / rhs ); - } - - }; - - #pragma pack( pop ) - - // New 16-byte aligned structures for 12.0 compiler. - struct KMP_DO_ALIGN( 16 ) Quad_a16_t { - _Quad q; - - Quad_a16_t( ) : q( ) {} - Quad_a16_t( const _Quad & cq ) : q ( cq ) {} - - Quad_a16_t operator + ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs + rhs ); - } - - Quad_a16_t operator - ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs - rhs ); - } - Quad_a16_t operator * ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs * rhs ); - } - - Quad_a16_t operator / ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs / rhs ); - } - }; - - struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t { - kmp_cmplx128 q; - - kmp_cmplx128_a16_t() : q () {} - - kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} - - kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs + rhs ); - } - kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs - rhs ); - } - kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs * rhs ); - } - - kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs / rhs ); - } - }; - -#endif - -#if ( KMP_ARCH_X86 ) - #define QUAD_LEGACY Quad_a4_t - #define CPLX128_LEG kmp_cmplx128_a4_t -#else - #define QUAD_LEGACY _Quad - #define CPLX128_LEG kmp_cmplx128 -#endif - -#ifdef __cplusplus - extern "C" { -#endif - -extern int __kmp_atomic_mode; - -// -// Atomic locks can easily become contended, so we use queuing locks for them. -// - -typedef kmp_queuing_lock_t kmp_atomic_lock_t; - -static inline void -__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)( - (ompt_wait_id_t) lck); - } -#endif - - __kmp_acquire_queuing_lock( lck, gtid ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( - (ompt_wait_id_t) lck); - } -#endif -} - -static inline int -__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_test_queuing_lock( lck, gtid ); -} - -static inline void -__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ - __kmp_release_queuing_lock( lck, gtid ); -#if OMPT_SUPPORT && OMPT_BLAME +/* + * kmp_atomic.h - ATOMIC header file + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_ATOMIC_H +#define KMP_ATOMIC_H + +#include "kmp_os.h" +#include "kmp_lock.h" + +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + +// C++ build port. +// Intel compiler does not support _Complex datatype on win. +// Intel compiler supports _Complex datatype on lin and mac. +// On the other side, there is a problem of stack alignment on lin_32 and mac_32 +// if the rhs is cmplx80 or cmplx128 typedef'ed datatype. +// The decision is: to use compiler supported _Complex type on lin and mac, +// to use typedef'ed types on win. +// Condition for WIN64 was modified in anticipation of 10.1 build compiler. + +#if defined( __cplusplus ) && ( KMP_OS_WINDOWS ) + // create shortcuts for c99 complex types + + #if (_MSC_VER < 1600) && defined(_DEBUG) + // Workaround for the problem of _DebugHeapTag unresolved external. + // This problem prevented to use our static debug library for C tests + // compiled with /MDd option (the library itself built with /MTd), + #undef _DEBUG + #define _DEBUG_TEMPORARILY_UNSET_ + #endif + + #include + + template< typename type_lhs, typename type_rhs > + std::complex< type_lhs > __kmp_lhs_div_rhs( + const std::complex< type_lhs >& lhs, + const std::complex< type_rhs >& rhs ) { + type_lhs a = lhs.real(); + type_lhs b = lhs.imag(); + type_rhs c = rhs.real(); + type_rhs d = rhs.imag(); + type_rhs den = c*c + d*d; + type_rhs r = ( a*c + b*d ); + type_rhs i = ( b*c - a*d ); + std::complex< type_lhs > ret( r/den, i/den ); + return ret; + } + + // complex8 + struct __kmp_cmplx64_t : std::complex< double > { + + __kmp_cmplx64_t() : std::complex< double > () {} + + __kmp_cmplx64_t( const std::complex< double >& cd ) + : std::complex< double > ( cd ) {} + + void operator /= ( const __kmp_cmplx64_t& rhs ) { + std::complex< double > lhs = *this; + *this = __kmp_lhs_div_rhs( lhs, rhs ); + } + + __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) { + std::complex< double > lhs = *this; + return __kmp_lhs_div_rhs( lhs, rhs ); + } + + }; + typedef struct __kmp_cmplx64_t kmp_cmplx64; + + // complex4 + struct __kmp_cmplx32_t : std::complex< float > { + + __kmp_cmplx32_t() : std::complex< float > () {} + + __kmp_cmplx32_t( const std::complex& cf ) + : std::complex< float > ( cf ) {} + + __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) { + std::complex< float > lhs = *this; + std::complex< float > rhs = b; + return ( lhs + rhs ); + } + __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) { + std::complex< float > lhs = *this; + std::complex< float > rhs = b; + return ( lhs - rhs ); + } + __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) { + std::complex< float > lhs = *this; + std::complex< float > rhs = b; + return ( lhs * rhs ); + } + + __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) { + kmp_cmplx64 t = kmp_cmplx64( *this ) + b; + std::complex< double > d( t ); + std::complex< float > f( d ); + __kmp_cmplx32_t r( f ); + return r; + } + __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) { + kmp_cmplx64 t = kmp_cmplx64( *this ) - b; + std::complex< double > d( t ); + std::complex< float > f( d ); + __kmp_cmplx32_t r( f ); + return r; + } + __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) { + kmp_cmplx64 t = kmp_cmplx64( *this ) * b; + std::complex< double > d( t ); + std::complex< float > f( d ); + __kmp_cmplx32_t r( f ); + return r; + } + + void operator /= ( const __kmp_cmplx32_t& rhs ) { + std::complex< float > lhs = *this; + *this = __kmp_lhs_div_rhs( lhs, rhs ); + } + + __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) { + std::complex< float > lhs = *this; + return __kmp_lhs_div_rhs( lhs, rhs ); + } + + void operator /= ( const kmp_cmplx64& rhs ) { + std::complex< float > lhs = *this; + *this = __kmp_lhs_div_rhs( lhs, rhs ); + } + + __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) { + std::complex< float > lhs = *this; + return __kmp_lhs_div_rhs( lhs, rhs ); + } + }; + typedef struct __kmp_cmplx32_t kmp_cmplx32; + + // complex10 + struct KMP_DO_ALIGN( 16 ) __kmp_cmplx80_t : std::complex< long double > { + + __kmp_cmplx80_t() : std::complex< long double > () {} + + __kmp_cmplx80_t( const std::complex< long double >& cld ) + : std::complex< long double > ( cld ) {} + + void operator /= ( const __kmp_cmplx80_t& rhs ) { + std::complex< long double > lhs = *this; + *this = __kmp_lhs_div_rhs( lhs, rhs ); + } + + __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) { + std::complex< long double > lhs = *this; + return __kmp_lhs_div_rhs( lhs, rhs ); + } + + }; + typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80; + + // complex16 + #if KMP_HAVE_QUAD + struct __kmp_cmplx128_t : std::complex< _Quad > { + + __kmp_cmplx128_t() : std::complex< _Quad > () {} + + __kmp_cmplx128_t( const std::complex< _Quad >& cq ) + : std::complex< _Quad > ( cq ) {} + + void operator /= ( const __kmp_cmplx128_t& rhs ) { + std::complex< _Quad > lhs = *this; + *this = __kmp_lhs_div_rhs( lhs, rhs ); + } + + __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) { + std::complex< _Quad > lhs = *this; + return __kmp_lhs_div_rhs( lhs, rhs ); + } + + }; + typedef struct __kmp_cmplx128_t kmp_cmplx128; + #endif /* KMP_HAVE_QUAD */ + + #ifdef _DEBUG_TEMPORARILY_UNSET_ + #undef _DEBUG_TEMPORARILY_UNSET_ + // Set it back now + #define _DEBUG 1 + #endif + +#else + // create shortcuts for c99 complex types + typedef float _Complex kmp_cmplx32; + typedef double _Complex kmp_cmplx64; + typedef long double _Complex kmp_cmplx80; + #if KMP_HAVE_QUAD + typedef _Quad _Complex kmp_cmplx128; + #endif +#endif + +// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad +// and kmp_cmplx128) on IA-32 architecture. The following aligned structures +// are implemented to support the old alignment in 10.1, 11.0, 11.1 and +// introduce the new alignment in 12.0. See CQ88405. +#if KMP_ARCH_X86 && KMP_HAVE_QUAD + + // 4-byte aligned structures for backward compatibility. + + #pragma pack( push, 4 ) + + + struct KMP_DO_ALIGN( 4 ) Quad_a4_t { + _Quad q; + + Quad_a4_t( ) : q( ) {} + Quad_a4_t( const _Quad & cq ) : q ( cq ) {} + + Quad_a4_t operator + ( const Quad_a4_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)( lhs + rhs ); + } + + Quad_a4_t operator - ( const Quad_a4_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)( lhs - rhs ); + } + Quad_a4_t operator * ( const Quad_a4_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)( lhs * rhs ); + } + + Quad_a4_t operator / ( const Quad_a4_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)( lhs / rhs ); + } + + }; + + struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t { + kmp_cmplx128 q; + + kmp_cmplx128_a4_t() : q () {} + + kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} + + kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)( lhs + rhs ); + } + kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)( lhs - rhs ); + } + kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)( lhs * rhs ); + } + + kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)( lhs / rhs ); + } + + }; + + #pragma pack( pop ) + + // New 16-byte aligned structures for 12.0 compiler. + struct KMP_DO_ALIGN( 16 ) Quad_a16_t { + _Quad q; + + Quad_a16_t( ) : q( ) {} + Quad_a16_t( const _Quad & cq ) : q ( cq ) {} + + Quad_a16_t operator + ( const Quad_a16_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)( lhs + rhs ); + } + + Quad_a16_t operator - ( const Quad_a16_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)( lhs - rhs ); + } + Quad_a16_t operator * ( const Quad_a16_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)( lhs * rhs ); + } + + Quad_a16_t operator / ( const Quad_a16_t& b ) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)( lhs / rhs ); + } + }; + + struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t { + kmp_cmplx128 q; + + kmp_cmplx128_a16_t() : q () {} + + kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} + + kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)( lhs + rhs ); + } + kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)( lhs - rhs ); + } + kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)( lhs * rhs ); + } + + kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)( lhs / rhs ); + } + }; + +#endif + +#if ( KMP_ARCH_X86 ) + #define QUAD_LEGACY Quad_a4_t + #define CPLX128_LEG kmp_cmplx128_a4_t +#else + #define QUAD_LEGACY _Quad + #define CPLX128_LEG kmp_cmplx128 +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +extern int __kmp_atomic_mode; + +// +// Atomic locks can easily become contended, so we use queuing locks for them. +// + +typedef kmp_queuing_lock_t kmp_atomic_lock_t; + +static inline void +__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) +{ +#if OMPT_SUPPORT && OMPT_TRACE if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_release_atomic)( - (ompt_wait_id_t) lck); - } -#endif -} - -static inline void -__kmp_init_atomic_lock( kmp_atomic_lock_t *lck ) -{ - __kmp_init_queuing_lock( lck ); -} - -static inline void -__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck ) -{ - __kmp_destroy_queuing_lock( lck ); -} - -// Global Locks - -extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ -extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ -extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ -extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ - -// -// Below routines for atomic UPDATE are listed -// - -// 1-byte -void __kmpc_atomic_fixed1_add( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_div( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_mul( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_orb( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shr( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_sub( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_xor( ident_t *id_ref, int gtid, char * lhs, char rhs ); -// 2-byte -void __kmpc_atomic_fixed2_add( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_div( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_mul( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_orb( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shr( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_sub( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_xor( ident_t *id_ref, int gtid, short * lhs, short rhs ); -// 4-byte add / sub fixed -void __kmpc_atomic_fixed4_add( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_sub( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -// 4-byte add / sub float -void __kmpc_atomic_float4_add( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_sub( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -// 8-byte add / sub fixed -void __kmpc_atomic_fixed8_add( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_sub( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -// 8-byte add / sub float -void __kmpc_atomic_float8_add( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_sub( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -// 4-byte fixed -void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_div( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_mul( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_orb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_xor( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -// 8-byte fixed -void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_div( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_mul( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_orb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_xor( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -// 4-byte float -void __kmpc_atomic_float4_div( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_mul( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -// 8-byte float -void __kmpc_atomic_float8_div( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_mul( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -// 1-, 2-, 4-, 8-byte logical (&&, ||) -void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_orl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_orl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_orl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_orl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -// MIN / MAX -void __kmpc_atomic_fixed1_max( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_min( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_max( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_min( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_max( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_min( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_max( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_min( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only - void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); -#endif -#endif -// .NEQV. (same as xor) -void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -// .EQV. (same as ~xor) -void __kmpc_atomic_fixed1_eqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_eqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_eqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_eqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -// long double type -void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -// _Quad type -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); -#endif -#endif -// routines for complex types -void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_mul( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_div( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_add( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_sub( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_mul( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_div( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); -#endif -#endif - -#if OMP_40_ENABLED - -// OpenMP 4.0: x = expr binop x for non-commutative operations. -// Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -void __kmpc_atomic_fixed1_sub_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_div_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_shl_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shr_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed2_sub_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_div_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_shl_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shr_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed4_sub_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_div_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_shl_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shr_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed8_sub_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_div_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_shl_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shr_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_float4_sub_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); -void __kmpc_atomic_float4_div_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); -void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); -void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); -void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#endif -void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); -#endif -#endif // KMP_HAVE_QUAD - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 - -#endif //OMP_40_ENABLED - -// routines for mixed types - -// RHS=float8 -void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) -#if KMP_HAVE_QUAD -void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_div_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed2_add_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_sub_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_mul_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_div_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed4_add_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_sub_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_mul_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_div_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed8_add_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_sub_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_mul_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_div_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); - -void __kmpc_atomic_float4_add_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_sub_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_mul_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_div_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); - -void __kmpc_atomic_float8_add_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_sub_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_mul_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_div_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); - -void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -#endif // KMP_HAVE_QUAD - -// RHS=cmplx8 -void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); - -// generic atomic routines -void __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); - -// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -// -// Below routines for atomic READ are listed -// - -char __kmpc_atomic_fixed1_rd( ident_t *id_ref, int gtid, char * loc ); -short __kmpc_atomic_fixed2_rd( ident_t *id_ref, int gtid, short * loc ); -kmp_int32 __kmpc_atomic_fixed4_rd( ident_t *id_ref, int gtid, kmp_int32 * loc ); -kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * loc ); -kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc ); -kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc ); -long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc ); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc ); -#endif -// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be -// returned through an additional parameter -#if ( KMP_OS_WINDOWS ) - void __kmpc_atomic_cmplx4_rd( kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); -#else - kmp_cmplx32 __kmpc_atomic_cmplx4_rd( ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); -#endif -kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc ); -kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc ); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc ); -#endif -#endif - - -// -// Below routines for atomic WRITE are listed -// - -void __kmpc_atomic_fixed1_wr( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_wr( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_wr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#endif -void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -#if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); -#endif -#endif - -// -// Below routines for atomic CAPTURE are listed -// - -// 1-byte -char __kmpc_atomic_fixed1_add_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_mul_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_orb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_shl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_shr_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_sub_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_xor_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -// 2-byte -short __kmpc_atomic_fixed2_add_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_mul_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_orb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_shl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_shr_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_xor_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -// 4-byte add / sub fixed -kmp_int32 __kmpc_atomic_fixed4_add_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -// 4-byte add / sub float -kmp_real32 __kmpc_atomic_float4_add_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_sub_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -// 8-byte add / sub fixed -kmp_int64 __kmpc_atomic_fixed8_add_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -// 8-byte add / sub float -kmp_real64 __kmpc_atomic_float8_add_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_sub_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -// 4-byte fixed -kmp_int32 __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_mul_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_orb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_xor_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -// 8-byte fixed -kmp_int64 __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_mul_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_orb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_xor_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -// 4-byte float -kmp_real32 __kmpc_atomic_float4_div_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_mul_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -// 8-byte float -kmp_real64 __kmpc_atomic_float8_div_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_mul_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -// 1-, 2-, 4-, 8-byte logical (&&, ||) -char __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_orl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_orl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_orl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_orl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -// MIN / MAX -char __kmpc_atomic_fixed1_max_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_min_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_max_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_min_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_max_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_min_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_max_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_min_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -#endif -// .NEQV. (same as xor) -char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -// .EQV. (same as ~xor) -char __kmpc_atomic_fixed1_eqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_eqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_eqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_eqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -// long double type -long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -#if KMP_HAVE_QUAD -// _Quad type -QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -#endif -// routines for complex types -// Workaround for cmplx4 routines - return void; captured value is returned via the argument -void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); - -kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); -#endif -#endif - -void __kmpc_atomic_start(void); -void __kmpc_atomic_end(void); - -#if OMP_40_ENABLED - -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. - -char __kmpc_atomic_fixed1_sub_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -char __kmpc_atomic_fixed1_div_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); -char __kmpc_atomic_fixed1_shl_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs , int flag); -char __kmpc_atomic_fixed1_shr_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); -short __kmpc_atomic_fixed2_sub_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -short __kmpc_atomic_fixed2_div_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); -short __kmpc_atomic_fixed2_shl_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -short __kmpc_atomic_fixed2_shr_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); -float __kmpc_atomic_float4_sub_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); -float __kmpc_atomic_float4_div_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); -double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); -double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); -long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); -long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); -#endif -// Workaround for cmplx4 routines - return void; captured value is returned via the argument -void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); -#if ( KMP_ARCH_X86 ) - Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); - Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); -#endif -#endif - -// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} -char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs ); -short __kmpc_atomic_fixed2_swp( ident_t *id_ref, int gtid, short * lhs, short rhs ); -kmp_int32 __kmpc_atomic_fixed4_swp( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs ); -double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs ); -long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -#if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#endif -// !!! TODO: check if we need a workaround here -void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out ); -//kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); - -kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -#if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); -#endif -#endif - -// End of OpenMP 4.0 capture - -#endif //OMP_40_ENABLED - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef __cplusplus - } // extern "C" -#endif - -#endif /* KMP_ATOMIC_H */ - -// end of file + ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_wait_atomic)( + (ompt_wait_id_t) lck); + } +#endif + + __kmp_acquire_queuing_lock( lck, gtid ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( + (ompt_wait_id_t) lck); + } +#endif +} + +static inline int +__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_test_queuing_lock( lck, gtid ); +} + +static inline void +__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) +{ + __kmp_release_queuing_lock( lck, gtid ); +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_release_atomic)( + (ompt_wait_id_t) lck); + } +#endif +} + +static inline void +__kmp_init_atomic_lock( kmp_atomic_lock_t *lck ) +{ + __kmp_init_queuing_lock( lck ); +} + +static inline void +__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck ) +{ + __kmp_destroy_queuing_lock( lck ); +} + +// Global Locks + +extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ +extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ +extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ +extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ + +// +// Below routines for atomic UPDATE are listed +// + +// 1-byte +void __kmpc_atomic_fixed1_add( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_div( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); +void __kmpc_atomic_fixed1_mul( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_orb( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_shl( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_shr( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); +void __kmpc_atomic_fixed1_sub( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_xor( ident_t *id_ref, int gtid, char * lhs, char rhs ); +// 2-byte +void __kmpc_atomic_fixed2_add( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_div( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); +void __kmpc_atomic_fixed2_mul( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_orb( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_shl( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_shr( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); +void __kmpc_atomic_fixed2_sub( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_xor( ident_t *id_ref, int gtid, short * lhs, short rhs ); +// 4-byte add / sub fixed +void __kmpc_atomic_fixed4_add( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_sub( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +// 4-byte add / sub float +void __kmpc_atomic_float4_add( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float4_sub( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +// 8-byte add / sub fixed +void __kmpc_atomic_fixed8_add( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_sub( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +// 8-byte add / sub float +void __kmpc_atomic_float8_add( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float8_sub( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +// 4-byte fixed +void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_div( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); +void __kmpc_atomic_fixed4_mul( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_orb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_shl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_shr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); +void __kmpc_atomic_fixed4_xor( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +// 8-byte fixed +void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_div( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); +void __kmpc_atomic_fixed8_mul( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_orb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_shl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_shr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); +void __kmpc_atomic_fixed8_xor( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +// 4-byte float +void __kmpc_atomic_float4_div( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float4_mul( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +// 8-byte float +void __kmpc_atomic_float8_div( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float8_mul( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +// 1-, 2-, 4-, 8-byte logical (&&, ||) +void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_orl( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_orl( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_orl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_orl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +// MIN / MAX +void __kmpc_atomic_fixed1_max( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_min( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed2_max( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_min( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed4_max( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_min( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed8_max( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_min( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only + void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); +#endif +#endif +// .NEQV. (same as xor) +void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +// .EQV. (same as ~xor) +void __kmpc_atomic_fixed1_eqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed2_eqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed4_eqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed8_eqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +// long double type +void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +// _Quad type +#if KMP_HAVE_QUAD +void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); +#endif +#endif +// routines for complex types +void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx4_mul( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx4_div( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx8_add( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx8_sub( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx8_mul( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx8_div( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); + void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); + void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); + void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +#endif +#endif + +#if OMP_40_ENABLED + +// OpenMP 4.0: x = expr binop x for non-commutative operations. +// Supported only on IA-32 architecture and Intel(R) 64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +void __kmpc_atomic_fixed1_sub_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_div_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); +void __kmpc_atomic_fixed1_shl_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_shr_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); +void __kmpc_atomic_fixed2_sub_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_div_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); +void __kmpc_atomic_fixed2_shl_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_shr_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); +void __kmpc_atomic_fixed4_sub_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_div_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); +void __kmpc_atomic_fixed4_shl_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_shr_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); +void __kmpc_atomic_fixed8_sub_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_div_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); +void __kmpc_atomic_fixed8_shl_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_shr_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); +void __kmpc_atomic_float4_sub_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); +void __kmpc_atomic_float4_div_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); +void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); +void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); +void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif +void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); + void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +#endif +#endif // KMP_HAVE_QUAD + +#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 + +#endif //OMP_40_ENABLED + +// routines for mixed types + +// RHS=float8 +void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); + +// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) +#if KMP_HAVE_QUAD +void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1_div_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); + +void __kmpc_atomic_fixed2_add_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); +void __kmpc_atomic_fixed2_sub_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); +void __kmpc_atomic_fixed2_mul_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); +void __kmpc_atomic_fixed2_div_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); +void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); + +void __kmpc_atomic_fixed4_add_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed4_sub_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed4_mul_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed4_div_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); + +void __kmpc_atomic_fixed8_add_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed8_sub_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed8_mul_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed8_div_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); +void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); + +void __kmpc_atomic_float4_add_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); +void __kmpc_atomic_float4_sub_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); +void __kmpc_atomic_float4_mul_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); +void __kmpc_atomic_float4_div_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); + +void __kmpc_atomic_float8_add_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); +void __kmpc_atomic_float8_sub_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); +void __kmpc_atomic_float8_mul_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); +void __kmpc_atomic_float8_div_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); + +void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +#endif // KMP_HAVE_QUAD + +// RHS=cmplx8 +void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); + +// generic atomic routines +void __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); + +// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +// +// Below routines for atomic READ are listed +// + +char __kmpc_atomic_fixed1_rd( ident_t *id_ref, int gtid, char * loc ); +short __kmpc_atomic_fixed2_rd( ident_t *id_ref, int gtid, short * loc ); +kmp_int32 __kmpc_atomic_fixed4_rd( ident_t *id_ref, int gtid, kmp_int32 * loc ); +kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * loc ); +kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc ); +kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc ); +long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc ); +#if KMP_HAVE_QUAD +QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc ); +#endif +// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be +// returned through an additional parameter +#if ( KMP_OS_WINDOWS ) + void __kmpc_atomic_cmplx4_rd( kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); +#else + kmp_cmplx32 __kmpc_atomic_cmplx4_rd( ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); +#endif +kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc ); +kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc ); +#if KMP_HAVE_QUAD +CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc ); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc ); +#endif +#endif + + +// +// Below routines for atomic WRITE are listed +// + +void __kmpc_atomic_fixed1_wr( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed2_wr( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed4_wr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif +void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); +void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD +void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +#endif +#endif + +// +// Below routines for atomic CAPTURE are listed +// + +// 1-byte +char __kmpc_atomic_fixed1_add_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_div_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); +char __kmpc_atomic_fixed1_mul_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_orb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_shl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_shr_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); +char __kmpc_atomic_fixed1_sub_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_xor_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +// 2-byte +short __kmpc_atomic_fixed2_add_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_div_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); +short __kmpc_atomic_fixed2_mul_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_orb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_shl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_shr_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); +short __kmpc_atomic_fixed2_sub_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_xor_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +// 4-byte add / sub fixed +kmp_int32 __kmpc_atomic_fixed4_add_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +// 4-byte add / sub float +kmp_real32 __kmpc_atomic_float4_add_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_sub_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +// 8-byte add / sub fixed +kmp_int64 __kmpc_atomic_fixed8_add_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +// 8-byte add / sub float +kmp_real64 __kmpc_atomic_float8_add_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_sub_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +// 4-byte fixed +kmp_int32 __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_div_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_mul_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_orb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_shl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_shr_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_xor_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +// 8-byte fixed +kmp_int64 __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_div_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_mul_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_orb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_shl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_shr_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_xor_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +// 4-byte float +kmp_real32 __kmpc_atomic_float4_div_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_mul_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +// 8-byte float +kmp_real64 __kmpc_atomic_float8_div_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_mul_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +// 1-, 2-, 4-, 8-byte logical (&&, ||) +char __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_orl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +short __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_orl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_orl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_orl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +// MIN / MAX +char __kmpc_atomic_fixed1_max_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_min_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +short __kmpc_atomic_fixed2_max_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_min_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_max_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_min_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_max_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_min_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +#if KMP_HAVE_QUAD +QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +#endif +// .NEQV. (same as xor) +char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +// .EQV. (same as ~xor) +char __kmpc_atomic_fixed1_eqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +short __kmpc_atomic_fixed2_eqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_eqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_eqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +// long double type +long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +#if KMP_HAVE_QUAD +// _Quad type +QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +#endif +// routines for complex types +// Workaround for cmplx4 routines - return void; captured value is returned via the argument +void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); +void __kmpc_atomic_cmplx4_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); +void __kmpc_atomic_cmplx4_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); +void __kmpc_atomic_cmplx4_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); + +kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +#if KMP_HAVE_QUAD +CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); +#if ( KMP_ARCH_X86 ) + // Routines with 16-byte arguments aligned to 16-byte boundary + Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); +#endif +#endif + +void __kmpc_atomic_start(void); +void __kmpc_atomic_end(void); + +#if OMP_40_ENABLED + +// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. + +char __kmpc_atomic_fixed1_sub_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); +char __kmpc_atomic_fixed1_div_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); +unsigned char __kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); +char __kmpc_atomic_fixed1_shl_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs , int flag); +char __kmpc_atomic_fixed1_shr_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); +unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); +short __kmpc_atomic_fixed2_sub_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); +short __kmpc_atomic_fixed2_div_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); +unsigned short __kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); +short __kmpc_atomic_fixed2_shl_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); +short __kmpc_atomic_fixed2_shr_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); +unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); +kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); +kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); +kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); +kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); +kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); +kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); +kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); +kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); +float __kmpc_atomic_float4_sub_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); +float __kmpc_atomic_float4_div_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); +double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); +double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); +long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); +long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); +#if KMP_HAVE_QUAD +QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); +QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); +#endif +// Workaround for cmplx4 routines - return void; captured value is returned via the argument +void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); +void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); +kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); +kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); +kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); +kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); +#if KMP_HAVE_QUAD +CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); +CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); +#if ( KMP_ARCH_X86 ) + Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); + Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); +#endif +#endif + +// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} +char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs ); +short __kmpc_atomic_fixed2_swp( ident_t *id_ref, int gtid, short * lhs, short rhs ); +kmp_int32 __kmpc_atomic_fixed4_swp( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs ); +double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs ); +long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD +QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif +// !!! TODO: check if we need a workaround here +void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out ); +//kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); + +kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); +kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD +CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); +#if ( KMP_ARCH_X86 ) + Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); + kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +#endif +#endif + +// End of OpenMP 4.0 capture + +#endif //OMP_40_ENABLED + +#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef __cplusplus + } // extern "C" +#endif + +#endif /* KMP_ATOMIC_H */ + +// end of file diff --git a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp index 23986c73ba4..6b66dabba2b 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp @@ -1,226 +1,226 @@ -/* - * kmp_barrier.cpp - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_wait_release.h" -#include "kmp_stats.h" -#include "kmp_itt.h" -#include "kmp_os.h" - - -#if KMP_MIC -#include -#define USE_NGO_STORES 1 -#endif // KMP_MIC - -#if KMP_MIC && USE_NGO_STORES -// ICV copying -#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) -#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") -#else -#define ngo_load(src) ((void)0) -#define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) -#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) -#define ngo_sync() ((void)0) -#endif /* KMP_MIC && USE_NGO_STORES */ - -void __kmp_print_structure(void); // Forward declaration - -// ---------------------------- Barrier Algorithms ---------------------------- - -// Linear Barrier -static void -__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather); +/* + * kmp_barrier.cpp + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_wait_release.h" +#include "kmp_stats.h" +#include "kmp_itt.h" +#include "kmp_os.h" + + +#if KMP_MIC +#include +#define USE_NGO_STORES 1 +#endif // KMP_MIC + +#if KMP_MIC && USE_NGO_STORES +// ICV copying +#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) +#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) +#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) +#define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") +#else +#define ngo_load(src) ((void)0) +#define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) +#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) +#define ngo_sync() ((void)0) +#endif /* KMP_MIC && USE_NGO_STORES */ + +void __kmp_print_structure(void); // Forward declaration + +// ---------------------------- Barrier Algorithms ---------------------------- + +// Linear Barrier +static void +__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather); kmp_team_t *team = this_thr->th.th_team; kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; kmp_info_t **other_threads = team->t.t_threads; - - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } -#endif - // We now perform a linear reduction to signal that all of the threads have arrived. - if (!KMP_MASTER_TID(tid)) { - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived, - thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to master thread - /* After performing this write, a worker thread may not assume that the team is valid - any more - it could be deallocated by the master thread at any time. */ - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); - flag.release(); - } else { + + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - save arrive time to the thread + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); + } +#endif + // We now perform a linear reduction to signal that all of the threads have arrived. + if (!KMP_MASTER_TID(tid)) { + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" + "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived, + thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + // Mark arrival to master thread + /* After performing this write, a worker thread may not assume that the team is valid + any more - it could be deallocated by the master thread at any time. */ + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); + flag.release(); + } else { kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; int nproc = this_thr->th.th_team_nproc; int i; - // Don't have to worry about sleep bit here or atomic since team setting + // Don't have to worry about sleep bit here or atomic since team setting kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; - - // Collect all the worker team member threads. - for (i=1; ith.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(i, team), team->t.t_id, i, - &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); - - // Wait for worker thread to arrive - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and the other thread time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - other_threads[i]->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid, - team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[i]->th.th_local.reduce_data); - } - } - // Don't have to worry about sleep bit here or atomic since team setting - team_bar->b_arrived = new_state; - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state)); - } - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void -__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release); + + // Collect all the worker team member threads. + for (i=1; ith.th_bar[bt].bb.b_arrived); +#endif /* KMP_CACHE_MANAGE */ + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " + "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(i, team), team->t.t_id, i, + &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); + + // Wait for worker thread to arrive + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); + flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - write min of the thread time and the other thread time to the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, + other_threads[i]->th.th_bar_min_time); + } +#endif + if (reduce) { + KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid, + team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); + (*reduce)(this_thr->th.th_local.reduce_data, + other_threads[i]->th.th_local.reduce_data); + } + } + // Don't have to worry about sleep bit here or atomic since team setting + team_bar->b_arrived = new_state; + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state)); + } + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +static void +__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs + USE_ITT_BUILD_ARG(void *itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release); kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; kmp_team_t *team; - - if (KMP_MASTER_TID(tid)) { + + if (KMP_MASTER_TID(tid)) { unsigned int i; kmp_uint32 nproc = this_thr->th.th_team_nproc; kmp_info_t **other_threads; - - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - other_threads = team->t.t_threads; - - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - - if (nproc > 1) { -#if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); - if (propagate_icvs) { - ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); - for (i=1; it.t_ident, team->t.t_threads[i], team, i, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - ngo_sync(); - } - } -#endif // KMP_BARRIER_ICV_PUSH - - // Now, release all of the worker threads - for (i=1; ith.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, - &other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]); - flag.release(); - } - } - } else { // Wait for the MASTER thread to release us - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) - return; - // The worker thread may now assume that the team is valid. -#ifdef KMP_DEBUG - tid = __kmp_tid_from_gtid(gtid); - team = __kmp_threads[gtid]->th.th_team; -#endif - KMP_DEBUG_ASSERT(team != NULL); - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// Tree barrier -static void -__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather); + + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + other_threads = team->t.t_threads; + + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + + if (nproc > 1) { +#if KMP_BARRIER_ICV_PUSH + { + KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + if (propagate_icvs) { + ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); + for (i=1; it.t_ident, team->t.t_threads[i], team, i, FALSE); + ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, + &team->t.t_implicit_task_taskdata[0].td_icvs); + } + ngo_sync(); + } + } +#endif // KMP_BARRIER_ICV_PUSH + + // Now, release all of the worker threads + for (i=1; ith.th_bar[bt].bb.b_go); +#endif /* KMP_CACHE_MANAGE */ + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " + "go(%p): %u => %u\n", gtid, team->t.t_id, tid, + other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, + &other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]); + flag.release(); + } + } + } else { // Wait for the MASTER thread to release us + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", + gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + // Early exit for reaping threads releasing forkjoin barrier + if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) + return; + // The worker thread may now assume that the team is valid. +#ifdef KMP_DEBUG + tid = __kmp_tid_from_gtid(gtid); + team = __kmp_threads[gtid]->th.th_team; +#endif + KMP_DEBUG_ASSERT(team != NULL); + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +// Tree barrier +static void +__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void *itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather); kmp_team_t *team = this_thr->th.th_team; kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; kmp_info_t **other_threads = team->t.t_threads; @@ -230,92 +230,92 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, kmp_uint32 child; kmp_uint32 child_tid; kmp_uint64 new_state; - - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } -#endif - // Perform tree gather to wait until all threads have arrived; reduce any required data as we go - child_tid = (tid << branch_bits) + 1; - if (child_tid < nproc) { - // Parent threads wait for all their children to arrive - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - child = 1; - do { + + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - save arrive time to the thread + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); + } +#endif + // Perform tree gather to wait until all threads have arrived; reduce any required data as we go + child_tid = (tid << branch_bits) + 1; + if (child_tid < nproc) { + // Parent threads wait for all their children to arrive + new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + child = 1; + do { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - // Prefetch next thread's arrived count - if (child+1 <= branch_factor && child_tid+1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, - &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - } - child++; - child_tid++; - } - while (child <= branch_factor && child_tid < nproc); - } - - if (!KMP_MASTER_TID(tid)) { // Worker threads +#if KMP_CACHE_MANAGE + // Prefetch next thread's arrived count + if (child+1 <= branch_factor && child_tid+1 < nproc) + KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived); +#endif /* KMP_CACHE_MANAGE */ + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " + "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, + &child_bar->b_arrived, new_state)); + // Wait for child to arrive + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - write min of the thread time and a child time to the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, + child_thr->th.th_bar_min_time); + } +#endif + if (reduce) { + KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + } + child++; + child_tid++; + } + while (child <= branch_factor && child_tid < nproc); + } + + if (!KMP_MASTER_TID(tid)) { // Worker threads kmp_int32 parent_tid = (tid - 1) >> branch_bits; - - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - - // Mark arrival to parent thread - /* After performing this write, a worker thread may not assume that the team is valid - any more - it could be deallocated by the master thread at any time. */ - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); - flag.release(); - } else { - // Need to update the team arrived pointer if we are the master thread - if (nproc > 1) // New value was already computed above - team->t.t_bar[bt].b_arrived = new_state; - else - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void -__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release); + + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " + "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, + &thr_bar->b_arrived, thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + + // Mark arrival to parent thread + /* After performing this write, a worker thread may not assume that the team is valid + any more - it could be deallocated by the master thread at any time. */ + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); + flag.release(); + } else { + // Need to update the team arrived pointer if we are the master thread + if (nproc > 1) // New value was already computed above + team->t.t_bar[bt].b_arrived = new_state; + else + team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, + &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +static void +__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs + USE_ITT_BUILD_ARG(void *itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release); kmp_team_t *team; kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; kmp_uint32 nproc; @@ -323,102 +323,102 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, kmp_uint32 branch_factor = 1 << branch_bits; kmp_uint32 child; kmp_uint32 child_tid; - - // Perform a tree release for all of the threads that have been gathered - if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } else { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } - nproc = this_thr->th.th_team_nproc; - child_tid = (tid << branch_bits) + 1; - - if (child_tid < nproc) { + + // Perform a tree release for all of the threads that have been gathered + if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", + gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + // Wait for parent thread to release us + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + // Early exit for reaping threads releasing forkjoin barrier + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); + + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } else { + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + } + nproc = this_thr->th.th_team_nproc; + child_tid = (tid << branch_bits) + 1; + + if (child_tid < nproc) { kmp_info_t **other_threads = team->t.t_threads; - child = 1; - // Parent threads release all their children - do { + child = 1; + // Parent threads release all their children + do { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE - // Prefetch next thread's go count - if (child+1 <= branch_factor && child_tid+1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - -#if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], - team, child_tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - } -#endif // KMP_BARRIER_ICV_PUSH - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - child++; - child_tid++; - } - while (child <= branch_factor && child_tid < nproc); - } - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - - -// Hyper Barrier -static void -__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather); +#if KMP_CACHE_MANAGE + // Prefetch next thread's go count + if (child+1 <= branch_factor && child_tid+1 < nproc) + KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go); +#endif /* KMP_CACHE_MANAGE */ + +#if KMP_BARRIER_ICV_PUSH + { + KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + if (propagate_icvs) { + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], + team, child_tid, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, + &team->t.t_implicit_task_taskdata[0].td_icvs); + } + } +#endif // KMP_BARRIER_ICV_PUSH + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" + "go(%p): %u => %u\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child from barrier + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + child++; + child_tid++; + } + while (child <= branch_factor && child_tid < nproc); + } + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + + +// Hyper Barrier +static void +__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void *itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather); kmp_team_t *team = this_thr->th.th_team; kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; kmp_info_t **other_threads = team->t.t_threads; @@ -428,103 +428,103 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, kmp_uint32 branch_factor = 1 << branch_bits; kmp_uint32 offset; kmp_uint32 level; - - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } -#endif - /* Perform a hypercube-embedded tree gather to wait until all of the threads have - arrived, and reduce any required data as we go. */ - kmp_flag_64 p_flag(&thr_bar->b_arrived); - for (level=0, offset=1; offsett.t_id, tid, bt)); + + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - save arrive time to the thread + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); + } +#endif + /* Perform a hypercube-embedded tree gather to wait until all of the threads have + arrived, and reduce any required data as we go. */ + kmp_flag_64 p_flag(&thr_bar->b_arrived); + for (level=0, offset=1; offset> level) & (branch_factor - 1)) != 0) { + + if (((tid >> level) & (branch_factor - 1)) != 0) { kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1); - - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to parent thread - /* After performing this write (in the last iteration of the enclosing for loop), - a worker thread may not assume that the team is valid any more - it could be - deallocated by the master thread at any time. */ - p_flag.set_waiter(other_threads[parent_tid]); - p_flag.release(); - break; - } - - // Parent threads wait for children to arrive - if (new_state == KMP_BARRIER_UNUSED_STATE) - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - for (child=1, child_tid=tid+(1 << level); child %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, + &thr_bar->b_arrived, thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + // Mark arrival to parent thread + /* After performing this write (in the last iteration of the enclosing for loop), + a worker thread may not assume that the team is valid any more - it could be + deallocated by the master thread at any time. */ + p_flag.set_waiter(other_threads[parent_tid]); + p_flag.release(); + break; + } + + // Parent threads wait for children to arrive + if (new_state == KMP_BARRIER_UNUSED_STATE) + new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + for (child=1, child_tid=tid+(1 << level); childth.th_bar[bt].bb; -#if KMP_CACHE_MANAGE +#if KMP_CACHE_MANAGE kmp_uint32 next_child_tid = child_tid + (1 << level); - // Prefetch next thread's arrived count - if (child+1 < branch_factor && next_child_tid < num_threads) - KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); -#endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, - &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); - c_flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } -#endif - if (reduce) { - KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - } - } - } - - if (KMP_MASTER_TID(tid)) { - // Need to update the team arrived pointer if we are the master thread - if (new_state == KMP_BARRIER_UNUSED_STATE) - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - else - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// The reverse versions seem to beat the forward versions overall -#define KMP_REVERSE_HYPER_BAR -static void -__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release); + // Prefetch next thread's arrived count + if (child+1 < branch_factor && next_child_tid < num_threads) + KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); +#endif /* KMP_CACHE_MANAGE */ + KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " + "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, + &child_bar->b_arrived, new_state)); + // Wait for child to arrive + kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); + c_flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - write min of the thread time and a child time to the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, + child_thr->th.th_bar_min_time); + } +#endif + if (reduce) { + KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + } + } + } + + if (KMP_MASTER_TID(tid)) { + // Need to update the team arrived pointer if we are the master thread + if (new_state == KMP_BARRIER_UNUSED_STATE) + team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; + else + team->t.t_bar[bt].b_arrived = new_state; + KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, + &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +// The reverse versions seem to beat the forward versions overall +#define KMP_REVERSE_HYPER_BAR +static void +__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs + USE_ITT_BUILD_ARG(void *itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release); kmp_team_t *team; kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb; kmp_info_t **other_threads; @@ -535,1208 +535,1208 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid kmp_uint32 child_tid; kmp_uint32 offset; kmp_uint32 level; - - /* Perform a hypercube-embedded tree release for all of the threads that have been gathered. - If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse - order of the corresponding gather, otherwise threads are released in the same order. */ - if (KMP_MASTER_TID(tid)) { // master - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { // master already has ICVs in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); - } -#endif - } - else { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - num_threads = this_thr->th.th_team_nproc; - other_threads = team->t.t_threads; - -#ifdef KMP_REVERSE_HYPER_BAR - // Count up to correct level for parent - for (level=0, offset=1; offset>level) & (branch_factor-1)) == 0); - level+=branch_bits, offset<<=branch_bits); - - // Now go down from there - for (level-=branch_bits, offset>>=branch_bits; offset != 0; - level-=branch_bits, offset>>=branch_bits) -#else - // Go down the tree, level by level - for (level=0, offset=1; offset> ((level==0)?level:level-1); - for (child=(child=1; child--, child_tid-=(1<> level) & (branch_factor - 1)) != 0) - // No need to go lower than this, since this is the level parent would be notified - break; - // Iterate through children on this level of the tree - for (child=1, child_tid=tid+(1<= num_threads) continue; // Child doesn't exist so keep going - else { + + /* Perform a hypercube-embedded tree release for all of the threads that have been gathered. + If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse + order of the corresponding gather, otherwise threads are released in the same order. */ + if (KMP_MASTER_TID(tid)) { // master + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +#if KMP_BARRIER_ICV_PUSH + if (propagate_icvs) { // master already has ICVs in final destination; copy + copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); + } +#endif + } + else { // Handle fork barrier workers who aren't part of a team yet + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", + gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + // Wait for parent thread to release us + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In fork barrier where we could not get the object reliably + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + // Early exit for reaping threads releasing forkjoin barrier + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); + + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + num_threads = this_thr->th.th_team_nproc; + other_threads = team->t.t_threads; + +#ifdef KMP_REVERSE_HYPER_BAR + // Count up to correct level for parent + for (level=0, offset=1; offset>level) & (branch_factor-1)) == 0); + level+=branch_bits, offset<<=branch_bits); + + // Now go down from there + for (level-=branch_bits, offset>>=branch_bits; offset != 0; + level-=branch_bits, offset>>=branch_bits) +#else + // Go down the tree, level by level + for (level=0, offset=1; offset> ((level==0)?level:level-1); + for (child=(child=1; child--, child_tid-=(1<> level) & (branch_factor - 1)) != 0) + // No need to go lower than this, since this is the level parent would be notified + break; + // Iterate through children on this level of the tree + for (child=1, child_tid=tid+(1<= num_threads) continue; // Child doesn't exist so keep going + else { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; -#if KMP_CACHE_MANAGE +#if KMP_CACHE_MANAGE kmp_uint32 next_child_tid = child_tid - (1 << level); - // Prefetch next thread's go count -# ifdef KMP_REVERSE_HYPER_BAR - if (child-1 >= 1 && next_child_tid < num_threads) -# else - if (child+1 < branch_factor && next_child_tid < num_threads) -# endif // KMP_REVERSE_HYPER_BAR - KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); -#endif /* KMP_CACHE_MANAGE */ - -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) // push my fixed ICVs to my child - copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); -#endif // KMP_BARRIER_ICV_PUSH - - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - } -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); - } -#endif - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// Hierarchical Barrier - -// Initialize thread barrier data -/* Initializes/re-initializes the hierarchical barrier data stored on a thread. Performs the - minimum amount of initialization required based on how the team has changed. Returns true if - leaf children will require both on-core and traditional wake-up mechanisms. For example, if the - team size increases, threads already in the team will respond to on-core wakeup on their parent - thread, but threads newly added to the team will only be listening on the their local b_go. */ -static bool -__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc, - int gtid, int tid, kmp_team_t *team) -{ - // Checks to determine if (re-)initialization is needed - bool uninitialized = thr_bar->team == NULL; - bool team_changed = team != thr_bar->team; - bool team_sz_changed = nproc != thr_bar->nproc; - bool tid_changed = tid != thr_bar->old_tid; - bool retval = false; - - if (uninitialized || team_sz_changed) { - __kmp_get_hierarchy(nproc, thr_bar); - } - - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->my_level = thr_bar->depth-1; // default for master - thr_bar->parent_tid = -1; // default for master - if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy - kmp_uint32 d=0; - while (ddepth) { // find parent based on level of thread in hierarchy, and note level - kmp_uint32 rem; - if (d == thr_bar->depth-2) { // reached level right below the master - thr_bar->parent_tid = 0; - thr_bar->my_level = d; - break; - } - else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster? - // thread is not a subtree root at next level, so this is max - thr_bar->parent_tid = tid - rem; - thr_bar->my_level = d; - break; - } - ++d; - } - } - thr_bar->offset = 7-(tid-thr_bar->parent_tid-1); - thr_bar->old_tid = tid; - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - thr_bar->team = team; - thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - } - if (uninitialized || team_changed || tid_changed) { - thr_bar->team = team; - thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - retval = true; - } - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->nproc = nproc; - thr_bar->leaf_kids = thr_bar->base_leaf_kids; - if (thr_bar->my_level == 0) thr_bar->leaf_kids=0; - if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc) - thr_bar->leaf_kids = nproc - tid - 1; - thr_bar->leaf_state = 0; - for (int i=0; ileaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1; - } - return retval; -} - -static void -__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, - int gtid, int tid, void (*reduce) (void *, void *) - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather); + // Prefetch next thread's go count +# ifdef KMP_REVERSE_HYPER_BAR + if (child-1 >= 1 && next_child_tid < num_threads) +# else + if (child+1 < branch_factor && next_child_tid < num_threads) +# endif // KMP_REVERSE_HYPER_BAR + KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); +#endif /* KMP_CACHE_MANAGE */ + +#if KMP_BARRIER_ICV_PUSH + if (propagate_icvs) // push my fixed ICVs to my child + copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); +#endif // KMP_BARRIER_ICV_PUSH + + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" + "go(%p): %u => %u\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child from barrier + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } + } + } +#if KMP_BARRIER_ICV_PUSH + if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); + } +#endif + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +// Hierarchical Barrier + +// Initialize thread barrier data +/* Initializes/re-initializes the hierarchical barrier data stored on a thread. Performs the + minimum amount of initialization required based on how the team has changed. Returns true if + leaf children will require both on-core and traditional wake-up mechanisms. For example, if the + team size increases, threads already in the team will respond to on-core wakeup on their parent + thread, but threads newly added to the team will only be listening on the their local b_go. */ +static bool +__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc, + int gtid, int tid, kmp_team_t *team) +{ + // Checks to determine if (re-)initialization is needed + bool uninitialized = thr_bar->team == NULL; + bool team_changed = team != thr_bar->team; + bool team_sz_changed = nproc != thr_bar->nproc; + bool tid_changed = tid != thr_bar->old_tid; + bool retval = false; + + if (uninitialized || team_sz_changed) { + __kmp_get_hierarchy(nproc, thr_bar); + } + + if (uninitialized || team_sz_changed || tid_changed) { + thr_bar->my_level = thr_bar->depth-1; // default for master + thr_bar->parent_tid = -1; // default for master + if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy + kmp_uint32 d=0; + while (ddepth) { // find parent based on level of thread in hierarchy, and note level + kmp_uint32 rem; + if (d == thr_bar->depth-2) { // reached level right below the master + thr_bar->parent_tid = 0; + thr_bar->my_level = d; + break; + } + else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster? + // thread is not a subtree root at next level, so this is max + thr_bar->parent_tid = tid - rem; + thr_bar->my_level = d; + break; + } + ++d; + } + } + thr_bar->offset = 7-(tid-thr_bar->parent_tid-1); + thr_bar->old_tid = tid; + thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; + thr_bar->team = team; + thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; + } + if (uninitialized || team_changed || tid_changed) { + thr_bar->team = team; + thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; + retval = true; + } + if (uninitialized || team_sz_changed || tid_changed) { + thr_bar->nproc = nproc; + thr_bar->leaf_kids = thr_bar->base_leaf_kids; + if (thr_bar->my_level == 0) thr_bar->leaf_kids=0; + if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc) + thr_bar->leaf_kids = nproc - tid - 1; + thr_bar->leaf_state = 0; + for (int i=0; ileaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1; + } + return retval; +} + +static void +__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, + int gtid, int tid, void (*reduce) (void *, void *) + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather); kmp_team_t *team = this_thr->th.th_team; kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; kmp_uint32 nproc = this_thr->th.th_team_nproc; kmp_info_t **other_threads = team->t.t_threads; kmp_uint64 new_state; - - int level = team->t.t_level; -#if OMP_40_ENABLED - if (other_threads[0]->th.th_teams_microtask) // are we inside the teams construct? - if (this_thr->th.th_teams_size.nteams > 1) - ++level; // level was not increased in teams construct for team_of_masters -#endif - if (level == 1) thr_bar->use_oncore_barrier = 1; - else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); - } -#endif - - (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); - - if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) + + int level = team->t.t_level; +#if OMP_40_ENABLED + if (other_threads[0]->th.th_teams_microtask) // are we inside the teams construct? + if (this_thr->th.th_teams_size.nteams > 1) + ++level; // level was not increased in teams construct for team_of_masters +#endif + if (level == 1) thr_bar->use_oncore_barrier = 1; + else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested + + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - save arrive time to the thread + if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); + } +#endif + + (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); + + if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) kmp_int32 child_tid; - new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { - if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag - kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n", - gtid, team->t.t_id, tid)); - kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - if (reduce) { - for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data); - } - } - (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits - } - // Next, wait for higher level children on each child's b_arrived flag - for (kmp_uint32 d=1; dmy_level; ++d) { // gather lowest level threads first, but skip 0 - kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { + new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { + if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag + kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n", + gtid, team->t.t_id, tid)); + kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); + flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + if (reduce) { + for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data); + } + } + (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits + } + // Next, wait for higher level children on each child's b_arrived flag + for (kmp_uint32 d=1; dmy_level; ++d) { // gather lowest level threads first, but skip 0 + kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; + if (last > nproc) last = nproc; + for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - } - } - } - } - else { // Blocktime is not infinite - for (kmp_uint32 d=0; dmy_level; ++d) { // Gather lowest level threads first - kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + if (reduce) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + } + } + } + } + else { // Blocktime is not infinite + for (kmp_uint32 d=0; dmy_level; ++d) { // Gather lowest level threads first + kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; + if (last > nproc) last = nproc; + for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - } - } - } - } - } - // All subordinates are gathered; now release parent if not master thread - - if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP)); - /* Mark arrival to parent: After performing this write, a worker thread may not assume that - the team is valid any more - it could be deallocated by the master thread at any time. */ - if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME - || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); - flag.release(); - } - else { // Leaf does special release on the "offset" bits of parent's b_arrived flag - thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); - flag.set_waiter(other_threads[thr_bar->parent_tid]); - flag.release(); - } - } else { // Master thread needs to update the team's b_arrived value - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - // Is the team access below unsafe or just technically invalid? - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void -__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release); + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + if (reduce) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + } + } + } + } + } + // All subordinates are gathered; now release parent if not master thread + + if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " + "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid, + &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP)); + /* Mark arrival to parent: After performing this write, a worker thread may not assume that + the team is valid any more - it could be deallocated by the master thread at any time. */ + if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME + || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); + flag.release(); + } + else { // Leaf does special release on the "offset" bits of parent's b_arrived flag + thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); + flag.set_waiter(other_threads[thr_bar->parent_tid]); + flag.release(); + } + } else { // Master thread needs to update the team's b_arrived value + team->t.t_bar[bt].b_arrived = new_state; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + // Is the team access below unsafe or just technically invalid? + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +static void +__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release); kmp_team_t *team; kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; kmp_uint32 nproc; - bool team_change = false; // indicates on-core barrier shouldn't be used - - if (KMP_MASTER_TID(tid)) { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } - else { // Worker threads - // Wait for parent thread to release me - if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME - || thr_bar->my_level != 0 || thr_bar->team == NULL) { - // Use traditional method of waiting on my own b_go flag - thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } - else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested - // Wait on my "offset" bits on parent's b_go flag - thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset, - bt, this_thr - USE_ITT_BUILD_ARG(itt_sync_obj) ); - flag.wait(this_thr, TRUE); - if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } - else { // Reset my bits on parent's b_go flag - ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0; - } - } - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - - nproc = this_thr->th.th_team_nproc; - int level = team->t.t_level; -#if OMP_40_ENABLED - if (team->t.t_threads[0]->th.th_teams_microtask ) { // are we inside the teams construct? - if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level) - ++level; // level was not increased in teams construct for team_of_workers - if( this_thr->th.th_teams_size.nteams > 1 ) - ++level; // level was not increased in teams construct for team_of_masters - } -#endif - if (level == 1) thr_bar->use_oncore_barrier = 1; - else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - - // If the team size has increased, we still communicate with old leaves via oncore barrier. - unsigned short int old_leaf_kids = thr_bar->leaf_kids; - kmp_uint64 old_leaf_state = thr_bar->leaf_state; - team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); - // But if the entire team changes, we won't use oncore barrier at all - if (team_change) old_leaf_kids = 0; - -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); - } - else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime - if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) - // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - // non-leaves will get ICVs piggybacked with b_go via NGO store - } - else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs - if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access - copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); - else // leaves copy parent's fixed ICVs directly to local ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - } - } -#endif // KMP_BARRIER_ICV_PUSH - - // Now, release my children - if (thr_bar->my_level) { // not a leaf + bool team_change = false; // indicates on-core barrier shouldn't be used + + if (KMP_MASTER_TID(tid)) { + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + } + else { // Worker threads + // Wait for parent thread to release me + if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME + || thr_bar->my_level != 0 || thr_bar->team == NULL) { + // Use traditional method of waiting on my own b_go flag + thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + } + else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested + // Wait on my "offset" bits on parent's b_go flag + thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; + kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset, + bt, this_thr + USE_ITT_BUILD_ARG(itt_sync_obj) ); + flag.wait(this_thr, TRUE); + if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go + TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + } + else { // Reset my bits on parent's b_go flag + ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0; + } + } + thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; + // Early exit for reaping threads releasing forkjoin barrier + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); + + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + + nproc = this_thr->th.th_team_nproc; + int level = team->t.t_level; +#if OMP_40_ENABLED + if (team->t.t_threads[0]->th.th_teams_microtask ) { // are we inside the teams construct? + if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level) + ++level; // level was not increased in teams construct for team_of_workers + if( this_thr->th.th_teams_size.nteams > 1 ) + ++level; // level was not increased in teams construct for team_of_masters + } +#endif + if (level == 1) thr_bar->use_oncore_barrier = 1; + else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested + + // If the team size has increased, we still communicate with old leaves via oncore barrier. + unsigned short int old_leaf_kids = thr_bar->leaf_kids; + kmp_uint64 old_leaf_state = thr_bar->leaf_state; + team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); + // But if the entire team changes, we won't use oncore barrier at all + if (team_change) old_leaf_kids = 0; + +#if KMP_BARRIER_ICV_PUSH + if (propagate_icvs) { + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); + if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy + copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); + } + else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime + if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) + // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->parent_bar->th_fixed_icvs); + // non-leaves will get ICVs piggybacked with b_go via NGO store + } + else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs + if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access + copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); + else // leaves copy parent's fixed ICVs directly to local ICV store + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->parent_bar->th_fixed_icvs); + } + } +#endif // KMP_BARRIER_ICV_PUSH + + // Now, release my children + if (thr_bar->my_level) { // not a leaf kmp_int32 child_tid; - kmp_uint32 last; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { - if (KMP_MASTER_TID(tid)) { // do a flat release - // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go. - thr_bar->b_go = KMP_BARRIER_STATE_BUMP; - // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line - ngo_load(&thr_bar->th_fixed_icvs); - // This loops over all the threads skipping only the leaf nodes in the hierarchy - for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) { + kmp_uint32 last; + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { + if (KMP_MASTER_TID(tid)) { // do a flat release + // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go. + thr_bar->b_go = KMP_BARRIER_STATE_BUMP; + // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line + ngo_load(&thr_bar->th_fixed_icvs); + // This loops over all the threads skipping only the leaf nodes in the hierarchy + for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) { kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" - " go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Use ngo store (if available) to both store ICVs and release child via child's b_go - ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); - } - ngo_sync(); - } - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - // Now, release leaf children - if (thr_bar->leaf_kids) { // if there are any - // We test team_change on the off-chance that the level 1 team changed. - if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new - if (old_leaf_kids) { // release old leaf kids - thr_bar->b_go |= old_leaf_state; - } - // Release new leaf kids - last = tid+thr_bar->skip_per_level[1]; - if (last > nproc) last = nproc; - for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1 + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" + " go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Use ngo store (if available) to both store ICVs and release child via child's b_go + ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); + } + ngo_sync(); + } + TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + // Now, release leaf children + if (thr_bar->leaf_kids) { // if there are any + // We test team_change on the off-chance that the level 1 team changed. + if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new + if (old_leaf_kids) { // release old leaf kids + thr_bar->b_go |= old_leaf_state; + } + // Release new leaf kids + last = tid+thr_bar->skip_per_level[1]; + if (last > nproc) last = nproc; + for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1 kmp_info_t *child_thr = team->t.t_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" - " T#%d(%d:%d) go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - else { // Release all children at once with leaf_state bits on my own b_go flag - thr_bar->b_go |= thr_bar->leaf_state; - } - } - } - else { // Blocktime is not infinite; do a simple hierarchical release - for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first - last = tid+thr_bar->skip_per_level[d+1]; - kmp_uint32 skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" + " T#%d(%d:%d) go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child using child's b_go flag + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } + } + else { // Release all children at once with leaf_state bits on my own b_go flag + thr_bar->b_go |= thr_bar->leaf_state; + } + } + } + else { // Blocktime is not infinite; do a simple hierarchical release + for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first + last = tid+thr_bar->skip_per_level[d+1]; + kmp_uint32 skip = thr_bar->skip_per_level[d]; + if (last > nproc) last = nproc; + for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { kmp_info_t *child_thr = team->t.t_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" - " go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - } -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); -#endif // KMP_BARRIER_ICV_PUSH - } - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -// ---------------------------- End of Barrier Algorithms ---------------------------- - -// Internal function to do a barrier. -/* If is_split is true, do a split barrier, otherwise, do a plain barrier - If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier - Returns 0 if master thread, 1 if worker thread. */ -int -__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, - void *reduce_data, void (*reduce)(void *, void *)) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_barrier); + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" + " go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child using child's b_go flag + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } + } + } +#if KMP_BARRIER_ICV_PUSH + if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); +#endif // KMP_BARRIER_ICV_PUSH + } + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); +} + +// ---------------------------- End of Barrier Algorithms ---------------------------- + +// Internal function to do a barrier. +/* If is_split is true, do a split barrier, otherwise, do a plain barrier + If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier + Returns 0 if master thread, 1 if worker thread. */ +int +__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, + void *reduce_data, void (*reduce)(void *, void *)) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_barrier); int tid = __kmp_tid_from_gtid(gtid); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team = this_thr->th.th_team; int status = 0; - ident_t *loc = __kmp_threads[gtid]->th.th_ident; -#if OMPT_SUPPORT - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; -#endif - - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - -#if OMPT_TRACE - if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_end)( - my_parallel_id, my_task_id); - } - } -#endif - if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - my_parallel_id, my_task_id); - } -#endif - // It is OK to report the barrier state after the barrier begin callback. - // According to the OMPT specification, a compliant implementation may - // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; - } -#endif - - if (! team->t.t_serialized) { -#if USE_ITT_BUILD - // This value will be used in itt notify events below. - void *itt_sync_obj = NULL; -# if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); -# endif -#endif /* USE_ITT_BUILD */ - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - } - - /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when - the team struct is not guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being performance-critical. - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; - } - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ -#if USE_DEBUGGER - // Let the debugger know: the thread arrived to the barrier and waiting. - if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. - team->t.t_bar[bt].b_master_arrived += 1; - } else { - this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; - } // if -#endif /* USE_DEBUGGER */ - if (reduce != NULL) { - //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 - this_thr->th.th_local.reduce_data = reduce_data; - } - - if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) - __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1 - - switch (__kmp_barrier_gather_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear - __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear - __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } - - KMP_MB(); - - if (KMP_MASTER_TID(tid)) { - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } -#if USE_DEBUGGER - // Let the debugger know: All threads are arrived and starting leaving the barrier. - team->t.t_bar[bt].b_team_arrived += 1; -#endif - -#if USE_ITT_BUILD - /* TODO: In case of split reduction barrier, master thread may send acquired event early, - before the final summation into the shared variable is done (final summation can be a - long operation for array reductions). */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier - report frame end (only if active_level == 1) - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && -#if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1) - { - kmp_uint64 cur_time = __itt_get_timestamp(); - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch(__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed) - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); - break; - case 3: - if( __itt_metadata_add_ptr ) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - for (i=1; ith.th_bar_arrive_time ); - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL)); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } - } -#endif /* USE_ITT_BUILD */ - } else { - status = 1; -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } - if (status == 1 || ! is_split) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } - } - -#if USE_ITT_BUILD - /* GEH: TODO: Move this under if-condition above and also include in - __kmp_end_split_barrier(). This will more accurately represent the actual release time - of the threads for split barriers. */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } else { // Team is serialized. - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { -#if OMP_41_ENABLED - if ( this_thr->th.th_task_team != NULL ) { - void *itt_sync_obj = NULL; -#if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); - __kmp_itt_barrier_starting(gtid, itt_sync_obj); - } -#endif - - KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE); - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj)); - __kmp_task_team_setup(this_thr, team, 0); - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - } -#else - // The task team should be NULL for serialized code (tasks will be executed immediately) - KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); -#endif - } - } - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status)); - -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - my_parallel_id, my_task_id); - } -#endif - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - } -#endif - - return status; -} - - -void -__kmp_end_split_barrier(enum barrier_type bt, int gtid) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier); - int tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - - if (!team->t.t_serialized) { - if (KMP_MASTER_GTID(gtid)) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } // if - } - } -} - - -void -__kmp_join_barrier(int gtid) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier); + ident_t *loc = __kmp_threads[gtid]->th.th_ident; +#if OMPT_SUPPORT + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; +#endif + + KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", + gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); + +#if OMPT_SUPPORT + if (ompt_enabled) { +#if OMPT_BLAME + my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; + +#if OMPT_TRACE + if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { + if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { + ompt_callbacks.ompt_callback(ompt_event_single_others_end)( + my_parallel_id, my_task_id); + } + } +#endif + if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( + my_parallel_id, my_task_id); + } +#endif + // It is OK to report the barrier state after the barrier begin callback. + // According to the OMPT specification, a compliant implementation may + // even delay reporting this state until the barrier begins to wait. + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + } +#endif + + if (! team->t.t_serialized) { +#if USE_ITT_BUILD + // This value will be used in itt notify events below. + void *itt_sync_obj = NULL; +# if USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); +# endif +#endif /* USE_ITT_BUILD */ + if (__kmp_tasking_mode == tskm_extra_barrier) { + __kmp_tasking_barrier(team, this_thr, gtid); + KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", + gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); + } + + /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when + the team struct is not guaranteed to exist. */ + // See note about the corresponding code in __kmp_join_barrier() being performance-critical. + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + } + +#if USE_ITT_BUILD + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_starting(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ +#if USE_DEBUGGER + // Let the debugger know: the thread arrived to the barrier and waiting. + if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. + team->t.t_bar[bt].b_master_arrived += 1; + } else { + this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; + } // if +#endif /* USE_DEBUGGER */ + if (reduce != NULL) { + //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 + this_thr->th.th_local.reduce_data = reduce_data; + } + + if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) + __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1 + + switch (__kmp_barrier_gather_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear + __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce + USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear + __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + default: { + __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } + } + + KMP_MB(); + + if (KMP_MASTER_TID(tid)) { + status = 0; + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_wait(this_thr, team + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } +#if USE_DEBUGGER + // Let the debugger know: All threads are arrived and starting leaving the barrier. + team->t.t_bar[bt].b_team_arrived += 1; +#endif + +#if USE_ITT_BUILD + /* TODO: In case of split reduction barrier, master thread may send acquired event early, + before the final summation into the shared variable is done (final summation can be a + long operation for array reductions). */ + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier - report frame end (only if active_level == 1) + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && +#if OMP_40_ENABLED + this_thr->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1) + { + kmp_uint64 cur_time = __itt_get_timestamp(); + kmp_info_t **other_threads = team->t.t_threads; + int nproc = this_thr->th.th_team_nproc; + int i; + switch(__kmp_forkjoin_frames_mode) { + case 1: + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed) + __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); + break; + case 3: + if( __itt_metadata_add_ptr ) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; + for (i=1; ith.th_bar_arrive_time ); + } + __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL)); + } + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + } + } +#endif /* USE_ITT_BUILD */ + } else { + status = 1; +#if USE_ITT_BUILD + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + } + if (status == 1 || ! is_split) { + switch (__kmp_barrier_release_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + default: { + __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } + } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } + } + +#if USE_ITT_BUILD + /* GEH: TODO: Move this under if-condition above and also include in + __kmp_end_split_barrier(). This will more accurately represent the actual release time + of the threads for split barriers. */ + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + } else { // Team is serialized. + status = 0; + if (__kmp_tasking_mode != tskm_immediate_exec) { +#if OMP_41_ENABLED + if ( this_thr->th.th_task_team != NULL ) { + void *itt_sync_obj = NULL; +#if USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); + __kmp_itt_barrier_starting(gtid, itt_sync_obj); + } +#endif + + KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE); + __kmp_task_team_wait(this_thr, team + USE_ITT_BUILD_ARG(itt_sync_obj)); + __kmp_task_team_setup(this_thr, team, 0); + +#if USE_ITT_BUILD + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + } +#else + // The task team should be NULL for serialized code (tasks will be executed immediately) + KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); +#endif + } + } + KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", + gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status)); + +#if OMPT_SUPPORT + if (ompt_enabled) { +#if OMPT_BLAME + if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_end)( + my_parallel_id, my_task_id); + } +#endif + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + } +#endif + + return status; +} + + +void +__kmp_end_split_barrier(enum barrier_type bt, int gtid) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier); + int tid = __kmp_tid_from_gtid(gtid); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; + + if (!team->t.t_serialized) { + if (KMP_MASTER_GTID(gtid)) { + switch (__kmp_barrier_release_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(NULL) ); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(NULL)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(NULL) ); + break; + } + default: { + __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE + USE_ITT_BUILD_ARG(NULL) ); + } + } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } // if + } + } +} + + +void +__kmp_join_barrier(int gtid) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team; kmp_uint nproc; - kmp_info_t *master_thread; - int tid; -#ifdef KMP_DEBUG - int team_id; -#endif /* KMP_DEBUG */ -#if USE_ITT_BUILD - void *itt_sync_obj = NULL; -# if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need - // Get object created at fork_barrier - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); -# endif -#endif /* USE_ITT_BUILD */ - KMP_MB(); - - // Get current info - team = this_thr->th.th_team; - nproc = this_thr->th.th_team_nproc; - KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); - tid = __kmp_tid_from_gtid(gtid); -#ifdef KMP_DEBUG - team_id = team->t.t_id; -#endif /* KMP_DEBUG */ - master_thread = this_thr->th.th_team_master; -#ifdef KMP_DEBUG - if (master_thread != team->t.t_threads[0]) { - __kmp_print_structure(); - } -#endif /* KMP_DEBUG */ - KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); - KMP_MB(); - - // Verify state - KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); - KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid)); - -#if OMPT_SUPPORT -#if OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; -#endif - - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid)); - } -# ifdef KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n", - __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], - this_thr->th.th_task_team)); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); - } -# endif /* KMP_DEBUG */ - - /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the - team struct is not guaranteed to exist. Doing these loads causes a cache miss slows - down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite, - since the values are not used by __kmp_wait_template() in that case. */ - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; - } - -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - - switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } - - /* From this point on, the team data structure may be deallocated at any time by the - master thread - it is unsafe to reference it in any of the worker threads. Any per-team - data items that need to be referenced before the end of the barrier should be moved to - the kmp_task_team_t structs. */ - if (KMP_MASTER_TID(tid)) { - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Master shouldn't call decrease_load(). // TODO: enable master threads. - // Master should have th_may_decrease_load == 0. // TODO: enable master threads. - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } -#if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); -#endif /* USE_ITT_BUILD */ - -# if USE_ITT_BUILD && USE_ITT_NOTIFY - // Join barrier - report frame end - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && -#if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1) - { - kmp_uint64 cur_time = __itt_get_timestamp(); - ident_t * loc = team->t.t_ident; - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch(__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - break; - case 2: - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); - break; - case 3: - if( __itt_metadata_add_ptr ) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - for (i=1; ith.th_bar_arrive_time ); - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } - } -# endif /* USE_ITT_BUILD */ - } -#if USE_ITT_BUILD - else { - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); - } -#endif /* USE_ITT_BUILD */ - -#if KMP_DEBUG - if (KMP_MASTER_TID(tid)) { - KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", - gtid, team_id, tid, nproc)); - } -#endif /* KMP_DEBUG */ - - // TODO now, mark worker threads as done so they may be disbanded - KMP_MB(); // Flush all pending memory write invalidates. - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); - + kmp_info_t *master_thread; + int tid; +#ifdef KMP_DEBUG + int team_id; +#endif /* KMP_DEBUG */ +#if USE_ITT_BUILD + void *itt_sync_obj = NULL; +# if USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need + // Get object created at fork_barrier + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); +# endif +#endif /* USE_ITT_BUILD */ + KMP_MB(); + + // Get current info + team = this_thr->th.th_team; + nproc = this_thr->th.th_team_nproc; + KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); + tid = __kmp_tid_from_gtid(gtid); +#ifdef KMP_DEBUG + team_id = team->t.t_id; +#endif /* KMP_DEBUG */ + master_thread = this_thr->th.th_team_master; +#ifdef KMP_DEBUG + if (master_thread != team->t.t_threads[0]) { + __kmp_print_structure(); + } +#endif /* KMP_DEBUG */ + KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); + KMP_MB(); + + // Verify state + KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); + KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); + KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); + KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid)); + #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - - // return to default state - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif -} - - -// TODO release worker threads' fork barriers as we are ready instead of all at once -void -__kmp_fork_barrier(int gtid, int tid) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; -#if USE_ITT_BUILD - void * itt_sync_obj = NULL; -#endif /* USE_ITT_BUILD */ - - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", - gtid, (team != NULL) ? team->t.t_id : -1, tid)); - - // th_team pointer only valid for master thread here - if (KMP_MASTER_TID(tid)) { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - // Create itt barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); - __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - -#ifdef KMP_DEBUG +#if OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } +#endif + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; +#endif + + if (__kmp_tasking_mode == tskm_extra_barrier) { + __kmp_tasking_barrier(team, this_thr, gtid); + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid)); + } +# ifdef KMP_DEBUG + if (__kmp_tasking_mode != tskm_immediate_exec) { + KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n", + __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], + this_thr->th.th_task_team)); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); + } +# endif /* KMP_DEBUG */ + + /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the + team struct is not guaranteed to exist. Doing these loads causes a cache miss slows + down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite, + since the values are not used by __kmp_wait_template() in that case. */ + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + } + +#if USE_ITT_BUILD + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_starting(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + + switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); + __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); + __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + default: { + __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } + } + + /* From this point on, the team data structure may be deallocated at any time by the + master thread - it is unsafe to reference it in any of the worker threads. Any per-team + data items that need to be referenced before the end of the barrier should be moved to + the kmp_task_team_t structs. */ + if (KMP_MASTER_TID(tid)) { + if (__kmp_tasking_mode != tskm_immediate_exec) { + // Master shouldn't call decrease_load(). // TODO: enable master threads. + // Master should have th_may_decrease_load == 0. // TODO: enable master threads. + __kmp_task_team_wait(this_thr, team + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } +#if USE_ITT_BUILD + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + +# if USE_ITT_BUILD && USE_ITT_NOTIFY + // Join barrier - report frame end + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && +#if OMP_40_ENABLED + this_thr->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1) + { + kmp_uint64 cur_time = __itt_get_timestamp(); + ident_t * loc = team->t.t_ident; + kmp_info_t **other_threads = team->t.t_threads; + int nproc = this_thr->th.th_team_nproc; + int i; + switch(__kmp_forkjoin_frames_mode) { + case 1: + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); + break; + case 2: + __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); + break; + case 3: + if( __itt_metadata_add_ptr ) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; + for (i=1; ith.th_bar_arrive_time ); + } + __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0); + } + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + } + } +# endif /* USE_ITT_BUILD */ + } +#if USE_ITT_BUILD + else { + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); + } +#endif /* USE_ITT_BUILD */ + +#if KMP_DEBUG + if (KMP_MASTER_TID(tid)) { + KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", + gtid, team_id, tid, nproc)); + } +#endif /* KMP_DEBUG */ + + // TODO now, mark worker threads as done so they may be disbanded + KMP_MB(); // Flush all pending memory write invalidates. + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); + +#if OMPT_SUPPORT + if (ompt_enabled) { +#if OMPT_BLAME + if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } +#endif + + // return to default state + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif +} + + +// TODO release worker threads' fork barriers as we are ready instead of all at once +void +__kmp_fork_barrier(int gtid, int tid) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; +#if USE_ITT_BUILD + void * itt_sync_obj = NULL; +#endif /* USE_ITT_BUILD */ + + KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", + gtid, (team != NULL) ? team->t.t_id : -1, tid)); + + // th_team pointer only valid for master thread here + if (KMP_MASTER_TID(tid)) { +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + // Create itt barrier object + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); + __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + +#ifdef KMP_DEBUG kmp_info_t **other_threads = team->t.t_threads; int i; - - // Verify state - KMP_MB(); - - for(i=1; it.t_nproc; ++i) { - KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n", - gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, - team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, - other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); - KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) - & ~(KMP_BARRIER_SLEEP_STATE)) - == KMP_INIT_BARRIER_STATE); - KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); - } -#endif - - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1 - } - - /* The master thread may have changed its blocktime between the join barrier and the - fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can - access it when the team struct is not guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being performance-critical - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; - } - } // master - - switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } - - // Early exit for reaping threads releasing forkjoin barrier - if (TCR_4(__kmp_global.g.g_done)) { - this_thr->th.th_task_team = NULL; - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); - } - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); - return; - } - - /* We can now assume that a valid team structure has been allocated by the master and - propagated to all worker threads. The current thread, however, may not be part of the - team, so we can't blindly assume that the team pointer is non-null. */ - team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - -#if KMP_BARRIER_ICV_PULL - /* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has - this data before this function is called. We cannot modify __kmp_fork_call() to look at - the fixed ICVs in the master's thread struct, because it is not always the case that the - threads arrays have been allocated when __kmp_fork_call() is executed. */ - { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); - if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs - // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs); - } - } -#endif // KMP_BARRIER_ICV_PULL - - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - if (proc_bind == proc_bind_intel) { -#endif -#if KMP_AFFINITY_SUPPORTED - // Call dynamic affinity settings - if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { - __kmp_balanced_affinity(tid, team->t.t_nproc); - } -#endif // KMP_AFFINITY_SUPPORTED -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - } - else if (proc_bind != proc_bind_false) { - if (this_thr->th.th_new_place == this_thr->th.th_current_place) { - KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", - __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place)); - } - else { - __kmp_affinity_set_place(gtid); - } - } -#endif - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - // Get correct barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired - } // (prepare called inside barrier_release) - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid)); -} - - -void -__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy); - - KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); - KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); - - /* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has - this data before this function is called. */ -#if KMP_BARRIER_ICV_PULL - /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where - all of the worker threads can access them and make their own copies after the barrier. */ - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point - copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team)); -#elif KMP_BARRIER_ICV_PUSH - // The ICVs will be propagated in the fork barrier, so nothing needs to be done here. - KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team)); -#else - // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time. - ngo_load(new_icvs); - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point - for (int f=1; ft.t_threads[f], team)); - __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", - f, team->t.t_threads[f], team)); - } - ngo_sync(); -#endif // KMP_BARRIER_ICV_PULL -} + + // Verify state + KMP_MB(); + + for(i=1; it.t_nproc; ++i) { + KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n", + gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, + team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, + other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); + KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) + & ~(KMP_BARRIER_SLEEP_STATE)) + == KMP_INIT_BARRIER_STATE); + KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); + } +#endif + + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1 + } + + /* The master thread may have changed its blocktime between the join barrier and the + fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can + access it when the team struct is not guaranteed to exist. */ + // See note about the corresponding code in __kmp_join_barrier() being performance-critical + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + } + } // master + + switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); + __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); + __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + break; + } + default: { + __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj) ); + } + } + + // Early exit for reaping threads releasing forkjoin barrier + if (TCR_4(__kmp_global.g.g_done)) { + this_thr->th.th_task_team = NULL; + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + if (!KMP_MASTER_TID(tid)) { + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); + } + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); + return; + } + + /* We can now assume that a valid team structure has been allocated by the master and + propagated to all worker threads. The current thread, however, may not be part of the + team, so we can't blindly assume that the team pointer is non-null. */ + team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); + + +#if KMP_BARRIER_ICV_PULL + /* Master thread's copy of the ICVs was set up on the implicit taskdata in + __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has + this data before this function is called. We cannot modify __kmp_fork_call() to look at + the fixed ICVs in the master's thread struct, because it is not always the case that the + threads arrays have been allocated when __kmp_fork_call() is executed. */ + { + KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs + // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. + KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs); + } + } +#endif // KMP_BARRIER_ICV_PULL + + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } + +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + kmp_proc_bind_t proc_bind = team->t.t_proc_bind; + if (proc_bind == proc_bind_intel) { +#endif +#if KMP_AFFINITY_SUPPORTED + // Call dynamic affinity settings + if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { + __kmp_balanced_affinity(tid, team->t.t_nproc); + } +#endif // KMP_AFFINITY_SUPPORTED +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + } + else if (proc_bind != proc_bind_false) { + if (this_thr->th.th_new_place == this_thr->th.th_current_place) { + KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", + __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place)); + } + else { + __kmp_affinity_set_place(gtid); + } + } +#endif + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + if (!KMP_MASTER_TID(tid)) { + // Get correct barrier object + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired + } // (prepare called inside barrier_release) + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid)); +} + + +void +__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy); + + KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); + KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); + + /* Master thread's copy of the ICVs was set up on the implicit taskdata in + __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has + this data before this function is called. */ +#if KMP_BARRIER_ICV_PULL + /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where + all of the worker threads can access them and make their own copies after the barrier. */ + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point + copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs); + KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", + 0, team->t.t_threads[0], team)); +#elif KMP_BARRIER_ICV_PUSH + // The ICVs will be propagated in the fork barrier, so nothing needs to be done here. + KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", + 0, team->t.t_threads[0], team)); +#else + // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time. + ngo_load(new_icvs); + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point + for (int f=1; ft.t_threads[f], team)); + __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); + ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); + KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", + f, team->t.t_threads[f], team)); + } + ngo_sync(); +#endif // KMP_BARRIER_ICV_PULL +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp b/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp index 17ea375cd06..d1eb00c6649 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp @@ -1,282 +1,282 @@ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_str.h" + +#if OMP_40_ENABLED + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread +@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) + +@return returns true if the cancellation request has been activated and the execution thread +needs to proceed to the end of the canceled region. + +Request cancellation of the binding OpenMP region. +*/ +kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + + KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", + // this_team->t.t_cancel_request, &(this_team->t.t_cancel_request)); + // we do not have a cancellation request in this team or we do have one + // that matches the current request -> cancel + return 1 /* true */; + } + break; + } + case cancel_taskgroup: + // cancellation requests for a task group + // are handled through the taskgroup structure + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT( task ); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + // we do not have a cancellation request in this taskgroup or we do have one + // that matches the current request -> cancel + return 1 /* true */; + } + } + else { + // TODO: what needs to happen here? + // the specification disallows cancellation w/o taskgroups + // so we might do anything here, let's abort for now + KMP_ASSERT( 0 /* false */); + } + } + break; + default: + KMP_ASSERT (0 /* false */); + } + } + + // ICV OMP_CANCELLATION=false, so we ignored this cancel request + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread +@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Cancellation point for the encountering thread. +*/ +kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + + KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + if (this_team->t.t_cancel_request) { + if (cncl_kind == this_team->t.t_cancel_request) { + // the request in the team structure matches the type of + // cancellation point so we can cancel + return 1 /* true */; + } + KMP_ASSERT( 0 /* false */); + } + else { + // we do not have a cancellation request pending, so we just + // ignore this cancellation point + return 0; + } + break; + } + case cancel_taskgroup: + // cancellation requests for a task group + // are handled through the taskgroup structure + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT( task ); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + // return the current status of cancellation for the + // taskgroup + return !!taskgroup->cancel_request; + } + else { + // if a cancellation point is encountered by a task + // that does not belong to a taskgroup, it is OK + // to ignore it + return 0 /* false */; + } + } + default: + KMP_ASSERT (0 /* false */); + } + } + + // ICV OMP_CANCELLATION=false, so we ignore the cancellation point + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Barrier with cancellation point to send threads from the barrier to the +end of the parallel region. Needs a special code pattern as documented +in the design document for the cancellation feature. +*/ +kmp_int32 +__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { + int ret = 0 /* false */; + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + kmp_team_t *this_team = this_thr->th.th_team; + + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + // call into the standard barrier + __kmpc_barrier(loc, gtid); + + // if cancellation is active, check cancellation flag + if (__kmp_omp_cancellation) { + // depending on which construct to cancel, check the flag and + // reset the flag + switch (this_team->t.t_cancel_request) { + case cancel_parallel: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // the next barrier is the fork/join barrier, which + // synchronizes the threads leaving here + break; + case cancel_loop: + case cancel_sections: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // synchronize the threads again to make sure we + // do not have any run-away threads that cause a race + // on the cancellation flag + __kmpc_barrier(loc, gtid); + break; + case cancel_taskgroup: + // this case should not occur + KMP_ASSERT (0 /* false */ ); + break; + case cancel_noreq: + // do nothing + break; + default: + KMP_ASSERT ( 0 /* false */); + } + } + + return ret; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Query function to query the current status of cancellation requests. +Can be used to implement the following pattern: -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_str.h" - -#if OMP_40_ENABLED - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread -@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) - -@return returns true if the cancellation request has been activated and the execution thread -needs to proceed to the end of the canceled region. - -Request cancellation of the binding OpenMP region. -*/ -kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - - KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { - //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", - // this_team->t.t_cancel_request, &(this_team->t.t_cancel_request)); - // we do not have a cancellation request in this team or we do have one - // that matches the current request -> cancel - return 1 /* true */; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT( task ); - - taskgroup = task->td_taskgroup; - if (taskgroup) { - kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { - // we do not have a cancellation request in this taskgroup or we do have one - // that matches the current request -> cancel - return 1 /* true */; - } - } - else { - // TODO: what needs to happen here? - // the specification disallows cancellation w/o taskgroups - // so we might do anything here, let's abort for now - KMP_ASSERT( 0 /* false */); - } - } - break; - default: - KMP_ASSERT (0 /* false */); - } - } - - // ICV OMP_CANCELLATION=false, so we ignored this cancel request - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread -@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) - -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. - -Cancellation point for the encountering thread. -*/ -kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - - KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - if (this_team->t.t_cancel_request) { - if (cncl_kind == this_team->t.t_cancel_request) { - // the request in the team structure matches the type of - // cancellation point so we can cancel - return 1 /* true */; - } - KMP_ASSERT( 0 /* false */); - } - else { - // we do not have a cancellation request pending, so we just - // ignore this cancellation point - return 0; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT( task ); - - taskgroup = task->td_taskgroup; - if (taskgroup) { - // return the current status of cancellation for the - // taskgroup - return !!taskgroup->cancel_request; - } - else { - // if a cancellation point is encountered by a task - // that does not belong to a taskgroup, it is OK - // to ignore it - return 0 /* false */; - } - } - default: - KMP_ASSERT (0 /* false */); - } - } - - // ICV OMP_CANCELLATION=false, so we ignore the cancellation point - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread - -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. - -Barrier with cancellation point to send threads from the barrier to the -end of the parallel region. Needs a special code pattern as documented -in the design document for the cancellation feature. -*/ -kmp_int32 -__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { - int ret = 0 /* false */; - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - kmp_team_t *this_team = this_thr->th.th_team; - - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - // call into the standard barrier - __kmpc_barrier(loc, gtid); - - // if cancellation is active, check cancellation flag - if (__kmp_omp_cancellation) { - // depending on which construct to cancel, check the flag and - // reset the flag - switch (this_team->t.t_cancel_request) { - case cancel_parallel: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // the next barrier is the fork/join barrier, which - // synchronizes the threads leaving here - break; - case cancel_loop: - case cancel_sections: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // synchronize the threads again to make sure we - // do not have any run-away threads that cause a race - // on the cancellation flag - __kmpc_barrier(loc, gtid); - break; - case cancel_taskgroup: - // this case should not occur - KMP_ASSERT (0 /* false */ ); - break; - case cancel_noreq: - // do nothing - break; - default: - KMP_ASSERT ( 0 /* false */); - } - } - - return ret; -} - -/*! -@ingroup CANCELLATION -@param loc_ref location of the original task directive -@param gtid Global thread ID of encountering thread - -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. - -Query function to query the current status of cancellation requests. -Can be used to implement the following pattern: - -if (kmp_get_cancellation_status(kmp_cancel_parallel)) { - perform_cleanup(); - #pragma omp cancellation point parallel -} -*/ -int __kmp_get_cancellation_status(int cancel_kind) { - if (__kmp_omp_cancellation) { - kmp_info_t *this_thr = __kmp_entry_thread(); - - switch (cancel_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - { - kmp_team_t *this_team = this_thr->th.th_team; - return this_team->t.t_cancel_request == cancel_kind; - } - case cancel_taskgroup: - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - task = this_thr->th.th_current_task; - taskgroup = task->td_taskgroup; - return taskgroup && taskgroup->cancel_request; - } - } - } - - return 0 /* false */; -} - -#endif +if (kmp_get_cancellation_status(kmp_cancel_parallel)) { + perform_cleanup(); + #pragma omp cancellation point parallel +} +*/ +int __kmp_get_cancellation_status(int cancel_kind) { + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_entry_thread(); + + switch (cancel_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + { + kmp_team_t *this_team = this_thr->th.th_team; + return this_team->t.t_cancel_request == cancel_kind; + } + case cancel_taskgroup: + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + task = this_thr->th.th_current_task; + taskgroup = task->td_taskgroup; + return taskgroup && taskgroup->cancel_request; + } + } + } + + return 0 /* false */; +} + +#endif diff --git a/contrib/libs/cxxsupp/openmp/kmp_config.h b/contrib/libs/cxxsupp/openmp/kmp_config.h index f49adebdb01..35d10e2bfdc 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_config.h +++ b/contrib/libs/cxxsupp/openmp/kmp_config.h @@ -1,100 +1,100 @@ -/* - * kmp_config.h -- Feature macros - */ -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -#ifndef KMP_CONFIG_H -#define KMP_CONFIG_H - -#include "kmp_platform.h" - -// cmakedefine01 MACRO will define MACRO as either 0 or 1 -// cmakedefine MACRO 1 will define MACRO as 1 or leave undefined -#define DEBUG_BUILD 0 -#define RELWITHDEBINFO_BUILD 0 +/* + * kmp_config.h -- Feature macros + */ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +#ifndef KMP_CONFIG_H +#define KMP_CONFIG_H + +#include "kmp_platform.h" + +// cmakedefine01 MACRO will define MACRO as either 0 or 1 +// cmakedefine MACRO 1 will define MACRO as 1 or leave undefined +#define DEBUG_BUILD 0 +#define RELWITHDEBINFO_BUILD 0 #define LIBOMP_USE_ITT_NOTIFY 0 -#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY -#if ! LIBOMP_USE_ITT_NOTIFY -# define INTEL_NO_ITTNOTIFY_API -#endif -#define LIBOMP_USE_VERSION_SYMBOLS 0 -#if LIBOMP_USE_VERSION_SYMBOLS -# define KMP_USE_VERSION_SYMBOLS -#endif -#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1 -#define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE -#define LIBOMP_HAVE_PSAPI 0 -#define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI -#define LIBOMP_STATS 0 -#define KMP_STATS_ENABLED LIBOMP_STATS -#define LIBOMP_USE_DEBUGGER 0 -#define USE_DEBUGGER LIBOMP_USE_DEBUGGER -#define LIBOMP_OMPT_DEBUG 0 -#define OMPT_DEBUG LIBOMP_OMPT_DEBUG -#define LIBOMP_OMPT_SUPPORT 0 -#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#define LIBOMP_OMPT_BLAME 1 -#define OMPT_BLAME LIBOMP_OMPT_BLAME -#define LIBOMP_OMPT_TRACE 1 -#define OMPT_TRACE LIBOMP_OMPT_TRACE -#define LIBOMP_USE_ADAPTIVE_LOCKS 1 -#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS -#define KMP_DEBUG_ADAPTIVE_LOCKS 0 -#define LIBOMP_USE_INTERNODE_ALIGNMENT 0 -#define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT -#define LIBOMP_ENABLE_ASSERTIONS 1 -#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS -#define STUBS_LIBRARY 0 -#define LIBOMP_USE_HWLOC 0 -#define KMP_USE_HWLOC LIBOMP_USE_HWLOC -#define KMP_ARCH_STR "Intel(R) 64" -#define KMP_LIBRARY_FILE "libomp.so" -#define KMP_VERSION_MAJOR 5 -#define KMP_VERSION_MINOR 0 -#define LIBOMP_OMP_VERSION 41 -#define OMP_50_ENABLED (LIBOMP_OMP_VERSION >= 50) -#define OMP_41_ENABLED (LIBOMP_OMP_VERSION >= 41) -#define OMP_40_ENABLED (LIBOMP_OMP_VERSION >= 40) -#define OMP_30_ENABLED (LIBOMP_OMP_VERSION >= 30) - -// Configured cache line based on architecture -#if KMP_ARCH_PPC64 -# define CACHE_LINE 128 -#else -# define CACHE_LINE 64 -#endif - -#define KMP_DYNAMIC_LIB 1 -#define KMP_NESTED_HOT_TEAMS 1 -#define KMP_ADJUST_BLOCKTIME 1 -#define BUILD_PARALLEL_ORDERED 1 -#define KMP_ASM_INTRINS 1 -#define USE_ITT_BUILD 1 -#define INTEL_ITTNOTIFY_PREFIX __kmp_itt_ -#if ! KMP_MIC -# define USE_LOAD_BALANCE 1 -#endif -#if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN) -# define KMP_TDATA_GTID 1 -#endif -#if STUBS_LIBRARY -# define KMP_STUB 1 -#endif -#if DEBUG_BUILD || RELWITHDEBINFO_BUILD -# define KMP_DEBUG 1 -#endif - -#if KMP_OS_WINDOWS -# define KMP_WIN_CDECL -#else -# define BUILD_TV -# define KMP_GOMP_COMPAT -#endif - -#endif // KMP_CONFIG_H +#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY +#if ! LIBOMP_USE_ITT_NOTIFY +# define INTEL_NO_ITTNOTIFY_API +#endif +#define LIBOMP_USE_VERSION_SYMBOLS 0 +#if LIBOMP_USE_VERSION_SYMBOLS +# define KMP_USE_VERSION_SYMBOLS +#endif +#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1 +#define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE +#define LIBOMP_HAVE_PSAPI 0 +#define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI +#define LIBOMP_STATS 0 +#define KMP_STATS_ENABLED LIBOMP_STATS +#define LIBOMP_USE_DEBUGGER 0 +#define USE_DEBUGGER LIBOMP_USE_DEBUGGER +#define LIBOMP_OMPT_DEBUG 0 +#define OMPT_DEBUG LIBOMP_OMPT_DEBUG +#define LIBOMP_OMPT_SUPPORT 0 +#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT +#define LIBOMP_OMPT_BLAME 1 +#define OMPT_BLAME LIBOMP_OMPT_BLAME +#define LIBOMP_OMPT_TRACE 1 +#define OMPT_TRACE LIBOMP_OMPT_TRACE +#define LIBOMP_USE_ADAPTIVE_LOCKS 1 +#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS +#define KMP_DEBUG_ADAPTIVE_LOCKS 0 +#define LIBOMP_USE_INTERNODE_ALIGNMENT 0 +#define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT +#define LIBOMP_ENABLE_ASSERTIONS 1 +#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS +#define STUBS_LIBRARY 0 +#define LIBOMP_USE_HWLOC 0 +#define KMP_USE_HWLOC LIBOMP_USE_HWLOC +#define KMP_ARCH_STR "Intel(R) 64" +#define KMP_LIBRARY_FILE "libomp.so" +#define KMP_VERSION_MAJOR 5 +#define KMP_VERSION_MINOR 0 +#define LIBOMP_OMP_VERSION 41 +#define OMP_50_ENABLED (LIBOMP_OMP_VERSION >= 50) +#define OMP_41_ENABLED (LIBOMP_OMP_VERSION >= 41) +#define OMP_40_ENABLED (LIBOMP_OMP_VERSION >= 40) +#define OMP_30_ENABLED (LIBOMP_OMP_VERSION >= 30) + +// Configured cache line based on architecture +#if KMP_ARCH_PPC64 +# define CACHE_LINE 128 +#else +# define CACHE_LINE 64 +#endif + +#define KMP_DYNAMIC_LIB 1 +#define KMP_NESTED_HOT_TEAMS 1 +#define KMP_ADJUST_BLOCKTIME 1 +#define BUILD_PARALLEL_ORDERED 1 +#define KMP_ASM_INTRINS 1 +#define USE_ITT_BUILD 1 +#define INTEL_ITTNOTIFY_PREFIX __kmp_itt_ +#if ! KMP_MIC +# define USE_LOAD_BALANCE 1 +#endif +#if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN) +# define KMP_TDATA_GTID 1 +#endif +#if STUBS_LIBRARY +# define KMP_STUB 1 +#endif +#if DEBUG_BUILD || RELWITHDEBINFO_BUILD +# define KMP_DEBUG 1 +#endif + +#if KMP_OS_WINDOWS +# define KMP_WIN_CDECL +#else +# define BUILD_TV +# define KMP_GOMP_COMPAT +#endif + +#endif // KMP_CONFIG_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_csupport.c b/contrib/libs/cxxsupp/openmp/kmp_csupport.c index 6d1d3285431..e44886facdf 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_csupport.c +++ b/contrib/libs/cxxsupp/openmp/kmp_csupport.c @@ -1,3046 +1,3046 @@ -/* - * kmp_csupport.c -- kfront linkage support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "omp.h" /* extern "C" declarations of user-visible routines */ -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_error.h" -#include "kmp_stats.h" - -#if OMPT_SUPPORT -#include "ompt-internal.h" -#include "ompt-specific.h" -#endif - -#define MAX_MESSAGE 512 - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* flags will be used in future, e.g., to implement */ -/* openmp_strict library restrictions */ - -/*! - * @ingroup STARTUP_SHUTDOWN - * @param loc in source location information - * @param flags in for future use (currently ignored) - * - * Initialize the runtime library. This call is optional; if it is not made then - * it will be implicitly called by attempts to use other library functions. - * - */ -void -__kmpc_begin(ident_t *loc, kmp_int32 flags) -{ - // By default __kmp_ignore_mppbeg() returns TRUE. - if (__kmp_ignore_mppbeg() == FALSE) { - __kmp_internal_begin(); - - KC_TRACE( 10, ("__kmpc_begin: called\n" ) ); - } -} - -/*! - * @ingroup STARTUP_SHUTDOWN - * @param loc source location information - * - * Shutdown the runtime library. This is also optional, and even if called will not - * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero. - */ -void -__kmpc_end(ident_t *loc) -{ - // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op. - // However, this can be overridden with KMP_IGNORE_MPPEND environment variable. - // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end() - // will unregister this root (it can cause library shut down). - if (__kmp_ignore_mppend() == FALSE) { - KC_TRACE( 10, ("__kmpc_end: called\n" ) ); - KA_TRACE( 30, ("__kmpc_end\n" )); - - __kmp_internal_end_thread( -1 ); - } -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The global thread index of the active thread. - -This function can be called in any context. - -If the runtime has ony been entered at the outermost level from a -single (necessarily non-OpenMP*) thread, then the thread number is that -which would be returned by omp_get_thread_num() in the outermost -active parallel construct. (Or zero if there is no active parallel -construct, since the master thread is necessarily thread zero). - -If multiple non-OpenMP threads all enter an OpenMP construct then this -will be a unique thread identifier among all the threads created by -the OpenMP runtime (but the value cannote be defined in terms of -OpenMP thread ids returned by omp_get_thread_num()). - -*/ -kmp_int32 -__kmpc_global_thread_num(ident_t *loc) -{ - kmp_int32 gtid = __kmp_entry_gtid(); - - KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) ); - - return gtid; -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The number of threads under control of the OpenMP* runtime - -This function can be called in any context. -It returns the total number of threads under the control of the OpenMP runtime. That is -not a number that can be determined by any OpenMP standard calls, since the library may be -called from more than one non-OpenMP thread, and this reflects the total over all such calls. -Similarly the runtime maintains underlying threads even when they are not active (since the cost -of creating and destroying OS threads is high), this call counts all such threads even if they are not -waiting for work. -*/ -kmp_int32 -__kmpc_global_num_threads(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) ); - - return TCR_4(__kmp_nth); -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The thread number of the calling thread in the innermost active parallel construct. - -*/ -kmp_int32 -__kmpc_bound_thread_num(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) ); - return __kmp_tid_from_gtid( __kmp_entry_gtid() ); -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return The number of threads in the innermost active parallel construct. -*/ -kmp_int32 -__kmpc_bound_num_threads(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) ); - - return __kmp_entry_thread() -> th.th_team -> t.t_nproc; -} - -/*! - * @ingroup DEPRECATED - * @param loc location description - * - * This function need not be called. It always returns TRUE. - */ -kmp_int32 -__kmpc_ok_to_fork(ident_t *loc) -{ -#ifndef KMP_DEBUG - - return TRUE; - -#else - - const char *semi2; - const char *semi3; - int line_no; - - if (__kmp_par_range == 0) { - return TRUE; - } - semi2 = loc->psource; - if (semi2 == NULL) { - return TRUE; - } - semi2 = strchr(semi2, ';'); - if (semi2 == NULL) { - return TRUE; - } - semi2 = strchr(semi2 + 1, ';'); - if (semi2 == NULL) { - return TRUE; - } - if (__kmp_par_range_filename[0]) { - const char *name = semi2 - 1; - while ((name > loc->psource) && (*name != '/') && (*name != ';')) { - name--; - } - if ((*name == '/') || (*name == ';')) { - name++; - } - if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { - return __kmp_par_range < 0; - } - } - semi3 = strchr(semi2 + 1, ';'); - if (__kmp_par_range_routine[0]) { - if ((semi3 != NULL) && (semi3 > semi2) - && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { - return __kmp_par_range < 0; - } - } - if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { - if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { - return __kmp_par_range > 0; - } - return __kmp_par_range < 0; - } - return TRUE; - -#endif /* KMP_DEBUG */ - -} - -/*! -@ingroup THREAD_STATES -@param loc Source location information. -@return 1 if this thread is executing inside an active parallel region, zero if not. -*/ -kmp_int32 -__kmpc_in_parallel( ident_t *loc ) -{ - return __kmp_entry_thread() -> th.th_root -> r.r_active; -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number -@param num_threads number of threads requested for this parallel construct - -Set the number of threads to be used by the next fork spawned by this thread. -This call is only required if the parallel construct has a `num_threads` clause. -*/ -void -__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ) -{ - KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", - global_tid, num_threads ) ); - - __kmp_push_num_threads( loc, global_tid, num_threads ); -} - -void -__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid ) -{ - KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) ); - - /* the num_threads are automatically popped */ -} - - -#if OMP_40_ENABLED - -void -__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind ) -{ - KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", - global_tid, proc_bind ) ); - - __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind ); -} - -#endif /* OMP_40_ENABLED */ - - -/*! -@ingroup PARALLEL -@param loc source location information -@param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined parallel construct -@param ... pointers to shared variables that aren't global - -Do the actual fork and call the microtask in the relevant number of threads. -*/ -void -__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) -{ - int gtid = __kmp_entry_gtid(); - -#if (KMP_STATS_ENABLED) - int inParallel = __kmpc_in_parallel(loc); - if (inParallel) - { - KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); - } - else - { - KMP_STOP_EXPLICIT_TIMER(OMP_serial); - KMP_COUNT_BLOCK(OMP_PARALLEL); - } -#endif - - // maybe to save thr_state is enough here - { - va_list ap; - va_start( ap, microtask ); - -#if OMPT_SUPPORT - int tid = __kmp_tid_from_gtid( gtid ); - kmp_info_t *master_th = __kmp_threads[ gtid ]; - kmp_team_t *parent_team = master_th->th.th_team; - if (ompt_enabled) { - parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - -#if INCLUDE_SSC_MARKS - SSC_MARK_FORKING(); -#endif - __kmp_fork_call( loc, gtid, fork_context_intel, - argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif - VOLATILE_CAST(microtask_t) microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); -#if INCLUDE_SSC_MARKS - SSC_MARK_JOINING(); -#endif - __kmp_join_call( loc, gtid -#if OMPT_SUPPORT - , fork_context_intel -#endif - ); - - va_end( ap ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.reenter_runtime_frame = 0; - } -#endif - } -#if (KMP_STATS_ENABLED) - if (!inParallel) - KMP_START_EXPLICIT_TIMER(OMP_serial); -#endif -} - -#if OMP_40_ENABLED -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number -@param num_teams number of teams requested for the teams construct -@param num_threads number of threads per team requested for the teams construct - -Set the number of teams to be used by the teams construct. -This call is only required if the teams construct has a `num_teams` clause -or a `thread_limit` clause (or both). -*/ -void -__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ) -{ - KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", - global_tid, num_teams, num_threads ) ); - - __kmp_push_num_teams( loc, global_tid, num_teams, num_threads ); -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined teams construct -@param ... pointers to shared variables that aren't global - -Do the actual fork and call the microtask in the relevant number of threads. -*/ -void -__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) -{ - int gtid = __kmp_entry_gtid(); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - va_list ap; - va_start( ap, microtask ); - - KMP_COUNT_BLOCK(OMP_TEAMS); - - // remember teams entry point and nesting level - this_thr->th.th_teams_microtask = microtask; - this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host - -#if OMPT_SUPPORT - kmp_team_t *parent_team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid( gtid ); - if (ompt_enabled) { - parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - - // check if __kmpc_push_num_teams called, set default number of teams otherwise - if ( this_thr->th.th_teams_size.nteams == 0 ) { - __kmp_push_num_teams( loc, gtid, 0, 0 ); - } - KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); - - __kmp_fork_call( loc, gtid, fork_context_intel, - argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif - VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); - __kmp_join_call( loc, gtid -#if OMPT_SUPPORT - , fork_context_intel -#endif - ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.reenter_runtime_frame = NULL; - } -#endif - - this_thr->th.th_teams_microtask = NULL; - this_thr->th.th_teams_level = 0; - *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L; - va_end( ap ); -} -#endif /* OMP_40_ENABLED */ - - -// -// I don't think this function should ever have been exported. -// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated -// openmp code ever called it, but it's been exported from the RTL for so -// long that I'm afraid to remove the definition. -// -int -__kmpc_invoke_task_func( int gtid ) -{ - return __kmp_invoke_task_func( gtid ); -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number - -Enter a serialized parallel construct. This interface is used to handle a -conditional parallel region, like this, -@code -#pragma omp parallel if (condition) -@endcode -when the condition is false. -*/ -void -__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with - * kmp_fork_call since the tasks to be done are similar in each case. - */ -} - -/*! -@ingroup PARALLEL -@param loc source location information -@param global_tid global thread number - -Leave a serialized parallel construct. -*/ -void -__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - kmp_internal_control_t *top; - kmp_info_t *this_thr; - kmp_team_t *serial_team; - - KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) ); - - /* skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) - return; - - // Not autopar code - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - this_thr = __kmp_threads[ global_tid ]; - serial_team = this_thr->th.th_serial_team; - - #if OMP_41_ENABLED - kmp_task_team_t * task_team = this_thr->th.th_task_team; - - // we need to wait for the proxy tasks before finishing the thread - if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) - __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here? - #endif - - KMP_MB(); - KMP_DEBUG_ASSERT( serial_team ); - KMP_ASSERT( serial_team -> t.t_serialized ); - KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team ); - KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team ); - KMP_DEBUG_ASSERT( serial_team -> t.t_threads ); - KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr ); - - /* If necessary, pop the internal control stack values and replace the team values */ - top = serial_team -> t.t_control_stack_top; - if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) { - copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top ); - serial_team -> t.t_control_stack_top = top -> next; - __kmp_free(top); - } - - //if( serial_team -> t.t_serialized > 1 ) - serial_team -> t.t_level--; - - /* pop dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); - { - dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = - serial_team->t.t_dispatch->th_disp_buffer->next; - __kmp_free( disp_buffer ); - } - - -- serial_team -> t.t_serialized; - if ( serial_team -> t.t_serialized == 0 ) { - - /* return to the parallel section */ - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word ); - __kmp_load_mxcsr( &serial_team->t.t_mxcsr ); - } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - this_thr -> th.th_team = serial_team -> t.t_parent; - this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid; - - /* restore values cached in the thread */ - this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */ - this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */ - this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized; - - /* TODO the below shouldn't need to be adjusted for serialized teams */ - this_thr -> th.th_dispatch = & this_thr -> th.th_team -> - t.t_dispatch[ serial_team -> t.t_master_tid ]; - - __kmp_pop_current_task_from_thread( this_thr ); - - KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 ); - this_thr -> th.th_current_task -> td_flags.executing = 1; - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Copy the task team from the new child / old parent team to the thread. - this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; - KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n", - global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) ); - } - } else { - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n", - global_tid, serial_team, serial_team -> t.t_serialized ) ); - } - } - -#if USE_ITT_BUILD - kmp_uint64 cur_time = 0; -#if USE_ITT_NOTIFY - if ( __itt_get_timestamp_ptr ) { - cur_time = __itt_get_timestamp(); - } -#endif /* USE_ITT_NOTIFY */ - if ( this_thr->th.th_team->t.t_level == 0 -#if OMP_40_ENABLED - && this_thr->th.th_teams_microtask == NULL -#endif - ) { - // Report the barrier - this_thr->th.th_ident = loc; - if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && - ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) - { - __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, - cur_time, 0, loc, this_thr->th.th_team_nproc, 0 ); - if ( __kmp_forkjoin_frames_mode == 3 ) - // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier. - __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, - cur_time, 0, loc, this_thr->th.th_team_nproc, 2 ); - } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && - ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) - // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. - __kmp_itt_region_joined( global_tid, 1 ); - } -#endif /* USE_ITT_BUILD */ - - if ( __kmp_env_consistency_check ) - __kmp_pop_parallel( global_tid, NULL ); -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information. - -Execute flush. This is implemented as a full memory fence. (Though -depending on the memory ordering convention obeyed by the compiler -even that may not be necessary). -*/ -void -__kmpc_flush(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_flush: called\n" ) ); - - /* need explicit __mf() here since use volatile instead in library */ - KMP_MB(); /* Flush all pending memory write invalidates. */ - - #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) - #if KMP_MIC - // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. - // We shouldn't need it, though, since the ABI rules require that - // * If the compiler generates NGO stores it also generates the fence - // * If users hand-code NGO stores they should insert the fence - // therefore no incomplete unordered stores should be visible. - #else - // C74404 - // This is to address non-temporal store instructions (sfence needed). - // The clflush instruction is addressed either (mfence needed). - // Probably the non-temporal load monvtdqa instruction should also be addressed. - // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( & __kmp_cpuinfo ); - }; // if - if ( ! __kmp_cpuinfo.sse2 ) { - // CPU cannot execute SSE2 instructions. - } else { - #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC - _mm_mfence(); - #else - __sync_synchronize(); - #endif // KMP_COMPILER_ICC - }; // if - #endif // KMP_MIC - #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // Nothing to see here move along - #elif KMP_ARCH_PPC64 - // Nothing needed here (we have a real MB above). - #if KMP_OS_CNK - // The flushing thread needs to yield here; this prevents a - // busy-waiting thread from saturating the pipeline. flush is - // often used in loops like this: - // while (!flag) { - // #pragma omp flush(flag) - // } - // and adding the yield here is good for at least a 10x speedup - // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(TRUE); - #endif - #else - #error Unknown or unsupported architecture - #endif - -} - -/* -------------------------------------------------------------------------- */ - -/* -------------------------------------------------------------------------- */ - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. - -Execute a barrier. -*/ -void -__kmpc_barrier(ident_t *loc, kmp_int32 global_tid) -{ - KMP_COUNT_BLOCK(OMP_BARRIER); - KMP_TIME_BLOCK(OMP_barrier); - KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) ); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? - }; // if - - __kmp_check_barrier( global_tid, ct_barrier, loc ); - } - - __kmp_threads[ global_tid ]->th.th_ident = loc; - // TODO: explicit barrier_wait_id: - // this function is called when 'barrier' directive is present or - // implicit barrier at the end of a worksharing construct. - // 1) better to add a per-thread barrier counter to a thread data structure - // 2) set to 0 when a new team is created - // 4) no sync is required - - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); -} - -/* The BARRIER for a MASTER section is always explicit */ -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@return 1 if this thread should execute the master block, 0 otherwise. -*/ -kmp_int32 -__kmpc_master(ident_t *loc, kmp_int32 global_tid) -{ - KMP_COUNT_BLOCK(OMP_MASTER); - int status = 0; - - KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) ); - - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - if( KMP_MASTER_GTID( global_tid )) { - KMP_START_EXPLICIT_TIMER(OMP_master); - status = 1; - } - -#if OMPT_SUPPORT && OMPT_TRACE - if (status) { - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_master_begin)) { - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - - int tid = __kmp_tid_from_gtid( global_tid ); - ompt_callbacks.ompt_callback(ompt_event_master_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } - } -#endif - - if ( __kmp_env_consistency_check ) { -#if KMP_USE_DYNAMIC_LOCK - if (status) - __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 ); - else - __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 ); -#else - if (status) - __kmp_push_sync( global_tid, ct_master, loc, NULL ); - else - __kmp_check_sync( global_tid, ct_master, loc, NULL ); -#endif - } - - return status; -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . - -Mark the end of a master region. This should only be called by the thread -that executes the master region. -*/ -void -__kmpc_end_master(ident_t *loc, kmp_int32 global_tid) -{ - KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) ); - - KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid )); - KMP_STOP_EXPLICIT_TIMER(OMP_master); - -#if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_master_end)) { - int tid = __kmp_tid_from_gtid( global_tid ); - ompt_callbacks.ompt_callback(ompt_event_master_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - - if ( __kmp_env_consistency_check ) { - if( global_tid < 0 ) - KMP_WARNING( ThreadIdentInvalid ); - - if( KMP_MASTER_GTID( global_tid )) - __kmp_pop_sync( global_tid, ct_master, loc ); - } -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param gtid global thread number. - -Start execution of an ordered construct. -*/ -void -__kmpc_ordered( ident_t * loc, kmp_int32 gtid ) -{ - int cid = 0; - kmp_info_t *th; - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid )); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - -#if USE_ITT_BUILD - __kmp_itt_ordered_prep( gtid ); - // TODO: ordered_wait_id -#endif /* USE_ITT_BUILD */ - - th = __kmp_threads[ gtid ]; - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - /* OMPT state update */ - th->th.ompt_thread_info.wait_id = (uint64_t) loc; - th->th.ompt_thread_info.state = ompt_state_wait_ordered; - - /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( - th->th.ompt_thread_info.wait_id); - } - } -#endif - - if ( th -> th.th_dispatch -> th_deo_fcn != 0 ) - (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc ); - else - __kmp_parallel_deo( & gtid, & cid, loc ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - /* OMPT state update */ - th->th.ompt_thread_info.state = ompt_state_work_parallel; - th->th.ompt_thread_info.wait_id = 0; - - /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( - th->th.ompt_thread_info.wait_id); - } - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_ordered_start( gtid ); -#endif /* USE_ITT_BUILD */ -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param gtid global thread number. - -End execution of an ordered construct. -*/ -void -__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid ) -{ - int cid = 0; - kmp_info_t *th; - - KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) ); - -#if USE_ITT_BUILD - __kmp_itt_ordered_end( gtid ); - // TODO: ordered_wait_id -#endif /* USE_ITT_BUILD */ - - th = __kmp_threads[ gtid ]; - - if ( th -> th.th_dispatch -> th_dxo_fcn != 0 ) - (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc ); - else - __kmp_parallel_dxo( & gtid, & cid, loc ); - -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - th->th.ompt_thread_info.wait_id); - } -#endif -} - -#if KMP_USE_DYNAMIC_LOCK - -static __forceinline void -__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag) -{ - // Pointer to the allocated indirect lock is written to crit, while indexing is ignored. - void *idx; - kmp_indirect_lock_t **lck; - lck = (kmp_indirect_lock_t **)crit; - kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); - KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); - KMP_SET_I_LOCK_LOCATION(ilk, loc); - KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); - KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); -#if USE_ITT_BUILD - __kmp_itt_critical_creating(ilk->lock, loc); -#endif - int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); - if (status == 0) { -#if USE_ITT_BUILD - __kmp_itt_critical_destroyed(ilk->lock); -#endif - // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit. - //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); - } - KMP_DEBUG_ASSERT(*lck != NULL); -} - -// Fast-path acquire tas lock -#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - if (l->lk.poll != KMP_LOCK_FREE(tas) || \ - ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE(l); \ - KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - while (l->lk.poll != KMP_LOCK_FREE(tas) || \ - ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED(l); \ -} - -// Fast-path test tas lock -#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ - KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \ -} - -// Fast-path release tas lock -#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \ - TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ - KMP_MB(); \ -} - -#if KMP_USE_FUTEX - -# include -# include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif - -// Fast-path acquire futex lock -#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - kmp_int32 gtid_code = (gtid+1) << 1; \ - KMP_MB(); \ - KMP_FSYNC_PREPARE(ftx); \ - kmp_int32 poll_val; \ - while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ - KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ - if (!cond) { \ - if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \ - continue; \ - } \ - poll_val |= KMP_LOCK_BUSY(1, futex); \ - } \ - kmp_int32 rc; \ - if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ - continue; \ - } \ - gtid_code |= 1; \ - } \ - KMP_FSYNC_ACQUIRED(ftx); \ -} - -// Fast-path test futex lock -#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \ - KMP_FSYNC_ACQUIRED(ftx); \ - rc = TRUE; \ - } else { \ - rc = FALSE; \ - } \ -} - -// Fast-path release futex lock -#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - KMP_MB(); \ - KMP_FSYNC_RELEASING(ftx); \ - kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ - if (KMP_LOCK_STRIP(poll_val) & 1) { \ - syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ - } \ - KMP_MB(); \ - KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ -} - -#endif // KMP_USE_FUTEX - -#else // KMP_USE_DYNAMIC_LOCK - -static kmp_user_lock_p -__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid ) -{ - kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; - - // - // Because of the double-check, the following load - // doesn't need to be volatile. - // - kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); - - if ( lck == NULL ) { - void * idx; - - // Allocate & initialize the lock. - // Remember allocated locks in table in order to free them in __kmp_cleanup() - lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section ); - __kmp_init_user_lock_with_checks( lck ); - __kmp_set_user_lock_location( lck, loc ); -#if USE_ITT_BUILD - __kmp_itt_critical_creating( lck ); - // __kmp_itt_critical_creating() should be called *before* the first usage of underlying - // lock. It is the only place where we can guarantee it. There are chances the lock will - // destroyed with no usage, but it is not a problem, because this is not real event seen - // by user but rather setting name for object (lock). See more details in kmp_itt.h. -#endif /* USE_ITT_BUILD */ - - // - // Use a cmpxchg instruction to slam the start of the critical - // section with the lock pointer. If another thread beat us - // to it, deallocate the lock, and use the lock that the other - // thread allocated. - // - int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck ); - - if ( status == 0 ) { - // Deallocate the lock and reload the value. -#if USE_ITT_BUILD - __kmp_itt_critical_destroyed( lck ); - // Let ITT know the lock is destroyed and the same memory location may be reused for - // another purpose. -#endif /* USE_ITT_BUILD */ - __kmp_destroy_user_lock_with_checks( lck ); - __kmp_user_lock_free( &idx, gtid, lck ); - lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); - KMP_DEBUG_ASSERT( lck != NULL ); - } - } - return lck; -} - -#endif // KMP_USE_DYNAMIC_LOCK - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or -some other suitably unique value. - -Enter code protected by a `critical` construct. -This function blocks until the executing thread can enter the critical section. -*/ -void -__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) -{ -#if KMP_USE_DYNAMIC_LOCK - __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); -#else - KMP_COUNT_BLOCK(OMP_CRITICAL); - kmp_user_lock_p lck; - - KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); - - //TODO: add THR_OVHD_STATE - - KMP_CHECK_USER_LOCK_INIT(); - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } -#endif - else { // ticket, queuing or drdpa - lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); - } - - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_critical, loc, lck ); - - /* since the critical directive binds to all threads, not just - * the current team we have to check this even if we are in a - * serialized team */ - /* also, even if we are the uber thread, we still have to conduct the lock, - * as we have to contend with sibling threads */ - -#if USE_ITT_BUILD - __kmp_itt_critical_acquiring( lck ); -#endif /* USE_ITT_BUILD */ - // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_acquire_user_lock_with_checks( lck, global_tid ); - -#if USE_ITT_BUILD - __kmp_itt_critical_acquired( lck ); -#endif /* USE_ITT_BUILD */ - - KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); -#endif // KMP_USE_DYNAMIC_LOCK -} - -#if KMP_USE_DYNAMIC_LOCK - -// Converts the given hint to an internal lock implementation -static __forceinline kmp_dyna_lockseq_t -__kmp_map_hint_to_lock(uintptr_t hint) -{ -#if KMP_USE_TSX -# define KMP_TSX_LOCK(seq) lockseq_##seq -#else -# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq -#endif - // Hints that do not require further logic - if (hint & kmp_lock_hint_hle) - return KMP_TSX_LOCK(hle); - if (hint & kmp_lock_hint_rtm) - return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq; - if (hint & kmp_lock_hint_adaptive) - return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq; - - // Rule out conflicting hints first by returning the default lock - if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) - return __kmp_user_lock_seq; - if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative)) - return __kmp_user_lock_seq; - - // Do not even consider speculation when it appears to be contended - if (hint & omp_lock_hint_contended) - return lockseq_queuing; - - // Uncontended lock without speculation - if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) - return lockseq_tas; - - // HLE lock for speculation - if (hint & omp_lock_hint_speculative) - return KMP_TSX_LOCK(hle); - - return __kmp_user_lock_seq; -} - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number. -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, -or some other suitably unique value. -@param hint the lock hint. - -Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation. -This function blocks until the executing thread can enter the critical section unless the hint suggests use of -speculative execution and the hardware supports it. -*/ -void -__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint ) -{ - KMP_COUNT_BLOCK(OMP_CRITICAL); - kmp_user_lock_p lck; - - KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); - - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - if (*lk == 0) { - kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); - if (KMP_IS_D_LOCK(lckseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); - } - } - // Branch for accessing the actual lock object and set operation. This branching is inevitable since - // this lock initialization does not follow the normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); - } -# if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -# endif -# if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(lck, global_tid); - } else -# elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); - } else -# endif - { - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); - } - } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); - } -# if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -# endif - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); - } - -#if USE_ITT_BUILD - __kmp_itt_critical_acquired( lck ); -#endif /* USE_ITT_BUILD */ - - KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); -} // __kmpc_critical_with_hint - -#endif // KMP_USE_DYNAMIC_LOCK - -/*! -@ingroup WORK_SHARING -@param loc source location information. -@param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or -some other suitably unique value. - -Leave a critical section, releasing any lock that was held during its execution. -*/ -void -__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) -{ - kmp_user_lock_p lck; - - KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid )); - -#if KMP_USE_DYNAMIC_LOCK - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - KMP_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -# if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); -# endif -# if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(lck, global_tid); - } else -# elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(lck, global_tid); - } else -# endif - { - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } - } else { - kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - KMP_ASSERT(ilk != NULL); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -# if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); -# endif - KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } -#endif - else { // ticket, queuing or drdpa - lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit)); - } - - KMP_ASSERT(lck != NULL); - - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_critical, loc ); - -#if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); -#endif /* USE_ITT_BUILD */ - // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_release_user_lock_with_checks( lck, global_tid ); - -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_critical)) { - ompt_callbacks.ompt_callback(ompt_event_release_critical)( - (uint64_t) lck); - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK - - KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid )); -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. -@return one if the thread should execute the master block, zero otherwise - -Start execution of a combined barrier and master. The barrier is executed inside this function. -*/ -kmp_int32 -__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) -{ - int status; - - KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) ); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if ( __kmp_env_consistency_check ) - __kmp_check_barrier( global_tid, ct_barrier, loc ); - -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL ); - - return (status != 0) ? 0 : 1; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. - -Complete the execution of a combined barrier and master. This function should -only be called at the completion of the master code. Other threads will -still be waiting at the barrier and this call releases them. -*/ -void -__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) -{ - KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid )); - - __kmp_end_split_barrier ( bs_plain_barrier, global_tid ); -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid thread id. -@return one if the thread should execute the master block, zero otherwise - -Start execution of a combined barrier and master(nowait) construct. -The barrier is executed inside this function. -There is no equivalent "end" function, since the -*/ -kmp_int32 -__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid ) -{ - kmp_int32 ret; - - KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid )); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? - } - __kmp_check_barrier( global_tid, ct_barrier, loc ); - } - -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); - - ret = __kmpc_master (loc, global_tid); - - if ( __kmp_env_consistency_check ) { - /* there's no __kmpc_end_master called; so the (stats) */ - /* actions of __kmpc_end_master are done here */ - - if ( global_tid < 0 ) { - KMP_WARNING( ThreadIdentInvalid ); - } - if (ret) { - /* only one thread should do the pop since only */ - /* one did the push (see __kmpc_master()) */ - - __kmp_pop_sync( global_tid, ct_master, loc ); - } - } - - return (ret); -} - -/* The BARRIER for a SINGLE process section is always explicit */ -/*! -@ingroup WORK_SHARING -@param loc source location information -@param global_tid global thread number -@return One if this thread should execute the single construct, zero otherwise. - -Test whether to execute a single construct. -There are no implicit barriers in the two "single" calls, rather the compiler should -introduce an explicit barrier if it is required. -*/ - -kmp_int32 -__kmpc_single(ident_t *loc, kmp_int32 global_tid) -{ - KMP_COUNT_BLOCK(OMP_SINGLE); - kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE ); - if(rc == TRUE) { - KMP_START_EXPLICIT_TIMER(OMP_single); - } - -#if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - int tid = __kmp_tid_from_gtid( global_tid ); - - if (ompt_enabled) { - if (rc) { - if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, - team->t.ompt_team_info.microtask); - } - } else { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } - this_thr->th.ompt_thread_info.state = ompt_state_wait_single; - } - } -#endif - - return rc; -} - -/*! -@ingroup WORK_SHARING -@param loc source location information -@param global_tid global thread number - -Mark the end of a single construct. This function should -only be called by the thread that executed the block of code protected -by the `single` construct. -*/ -void -__kmpc_end_single(ident_t *loc, kmp_int32 global_tid) -{ - __kmp_exit_single( global_tid ); - KMP_STOP_EXPLICIT_TIMER(OMP_single); - -#if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - int tid = __kmp_tid_from_gtid( global_tid ); - - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif -} - -/*! -@ingroup WORK_SHARING -@param loc Source location -@param global_tid Global thread id - -Mark the end of a statically scheduled loop. -*/ -void -__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid ) -{ - KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_end)) { - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - int tid = __kmp_tid_from_gtid( global_tid ); - - ompt_callbacks.ompt_callback(ompt_event_loop_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_pdo, loc ); -} - -/* - * User routines which take C-style arguments (call by value) - * different from the Fortran equivalent routines - */ - -void -ompc_set_num_threads( int arg ) -{ -// !!!!! TODO: check the per-task binding - __kmp_set_num_threads( arg, __kmp_entry_gtid() ); -} - -void -ompc_set_dynamic( int flag ) -{ - kmp_info_t *thread; - - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); - - __kmp_save_internal_controls( thread ); - - set__dynamic( thread, flag ? TRUE : FALSE ); -} - -void -ompc_set_nested( int flag ) -{ - kmp_info_t *thread; - - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); - - __kmp_save_internal_controls( thread ); - - set__nested( thread, flag ? TRUE : FALSE ); -} - -void -ompc_set_max_active_levels( int max_active_levels ) -{ - /* TO DO */ - /* we want per-task implementation of this internal control */ - - /* For the per-thread internal controls implementation */ - __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels ); -} - -void -ompc_set_schedule( omp_sched_t kind, int modifier ) -{ -// !!!!! TODO: check the per-task binding - __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier ); -} - -int -ompc_get_ancestor_thread_num( int level ) -{ - return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level ); -} - -int -ompc_get_team_size( int level ) -{ - return __kmp_get_team_size( __kmp_entry_gtid(), level ); -} - -void -kmpc_set_stacksize( int arg ) -{ - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( arg ); -} - -void -kmpc_set_stacksize_s( size_t arg ) -{ - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( arg ); -} - -void -kmpc_set_blocktime( int arg ) -{ - int gtid, tid; - kmp_info_t *thread; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - - __kmp_aux_set_blocktime( arg, thread, tid ); -} - -void -kmpc_set_library( int arg ) -{ - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( (enum library_type)arg ); -} - -void -kmpc_set_defaults( char const * str ) -{ - // __kmp_aux_set_defaults initializes the library if needed - __kmp_aux_set_defaults( str, KMP_STRLEN( str ) ); -} - -int -kmpc_set_affinity_mask_proc( int proc, void **mask ) -{ -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc( proc, mask ); -#endif -} - -int -kmpc_unset_affinity_mask_proc( int proc, void **mask ) -{ -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc( proc, mask ); -#endif -} - -int -kmpc_get_affinity_mask_proc( int proc, void **mask ) -{ -#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; -#else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc( proc, mask ); -#endif -} - - -/* -------------------------------------------------------------------------- */ -/*! -@ingroup THREADPRIVATE -@param loc source location information -@param gtid global thread number -@param cpy_size size of the cpy_data buffer -@param cpy_data pointer to data to be copied -@param cpy_func helper function to call for copying data -@param didit flag variable: 1=single thread; 0=not single thread - -__kmpc_copyprivate implements the interface for the private data broadcast needed for -the copyprivate clause associated with a single region in an OpenMP* program (both C and Fortran). -All threads participating in the parallel region call this routine. -One of the threads (called the single thread) should have the didit variable set to 1 -and all other threads should have that variable set to 0. -All threads pass a pointer to a data buffer (cpy_data) that they have built. - -The OpenMP specification forbids the use of nowait on the single region when a copyprivate -clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid -race conditions, so the code generation for the single region should avoid generating a barrier -after the call to @ref __kmpc_copyprivate. - -The gtid parameter is the global thread id for the current thread. -The loc parameter is a pointer to source location information. - -Internal implementation: The single thread will first copy its descriptor address (cpy_data) -to a team-private location, then the other threads will each call the function pointed to by -the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer. - -The cpy_func routine used for the copy and the contents of the data area defined by cpy_data -and cpy_size may be built in any fashion that will allow the copy to be done. For instance, -the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers -to the data. The cpy_func routine must interpret the cpy_data buffer appropriately. - -The interface to cpy_func is as follows: -@code -void cpy_func( void *destination, void *source ) -@endcode -where void *destination is the cpy_data pointer for the thread being copied to -and void *source is the cpy_data pointer for the thread being copied from. -*/ -void -__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ) -{ - void **data_ptr; - - KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid )); - - KMP_MB(); - - data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data; - - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); - } - } - - /* ToDo: Optimize the following two barriers into some kind of split barrier */ - - if (didit) *data_ptr = cpy_data; - - /* This barrier is not a barrier region boundary */ -#if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; -#endif - __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); - - if (! didit) (*cpy_func)( cpy_data, *data_ptr ); - - /* Consider next barrier the user-visible barrier for barrier region boundaries */ - /* Nesting checks are already handled by the single construct checks */ - -#if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location) -#endif - __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); -} - -/* -------------------------------------------------------------------------- */ - -#define INIT_LOCK __kmp_init_user_lock_with_checks -#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks -#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks -#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed -#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks -#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed -#define RELEASE_LOCK __kmp_release_user_lock_with_checks -#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks -#define TEST_LOCK __kmp_test_user_lock_with_checks -#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks -#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks -#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks - - -/* - * TODO: Make check abort messages use location info & pass it - * into with_checks routines - */ - -#if KMP_USE_DYNAMIC_LOCK - -// internal lock initializer -static __forceinline void -__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) -{ - if (KMP_IS_D_LOCK(seq)) { - KMP_INIT_D_LOCK(lock, seq); -#if USE_ITT_BUILD - __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); -#endif - } else { - KMP_INIT_I_LOCK(lock, seq); -#if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); -#endif - } -} - -// internal nest lock initializer -static __forceinline void -__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) -{ -#if KMP_USE_TSX - // Don't have nested lock implementation for speculative locks - if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) - seq = __kmp_user_lock_seq; -#endif - switch (seq) { - case lockseq_tas: - seq = lockseq_nested_tas; - break; -#if KMP_USE_FUTEX - case lockseq_futex: - seq = lockseq_nested_futex; - break; -#endif - case lockseq_ticket: - seq = lockseq_nested_ticket; - break; - case lockseq_queuing: - seq = lockseq_nested_queuing; - break; - case lockseq_drdpa: - seq = lockseq_nested_drdpa; - break; - default: - seq = lockseq_nested_queuing; - } - KMP_INIT_I_LOCK(lock, seq); -#if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); -#endif -} - -/* initialize the lock with a hint */ -void -__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) -{ - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); - } - - __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); -} - -/* initialize the lock with a hint */ -void -__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) -{ - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); - } - - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); -} - -#endif // KMP_USE_DYNAMIC_LOCK - -/* initialize the lock */ -void -__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { -#if KMP_USE_DYNAMIC_LOCK - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock"); - } - __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); - -#else // KMP_USE_DYNAMIC_LOCK - - static char const * const func = "omp_init_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - - KMP_CHECK_USER_LOCK_INIT(); - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); - } - INIT_LOCK( lck ); - __kmp_set_user_lock_location( lck, loc ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_init_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_creating( lck ); -#endif /* USE_ITT_BUILD */ - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_init_lock - -/* initialize the lock */ -void -__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { -#if KMP_USE_DYNAMIC_LOCK - - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); - } - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); - -#else // KMP_USE_DYNAMIC_LOCK - - static char const * const func = "omp_init_nest_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - - KMP_CHECK_USER_LOCK_INIT(); - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); - } - - INIT_NESTED_LOCK( lck ); - __kmp_set_user_lock_location( lck, loc ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_creating( lck ); -#endif /* USE_ITT_BUILD */ - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_init_nest_lock - -void -__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { -#if KMP_USE_DYNAMIC_LOCK - -# if USE_ITT_BUILD - kmp_user_lock_p lck; - if (KMP_EXTRACT_D_TAG(user_lock) == 0) { - lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; - } else { - lck = (kmp_user_lock_p)user_lock; - } - __kmp_itt_lock_destroyed(lck); -# endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); -#else - kmp_user_lock_p lck; - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" ); - } - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_destroyed( lck ); -#endif /* USE_ITT_BUILD */ - DESTROY_LOCK( lck ); - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - ; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - ; - } -#endif - else { - __kmp_user_lock_free( user_lock, gtid, lck ); - } -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_destroy_lock - -/* destroy the lock */ -void -__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { -#if KMP_USE_DYNAMIC_LOCK - -# if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); - __kmp_itt_lock_destroyed(ilk->lock); -# endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" ); - } - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck); - } -#endif - -#if USE_ITT_BUILD - __kmp_itt_lock_destroyed( lck ); -#endif /* USE_ITT_BUILD */ - - DESTROY_NESTED_LOCK( lck ); - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - ; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - ; - } -#endif - else { - __kmp_user_lock_free( user_lock, gtid, lck ); - } -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmpc_destroy_nest_lock - -void -__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { - KMP_COUNT_BLOCK(OMP_set_lock); -#if KMP_USE_DYNAMIC_LOCK - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object. -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); - } else -# endif - { - __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -# endif - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); -#endif /* USE_ITT_BUILD */ - - ACQUIRE_LOCK( lck, gtid ); - -#if USE_ITT_BUILD - __kmp_itt_lock_acquired( lck ); -#endif /* USE_ITT_BUILD */ - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck); - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -void -__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { -#if KMP_USE_DYNAMIC_LOCK - -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif - KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -#endif - -#else // KMP_USE_DYNAMIC_LOCK - int acquire_status; - kmp_user_lock_p lck; - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); -#endif /* USE_ITT_BUILD */ - - ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status ); - -#if USE_ITT_BUILD - __kmp_itt_lock_acquired( lck ); -#endif /* USE_ITT_BUILD */ -#endif // KMP_USE_DYNAMIC_LOCK - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck); - } else { - if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck); - } - } -#endif -} - -void -__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ -#if KMP_USE_DYNAMIC_LOCK - - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(user_lock, gtid); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); - } else -# endif - { - __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - /* Can't use serial interval since not block structured */ - /* release the lock */ - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // "fast" path implemented to fix customer performance issue -#if USE_ITT_BUILD - __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); -#endif /* USE_ITT_BUILD */ - TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); - KMP_MB(); - return; -#else - lck = (kmp_user_lock_p)user_lock; -#endif - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_releasing( lck ); -#endif /* USE_ITT_BUILD */ - - RELEASE_LOCK( lck, gtid ); - -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_lock)) { - ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck); - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* release the lock */ -void -__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ -#if KMP_USE_DYNAMIC_LOCK - -# if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -# endif - KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - - /* Can't use serial interval since not block structured */ - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // "fast" path implemented to fix customer performance issue - kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock; -#if USE_ITT_BUILD - __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); -#endif /* USE_ITT_BUILD */ - if ( --(tl->lk.depth_locked) == 0 ) { - TCW_4(tl->lk.poll, 0); - } - KMP_MB(); - return; -#else - lck = (kmp_user_lock_p)user_lock; -#endif - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_releasing( lck ); -#endif /* USE_ITT_BUILD */ - - int release_status; - release_status = RELEASE_NESTED_LOCK( lck, gtid ); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled) { - if (release_status == KMP_LOCK_RELEASED) { - if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( - (uint64_t) lck); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( - (uint64_t) lck); - } - } -#endif - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* try to acquire the lock */ -int -__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ - KMP_COUNT_BLOCK(OMP_test_lock); - -#if KMP_USE_DYNAMIC_LOCK - int rc; - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_TEST_TAS_LOCK(user_lock, gtid, rc); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); - } else -# endif - { - rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } - if (rc) { -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -# endif - return FTN_TRUE; - } else { -# if USE_ITT_BUILD - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); -# endif - return FTN_FALSE; - } - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - int rc; - - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); -#endif /* USE_ITT_BUILD */ - - rc = TEST_LOCK( lck, gtid ); -#if USE_ITT_BUILD - if ( rc ) { - __kmp_itt_lock_acquired( lck ); - } else { - __kmp_itt_lock_cancelled( lck ); - } -#endif /* USE_ITT_BUILD */ - return ( rc ? FTN_TRUE : FTN_FALSE ); - - /* Can't use serial interval since not block structured */ - -#endif // KMP_USE_DYNAMIC_LOCK -} - -/* try to acquire the lock */ -int -__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ -#if KMP_USE_DYNAMIC_LOCK - int rc; -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif - rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); -# if USE_ITT_BUILD - if (rc) { - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); - } else { - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); - } -# endif - return rc; - -#else // KMP_USE_DYNAMIC_LOCK - - kmp_user_lock_p lck; - int rc; - - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } -#endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" ); - } - -#if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); -#endif /* USE_ITT_BUILD */ - - rc = TEST_NESTED_LOCK( lck, gtid ); -#if USE_ITT_BUILD - if ( rc ) { - __kmp_itt_lock_acquired( lck ); - } else { - __kmp_itt_lock_cancelled( lck ); - } -#endif /* USE_ITT_BUILD */ - return rc; - - /* Can't use serial interval since not block structured */ - -#endif // KMP_USE_DYNAMIC_LOCK -} - - -/*--------------------------------------------------------------------------------------------------------------------*/ - -/* - * Interface to fast scalable reduce methods routines - */ - -// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions; -// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then) -// AT: which solution is better? -#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ - ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) - -#define __KMP_GET_REDUCTION_METHOD(gtid) \ - ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) - -// description of the packed_reduction_method variable: look at the macros in kmp.h - - -// used in a critical section reduce block -static __forceinline void -__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { - - // this lock was visible to a customer and to the threading profile tool as a serial overhead span - // (although it's used for an internal purpose only) - // why was it visible in previous implementation? - // should we keep it visible in new reduce block? - kmp_user_lock_p lck; - -#if KMP_USE_DYNAMIC_LOCK - - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - if (*lk == 0) { - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq)); - } - } - // Branch for accessing the actual lock object and set operation. This branching is inevitable since - // this lock initialization does not follow the normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); - } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - // We know that the fast reduction code is only emitted by Intel compilers - // with 32 byte critical sections. If there isn't enough space, then we - // have to use a pointer. - if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) { - lck = (kmp_user_lock_p)crit; - } - else { - lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); - } - KMP_DEBUG_ASSERT( lck != NULL ); - - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_critical, loc, lck ); - - __kmp_acquire_user_lock_with_checks( lck, global_tid ); - -#endif // KMP_USE_DYNAMIC_LOCK -} - -// used in a critical section reduce block -static __forceinline void -__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { - - kmp_user_lock_p lck; - -#if KMP_USE_DYNAMIC_LOCK - - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } else { - kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); - } - -#else // KMP_USE_DYNAMIC_LOCK - - // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical - // sections. If there isn't enough space, then we have to use a pointer. - if ( __kmp_base_user_lock_size > 32 ) { - lck = *( (kmp_user_lock_p *) crit ); - KMP_ASSERT( lck != NULL ); - } else { - lck = (kmp_user_lock_p) crit; - } - - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_critical, loc ); - - __kmp_release_user_lock_with_checks( lck, global_tid ); - -#endif // KMP_USE_DYNAMIC_LOCK -} // __kmp_end_critical_section_reduce_block - - -/* 2.a.i. Reduce Block without a terminating barrier */ -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread number -@param num_vars number of items (variables) to be reduced -@param reduce_size size of data in bytes to be reduced -@param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data -@param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed - -The nowait version is used for a reduce clause with the nowait argument. -*/ -kmp_int32 -__kmpc_reduce_nowait( - ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) { - - KMP_COUNT_BLOCK(REDUCE_nowait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED - kmp_team_t *team; - kmp_info_t *th; - int teams_swapped = 0, task_state; -#endif - KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) ); - - // why do we need this initialization here at all? - // Reduction clause can not be used as a stand-alone directive. - - // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - // check correctness of reduce block nesting -#if KMP_USE_DYNAMIC_LOCK - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); -#else - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); -#endif - -#if OMP_40_ENABLED - th = __kmp_thread_from_gtid(global_tid); - if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct? - team = th->th.th_team; - if( team->t.t_level == th->th.th_teams_level ) { - // this is reduction at teams construct - KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 - // Let's swap teams temporarily for the reduction barrier - teams_swapped = 1; - th->th.th_info.ds.ds_tid = team->t.t_master_tid; - th->th.th_team = team->t.t_parent; - th->th.th_team_nproc = th->th.th_team->t.t_nproc; - th->th.th_task_team = th->th.th_team->t.t_task_team[0]; - task_state = th->th.th_task_state; - th->th.th_task_state = 0; - } - } -#endif // OMP_40_ENABLED - - // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable - // the variable should be either a construct-specific or thread-specific property, not a team specific property - // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct) - // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?) - // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed) - // a thread-specific variable is better regarding two issues above (next construct and extra syncs) - // a thread-specific "th_local.reduction_method" variable is used currently - // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs) - - packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); - __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); - - if( packed_reduction_method == critical_reduce_block ) { - - __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); - retval = 1; - - } else if( packed_reduction_method == empty_reduce_block ) { - - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) - retval = 1; - - } else if( packed_reduction_method == atomic_reduce_block ) { - - retval = 2; - - // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen) - // (it's not quite good, because the checking block has been closed by this 'pop', - // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction) - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { - - //AT: performance issue: a real barrier here - //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them) - //AT: (it's not what a customer might expect specifying NOWAIT clause) - //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer) - //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster - // and be more in line with sense of NOWAIT - //AT: TO DO: do epcc test and compare times - - // this barrier should be invisible to a customer and to the threading profile tool - // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose) -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func ); - retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); - - // all other workers except master should do this pop here - // ( none of other workers will get to __kmpc_end_reduce_nowait() ) - if ( __kmp_env_consistency_check ) { - if( retval == 0 ) { - __kmp_pop_sync( global_tid, ct_reduce, loc ); - } - } - - } else { - - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" - - } -#if OMP_40_ENABLED - if( teams_swapped ) { - // Restore thread structure - th->th.th_info.ds.ds_tid = 0; - th->th.th_team = team; - th->th.th_team_nproc = team->t.t_nproc; - th->th.th_task_team = team->t.t_task_team[task_state]; - th->th.th_task_state = task_state; - } -#endif - KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); - - return retval; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread id. -@param lck pointer to the unique lock data structure - -Finish the execution of a reduce nowait. -*/ -void -__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { - - PACKED_REDUCTION_METHOD_T packed_reduction_method; - - KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) ); - - packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); - - if( packed_reduction_method == critical_reduce_block ) { - - __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); - - } else if( packed_reduction_method == empty_reduce_block ) { - - // usage: if team size == 1, no synchronization is required ( on Intel platforms only ) - - } else if( packed_reduction_method == atomic_reduce_block ) { - - // neither master nor other workers should get here - // (code gen does not generate this call in case 2: atomic reduce block) - // actually it's better to remove this elseif at all; - // after removal this value will checked by the 'else' and will assert - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { - - // only master gets here - - } else { - - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" - - } - - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); - - KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); - - return; -} - -/* 2.a.ii. Reduce Block with a terminating barrier */ - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread number -@param num_vars number of items (variables) to be reduced -@param reduce_size size of data in bytes to be reduced -@param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data -@param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed - -A blocking reduce that includes an implicit barrier. -*/ -kmp_int32 -__kmpc_reduce( - ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, - void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) -{ - KMP_COUNT_BLOCK(REDUCE_wait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; - - KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) ); - - // why do we need this initialization here at all? - // Reduction clause can not be a stand-alone directive. - - // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - // check correctness of reduce block nesting -#if KMP_USE_DYNAMIC_LOCK - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); -#else - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); -#endif - - packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); - __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); - - if( packed_reduction_method == critical_reduce_block ) { - - __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); - retval = 1; - - } else if( packed_reduction_method == empty_reduce_block ) { - - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) - retval = 1; - - } else if( packed_reduction_method == atomic_reduce_block ) { - - retval = 2; - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { - - //case tree_reduce_block: - // this barrier should be visible to a customer and to the threading profile tool - // (it's a terminating barrier on constructs if NOWAIT not specified) -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames -#endif - retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func ); - retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); - - // all other workers except master should do this pop here - // ( none of other workers except master will enter __kmpc_end_reduce() ) - if ( __kmp_env_consistency_check ) { - if( retval == 0 ) { // 0: all other workers; 1: master - __kmp_pop_sync( global_tid, ct_reduce, loc ); - } - } - - } else { - - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" - - } - - KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); - - return retval; -} - -/*! -@ingroup SYNCHRONIZATION -@param loc source location information -@param global_tid global thread id. -@param lck pointer to the unique lock data structure - -Finish the execution of a blocking reduce. -The lck pointer must be the same as that used in the corresponding start function. -*/ -void -__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { - - PACKED_REDUCTION_METHOD_T packed_reduction_method; - - KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) ); - - packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); - - // this barrier should be visible to a customer and to the threading profile tool - // (it's a terminating barrier on constructs if NOWAIT not specified) - - if( packed_reduction_method == critical_reduce_block ) { - - __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); - - // TODO: implicit barrier: should be exposed -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); - - } else if( packed_reduction_method == empty_reduce_block ) { - - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) - - // TODO: implicit barrier: should be exposed -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); - - } else if( packed_reduction_method == atomic_reduce_block ) { - - // TODO: implicit barrier: should be exposed -#if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; -#endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { - - // only master executes here (master releases all other workers) - __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid ); - - } else { - - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" - - } - - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); - - KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); - - return; -} - -#undef __KMP_GET_REDUCTION_METHOD -#undef __KMP_SET_REDUCTION_METHOD - -/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/ - -kmp_uint64 -__kmpc_get_taskid() { - - kmp_int32 gtid; - kmp_info_t * thread; - - gtid = __kmp_get_gtid(); - if ( gtid < 0 ) { - return 0; - }; // if - thread = __kmp_thread_from_gtid( gtid ); - return thread->th.th_current_task->td_task_id; - -} // __kmpc_get_taskid - - -kmp_uint64 -__kmpc_get_parent_taskid() { - - kmp_int32 gtid; - kmp_info_t * thread; - kmp_taskdata_t * parent_task; - - gtid = __kmp_get_gtid(); - if ( gtid < 0 ) { - return 0; - }; // if - thread = __kmp_thread_from_gtid( gtid ); - parent_task = thread->th.th_current_task->td_parent; - return ( parent_task == NULL ? 0 : parent_task->td_task_id ); - -} // __kmpc_get_parent_taskid - -void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT) -{ - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - __kmp_place_num_sockets = nS; - __kmp_place_socket_offset = sO; - __kmp_place_num_cores = nC; - __kmp_place_core_offset = cO; - __kmp_place_num_threads_per_core = nT; -} - -// end of file // - +/* + * kmp_csupport.c -- kfront linkage support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "omp.h" /* extern "C" declarations of user-visible routines */ +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_itt.h" +#include "kmp_error.h" +#include "kmp_stats.h" + +#if OMPT_SUPPORT +#include "ompt-internal.h" +#include "ompt-specific.h" +#endif + +#define MAX_MESSAGE 512 + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* flags will be used in future, e.g., to implement */ +/* openmp_strict library restrictions */ + +/*! + * @ingroup STARTUP_SHUTDOWN + * @param loc in source location information + * @param flags in for future use (currently ignored) + * + * Initialize the runtime library. This call is optional; if it is not made then + * it will be implicitly called by attempts to use other library functions. + * + */ +void +__kmpc_begin(ident_t *loc, kmp_int32 flags) +{ + // By default __kmp_ignore_mppbeg() returns TRUE. + if (__kmp_ignore_mppbeg() == FALSE) { + __kmp_internal_begin(); + + KC_TRACE( 10, ("__kmpc_begin: called\n" ) ); + } +} + +/*! + * @ingroup STARTUP_SHUTDOWN + * @param loc source location information + * + * Shutdown the runtime library. This is also optional, and even if called will not + * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero. + */ +void +__kmpc_end(ident_t *loc) +{ + // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op. + // However, this can be overridden with KMP_IGNORE_MPPEND environment variable. + // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end() + // will unregister this root (it can cause library shut down). + if (__kmp_ignore_mppend() == FALSE) { + KC_TRACE( 10, ("__kmpc_end: called\n" ) ); + KA_TRACE( 30, ("__kmpc_end\n" )); + + __kmp_internal_end_thread( -1 ); + } +} + +/*! +@ingroup THREAD_STATES +@param loc Source location information. +@return The global thread index of the active thread. + +This function can be called in any context. + +If the runtime has ony been entered at the outermost level from a +single (necessarily non-OpenMP*) thread, then the thread number is that +which would be returned by omp_get_thread_num() in the outermost +active parallel construct. (Or zero if there is no active parallel +construct, since the master thread is necessarily thread zero). + +If multiple non-OpenMP threads all enter an OpenMP construct then this +will be a unique thread identifier among all the threads created by +the OpenMP runtime (but the value cannote be defined in terms of +OpenMP thread ids returned by omp_get_thread_num()). + +*/ +kmp_int32 +__kmpc_global_thread_num(ident_t *loc) +{ + kmp_int32 gtid = __kmp_entry_gtid(); + + KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) ); + + return gtid; +} + +/*! +@ingroup THREAD_STATES +@param loc Source location information. +@return The number of threads under control of the OpenMP* runtime + +This function can be called in any context. +It returns the total number of threads under the control of the OpenMP runtime. That is +not a number that can be determined by any OpenMP standard calls, since the library may be +called from more than one non-OpenMP thread, and this reflects the total over all such calls. +Similarly the runtime maintains underlying threads even when they are not active (since the cost +of creating and destroying OS threads is high), this call counts all such threads even if they are not +waiting for work. +*/ +kmp_int32 +__kmpc_global_num_threads(ident_t *loc) +{ + KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) ); + + return TCR_4(__kmp_nth); +} + +/*! +@ingroup THREAD_STATES +@param loc Source location information. +@return The thread number of the calling thread in the innermost active parallel construct. + +*/ +kmp_int32 +__kmpc_bound_thread_num(ident_t *loc) +{ + KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) ); + return __kmp_tid_from_gtid( __kmp_entry_gtid() ); +} + +/*! +@ingroup THREAD_STATES +@param loc Source location information. +@return The number of threads in the innermost active parallel construct. +*/ +kmp_int32 +__kmpc_bound_num_threads(ident_t *loc) +{ + KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) ); + + return __kmp_entry_thread() -> th.th_team -> t.t_nproc; +} + +/*! + * @ingroup DEPRECATED + * @param loc location description + * + * This function need not be called. It always returns TRUE. + */ +kmp_int32 +__kmpc_ok_to_fork(ident_t *loc) +{ +#ifndef KMP_DEBUG + + return TRUE; + +#else + + const char *semi2; + const char *semi3; + int line_no; + + if (__kmp_par_range == 0) { + return TRUE; + } + semi2 = loc->psource; + if (semi2 == NULL) { + return TRUE; + } + semi2 = strchr(semi2, ';'); + if (semi2 == NULL) { + return TRUE; + } + semi2 = strchr(semi2 + 1, ';'); + if (semi2 == NULL) { + return TRUE; + } + if (__kmp_par_range_filename[0]) { + const char *name = semi2 - 1; + while ((name > loc->psource) && (*name != '/') && (*name != ';')) { + name--; + } + if ((*name == '/') || (*name == ';')) { + name++; + } + if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { + return __kmp_par_range < 0; + } + } + semi3 = strchr(semi2 + 1, ';'); + if (__kmp_par_range_routine[0]) { + if ((semi3 != NULL) && (semi3 > semi2) + && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { + return __kmp_par_range < 0; + } + } + if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { + if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { + return __kmp_par_range > 0; + } + return __kmp_par_range < 0; + } + return TRUE; + +#endif /* KMP_DEBUG */ + +} + +/*! +@ingroup THREAD_STATES +@param loc Source location information. +@return 1 if this thread is executing inside an active parallel region, zero if not. +*/ +kmp_int32 +__kmpc_in_parallel( ident_t *loc ) +{ + return __kmp_entry_thread() -> th.th_root -> r.r_active; +} + +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number +@param num_threads number of threads requested for this parallel construct + +Set the number of threads to be used by the next fork spawned by this thread. +This call is only required if the parallel construct has a `num_threads` clause. +*/ +void +__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ) +{ + KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", + global_tid, num_threads ) ); + + __kmp_push_num_threads( loc, global_tid, num_threads ); +} + +void +__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid ) +{ + KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) ); + + /* the num_threads are automatically popped */ +} + + +#if OMP_40_ENABLED + +void +__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind ) +{ + KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", + global_tid, proc_bind ) ); + + __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind ); +} + +#endif /* OMP_40_ENABLED */ + + +/*! +@ingroup PARALLEL +@param loc source location information +@param argc total number of arguments in the ellipsis +@param microtask pointer to callback routine consisting of outlined parallel construct +@param ... pointers to shared variables that aren't global + +Do the actual fork and call the microtask in the relevant number of threads. +*/ +void +__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) +{ + int gtid = __kmp_entry_gtid(); + +#if (KMP_STATS_ENABLED) + int inParallel = __kmpc_in_parallel(loc); + if (inParallel) + { + KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); + } + else + { + KMP_STOP_EXPLICIT_TIMER(OMP_serial); + KMP_COUNT_BLOCK(OMP_PARALLEL); + } +#endif + + // maybe to save thr_state is enough here + { + va_list ap; + va_start( ap, microtask ); + +#if OMPT_SUPPORT + int tid = __kmp_tid_from_gtid( gtid ); + kmp_info_t *master_th = __kmp_threads[ gtid ]; + kmp_team_t *parent_team = master_th->th.th_team; + if (ompt_enabled) { + parent_team->t.t_implicit_task_taskdata[tid]. + ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + +#if INCLUDE_SSC_MARKS + SSC_MARK_FORKING(); +#endif + __kmp_fork_call( loc, gtid, fork_context_intel, + argc, +#if OMPT_SUPPORT + VOLATILE_CAST(void *) microtask, // "unwrapped" task +#endif + VOLATILE_CAST(microtask_t) microtask, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_task_func, +/* TODO: revert workaround for Intel(R) 64 tracker #96 */ +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + &ap +#else + ap +#endif + ); +#if INCLUDE_SSC_MARKS + SSC_MARK_JOINING(); +#endif + __kmp_join_call( loc, gtid +#if OMPT_SUPPORT + , fork_context_intel +#endif + ); + + va_end( ap ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + parent_team->t.t_implicit_task_taskdata[tid]. + ompt_task_info.frame.reenter_runtime_frame = 0; + } +#endif + } +#if (KMP_STATS_ENABLED) + if (!inParallel) + KMP_START_EXPLICIT_TIMER(OMP_serial); +#endif +} + +#if OMP_40_ENABLED +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number +@param num_teams number of teams requested for the teams construct +@param num_threads number of threads per team requested for the teams construct + +Set the number of teams to be used by the teams construct. +This call is only required if the teams construct has a `num_teams` clause +or a `thread_limit` clause (or both). +*/ +void +__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ) +{ + KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", + global_tid, num_teams, num_threads ) ); + + __kmp_push_num_teams( loc, global_tid, num_teams, num_threads ); +} + +/*! +@ingroup PARALLEL +@param loc source location information +@param argc total number of arguments in the ellipsis +@param microtask pointer to callback routine consisting of outlined teams construct +@param ... pointers to shared variables that aren't global + +Do the actual fork and call the microtask in the relevant number of threads. +*/ +void +__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) +{ + int gtid = __kmp_entry_gtid(); + kmp_info_t *this_thr = __kmp_threads[ gtid ]; + va_list ap; + va_start( ap, microtask ); + + KMP_COUNT_BLOCK(OMP_TEAMS); + + // remember teams entry point and nesting level + this_thr->th.th_teams_microtask = microtask; + this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host + +#if OMPT_SUPPORT + kmp_team_t *parent_team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid( gtid ); + if (ompt_enabled) { + parent_team->t.t_implicit_task_taskdata[tid]. + ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + + // check if __kmpc_push_num_teams called, set default number of teams otherwise + if ( this_thr->th.th_teams_size.nteams == 0 ) { + __kmp_push_num_teams( loc, gtid, 0, 0 ); + } + KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); + KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); + KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); + + __kmp_fork_call( loc, gtid, fork_context_intel, + argc, +#if OMPT_SUPPORT + VOLATILE_CAST(void *) microtask, // "unwrapped" task +#endif + VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + &ap +#else + ap +#endif + ); + __kmp_join_call( loc, gtid +#if OMPT_SUPPORT + , fork_context_intel +#endif + ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + parent_team->t.t_implicit_task_taskdata[tid]. + ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif + + this_thr->th.th_teams_microtask = NULL; + this_thr->th.th_teams_level = 0; + *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L; + va_end( ap ); +} +#endif /* OMP_40_ENABLED */ + + +// +// I don't think this function should ever have been exported. +// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated +// openmp code ever called it, but it's been exported from the RTL for so +// long that I'm afraid to remove the definition. +// +int +__kmpc_invoke_task_func( int gtid ) +{ + return __kmp_invoke_task_func( gtid ); +} + +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number + +Enter a serialized parallel construct. This interface is used to handle a +conditional parallel region, like this, +@code +#pragma omp parallel if (condition) +@endcode +when the condition is false. +*/ +void +__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) +{ + __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with + * kmp_fork_call since the tasks to be done are similar in each case. + */ +} + +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number + +Leave a serialized parallel construct. +*/ +void +__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) +{ + kmp_internal_control_t *top; + kmp_info_t *this_thr; + kmp_team_t *serial_team; + + KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) ); + + /* skip all this code for autopar serialized loops since it results in + unacceptable overhead */ + if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) + return; + + // Not autopar code + if( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + + this_thr = __kmp_threads[ global_tid ]; + serial_team = this_thr->th.th_serial_team; + + #if OMP_41_ENABLED + kmp_task_team_t * task_team = this_thr->th.th_task_team; + + // we need to wait for the proxy tasks before finishing the thread + if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) + __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here? + #endif + + KMP_MB(); + KMP_DEBUG_ASSERT( serial_team ); + KMP_ASSERT( serial_team -> t.t_serialized ); + KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team ); + KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team ); + KMP_DEBUG_ASSERT( serial_team -> t.t_threads ); + KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr ); + + /* If necessary, pop the internal control stack values and replace the team values */ + top = serial_team -> t.t_control_stack_top; + if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) { + copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top ); + serial_team -> t.t_control_stack_top = top -> next; + __kmp_free(top); + } + + //if( serial_team -> t.t_serialized > 1 ) + serial_team -> t.t_level--; + + /* pop dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); + { + dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer; + serial_team->t.t_dispatch->th_disp_buffer = + serial_team->t.t_dispatch->th_disp_buffer->next; + __kmp_free( disp_buffer ); + } + + -- serial_team -> t.t_serialized; + if ( serial_team -> t.t_serialized == 0 ) { + + /* return to the parallel section */ + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) { + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word ); + __kmp_load_mxcsr( &serial_team->t.t_mxcsr ); + } +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + this_thr -> th.th_team = serial_team -> t.t_parent; + this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid; + + /* restore values cached in the thread */ + this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */ + this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */ + this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized; + + /* TODO the below shouldn't need to be adjusted for serialized teams */ + this_thr -> th.th_dispatch = & this_thr -> th.th_team -> + t.t_dispatch[ serial_team -> t.t_master_tid ]; + + __kmp_pop_current_task_from_thread( this_thr ); + + KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 ); + this_thr -> th.th_current_task -> td_flags.executing = 1; + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Copy the task team from the new child / old parent team to the thread. + this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; + KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n", + global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) ); + } + } else { + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n", + global_tid, serial_team, serial_team -> t.t_serialized ) ); + } + } + +#if USE_ITT_BUILD + kmp_uint64 cur_time = 0; +#if USE_ITT_NOTIFY + if ( __itt_get_timestamp_ptr ) { + cur_time = __itt_get_timestamp(); + } +#endif /* USE_ITT_NOTIFY */ + if ( this_thr->th.th_team->t.t_level == 0 +#if OMP_40_ENABLED + && this_thr->th.th_teams_microtask == NULL +#endif + ) { + // Report the barrier + this_thr->th.th_ident = loc; + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, + cur_time, 0, loc, this_thr->th.th_team_nproc, 0 ); + if ( __kmp_forkjoin_frames_mode == 3 ) + // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier. + __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, + cur_time, 0, loc, this_thr->th.th_team_nproc, 2 ); + } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && + ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) + // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. + __kmp_itt_region_joined( global_tid, 1 ); + } +#endif /* USE_ITT_BUILD */ + + if ( __kmp_env_consistency_check ) + __kmp_pop_parallel( global_tid, NULL ); +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information. + +Execute flush. This is implemented as a full memory fence. (Though +depending on the memory ordering convention obeyed by the compiler +even that may not be necessary). +*/ +void +__kmpc_flush(ident_t *loc) +{ + KC_TRACE( 10, ("__kmpc_flush: called\n" ) ); + + /* need explicit __mf() here since use volatile instead in library */ + KMP_MB(); /* Flush all pending memory write invalidates. */ + + #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) + #if KMP_MIC + // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. + // We shouldn't need it, though, since the ABI rules require that + // * If the compiler generates NGO stores it also generates the fence + // * If users hand-code NGO stores they should insert the fence + // therefore no incomplete unordered stores should be visible. + #else + // C74404 + // This is to address non-temporal store instructions (sfence needed). + // The clflush instruction is addressed either (mfence needed). + // Probably the non-temporal load monvtdqa instruction should also be addressed. + // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. + if ( ! __kmp_cpuinfo.initialized ) { + __kmp_query_cpuid( & __kmp_cpuinfo ); + }; // if + if ( ! __kmp_cpuinfo.sse2 ) { + // CPU cannot execute SSE2 instructions. + } else { + #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC + _mm_mfence(); + #else + __sync_synchronize(); + #endif // KMP_COMPILER_ICC + }; // if + #endif // KMP_MIC + #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64) + // Nothing to see here move along + #elif KMP_ARCH_PPC64 + // Nothing needed here (we have a real MB above). + #if KMP_OS_CNK + // The flushing thread needs to yield here; this prevents a + // busy-waiting thread from saturating the pipeline. flush is + // often used in loops like this: + // while (!flag) { + // #pragma omp flush(flag) + // } + // and adding the yield here is good for at least a 10x speedup + // when running >2 threads per core (on the NAS LU benchmark). + __kmp_yield(TRUE); + #endif + #else + #error Unknown or unsupported architecture + #endif + +} + +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid thread id. + +Execute a barrier. +*/ +void +__kmpc_barrier(ident_t *loc, kmp_int32 global_tid) +{ + KMP_COUNT_BLOCK(OMP_BARRIER); + KMP_TIME_BLOCK(OMP_barrier); + KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) ); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + if ( __kmp_env_consistency_check ) { + if ( loc == 0 ) { + KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? + }; // if + + __kmp_check_barrier( global_tid, ct_barrier, loc ); + } + + __kmp_threads[ global_tid ]->th.th_ident = loc; + // TODO: explicit barrier_wait_id: + // this function is called when 'barrier' directive is present or + // implicit barrier at the end of a worksharing construct. + // 1) better to add a per-thread barrier counter to a thread data structure + // 2) set to 0 when a new team is created + // 4) no sync is required + + __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); +} + +/* The BARRIER for a MASTER section is always explicit */ +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param global_tid global thread number . +@return 1 if this thread should execute the master block, 0 otherwise. +*/ +kmp_int32 +__kmpc_master(ident_t *loc, kmp_int32 global_tid) +{ + KMP_COUNT_BLOCK(OMP_MASTER); + int status = 0; + + KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) ); + + if( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + + if( KMP_MASTER_GTID( global_tid )) { + KMP_START_EXPLICIT_TIMER(OMP_master); + status = 1; + } + +#if OMPT_SUPPORT && OMPT_TRACE + if (status) { + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_master_begin)) { + kmp_info_t *this_thr = __kmp_threads[ global_tid ]; + kmp_team_t *team = this_thr -> th.th_team; + + int tid = __kmp_tid_from_gtid( global_tid ); + ompt_callbacks.ompt_callback(ompt_event_master_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } + } +#endif + + if ( __kmp_env_consistency_check ) { +#if KMP_USE_DYNAMIC_LOCK + if (status) + __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 ); + else + __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 ); +#else + if (status) + __kmp_push_sync( global_tid, ct_master, loc, NULL ); + else + __kmp_check_sync( global_tid, ct_master, loc, NULL ); +#endif + } + + return status; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param global_tid global thread number . + +Mark the end of a master region. This should only be called by the thread +that executes the master region. +*/ +void +__kmpc_end_master(ident_t *loc, kmp_int32 global_tid) +{ + KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) ); + + KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid )); + KMP_STOP_EXPLICIT_TIMER(OMP_master); + +#if OMPT_SUPPORT && OMPT_TRACE + kmp_info_t *this_thr = __kmp_threads[ global_tid ]; + kmp_team_t *team = this_thr -> th.th_team; + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_master_end)) { + int tid = __kmp_tid_from_gtid( global_tid ); + ompt_callbacks.ompt_callback(ompt_event_master_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } +#endif + + if ( __kmp_env_consistency_check ) { + if( global_tid < 0 ) + KMP_WARNING( ThreadIdentInvalid ); + + if( KMP_MASTER_GTID( global_tid )) + __kmp_pop_sync( global_tid, ct_master, loc ); + } +} + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param gtid global thread number. + +Start execution of an ordered construct. +*/ +void +__kmpc_ordered( ident_t * loc, kmp_int32 gtid ) +{ + int cid = 0; + kmp_info_t *th; + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid )); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + +#if USE_ITT_BUILD + __kmp_itt_ordered_prep( gtid ); + // TODO: ordered_wait_id +#endif /* USE_ITT_BUILD */ + + th = __kmp_threads[ gtid ]; + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled) { + /* OMPT state update */ + th->th.ompt_thread_info.wait_id = (uint64_t) loc; + th->th.ompt_thread_info.state = ompt_state_wait_ordered; + + /* OMPT event callback */ + if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( + th->th.ompt_thread_info.wait_id); + } + } +#endif + + if ( th -> th.th_dispatch -> th_deo_fcn != 0 ) + (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc ); + else + __kmp_parallel_deo( & gtid, & cid, loc ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled) { + /* OMPT state update */ + th->th.ompt_thread_info.state = ompt_state_work_parallel; + th->th.ompt_thread_info.wait_id = 0; + + /* OMPT event callback */ + if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( + th->th.ompt_thread_info.wait_id); + } + } +#endif + +#if USE_ITT_BUILD + __kmp_itt_ordered_start( gtid ); +#endif /* USE_ITT_BUILD */ +} + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param gtid global thread number. + +End execution of an ordered construct. +*/ +void +__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid ) +{ + int cid = 0; + kmp_info_t *th; + + KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) ); + +#if USE_ITT_BUILD + __kmp_itt_ordered_end( gtid ); + // TODO: ordered_wait_id +#endif /* USE_ITT_BUILD */ + + th = __kmp_threads[ gtid ]; + + if ( th -> th.th_dispatch -> th_dxo_fcn != 0 ) + (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc ); + else + __kmp_parallel_dxo( & gtid, & cid, loc ); + +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_release_ordered)( + th->th.ompt_thread_info.wait_id); + } +#endif +} + +#if KMP_USE_DYNAMIC_LOCK + +static __forceinline void +__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag) +{ + // Pointer to the allocated indirect lock is written to crit, while indexing is ignored. + void *idx; + kmp_indirect_lock_t **lck; + lck = (kmp_indirect_lock_t **)crit; + kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); + KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); + KMP_SET_I_LOCK_LOCATION(ilk, loc); + KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); + KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); +#if USE_ITT_BUILD + __kmp_itt_critical_creating(ilk->lock, loc); +#endif + int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); + if (status == 0) { +#if USE_ITT_BUILD + __kmp_itt_critical_destroyed(ilk->lock); +#endif + // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit. + //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); + } + KMP_DEBUG_ASSERT(*lck != NULL); +} + +// Fast-path acquire tas lock +#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + if (l->lk.poll != KMP_LOCK_FREE(tas) || \ + ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(l); \ + KMP_INIT_YIELD(spins); \ + if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + while (l->lk.poll != KMP_LOCK_FREE(tas) || \ + ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ + if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED(l); \ +} + +// Fast-path test tas lock +#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ + KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \ +} + +// Fast-path release tas lock +#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \ + TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ + KMP_MB(); \ +} + +#if KMP_USE_FUTEX + +# include +# include +# ifndef FUTEX_WAIT +# define FUTEX_WAIT 0 +# endif +# ifndef FUTEX_WAKE +# define FUTEX_WAKE 1 +# endif + +// Fast-path acquire futex lock +#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + kmp_int32 gtid_code = (gtid+1) << 1; \ + KMP_MB(); \ + KMP_FSYNC_PREPARE(ftx); \ + kmp_int32 poll_val; \ + while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ + KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ + kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ + if (!cond) { \ + if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \ + continue; \ + } \ + poll_val |= KMP_LOCK_BUSY(1, futex); \ + } \ + kmp_int32 rc; \ + if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ + continue; \ + } \ + gtid_code |= 1; \ + } \ + KMP_FSYNC_ACQUIRED(ftx); \ +} + +// Fast-path test futex lock +#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \ + KMP_FSYNC_ACQUIRED(ftx); \ + rc = TRUE; \ + } else { \ + rc = FALSE; \ + } \ +} + +// Fast-path release futex lock +#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + KMP_MB(); \ + KMP_FSYNC_RELEASING(ftx); \ + kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ + if (KMP_LOCK_STRIP(poll_val) & 1) { \ + syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ + } \ + KMP_MB(); \ + KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ +} + +#endif // KMP_USE_FUTEX + +#else // KMP_USE_DYNAMIC_LOCK + +static kmp_user_lock_p +__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid ) +{ + kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; + + // + // Because of the double-check, the following load + // doesn't need to be volatile. + // + kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); + + if ( lck == NULL ) { + void * idx; + + // Allocate & initialize the lock. + // Remember allocated locks in table in order to free them in __kmp_cleanup() + lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section ); + __kmp_init_user_lock_with_checks( lck ); + __kmp_set_user_lock_location( lck, loc ); +#if USE_ITT_BUILD + __kmp_itt_critical_creating( lck ); + // __kmp_itt_critical_creating() should be called *before* the first usage of underlying + // lock. It is the only place where we can guarantee it. There are chances the lock will + // destroyed with no usage, but it is not a problem, because this is not real event seen + // by user but rather setting name for object (lock). See more details in kmp_itt.h. +#endif /* USE_ITT_BUILD */ + + // + // Use a cmpxchg instruction to slam the start of the critical + // section with the lock pointer. If another thread beat us + // to it, deallocate the lock, and use the lock that the other + // thread allocated. + // + int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck ); + + if ( status == 0 ) { + // Deallocate the lock and reload the value. +#if USE_ITT_BUILD + __kmp_itt_critical_destroyed( lck ); + // Let ITT know the lock is destroyed and the same memory location may be reused for + // another purpose. +#endif /* USE_ITT_BUILD */ + __kmp_destroy_user_lock_with_checks( lck ); + __kmp_user_lock_free( &idx, gtid, lck ); + lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); + KMP_DEBUG_ASSERT( lck != NULL ); + } + } + return lck; +} + +#endif // KMP_USE_DYNAMIC_LOCK + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param global_tid global thread number . +@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or +some other suitably unique value. + +Enter code protected by a `critical` construct. +This function blocks until the executing thread can enter the critical section. +*/ +void +__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) +{ +#if KMP_USE_DYNAMIC_LOCK + __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); +#else + KMP_COUNT_BLOCK(OMP_CRITICAL); + kmp_user_lock_p lck; + + KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); + + //TODO: add THR_OVHD_STATE + + KMP_CHECK_USER_LOCK_INIT(); + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { + lck = (kmp_user_lock_p)crit; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { + lck = (kmp_user_lock_p)crit; + } +#endif + else { // ticket, queuing or drdpa + lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); + } + + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_critical, loc, lck ); + + /* since the critical directive binds to all threads, not just + * the current team we have to check this even if we are in a + * serialized team */ + /* also, even if we are the uber thread, we still have to conduct the lock, + * as we have to contend with sibling threads */ + +#if USE_ITT_BUILD + __kmp_itt_critical_acquiring( lck ); +#endif /* USE_ITT_BUILD */ + // Value of 'crit' should be good for using as a critical_id of the critical section directive. + __kmp_acquire_user_lock_with_checks( lck, global_tid ); + +#if USE_ITT_BUILD + __kmp_itt_critical_acquired( lck ); +#endif /* USE_ITT_BUILD */ + + KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); +#endif // KMP_USE_DYNAMIC_LOCK +} + +#if KMP_USE_DYNAMIC_LOCK + +// Converts the given hint to an internal lock implementation +static __forceinline kmp_dyna_lockseq_t +__kmp_map_hint_to_lock(uintptr_t hint) +{ +#if KMP_USE_TSX +# define KMP_TSX_LOCK(seq) lockseq_##seq +#else +# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq +#endif + // Hints that do not require further logic + if (hint & kmp_lock_hint_hle) + return KMP_TSX_LOCK(hle); + if (hint & kmp_lock_hint_rtm) + return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq; + if (hint & kmp_lock_hint_adaptive) + return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq; + + // Rule out conflicting hints first by returning the default lock + if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) + return __kmp_user_lock_seq; + if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative)) + return __kmp_user_lock_seq; + + // Do not even consider speculation when it appears to be contended + if (hint & omp_lock_hint_contended) + return lockseq_queuing; + + // Uncontended lock without speculation + if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) + return lockseq_tas; + + // HLE lock for speculation + if (hint & omp_lock_hint_speculative) + return KMP_TSX_LOCK(hle); + + return __kmp_user_lock_seq; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param global_tid global thread number. +@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, +or some other suitably unique value. +@param hint the lock hint. + +Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation. +This function blocks until the executing thread can enter the critical section unless the hint suggests use of +speculative execution and the hardware supports it. +*/ +void +__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint ) +{ + KMP_COUNT_BLOCK(OMP_CRITICAL); + kmp_user_lock_p lck; + + KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); + + kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; + // Check if it is initialized. + if (*lk == 0) { + kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); + if (KMP_IS_D_LOCK(lckseq)) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq)); + } else { + __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); + } + } + // Branch for accessing the actual lock object and set operation. This branching is inevitable since + // this lock initialization does not follow the normal dispatch path (lock table is not used). + if (KMP_EXTRACT_D_TAG(lk) != 0) { + lck = (kmp_user_lock_p)lk; + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); + } +# if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +# endif +# if KMP_USE_INLINED_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + KMP_ACQUIRE_TAS_LOCK(lck, global_tid); + } else +# elif KMP_USE_INLINED_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); + } else +# endif + { + KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); + } + } else { + kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); + } +# if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +# endif + KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + } + +#if USE_ITT_BUILD + __kmp_itt_critical_acquired( lck ); +#endif /* USE_ITT_BUILD */ + + KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); +} // __kmpc_critical_with_hint + +#endif // KMP_USE_DYNAMIC_LOCK + +/*! +@ingroup WORK_SHARING +@param loc source location information. +@param global_tid global thread number . +@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or +some other suitably unique value. + +Leave a critical section, releasing any lock that was held during its execution. +*/ +void +__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) +{ + kmp_user_lock_p lck; + + KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid )); + +#if KMP_USE_DYNAMIC_LOCK + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + KMP_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); + } +# if USE_ITT_BUILD + __kmp_itt_critical_releasing( lck ); +# endif +# if KMP_USE_INLINED_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + KMP_RELEASE_TAS_LOCK(lck, global_tid); + } else +# elif KMP_USE_INLINED_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + KMP_RELEASE_FUTEX_LOCK(lck, global_tid); + } else +# endif + { + KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } + } else { + kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + KMP_ASSERT(ilk != NULL); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); + } +# if USE_ITT_BUILD + __kmp_itt_critical_releasing( lck ); +# endif + KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { + lck = (kmp_user_lock_p)crit; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { + lck = (kmp_user_lock_p)crit; + } +#endif + else { // ticket, queuing or drdpa + lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit)); + } + + KMP_ASSERT(lck != NULL); + + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( global_tid, ct_critical, loc ); + +#if USE_ITT_BUILD + __kmp_itt_critical_releasing( lck ); +#endif /* USE_ITT_BUILD */ + // Value of 'crit' should be good for using as a critical_id of the critical section directive. + __kmp_release_user_lock_with_checks( lck, global_tid ); + +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_critical)) { + ompt_callbacks.ompt_callback(ompt_event_release_critical)( + (uint64_t) lck); + } +#endif + +#endif // KMP_USE_DYNAMIC_LOCK + + KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid )); +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid thread id. +@return one if the thread should execute the master block, zero otherwise + +Start execution of a combined barrier and master. The barrier is executed inside this function. +*/ +kmp_int32 +__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) +{ + int status; + + KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) ); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + if ( __kmp_env_consistency_check ) + __kmp_check_barrier( global_tid, ct_barrier, loc ); + +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL ); + + return (status != 0) ? 0 : 1; +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid thread id. + +Complete the execution of a combined barrier and master. This function should +only be called at the completion of the master code. Other threads will +still be waiting at the barrier and this call releases them. +*/ +void +__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) +{ + KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid )); + + __kmp_end_split_barrier ( bs_plain_barrier, global_tid ); +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid thread id. +@return one if the thread should execute the master block, zero otherwise + +Start execution of a combined barrier and master(nowait) construct. +The barrier is executed inside this function. +There is no equivalent "end" function, since the +*/ +kmp_int32 +__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid ) +{ + kmp_int32 ret; + + KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid )); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + if ( __kmp_env_consistency_check ) { + if ( loc == 0 ) { + KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? + } + __kmp_check_barrier( global_tid, ct_barrier, loc ); + } + +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + + ret = __kmpc_master (loc, global_tid); + + if ( __kmp_env_consistency_check ) { + /* there's no __kmpc_end_master called; so the (stats) */ + /* actions of __kmpc_end_master are done here */ + + if ( global_tid < 0 ) { + KMP_WARNING( ThreadIdentInvalid ); + } + if (ret) { + /* only one thread should do the pop since only */ + /* one did the push (see __kmpc_master()) */ + + __kmp_pop_sync( global_tid, ct_master, loc ); + } + } + + return (ret); +} + +/* The BARRIER for a SINGLE process section is always explicit */ +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number +@return One if this thread should execute the single construct, zero otherwise. + +Test whether to execute a single construct. +There are no implicit barriers in the two "single" calls, rather the compiler should +introduce an explicit barrier if it is required. +*/ + +kmp_int32 +__kmpc_single(ident_t *loc, kmp_int32 global_tid) +{ + KMP_COUNT_BLOCK(OMP_SINGLE); + kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE ); + if(rc == TRUE) { + KMP_START_EXPLICIT_TIMER(OMP_single); + } + +#if OMPT_SUPPORT && OMPT_TRACE + kmp_info_t *this_thr = __kmp_threads[ global_tid ]; + kmp_team_t *team = this_thr -> th.th_team; + int tid = __kmp_tid_from_gtid( global_tid ); + + if (ompt_enabled) { + if (rc) { + if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { + ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, + team->t.ompt_team_info.microtask); + } + } else { + if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { + ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } + this_thr->th.ompt_thread_info.state = ompt_state_wait_single; + } + } +#endif + + return rc; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number + +Mark the end of a single construct. This function should +only be called by the thread that executed the block of code protected +by the `single` construct. +*/ +void +__kmpc_end_single(ident_t *loc, kmp_int32 global_tid) +{ + __kmp_exit_single( global_tid ); + KMP_STOP_EXPLICIT_TIMER(OMP_single); + +#if OMPT_SUPPORT && OMPT_TRACE + kmp_info_t *this_thr = __kmp_threads[ global_tid ]; + kmp_team_t *team = this_thr -> th.th_team; + int tid = __kmp_tid_from_gtid( global_tid ); + + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { + ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } +#endif +} + +/*! +@ingroup WORK_SHARING +@param loc Source location +@param global_tid Global thread id + +Mark the end of a statically scheduled loop. +*/ +void +__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid ) +{ + KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_end)) { + kmp_info_t *this_thr = __kmp_threads[ global_tid ]; + kmp_team_t *team = this_thr -> th.th_team; + int tid = __kmp_tid_from_gtid( global_tid ); + + ompt_callbacks.ompt_callback(ompt_event_loop_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } +#endif + + if ( __kmp_env_consistency_check ) + __kmp_pop_workshare( global_tid, ct_pdo, loc ); +} + +/* + * User routines which take C-style arguments (call by value) + * different from the Fortran equivalent routines + */ + +void +ompc_set_num_threads( int arg ) +{ +// !!!!! TODO: check the per-task binding + __kmp_set_num_threads( arg, __kmp_entry_gtid() ); +} + +void +ompc_set_dynamic( int flag ) +{ + kmp_info_t *thread; + + /* For the thread-private implementation of the internal controls */ + thread = __kmp_entry_thread(); + + __kmp_save_internal_controls( thread ); + + set__dynamic( thread, flag ? TRUE : FALSE ); +} + +void +ompc_set_nested( int flag ) +{ + kmp_info_t *thread; + + /* For the thread-private internal controls implementation */ + thread = __kmp_entry_thread(); + + __kmp_save_internal_controls( thread ); + + set__nested( thread, flag ? TRUE : FALSE ); +} + +void +ompc_set_max_active_levels( int max_active_levels ) +{ + /* TO DO */ + /* we want per-task implementation of this internal control */ + + /* For the per-thread internal controls implementation */ + __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels ); +} + +void +ompc_set_schedule( omp_sched_t kind, int modifier ) +{ +// !!!!! TODO: check the per-task binding + __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier ); +} + +int +ompc_get_ancestor_thread_num( int level ) +{ + return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level ); +} + +int +ompc_get_team_size( int level ) +{ + return __kmp_get_team_size( __kmp_entry_gtid(), level ); +} + +void +kmpc_set_stacksize( int arg ) +{ + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize( arg ); +} + +void +kmpc_set_stacksize_s( size_t arg ) +{ + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize( arg ); +} + +void +kmpc_set_blocktime( int arg ) +{ + int gtid, tid; + kmp_info_t *thread; + + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); + + __kmp_aux_set_blocktime( arg, thread, tid ); +} + +void +kmpc_set_library( int arg ) +{ + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library( (enum library_type)arg ); +} + +void +kmpc_set_defaults( char const * str ) +{ + // __kmp_aux_set_defaults initializes the library if needed + __kmp_aux_set_defaults( str, KMP_STRLEN( str ) ); +} + +int +kmpc_set_affinity_mask_proc( int proc, void **mask ) +{ +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity_mask_proc( proc, mask ); +#endif +} + +int +kmpc_unset_affinity_mask_proc( int proc, void **mask ) +{ +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_unset_affinity_mask_proc( proc, mask ); +#endif +} + +int +kmpc_get_affinity_mask_proc( int proc, void **mask ) +{ +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity_mask_proc( proc, mask ); +#endif +} + + +/* -------------------------------------------------------------------------- */ +/*! +@ingroup THREADPRIVATE +@param loc source location information +@param gtid global thread number +@param cpy_size size of the cpy_data buffer +@param cpy_data pointer to data to be copied +@param cpy_func helper function to call for copying data +@param didit flag variable: 1=single thread; 0=not single thread + +__kmpc_copyprivate implements the interface for the private data broadcast needed for +the copyprivate clause associated with a single region in an OpenMP* program (both C and Fortran). +All threads participating in the parallel region call this routine. +One of the threads (called the single thread) should have the didit variable set to 1 +and all other threads should have that variable set to 0. +All threads pass a pointer to a data buffer (cpy_data) that they have built. + +The OpenMP specification forbids the use of nowait on the single region when a copyprivate +clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid +race conditions, so the code generation for the single region should avoid generating a barrier +after the call to @ref __kmpc_copyprivate. + +The gtid parameter is the global thread id for the current thread. +The loc parameter is a pointer to source location information. + +Internal implementation: The single thread will first copy its descriptor address (cpy_data) +to a team-private location, then the other threads will each call the function pointed to by +the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer. + +The cpy_func routine used for the copy and the contents of the data area defined by cpy_data +and cpy_size may be built in any fashion that will allow the copy to be done. For instance, +the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers +to the data. The cpy_func routine must interpret the cpy_data buffer appropriately. + +The interface to cpy_func is as follows: +@code +void cpy_func( void *destination, void *source ) +@endcode +where void *destination is the cpy_data pointer for the thread being copied to +and void *source is the cpy_data pointer for the thread being copied from. +*/ +void +__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ) +{ + void **data_ptr; + + KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid )); + + KMP_MB(); + + data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data; + + if ( __kmp_env_consistency_check ) { + if ( loc == 0 ) { + KMP_WARNING( ConstructIdentInvalid ); + } + } + + /* ToDo: Optimize the following two barriers into some kind of split barrier */ + + if (didit) *data_ptr = cpy_data; + + /* This barrier is not a barrier region boundary */ +#if USE_ITT_NOTIFY + __kmp_threads[gtid]->th.th_ident = loc; +#endif + __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); + + if (! didit) (*cpy_func)( cpy_data, *data_ptr ); + + /* Consider next barrier the user-visible barrier for barrier region boundaries */ + /* Nesting checks are already handled by the single construct checks */ + +#if USE_ITT_NOTIFY + __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location) +#endif + __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); +} + +/* -------------------------------------------------------------------------- */ + +#define INIT_LOCK __kmp_init_user_lock_with_checks +#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks +#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks +#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed +#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks +#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed +#define RELEASE_LOCK __kmp_release_user_lock_with_checks +#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks +#define TEST_LOCK __kmp_test_user_lock_with_checks +#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks +#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks +#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks + + +/* + * TODO: Make check abort messages use location info & pass it + * into with_checks routines + */ + +#if KMP_USE_DYNAMIC_LOCK + +// internal lock initializer +static __forceinline void +__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) +{ + if (KMP_IS_D_LOCK(seq)) { + KMP_INIT_D_LOCK(lock, seq); +#if USE_ITT_BUILD + __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); +#endif + } else { + KMP_INIT_I_LOCK(lock, seq); +#if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, loc); +#endif + } +} + +// internal nest lock initializer +static __forceinline void +__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) +{ +#if KMP_USE_TSX + // Don't have nested lock implementation for speculative locks + if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) + seq = __kmp_user_lock_seq; +#endif + switch (seq) { + case lockseq_tas: + seq = lockseq_nested_tas; + break; +#if KMP_USE_FUTEX + case lockseq_futex: + seq = lockseq_nested_futex; + break; +#endif + case lockseq_ticket: + seq = lockseq_nested_ticket; + break; + case lockseq_queuing: + seq = lockseq_nested_queuing; + break; + case lockseq_drdpa: + seq = lockseq_nested_drdpa; + break; + default: + seq = lockseq_nested_queuing; + } + KMP_INIT_I_LOCK(lock, seq); +#if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, loc); +#endif +} + +/* initialize the lock with a hint */ +void +__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) +{ + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); + } + + __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); +} + +/* initialize the lock with a hint */ +void +__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) +{ + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); + } + + __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); +} + +#endif // KMP_USE_DYNAMIC_LOCK + +/* initialize the lock */ +void +__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_lock"); + } + __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); + +#else // KMP_USE_DYNAMIC_LOCK + + static char const * const func = "omp_init_lock"; + kmp_user_lock_p lck; + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + if ( __kmp_env_consistency_check ) { + if ( user_lock == NULL ) { + KMP_FATAL( LockIsUninitialized, func ); + } + } + + KMP_CHECK_USER_LOCK_INIT(); + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); + } + INIT_LOCK( lck ); + __kmp_set_user_lock_location( lck, loc ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_init_lock)) { + ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck); + } +#endif + +#if USE_ITT_BUILD + __kmp_itt_lock_creating( lck ); +#endif /* USE_ITT_BUILD */ + +#endif // KMP_USE_DYNAMIC_LOCK +} // __kmpc_init_lock + +/* initialize the lock */ +void +__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); + } + __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); + +#else // KMP_USE_DYNAMIC_LOCK + + static char const * const func = "omp_init_nest_lock"; + kmp_user_lock_p lck; + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + if ( __kmp_env_consistency_check ) { + if ( user_lock == NULL ) { + KMP_FATAL( LockIsUninitialized, func ); + } + } + + KMP_CHECK_USER_LOCK_INIT(); + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); + } + + INIT_NESTED_LOCK( lck ); + __kmp_set_user_lock_location( lck, loc ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { + ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck); + } +#endif + +#if USE_ITT_BUILD + __kmp_itt_lock_creating( lck ); +#endif /* USE_ITT_BUILD */ + +#endif // KMP_USE_DYNAMIC_LOCK +} // __kmpc_init_nest_lock + +void +__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + kmp_user_lock_p lck; + if (KMP_EXTRACT_D_TAG(user_lock) == 0) { + lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; + } else { + lck = (kmp_user_lock_p)user_lock; + } + __kmp_itt_lock_destroyed(lck); +# endif + KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); +#else + kmp_user_lock_p lck; + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" ); + } + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { + ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck); + } +#endif + +#if USE_ITT_BUILD + __kmp_itt_lock_destroyed( lck ); +#endif /* USE_ITT_BUILD */ + DESTROY_LOCK( lck ); + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + ; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + ; + } +#endif + else { + __kmp_user_lock_free( user_lock, gtid, lck ); + } +#endif // KMP_USE_DYNAMIC_LOCK +} // __kmpc_destroy_lock + +/* destroy the lock */ +void +__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); + __kmp_itt_lock_destroyed(ilk->lock); +# endif + KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" ); + } + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { + ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck); + } +#endif + +#if USE_ITT_BUILD + __kmp_itt_lock_destroyed( lck ); +#endif /* USE_ITT_BUILD */ + + DESTROY_NESTED_LOCK( lck ); + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { + ; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + ; + } +#endif + else { + __kmp_user_lock_free( user_lock, gtid, lck ); + } +#endif // KMP_USE_DYNAMIC_LOCK +} // __kmpc_destroy_nest_lock + +void +__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { + KMP_COUNT_BLOCK(OMP_set_lock); +#if KMP_USE_DYNAMIC_LOCK + int tag = KMP_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object. +# endif +# if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); + } else +# elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); + } else +# endif + { + __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +# endif + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring( lck ); +#endif /* USE_ITT_BUILD */ + + ACQUIRE_LOCK( lck, gtid ); + +#if USE_ITT_BUILD + __kmp_itt_lock_acquired( lck ); +#endif /* USE_ITT_BUILD */ + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck); + } +#endif + +#endif // KMP_USE_DYNAMIC_LOCK +} + +void +__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif + KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +#endif + +#else // KMP_USE_DYNAMIC_LOCK + int acquire_status; + kmp_user_lock_p lck; + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring( lck ); +#endif /* USE_ITT_BUILD */ + + ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status ); + +#if USE_ITT_BUILD + __kmp_itt_lock_acquired( lck ); +#endif /* USE_ITT_BUILD */ +#endif // KMP_USE_DYNAMIC_LOCK + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled) { + if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { + if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) + ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck); + } else { + if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) + ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck); + } + } +#endif +} + +void +__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) +{ +#if KMP_USE_DYNAMIC_LOCK + + int tag = KMP_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +# endif +# if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_RELEASE_TAS_LOCK(user_lock, gtid); + } else +# elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); + } else +# endif + { + __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + + /* Can't use serial interval since not block structured */ + /* release the lock */ + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + // "fast" path implemented to fix customer performance issue +#if USE_ITT_BUILD + __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); +#endif /* USE_ITT_BUILD */ + TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); + KMP_MB(); + return; +#else + lck = (kmp_user_lock_p)user_lock; +#endif + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_releasing( lck ); +#endif /* USE_ITT_BUILD */ + + RELEASE_LOCK( lck, gtid ); + +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_lock)) { + ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck); + } +#endif + +#endif // KMP_USE_DYNAMIC_LOCK +} + +/* release the lock */ +void +__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) +{ +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +# endif + KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + + /* Can't use serial interval since not block structured */ + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + // "fast" path implemented to fix customer performance issue + kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock; +#if USE_ITT_BUILD + __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); +#endif /* USE_ITT_BUILD */ + if ( --(tl->lk.depth_locked) == 0 ) { + TCW_4(tl->lk.poll, 0); + } + KMP_MB(); + return; +#else + lck = (kmp_user_lock_p)user_lock; +#endif + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_releasing( lck ); +#endif /* USE_ITT_BUILD */ + + int release_status; + release_status = RELEASE_NESTED_LOCK( lck, gtid ); +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { + ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( + (uint64_t) lck); + } + } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) { + ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( + (uint64_t) lck); + } + } +#endif + +#endif // KMP_USE_DYNAMIC_LOCK +} + +/* try to acquire the lock */ +int +__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) +{ + KMP_COUNT_BLOCK(OMP_test_lock); + +#if KMP_USE_DYNAMIC_LOCK + int rc; + int tag = KMP_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif +# if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_TEST_TAS_LOCK(user_lock, gtid, rc); + } else +# elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); + } else +# endif + { + rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } + if (rc) { +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +# endif + return FTN_TRUE; + } else { +# if USE_ITT_BUILD + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); +# endif + return FTN_FALSE; + } + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + int rc; + + if ( ( __kmp_user_lock_kind == lk_tas ) + && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring( lck ); +#endif /* USE_ITT_BUILD */ + + rc = TEST_LOCK( lck, gtid ); +#if USE_ITT_BUILD + if ( rc ) { + __kmp_itt_lock_acquired( lck ); + } else { + __kmp_itt_lock_cancelled( lck ); + } +#endif /* USE_ITT_BUILD */ + return ( rc ? FTN_TRUE : FTN_FALSE ); + + /* Can't use serial interval since not block structured */ + +#endif // KMP_USE_DYNAMIC_LOCK +} + +/* try to acquire the lock */ +int +__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) +{ +#if KMP_USE_DYNAMIC_LOCK + int rc; +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif + rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); +# if USE_ITT_BUILD + if (rc) { + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); + } else { + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); + } +# endif + return rc; + +#else // KMP_USE_DYNAMIC_LOCK + + kmp_user_lock_p lck; + int rc; + + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + else if ( ( __kmp_user_lock_kind == lk_futex ) + && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) + <= OMP_NEST_LOCK_T_SIZE ) ) { + lck = (kmp_user_lock_p)user_lock; + } +#endif + else { + lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" ); + } + +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring( lck ); +#endif /* USE_ITT_BUILD */ + + rc = TEST_NESTED_LOCK( lck, gtid ); +#if USE_ITT_BUILD + if ( rc ) { + __kmp_itt_lock_acquired( lck ); + } else { + __kmp_itt_lock_cancelled( lck ); + } +#endif /* USE_ITT_BUILD */ + return rc; + + /* Can't use serial interval since not block structured */ + +#endif // KMP_USE_DYNAMIC_LOCK +} + + +/*--------------------------------------------------------------------------------------------------------------------*/ + +/* + * Interface to fast scalable reduce methods routines + */ + +// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions; +// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then) +// AT: which solution is better? +#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ + ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) + +#define __KMP_GET_REDUCTION_METHOD(gtid) \ + ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) + +// description of the packed_reduction_method variable: look at the macros in kmp.h + + +// used in a critical section reduce block +static __forceinline void +__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { + + // this lock was visible to a customer and to the threading profile tool as a serial overhead span + // (although it's used for an internal purpose only) + // why was it visible in previous implementation? + // should we keep it visible in new reduce block? + kmp_user_lock_p lck; + +#if KMP_USE_DYNAMIC_LOCK + + kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; + // Check if it is initialized. + if (*lk == 0) { + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq)); + } else { + __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq)); + } + } + // Branch for accessing the actual lock object and set operation. This branching is inevitable since + // this lock initialization does not follow the normal dispatch path (lock table is not used). + if (KMP_EXTRACT_D_TAG(lk) != 0) { + lck = (kmp_user_lock_p)lk; + KMP_DEBUG_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } + KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); + } else { + kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); + lck = ilk->lock; + KMP_DEBUG_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } + KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + + // We know that the fast reduction code is only emitted by Intel compilers + // with 32 byte critical sections. If there isn't enough space, then we + // have to use a pointer. + if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) { + lck = (kmp_user_lock_p)crit; + } + else { + lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); + } + KMP_DEBUG_ASSERT( lck != NULL ); + + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_critical, loc, lck ); + + __kmp_acquire_user_lock_with_checks( lck, global_tid ); + +#endif // KMP_USE_DYNAMIC_LOCK +} + +// used in a critical section reduce block +static __forceinline void +__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { + + kmp_user_lock_p lck; + +#if KMP_USE_DYNAMIC_LOCK + + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } else { + kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + + // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical + // sections. If there isn't enough space, then we have to use a pointer. + if ( __kmp_base_user_lock_size > 32 ) { + lck = *( (kmp_user_lock_p *) crit ); + KMP_ASSERT( lck != NULL ); + } else { + lck = (kmp_user_lock_p) crit; + } + + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( global_tid, ct_critical, loc ); + + __kmp_release_user_lock_with_checks( lck, global_tid ); + +#endif // KMP_USE_DYNAMIC_LOCK +} // __kmp_end_critical_section_reduce_block + + +/* 2.a.i. Reduce Block without a terminating barrier */ +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid global thread number +@param num_vars number of items (variables) to be reduced +@param reduce_size size of data in bytes to be reduced +@param reduce_data pointer to data to be reduced +@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data +@param lck pointer to the unique lock data structure +@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed + +The nowait version is used for a reduce clause with the nowait argument. +*/ +kmp_int32 +__kmpc_reduce_nowait( + ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ) { + + KMP_COUNT_BLOCK(REDUCE_nowait); + int retval = 0; + PACKED_REDUCTION_METHOD_T packed_reduction_method; +#if OMP_40_ENABLED + kmp_team_t *team; + kmp_info_t *th; + int teams_swapped = 0, task_state; +#endif + KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) ); + + // why do we need this initialization here at all? + // Reduction clause can not be used as a stand-alone directive. + + // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed + // possible detection of false-positive race by the threadchecker ??? + if( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + + // check correctness of reduce block nesting +#if KMP_USE_DYNAMIC_LOCK + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); +#else + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); +#endif + +#if OMP_40_ENABLED + th = __kmp_thread_from_gtid(global_tid); + if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct? + team = th->th.th_team; + if( team->t.t_level == th->th.th_teams_level ) { + // this is reduction at teams construct + KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 + // Let's swap teams temporarily for the reduction barrier + teams_swapped = 1; + th->th.th_info.ds.ds_tid = team->t.t_master_tid; + th->th.th_team = team->t.t_parent; + th->th.th_team_nproc = th->th.th_team->t.t_nproc; + th->th.th_task_team = th->th.th_team->t.t_task_team[0]; + task_state = th->th.th_task_state; + th->th.th_task_state = 0; + } + } +#endif // OMP_40_ENABLED + + // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable + // the variable should be either a construct-specific or thread-specific property, not a team specific property + // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct) + // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?) + // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed) + // a thread-specific variable is better regarding two issues above (next construct and extra syncs) + // a thread-specific "th_local.reduction_method" variable is used currently + // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs) + + packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); + __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); + + if( packed_reduction_method == critical_reduce_block ) { + + __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); + retval = 1; + + } else if( packed_reduction_method == empty_reduce_block ) { + + // usage: if team size == 1, no synchronization is required ( Intel platforms only ) + retval = 1; + + } else if( packed_reduction_method == atomic_reduce_block ) { + + retval = 2; + + // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen) + // (it's not quite good, because the checking block has been closed by this 'pop', + // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction) + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( global_tid, ct_reduce, loc ); + + } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + + //AT: performance issue: a real barrier here + //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them) + //AT: (it's not what a customer might expect specifying NOWAIT clause) + //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer) + //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster + // and be more in line with sense of NOWAIT + //AT: TO DO: do epcc test and compare times + + // this barrier should be invisible to a customer and to the threading profile tool + // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose) +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func ); + retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); + + // all other workers except master should do this pop here + // ( none of other workers will get to __kmpc_end_reduce_nowait() ) + if ( __kmp_env_consistency_check ) { + if( retval == 0 ) { + __kmp_pop_sync( global_tid, ct_reduce, loc ); + } + } + + } else { + + // should never reach this block + KMP_ASSERT( 0 ); // "unexpected method" + + } +#if OMP_40_ENABLED + if( teams_swapped ) { + // Restore thread structure + th->th.th_info.ds.ds_tid = 0; + th->th.th_team = team; + th->th.th_team_nproc = team->t.t_nproc; + th->th.th_task_team = team->t.t_task_team[task_state]; + th->th.th_task_state = task_state; + } +#endif + KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); + + return retval; +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid global thread id. +@param lck pointer to the unique lock data structure + +Finish the execution of a reduce nowait. +*/ +void +__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { + + PACKED_REDUCTION_METHOD_T packed_reduction_method; + + KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) ); + + packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); + + if( packed_reduction_method == critical_reduce_block ) { + + __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); + + } else if( packed_reduction_method == empty_reduce_block ) { + + // usage: if team size == 1, no synchronization is required ( on Intel platforms only ) + + } else if( packed_reduction_method == atomic_reduce_block ) { + + // neither master nor other workers should get here + // (code gen does not generate this call in case 2: atomic reduce block) + // actually it's better to remove this elseif at all; + // after removal this value will checked by the 'else' and will assert + + } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + + // only master gets here + + } else { + + // should never reach this block + KMP_ASSERT( 0 ); // "unexpected method" + + } + + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( global_tid, ct_reduce, loc ); + + KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); + + return; +} + +/* 2.a.ii. Reduce Block with a terminating barrier */ + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid global thread number +@param num_vars number of items (variables) to be reduced +@param reduce_size size of data in bytes to be reduced +@param reduce_data pointer to data to be reduced +@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data +@param lck pointer to the unique lock data structure +@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed + +A blocking reduce that includes an implicit barrier. +*/ +kmp_int32 +__kmpc_reduce( + ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, void *reduce_data, + void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ) +{ + KMP_COUNT_BLOCK(REDUCE_wait); + int retval = 0; + PACKED_REDUCTION_METHOD_T packed_reduction_method; + + KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) ); + + // why do we need this initialization here at all? + // Reduction clause can not be a stand-alone directive. + + // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed + // possible detection of false-positive race by the threadchecker ??? + if( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + + // check correctness of reduce block nesting +#if KMP_USE_DYNAMIC_LOCK + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); +#else + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); +#endif + + packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); + __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); + + if( packed_reduction_method == critical_reduce_block ) { + + __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); + retval = 1; + + } else if( packed_reduction_method == empty_reduce_block ) { + + // usage: if team size == 1, no synchronization is required ( Intel platforms only ) + retval = 1; + + } else if( packed_reduction_method == atomic_reduce_block ) { + + retval = 2; + + } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + + //case tree_reduce_block: + // this barrier should be visible to a customer and to the threading profile tool + // (it's a terminating barrier on constructs if NOWAIT not specified) +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames +#endif + retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func ); + retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); + + // all other workers except master should do this pop here + // ( none of other workers except master will enter __kmpc_end_reduce() ) + if ( __kmp_env_consistency_check ) { + if( retval == 0 ) { // 0: all other workers; 1: master + __kmp_pop_sync( global_tid, ct_reduce, loc ); + } + } + + } else { + + // should never reach this block + KMP_ASSERT( 0 ); // "unexpected method" + + } + + KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); + + return retval; +} + +/*! +@ingroup SYNCHRONIZATION +@param loc source location information +@param global_tid global thread id. +@param lck pointer to the unique lock data structure + +Finish the execution of a blocking reduce. +The lck pointer must be the same as that used in the corresponding start function. +*/ +void +__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { + + PACKED_REDUCTION_METHOD_T packed_reduction_method; + + KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) ); + + packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); + + // this barrier should be visible to a customer and to the threading profile tool + // (it's a terminating barrier on constructs if NOWAIT not specified) + + if( packed_reduction_method == critical_reduce_block ) { + + __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); + + // TODO: implicit barrier: should be exposed +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + + } else if( packed_reduction_method == empty_reduce_block ) { + + // usage: if team size == 1, no synchronization is required ( Intel platforms only ) + + // TODO: implicit barrier: should be exposed +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + + } else if( packed_reduction_method == atomic_reduce_block ) { + + // TODO: implicit barrier: should be exposed +#if USE_ITT_NOTIFY + __kmp_threads[global_tid]->th.th_ident = loc; +#endif + __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + + } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + + // only master executes here (master releases all other workers) + __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid ); + + } else { + + // should never reach this block + KMP_ASSERT( 0 ); // "unexpected method" + + } + + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( global_tid, ct_reduce, loc ); + + KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); + + return; +} + +#undef __KMP_GET_REDUCTION_METHOD +#undef __KMP_SET_REDUCTION_METHOD + +/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/ + +kmp_uint64 +__kmpc_get_taskid() { + + kmp_int32 gtid; + kmp_info_t * thread; + + gtid = __kmp_get_gtid(); + if ( gtid < 0 ) { + return 0; + }; // if + thread = __kmp_thread_from_gtid( gtid ); + return thread->th.th_current_task->td_task_id; + +} // __kmpc_get_taskid + + +kmp_uint64 +__kmpc_get_parent_taskid() { + + kmp_int32 gtid; + kmp_info_t * thread; + kmp_taskdata_t * parent_task; + + gtid = __kmp_get_gtid(); + if ( gtid < 0 ) { + return 0; + }; // if + thread = __kmp_thread_from_gtid( gtid ); + parent_task = thread->th.th_current_task->td_parent; + return ( parent_task == NULL ? 0 : parent_task->td_task_id ); + +} // __kmpc_get_parent_taskid + +void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT) +{ + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + } + __kmp_place_num_sockets = nS; + __kmp_place_socket_offset = sO; + __kmp_place_num_cores = nC; + __kmp_place_core_offset = cO; + __kmp_place_num_threads_per_core = nT; +} + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/kmp_debug.c b/contrib/libs/cxxsupp/openmp/kmp_debug.c index c3d96cf0130..3bbffa53f21 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_debug.c +++ b/contrib/libs/cxxsupp/openmp/kmp_debug.c @@ -1,142 +1,142 @@ -/* - * kmp_debug.c -- debug utilities for the Guide library - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_debug.h" /* really necessary? */ -#include "kmp_i18n.h" -#include "kmp_io.h" - -#ifdef KMP_DEBUG -void -__kmp_debug_printf_stdout( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); - - __kmp_vprintf( kmp_out, format, ap ); - - va_end(ap); -} -#endif - -void -__kmp_debug_printf( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); - - __kmp_vprintf( kmp_err, format, ap ); - - va_end( ap ); -} - -#ifdef KMP_USE_ASSERT - int - __kmp_debug_assert( - char const * msg, - char const * file, - int line - ) { - - if ( file == NULL ) { - file = KMP_I18N_STR( UnknownFile ); - } else { - // Remove directories from path, leave only file name. File name is enough, there is no need - // in bothering developers and customers with full paths. - char const * slash = strrchr( file, '/' ); - if ( slash != NULL ) { - file = slash + 1; - }; // if - }; // if - - #ifdef KMP_DEBUG - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_debug_printf( "Assertion failure at %s(%d): %s.\n", file, line, msg ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); - #ifdef USE_ASSERT_BREAK - #if KMP_OS_WINDOWS - DebugBreak(); - #endif - #endif // USE_ASSERT_BREAK - #ifdef USE_ASSERT_STALL - /* __kmp_infinite_loop(); */ - for(;;); - #endif // USE_ASSERT_STALL - #ifdef USE_ASSERT_SEG - { - int volatile * ZERO = (int*) 0; - ++ (*ZERO); - } - #endif // USE_ASSERT_SEG - #endif - - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( AssertionFailure, file, line ), - KMP_HNT( SubmitBugReport ), - __kmp_msg_null - ); - - return 0; - - } // __kmp_debug_assert - -#endif // KMP_USE_ASSERT - -/* Dump debugging buffer to stderr */ -void -__kmp_dump_debug_buffer( void ) -{ - if ( __kmp_debug_buffer != NULL ) { - int i; - int dc = __kmp_debug_count; - char *db = & __kmp_debug_buffer[ (dc % __kmp_debug_buf_lines) * __kmp_debug_buf_chars ]; - char *db_end = & __kmp_debug_buffer[ __kmp_debug_buf_lines * __kmp_debug_buf_chars ]; - char *db2; - - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_printf_no_lock( "\nStart dump of debugging buffer (entry=%d):\n", - dc % __kmp_debug_buf_lines ); - - for ( i = 0; i < __kmp_debug_buf_lines; i++ ) { - - if ( *db != '\0' ) { - /* Fix up where no carriage return before string termination char */ - for ( db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2 ++) { - if ( *db2 == '\0' ) { - if ( *(db2-1) != '\n' ) { *db2 = '\n'; *(db2+1) = '\0'; } - break; - } - } - /* Handle case at end by shortening the printed message by one char if necessary */ - if ( db2 == db + __kmp_debug_buf_chars - 1 && - *db2 == '\0' && *(db2-1) != '\n' ) { - *(db2-1) = '\n'; - } - - __kmp_printf_no_lock( "%4d: %.*s", i, __kmp_debug_buf_chars, db ); - *db = '\0'; /* only let it print once! */ - } - - db += __kmp_debug_buf_chars; - if ( db >= db_end ) - db = __kmp_debug_buffer; - } - - __kmp_printf_no_lock( "End dump of debugging buffer (entry=%d).\n\n", - ( dc+i-1 ) % __kmp_debug_buf_lines ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); - } -} +/* + * kmp_debug.c -- debug utilities for the Guide library + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_debug.h" /* really necessary? */ +#include "kmp_i18n.h" +#include "kmp_io.h" + +#ifdef KMP_DEBUG +void +__kmp_debug_printf_stdout( char const * format, ... ) +{ + va_list ap; + va_start( ap, format ); + + __kmp_vprintf( kmp_out, format, ap ); + + va_end(ap); +} +#endif + +void +__kmp_debug_printf( char const * format, ... ) +{ + va_list ap; + va_start( ap, format ); + + __kmp_vprintf( kmp_err, format, ap ); + + va_end( ap ); +} + +#ifdef KMP_USE_ASSERT + int + __kmp_debug_assert( + char const * msg, + char const * file, + int line + ) { + + if ( file == NULL ) { + file = KMP_I18N_STR( UnknownFile ); + } else { + // Remove directories from path, leave only file name. File name is enough, there is no need + // in bothering developers and customers with full paths. + char const * slash = strrchr( file, '/' ); + if ( slash != NULL ) { + file = slash + 1; + }; // if + }; // if + + #ifdef KMP_DEBUG + __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_debug_printf( "Assertion failure at %s(%d): %s.\n", file, line, msg ); + __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + #ifdef USE_ASSERT_BREAK + #if KMP_OS_WINDOWS + DebugBreak(); + #endif + #endif // USE_ASSERT_BREAK + #ifdef USE_ASSERT_STALL + /* __kmp_infinite_loop(); */ + for(;;); + #endif // USE_ASSERT_STALL + #ifdef USE_ASSERT_SEG + { + int volatile * ZERO = (int*) 0; + ++ (*ZERO); + } + #endif // USE_ASSERT_SEG + #endif + + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( AssertionFailure, file, line ), + KMP_HNT( SubmitBugReport ), + __kmp_msg_null + ); + + return 0; + + } // __kmp_debug_assert + +#endif // KMP_USE_ASSERT + +/* Dump debugging buffer to stderr */ +void +__kmp_dump_debug_buffer( void ) +{ + if ( __kmp_debug_buffer != NULL ) { + int i; + int dc = __kmp_debug_count; + char *db = & __kmp_debug_buffer[ (dc % __kmp_debug_buf_lines) * __kmp_debug_buf_chars ]; + char *db_end = & __kmp_debug_buffer[ __kmp_debug_buf_lines * __kmp_debug_buf_chars ]; + char *db2; + + __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_printf_no_lock( "\nStart dump of debugging buffer (entry=%d):\n", + dc % __kmp_debug_buf_lines ); + + for ( i = 0; i < __kmp_debug_buf_lines; i++ ) { + + if ( *db != '\0' ) { + /* Fix up where no carriage return before string termination char */ + for ( db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2 ++) { + if ( *db2 == '\0' ) { + if ( *(db2-1) != '\n' ) { *db2 = '\n'; *(db2+1) = '\0'; } + break; + } + } + /* Handle case at end by shortening the printed message by one char if necessary */ + if ( db2 == db + __kmp_debug_buf_chars - 1 && + *db2 == '\0' && *(db2-1) != '\n' ) { + *(db2-1) = '\n'; + } + + __kmp_printf_no_lock( "%4d: %.*s", i, __kmp_debug_buf_chars, db ); + *db = '\0'; /* only let it print once! */ + } + + db += __kmp_debug_buf_chars; + if ( db >= db_end ) + db = __kmp_debug_buffer; + } + + __kmp_printf_no_lock( "End dump of debugging buffer (entry=%d).\n\n", + ( dc+i-1 ) % __kmp_debug_buf_lines ); + __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + } +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_debug.h b/contrib/libs/cxxsupp/openmp/kmp_debug.h index 912d252b157..abc923edc64 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_debug.h +++ b/contrib/libs/cxxsupp/openmp/kmp_debug.h @@ -1,131 +1,131 @@ -/* - * kmp_debug.h -- debug / assertion code for Assure library - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_DEBUG_H -#define KMP_DEBUG_H - -#include - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -// ------------------------------------------------------------------------------------------------- -// Build-time assertion. -// ------------------------------------------------------------------------------------------------- - -/* - Build-time assertion can do compile-time checking of data structure sizes, etc. This works by - declaring a negative-length array if the conditional expression evaluates to false. In that - case, the compiler issues a syntax error and stops the compilation. If the expression is - true, we get an extraneous static single character array in the scope of the macro. - - Usage: - - KMP_BUILD_ASSERT( sizeof( some_t ) <= 32 ); - KMP_BUILD_ASSERT( offsetof( some_t, field ) % 8 == 0 ); - - Do not use _KMP_BUILD_ASSERT and __KMP_BUILD_ASSERT directly, it is working guts. -*/ - -#define __KMP_BUILD_ASSERT( expr, suffix ) typedef char __kmp_build_check_##suffix[ (expr) ? 1 : -1 ] -#define _KMP_BUILD_ASSERT( expr, suffix ) __KMP_BUILD_ASSERT( (expr), suffix ) -#ifdef KMP_USE_ASSERT - #define KMP_BUILD_ASSERT( expr ) _KMP_BUILD_ASSERT( (expr), __LINE__ ) -#else - #define KMP_BUILD_ASSERT( expr ) /* nothing to do */ -#endif - -// ------------------------------------------------------------------------------------------------- -// Run-time assertions. -// ------------------------------------------------------------------------------------------------- - -extern void __kmp_dump_debug_buffer( void ); - +/* + * kmp_debug.h -- debug / assertion code for Assure library + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_DEBUG_H +#define KMP_DEBUG_H + +#include + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +// ------------------------------------------------------------------------------------------------- +// Build-time assertion. +// ------------------------------------------------------------------------------------------------- + +/* + Build-time assertion can do compile-time checking of data structure sizes, etc. This works by + declaring a negative-length array if the conditional expression evaluates to false. In that + case, the compiler issues a syntax error and stops the compilation. If the expression is + true, we get an extraneous static single character array in the scope of the macro. + + Usage: + + KMP_BUILD_ASSERT( sizeof( some_t ) <= 32 ); + KMP_BUILD_ASSERT( offsetof( some_t, field ) % 8 == 0 ); + + Do not use _KMP_BUILD_ASSERT and __KMP_BUILD_ASSERT directly, it is working guts. +*/ + +#define __KMP_BUILD_ASSERT( expr, suffix ) typedef char __kmp_build_check_##suffix[ (expr) ? 1 : -1 ] +#define _KMP_BUILD_ASSERT( expr, suffix ) __KMP_BUILD_ASSERT( (expr), suffix ) #ifdef KMP_USE_ASSERT - extern int __kmp_debug_assert( char const * expr, char const * file, int line ); - #ifdef KMP_DEBUG - #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( #cond, __FILE__, __LINE__ ) ) - #define KMP_ASSERT2( cond, msg ) ( (cond) ? 0 : __kmp_debug_assert( (msg), __FILE__, __LINE__ ) ) - #define KMP_DEBUG_ASSERT( cond ) KMP_ASSERT( cond ) - #define KMP_DEBUG_ASSERT2( cond, msg ) KMP_ASSERT2( cond, msg ) - #else - // Do not expose condition in release build. Use "assertion failure". - #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( "assertion failure", __FILE__, __LINE__ ) ) - #define KMP_ASSERT2( cond, msg ) KMP_ASSERT( cond ) - #define KMP_DEBUG_ASSERT( cond ) 0 - #define KMP_DEBUG_ASSERT2( cond, msg ) 0 - #endif // KMP_DEBUG -#else - #define KMP_ASSERT( cond ) 0 - #define KMP_ASSERT2( cond, msg ) 0 - #define KMP_DEBUG_ASSERT( cond ) 0 - #define KMP_DEBUG_ASSERT2( cond, msg ) 0 -#endif // KMP_USE_ASSERT - -#ifdef KMP_DEBUG - extern void __kmp_debug_printf_stdout( char const * format, ... ); -#endif -extern void __kmp_debug_printf( char const * format, ... ); - -#ifdef KMP_DEBUG - - extern int kmp_a_debug; - extern int kmp_b_debug; - extern int kmp_c_debug; - extern int kmp_d_debug; - extern int kmp_e_debug; - extern int kmp_f_debug; - extern int kmp_diag; - - #define KA_TRACE(d,x) if (kmp_a_debug >= d) { __kmp_debug_printf x ; } - #define KB_TRACE(d,x) if (kmp_b_debug >= d) { __kmp_debug_printf x ; } - #define KC_TRACE(d,x) if (kmp_c_debug >= d) { __kmp_debug_printf x ; } - #define KD_TRACE(d,x) if (kmp_d_debug >= d) { __kmp_debug_printf x ; } - #define KE_TRACE(d,x) if (kmp_e_debug >= d) { __kmp_debug_printf x ; } - #define KF_TRACE(d,x) if (kmp_f_debug >= d) { __kmp_debug_printf x ; } - #define K_DIAG(d,x) {if (kmp_diag == d) { __kmp_debug_printf_stdout x ; } } - - #define KA_DUMP(d,x) if (kmp_a_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KB_DUMP(d,x) if (kmp_b_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KC_DUMP(d,x) if (kmp_c_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KD_DUMP(d,x) if (kmp_d_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KE_DUMP(d,x) if (kmp_e_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KF_DUMP(d,x) if (kmp_f_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - -#else - - #define KA_TRACE(d,x) /* nothing to do */ - #define KB_TRACE(d,x) /* nothing to do */ - #define KC_TRACE(d,x) /* nothing to do */ - #define KD_TRACE(d,x) /* nothing to do */ - #define KE_TRACE(d,x) /* nothing to do */ - #define KF_TRACE(d,x) /* nothing to do */ - #define K_DIAG(d,x) {}/* nothing to do */ - - #define KA_DUMP(d,x) /* nothing to do */ - #define KB_DUMP(d,x) /* nothing to do */ - #define KC_DUMP(d,x) /* nothing to do */ - #define KD_DUMP(d,x) /* nothing to do */ - #define KE_DUMP(d,x) /* nothing to do */ - #define KF_DUMP(d,x) /* nothing to do */ - -#endif // KMP_DEBUG - -#ifdef __cplusplus - } // extern "C" -#endif // __cplusplus - -#endif /* KMP_DEBUG_H */ + #define KMP_BUILD_ASSERT( expr ) _KMP_BUILD_ASSERT( (expr), __LINE__ ) +#else + #define KMP_BUILD_ASSERT( expr ) /* nothing to do */ +#endif + +// ------------------------------------------------------------------------------------------------- +// Run-time assertions. +// ------------------------------------------------------------------------------------------------- + +extern void __kmp_dump_debug_buffer( void ); + +#ifdef KMP_USE_ASSERT + extern int __kmp_debug_assert( char const * expr, char const * file, int line ); + #ifdef KMP_DEBUG + #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( #cond, __FILE__, __LINE__ ) ) + #define KMP_ASSERT2( cond, msg ) ( (cond) ? 0 : __kmp_debug_assert( (msg), __FILE__, __LINE__ ) ) + #define KMP_DEBUG_ASSERT( cond ) KMP_ASSERT( cond ) + #define KMP_DEBUG_ASSERT2( cond, msg ) KMP_ASSERT2( cond, msg ) + #else + // Do not expose condition in release build. Use "assertion failure". + #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( "assertion failure", __FILE__, __LINE__ ) ) + #define KMP_ASSERT2( cond, msg ) KMP_ASSERT( cond ) + #define KMP_DEBUG_ASSERT( cond ) 0 + #define KMP_DEBUG_ASSERT2( cond, msg ) 0 + #endif // KMP_DEBUG +#else + #define KMP_ASSERT( cond ) 0 + #define KMP_ASSERT2( cond, msg ) 0 + #define KMP_DEBUG_ASSERT( cond ) 0 + #define KMP_DEBUG_ASSERT2( cond, msg ) 0 +#endif // KMP_USE_ASSERT + +#ifdef KMP_DEBUG + extern void __kmp_debug_printf_stdout( char const * format, ... ); +#endif +extern void __kmp_debug_printf( char const * format, ... ); + +#ifdef KMP_DEBUG + + extern int kmp_a_debug; + extern int kmp_b_debug; + extern int kmp_c_debug; + extern int kmp_d_debug; + extern int kmp_e_debug; + extern int kmp_f_debug; + extern int kmp_diag; + + #define KA_TRACE(d,x) if (kmp_a_debug >= d) { __kmp_debug_printf x ; } + #define KB_TRACE(d,x) if (kmp_b_debug >= d) { __kmp_debug_printf x ; } + #define KC_TRACE(d,x) if (kmp_c_debug >= d) { __kmp_debug_printf x ; } + #define KD_TRACE(d,x) if (kmp_d_debug >= d) { __kmp_debug_printf x ; } + #define KE_TRACE(d,x) if (kmp_e_debug >= d) { __kmp_debug_printf x ; } + #define KF_TRACE(d,x) if (kmp_f_debug >= d) { __kmp_debug_printf x ; } + #define K_DIAG(d,x) {if (kmp_diag == d) { __kmp_debug_printf_stdout x ; } } + + #define KA_DUMP(d,x) if (kmp_a_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + #define KB_DUMP(d,x) if (kmp_b_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + #define KC_DUMP(d,x) if (kmp_c_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + #define KD_DUMP(d,x) if (kmp_d_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + #define KE_DUMP(d,x) if (kmp_e_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + #define KF_DUMP(d,x) if (kmp_f_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } + +#else + + #define KA_TRACE(d,x) /* nothing to do */ + #define KB_TRACE(d,x) /* nothing to do */ + #define KC_TRACE(d,x) /* nothing to do */ + #define KD_TRACE(d,x) /* nothing to do */ + #define KE_TRACE(d,x) /* nothing to do */ + #define KF_TRACE(d,x) /* nothing to do */ + #define K_DIAG(d,x) {}/* nothing to do */ + + #define KA_DUMP(d,x) /* nothing to do */ + #define KB_DUMP(d,x) /* nothing to do */ + #define KC_DUMP(d,x) /* nothing to do */ + #define KD_DUMP(d,x) /* nothing to do */ + #define KE_DUMP(d,x) /* nothing to do */ + #define KF_DUMP(d,x) /* nothing to do */ + +#endif // KMP_DEBUG + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + +#endif /* KMP_DEBUG_H */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_debugger.c b/contrib/libs/cxxsupp/openmp/kmp_debugger.c index c4180fd554d..b3c1acb49ba 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_debugger.c +++ b/contrib/libs/cxxsupp/openmp/kmp_debugger.c @@ -1,314 +1,314 @@ -#if USE_DEBUGGER -/* - * kmp_debugger.c -- debugger support. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_lock.h" -#include "kmp_omp.h" -#include "kmp_str.h" - -/* - NOTE: All variable names are known to the debugger, do not change! -*/ - -#ifdef __cplusplus - extern "C" { - extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; - } // extern "C" -#endif // __cplusplus - -int __kmp_debugging = FALSE; // Boolean whether currently debugging OpenMP RTL. - -#define offset_and_size_of( structure, field ) \ - { \ - offsetof( structure, field ), \ - sizeof( ( (structure *) NULL)->field ) \ - } - -#define offset_and_size_not_available \ - { -1, -1 } - -#define addr_and_size_of( var ) \ - { \ - (kmp_uint64)( & var ), \ - sizeof( var ) \ - } - -#define nthr_buffer_size 1024 -static kmp_int32 -kmp_omp_nthr_info_buffer[ nthr_buffer_size ] = - { nthr_buffer_size * sizeof( kmp_int32 ) }; - -/* TODO: Check punctuation for various platforms here */ -static char func_microtask[] = "__kmp_invoke_microtask"; -static char func_fork[] = "__kmpc_fork_call"; -static char func_fork_teams[] = "__kmpc_fork_teams"; - - -// Various info about runtime structures: addresses, field offsets, sizes, etc. -kmp_omp_struct_info_t -__kmp_omp_debug_struct_info = { - - /* Change this only if you make a fundamental data structure change here */ - KMP_OMP_VERSION, - - /* sanity check. Only should be checked if versions are identical - * This is also used for backward compatibility to get the runtime - * structure size if it the runtime is older than the interface */ - sizeof( kmp_omp_struct_info_t ), - - /* OpenMP RTL version info. */ - addr_and_size_of( __kmp_version_major ), - addr_and_size_of( __kmp_version_minor ), - addr_and_size_of( __kmp_version_build ), - addr_and_size_of( __kmp_openmp_version ), - { (kmp_uint64)( __kmp_copyright ) + KMP_VERSION_MAGIC_LEN, 0 }, // Skip magic prefix. - - /* Various globals. */ - addr_and_size_of( __kmp_threads ), - addr_and_size_of( __kmp_root ), - addr_and_size_of( __kmp_threads_capacity ), - addr_and_size_of( __kmp_monitor ), -#if ! KMP_USE_DYNAMIC_LOCK - addr_and_size_of( __kmp_user_lock_table ), -#endif - addr_and_size_of( func_microtask ), - addr_and_size_of( func_fork ), - addr_and_size_of( func_fork_teams ), - addr_and_size_of( __kmp_team_counter ), - addr_and_size_of( __kmp_task_counter ), - addr_and_size_of( kmp_omp_nthr_info_buffer ), - sizeof( void * ), - OMP_LOCK_T_SIZE < sizeof(void *), - bs_last_barrier, - TASK_DEQUE_SIZE, - - // thread structure information - sizeof( kmp_base_info_t ), - offset_and_size_of( kmp_base_info_t, th_info ), - offset_and_size_of( kmp_base_info_t, th_team ), - offset_and_size_of( kmp_base_info_t, th_root ), - offset_and_size_of( kmp_base_info_t, th_serial_team ), - offset_and_size_of( kmp_base_info_t, th_ident ), - offset_and_size_of( kmp_base_info_t, th_spin_here ), - offset_and_size_of( kmp_base_info_t, th_next_waiting ), - offset_and_size_of( kmp_base_info_t, th_task_team ), - offset_and_size_of( kmp_base_info_t, th_current_task ), - offset_and_size_of( kmp_base_info_t, th_task_state ), - offset_and_size_of( kmp_base_info_t, th_bar ), - offset_and_size_of( kmp_bstate_t, b_worker_arrived ), - -#if OMP_40_ENABLED - // teams information - offset_and_size_of( kmp_base_info_t, th_teams_microtask), - offset_and_size_of( kmp_base_info_t, th_teams_level), - offset_and_size_of( kmp_teams_size_t, nteams ), - offset_and_size_of( kmp_teams_size_t, nth ), -#endif - - // kmp_desc structure (for info field above) - sizeof( kmp_desc_base_t ), - offset_and_size_of( kmp_desc_base_t, ds_tid ), - offset_and_size_of( kmp_desc_base_t, ds_gtid ), - // On Windows* OS, ds_thread contains a thread /handle/, which is not usable, while thread /id/ - // is in ds_thread_id. - #if KMP_OS_WINDOWS - offset_and_size_of( kmp_desc_base_t, ds_thread_id), - #else - offset_and_size_of( kmp_desc_base_t, ds_thread), - #endif - - // team structure information - sizeof( kmp_base_team_t ), - offset_and_size_of( kmp_base_team_t, t_master_tid ), - offset_and_size_of( kmp_base_team_t, t_ident ), - offset_and_size_of( kmp_base_team_t, t_parent ), - offset_and_size_of( kmp_base_team_t, t_nproc ), - offset_and_size_of( kmp_base_team_t, t_threads ), - offset_and_size_of( kmp_base_team_t, t_serialized ), - offset_and_size_of( kmp_base_team_t, t_id ), - offset_and_size_of( kmp_base_team_t, t_pkfn ), - offset_and_size_of( kmp_base_team_t, t_task_team ), - offset_and_size_of( kmp_base_team_t, t_implicit_task_taskdata ), -#if OMP_40_ENABLED - offset_and_size_of( kmp_base_team_t, t_cancel_request ), -#endif - offset_and_size_of( kmp_base_team_t, t_bar ), - offset_and_size_of( kmp_balign_team_t, b_master_arrived ), - offset_and_size_of( kmp_balign_team_t, b_team_arrived ), - - // root structure information - sizeof( kmp_base_root_t ), - offset_and_size_of( kmp_base_root_t, r_root_team ), - offset_and_size_of( kmp_base_root_t, r_hot_team ), - offset_and_size_of( kmp_base_root_t, r_uber_thread ), - offset_and_size_not_available, - - // ident structure information - sizeof( ident_t ), - offset_and_size_of( ident_t, psource ), - offset_and_size_of( ident_t, flags ), - - // lock structure information - sizeof( kmp_base_queuing_lock_t ), - offset_and_size_of( kmp_base_queuing_lock_t, initialized ), - offset_and_size_of( kmp_base_queuing_lock_t, location ), - offset_and_size_of( kmp_base_queuing_lock_t, tail_id ), - offset_and_size_of( kmp_base_queuing_lock_t, head_id ), - offset_and_size_of( kmp_base_queuing_lock_t, next_ticket ), - offset_and_size_of( kmp_base_queuing_lock_t, now_serving ), - offset_and_size_of( kmp_base_queuing_lock_t, owner_id ), - offset_and_size_of( kmp_base_queuing_lock_t, depth_locked ), - offset_and_size_of( kmp_base_queuing_lock_t, flags ), - -#if ! KMP_USE_DYNAMIC_LOCK - /* Lock table. */ - sizeof( kmp_lock_table_t ), - offset_and_size_of( kmp_lock_table_t, used ), - offset_and_size_of( kmp_lock_table_t, allocated ), - offset_and_size_of( kmp_lock_table_t, table ), -#endif - - // Task team structure information. - sizeof( kmp_base_task_team_t ), - offset_and_size_of( kmp_base_task_team_t, tt_threads_data ), - offset_and_size_of( kmp_base_task_team_t, tt_found_tasks ), - offset_and_size_of( kmp_base_task_team_t, tt_nproc ), - offset_and_size_of( kmp_base_task_team_t, tt_unfinished_threads ), - offset_and_size_of( kmp_base_task_team_t, tt_active ), - - // task_data_t. - sizeof( kmp_taskdata_t ), - offset_and_size_of( kmp_taskdata_t, td_task_id ), - offset_and_size_of( kmp_taskdata_t, td_flags ), - offset_and_size_of( kmp_taskdata_t, td_team ), - offset_and_size_of( kmp_taskdata_t, td_parent ), - offset_and_size_of( kmp_taskdata_t, td_level ), - offset_and_size_of( kmp_taskdata_t, td_ident ), - offset_and_size_of( kmp_taskdata_t, td_allocated_child_tasks ), - offset_and_size_of( kmp_taskdata_t, td_incomplete_child_tasks ), - - offset_and_size_of( kmp_taskdata_t, td_taskwait_ident ), - offset_and_size_of( kmp_taskdata_t, td_taskwait_counter ), - offset_and_size_of( kmp_taskdata_t, td_taskwait_thread ), - -#if OMP_40_ENABLED - offset_and_size_of( kmp_taskdata_t, td_taskgroup ), - offset_and_size_of( kmp_taskgroup_t, count ), - offset_and_size_of( kmp_taskgroup_t, cancel_request ), - - offset_and_size_of( kmp_taskdata_t, td_depnode ), - offset_and_size_of( kmp_depnode_list_t, node ), - offset_and_size_of( kmp_depnode_list_t, next ), - offset_and_size_of( kmp_base_depnode_t, successors ), - offset_and_size_of( kmp_base_depnode_t, task ), - offset_and_size_of( kmp_base_depnode_t, npredecessors ), - offset_and_size_of( kmp_base_depnode_t, nrefs ), -#endif - offset_and_size_of( kmp_task_t, routine ), - - // thread_data_t. - sizeof( kmp_thread_data_t ), - offset_and_size_of( kmp_base_thread_data_t, td_deque ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_head ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_tail ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_ntasks ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_last_stolen ), - - // The last field. - KMP_OMP_VERSION, - -}; // __kmp_omp_debug_struct_info - -#undef offset_and_size_of -#undef addr_and_size_of - -/* - Intel compiler on IA-32 architecture issues a warning "conversion - from "unsigned long long" to "char *" may lose significant bits" - when 64-bit value is assigned to 32-bit pointer. Use this function - to suppress the warning. -*/ -static inline -void * -__kmp_convert_to_ptr( - kmp_uint64 addr -) { - #if KMP_COMPILER_ICC - #pragma warning( push ) - #pragma warning( disable: 810 ) // conversion from "unsigned long long" to "char *" may lose significant bits - #pragma warning( disable: 1195 ) // conversion from integer to smaller pointer - #endif // KMP_COMPILER_ICC - return (void *) addr; - #if KMP_COMPILER_ICC - #pragma warning( pop ) - #endif // KMP_COMPILER_ICC -} // __kmp_convert_to_ptr - - -static int -kmp_location_match( - kmp_str_loc_t * loc, - kmp_omp_nthr_item_t * item -) { - - int file_match = 0; - int func_match = 0; - int line_match = 0; - - char * file = (char *) __kmp_convert_to_ptr( item->file ); - char * func = (char *) __kmp_convert_to_ptr( item->func ); - file_match = __kmp_str_fname_match( & loc->fname, file ); - func_match = - item->func == 0 // If item->func is NULL, it allows any func name. - || - strcmp( func, "*" ) == 0 - || - ( loc->func != NULL && strcmp( loc->func, func ) == 0 ); - line_match = - item->begin <= loc->line - && - ( item->end <= 0 || loc->line <= item->end ); // if item->end <= 0, it means "end of file". - - return ( file_match && func_match && line_match ); - -} // kmp_location_match - - -int -__kmp_omp_num_threads( - ident_t const * ident -) { - - int num_threads = 0; - - kmp_omp_nthr_info_t * info = - (kmp_omp_nthr_info_t *) __kmp_convert_to_ptr( __kmp_omp_debug_struct_info.nthr_info.addr ); - if ( info->num > 0 && info->array != 0 ) { - kmp_omp_nthr_item_t * items = (kmp_omp_nthr_item_t *) __kmp_convert_to_ptr( info->array ); - kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 1 ); - int i; - for ( i = 0; i < info->num; ++ i ) { - if ( kmp_location_match( & loc, & items[ i ] ) ) { - num_threads = items[ i ].num_threads; - }; // if - }; // for - __kmp_str_loc_free( & loc ); - }; // if - - return num_threads;; - -} // __kmp_omp_num_threads -#endif /* USE_DEBUGGER */ +#if USE_DEBUGGER +/* + * kmp_debugger.c -- debugger support. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_lock.h" +#include "kmp_omp.h" +#include "kmp_str.h" + +/* + NOTE: All variable names are known to the debugger, do not change! +*/ + +#ifdef __cplusplus + extern "C" { + extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; + } // extern "C" +#endif // __cplusplus + +int __kmp_debugging = FALSE; // Boolean whether currently debugging OpenMP RTL. + +#define offset_and_size_of( structure, field ) \ + { \ + offsetof( structure, field ), \ + sizeof( ( (structure *) NULL)->field ) \ + } + +#define offset_and_size_not_available \ + { -1, -1 } + +#define addr_and_size_of( var ) \ + { \ + (kmp_uint64)( & var ), \ + sizeof( var ) \ + } + +#define nthr_buffer_size 1024 +static kmp_int32 +kmp_omp_nthr_info_buffer[ nthr_buffer_size ] = + { nthr_buffer_size * sizeof( kmp_int32 ) }; + +/* TODO: Check punctuation for various platforms here */ +static char func_microtask[] = "__kmp_invoke_microtask"; +static char func_fork[] = "__kmpc_fork_call"; +static char func_fork_teams[] = "__kmpc_fork_teams"; + + +// Various info about runtime structures: addresses, field offsets, sizes, etc. +kmp_omp_struct_info_t +__kmp_omp_debug_struct_info = { + + /* Change this only if you make a fundamental data structure change here */ + KMP_OMP_VERSION, + + /* sanity check. Only should be checked if versions are identical + * This is also used for backward compatibility to get the runtime + * structure size if it the runtime is older than the interface */ + sizeof( kmp_omp_struct_info_t ), + + /* OpenMP RTL version info. */ + addr_and_size_of( __kmp_version_major ), + addr_and_size_of( __kmp_version_minor ), + addr_and_size_of( __kmp_version_build ), + addr_and_size_of( __kmp_openmp_version ), + { (kmp_uint64)( __kmp_copyright ) + KMP_VERSION_MAGIC_LEN, 0 }, // Skip magic prefix. + + /* Various globals. */ + addr_and_size_of( __kmp_threads ), + addr_and_size_of( __kmp_root ), + addr_and_size_of( __kmp_threads_capacity ), + addr_and_size_of( __kmp_monitor ), +#if ! KMP_USE_DYNAMIC_LOCK + addr_and_size_of( __kmp_user_lock_table ), +#endif + addr_and_size_of( func_microtask ), + addr_and_size_of( func_fork ), + addr_and_size_of( func_fork_teams ), + addr_and_size_of( __kmp_team_counter ), + addr_and_size_of( __kmp_task_counter ), + addr_and_size_of( kmp_omp_nthr_info_buffer ), + sizeof( void * ), + OMP_LOCK_T_SIZE < sizeof(void *), + bs_last_barrier, + TASK_DEQUE_SIZE, + + // thread structure information + sizeof( kmp_base_info_t ), + offset_and_size_of( kmp_base_info_t, th_info ), + offset_and_size_of( kmp_base_info_t, th_team ), + offset_and_size_of( kmp_base_info_t, th_root ), + offset_and_size_of( kmp_base_info_t, th_serial_team ), + offset_and_size_of( kmp_base_info_t, th_ident ), + offset_and_size_of( kmp_base_info_t, th_spin_here ), + offset_and_size_of( kmp_base_info_t, th_next_waiting ), + offset_and_size_of( kmp_base_info_t, th_task_team ), + offset_and_size_of( kmp_base_info_t, th_current_task ), + offset_and_size_of( kmp_base_info_t, th_task_state ), + offset_and_size_of( kmp_base_info_t, th_bar ), + offset_and_size_of( kmp_bstate_t, b_worker_arrived ), + +#if OMP_40_ENABLED + // teams information + offset_and_size_of( kmp_base_info_t, th_teams_microtask), + offset_and_size_of( kmp_base_info_t, th_teams_level), + offset_and_size_of( kmp_teams_size_t, nteams ), + offset_and_size_of( kmp_teams_size_t, nth ), +#endif + + // kmp_desc structure (for info field above) + sizeof( kmp_desc_base_t ), + offset_and_size_of( kmp_desc_base_t, ds_tid ), + offset_and_size_of( kmp_desc_base_t, ds_gtid ), + // On Windows* OS, ds_thread contains a thread /handle/, which is not usable, while thread /id/ + // is in ds_thread_id. + #if KMP_OS_WINDOWS + offset_and_size_of( kmp_desc_base_t, ds_thread_id), + #else + offset_and_size_of( kmp_desc_base_t, ds_thread), + #endif + + // team structure information + sizeof( kmp_base_team_t ), + offset_and_size_of( kmp_base_team_t, t_master_tid ), + offset_and_size_of( kmp_base_team_t, t_ident ), + offset_and_size_of( kmp_base_team_t, t_parent ), + offset_and_size_of( kmp_base_team_t, t_nproc ), + offset_and_size_of( kmp_base_team_t, t_threads ), + offset_and_size_of( kmp_base_team_t, t_serialized ), + offset_and_size_of( kmp_base_team_t, t_id ), + offset_and_size_of( kmp_base_team_t, t_pkfn ), + offset_and_size_of( kmp_base_team_t, t_task_team ), + offset_and_size_of( kmp_base_team_t, t_implicit_task_taskdata ), +#if OMP_40_ENABLED + offset_and_size_of( kmp_base_team_t, t_cancel_request ), +#endif + offset_and_size_of( kmp_base_team_t, t_bar ), + offset_and_size_of( kmp_balign_team_t, b_master_arrived ), + offset_and_size_of( kmp_balign_team_t, b_team_arrived ), + + // root structure information + sizeof( kmp_base_root_t ), + offset_and_size_of( kmp_base_root_t, r_root_team ), + offset_and_size_of( kmp_base_root_t, r_hot_team ), + offset_and_size_of( kmp_base_root_t, r_uber_thread ), + offset_and_size_not_available, + + // ident structure information + sizeof( ident_t ), + offset_and_size_of( ident_t, psource ), + offset_and_size_of( ident_t, flags ), + + // lock structure information + sizeof( kmp_base_queuing_lock_t ), + offset_and_size_of( kmp_base_queuing_lock_t, initialized ), + offset_and_size_of( kmp_base_queuing_lock_t, location ), + offset_and_size_of( kmp_base_queuing_lock_t, tail_id ), + offset_and_size_of( kmp_base_queuing_lock_t, head_id ), + offset_and_size_of( kmp_base_queuing_lock_t, next_ticket ), + offset_and_size_of( kmp_base_queuing_lock_t, now_serving ), + offset_and_size_of( kmp_base_queuing_lock_t, owner_id ), + offset_and_size_of( kmp_base_queuing_lock_t, depth_locked ), + offset_and_size_of( kmp_base_queuing_lock_t, flags ), + +#if ! KMP_USE_DYNAMIC_LOCK + /* Lock table. */ + sizeof( kmp_lock_table_t ), + offset_and_size_of( kmp_lock_table_t, used ), + offset_and_size_of( kmp_lock_table_t, allocated ), + offset_and_size_of( kmp_lock_table_t, table ), +#endif + + // Task team structure information. + sizeof( kmp_base_task_team_t ), + offset_and_size_of( kmp_base_task_team_t, tt_threads_data ), + offset_and_size_of( kmp_base_task_team_t, tt_found_tasks ), + offset_and_size_of( kmp_base_task_team_t, tt_nproc ), + offset_and_size_of( kmp_base_task_team_t, tt_unfinished_threads ), + offset_and_size_of( kmp_base_task_team_t, tt_active ), + + // task_data_t. + sizeof( kmp_taskdata_t ), + offset_and_size_of( kmp_taskdata_t, td_task_id ), + offset_and_size_of( kmp_taskdata_t, td_flags ), + offset_and_size_of( kmp_taskdata_t, td_team ), + offset_and_size_of( kmp_taskdata_t, td_parent ), + offset_and_size_of( kmp_taskdata_t, td_level ), + offset_and_size_of( kmp_taskdata_t, td_ident ), + offset_and_size_of( kmp_taskdata_t, td_allocated_child_tasks ), + offset_and_size_of( kmp_taskdata_t, td_incomplete_child_tasks ), + + offset_and_size_of( kmp_taskdata_t, td_taskwait_ident ), + offset_and_size_of( kmp_taskdata_t, td_taskwait_counter ), + offset_and_size_of( kmp_taskdata_t, td_taskwait_thread ), + +#if OMP_40_ENABLED + offset_and_size_of( kmp_taskdata_t, td_taskgroup ), + offset_and_size_of( kmp_taskgroup_t, count ), + offset_and_size_of( kmp_taskgroup_t, cancel_request ), + + offset_and_size_of( kmp_taskdata_t, td_depnode ), + offset_and_size_of( kmp_depnode_list_t, node ), + offset_and_size_of( kmp_depnode_list_t, next ), + offset_and_size_of( kmp_base_depnode_t, successors ), + offset_and_size_of( kmp_base_depnode_t, task ), + offset_and_size_of( kmp_base_depnode_t, npredecessors ), + offset_and_size_of( kmp_base_depnode_t, nrefs ), +#endif + offset_and_size_of( kmp_task_t, routine ), + + // thread_data_t. + sizeof( kmp_thread_data_t ), + offset_and_size_of( kmp_base_thread_data_t, td_deque ), + offset_and_size_of( kmp_base_thread_data_t, td_deque_head ), + offset_and_size_of( kmp_base_thread_data_t, td_deque_tail ), + offset_and_size_of( kmp_base_thread_data_t, td_deque_ntasks ), + offset_and_size_of( kmp_base_thread_data_t, td_deque_last_stolen ), + + // The last field. + KMP_OMP_VERSION, + +}; // __kmp_omp_debug_struct_info + +#undef offset_and_size_of +#undef addr_and_size_of + +/* + Intel compiler on IA-32 architecture issues a warning "conversion + from "unsigned long long" to "char *" may lose significant bits" + when 64-bit value is assigned to 32-bit pointer. Use this function + to suppress the warning. +*/ +static inline +void * +__kmp_convert_to_ptr( + kmp_uint64 addr +) { + #if KMP_COMPILER_ICC + #pragma warning( push ) + #pragma warning( disable: 810 ) // conversion from "unsigned long long" to "char *" may lose significant bits + #pragma warning( disable: 1195 ) // conversion from integer to smaller pointer + #endif // KMP_COMPILER_ICC + return (void *) addr; + #if KMP_COMPILER_ICC + #pragma warning( pop ) + #endif // KMP_COMPILER_ICC +} // __kmp_convert_to_ptr + + +static int +kmp_location_match( + kmp_str_loc_t * loc, + kmp_omp_nthr_item_t * item +) { + + int file_match = 0; + int func_match = 0; + int line_match = 0; + + char * file = (char *) __kmp_convert_to_ptr( item->file ); + char * func = (char *) __kmp_convert_to_ptr( item->func ); + file_match = __kmp_str_fname_match( & loc->fname, file ); + func_match = + item->func == 0 // If item->func is NULL, it allows any func name. + || + strcmp( func, "*" ) == 0 + || + ( loc->func != NULL && strcmp( loc->func, func ) == 0 ); + line_match = + item->begin <= loc->line + && + ( item->end <= 0 || loc->line <= item->end ); // if item->end <= 0, it means "end of file". + + return ( file_match && func_match && line_match ); + +} // kmp_location_match + + +int +__kmp_omp_num_threads( + ident_t const * ident +) { + + int num_threads = 0; + + kmp_omp_nthr_info_t * info = + (kmp_omp_nthr_info_t *) __kmp_convert_to_ptr( __kmp_omp_debug_struct_info.nthr_info.addr ); + if ( info->num > 0 && info->array != 0 ) { + kmp_omp_nthr_item_t * items = (kmp_omp_nthr_item_t *) __kmp_convert_to_ptr( info->array ); + kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 1 ); + int i; + for ( i = 0; i < info->num; ++ i ) { + if ( kmp_location_match( & loc, & items[ i ] ) ) { + num_threads = items[ i ].num_threads; + }; // if + }; // for + __kmp_str_loc_free( & loc ); + }; // if + + return num_threads;; + +} // __kmp_omp_num_threads +#endif /* USE_DEBUGGER */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_debugger.h b/contrib/libs/cxxsupp/openmp/kmp_debugger.h index ae75706a3e0..29f41340dde 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_debugger.h +++ b/contrib/libs/cxxsupp/openmp/kmp_debugger.h @@ -1,51 +1,51 @@ -#if USE_DEBUGGER -/* - * kmp_debugger.h -- debugger support. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_DEBUGGER_H -#define KMP_DEBUGGER_H - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -/* * This external variable can be set by any debugger to flag to the runtime that we - are currently executing inside a debugger. This will allow the debugger to override - the number of threads spawned in a parallel region by using __kmp_omp_num_threads() (below). - * When __kmp_debugging is TRUE, each team and each task gets a unique integer identifier - that can be used by debugger to conveniently identify teams and tasks. - * The debugger has access to __kmp_omp_debug_struct_info which contains information - about the OpenMP library's important internal structures. This access will allow the debugger - to read detailed information from the typical OpenMP constructs (teams, threads, tasking, etc. ) - during a debugging session and offer detailed and useful information which the user can probe - about the OpenMP portion of their code. - */ -extern int __kmp_debugging; /* Boolean whether currently debugging OpenMP RTL */ -// Return number of threads specified by the debugger for given parallel region. -/* The ident field, which represents a source file location, is used to check if the - debugger has changed the number of threads for the parallel region at source file - location ident. This way, specific parallel regions' number of threads can be changed - at the debugger's request. - */ -int __kmp_omp_num_threads( ident_t const * ident ); - -#ifdef __cplusplus - } // extern "C" -#endif // __cplusplus - - -#endif // KMP_DEBUGGER_H - -#endif // USE_DEBUGGER +#if USE_DEBUGGER +/* + * kmp_debugger.h -- debugger support. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_DEBUGGER_H +#define KMP_DEBUGGER_H + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +/* * This external variable can be set by any debugger to flag to the runtime that we + are currently executing inside a debugger. This will allow the debugger to override + the number of threads spawned in a parallel region by using __kmp_omp_num_threads() (below). + * When __kmp_debugging is TRUE, each team and each task gets a unique integer identifier + that can be used by debugger to conveniently identify teams and tasks. + * The debugger has access to __kmp_omp_debug_struct_info which contains information + about the OpenMP library's important internal structures. This access will allow the debugger + to read detailed information from the typical OpenMP constructs (teams, threads, tasking, etc. ) + during a debugging session and offer detailed and useful information which the user can probe + about the OpenMP portion of their code. + */ +extern int __kmp_debugging; /* Boolean whether currently debugging OpenMP RTL */ +// Return number of threads specified by the debugger for given parallel region. +/* The ident field, which represents a source file location, is used to check if the + debugger has changed the number of threads for the parallel region at source file + location ident. This way, specific parallel regions' number of threads can be changed + at the debugger's request. + */ +int __kmp_omp_num_threads( ident_t const * ident ); + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + + +#endif // KMP_DEBUGGER_H + +#endif // USE_DEBUGGER diff --git a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp index db8c20a64c1..c91bb8da3c0 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp @@ -1,2674 +1,2674 @@ -/* - * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -/* - * Dynamic scheduling initialization and dispatch. - * - * NOTE: __kmp_nth is a constant inside of any dispatch loop, however - * it may change values between parallel regions. __kmp_max_nth - * is the largest value __kmp_nth may take, 1 is the smallest. - * - */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_error.h" -#include "kmp_stats.h" -#if KMP_OS_WINDOWS && KMP_ARCH_X86 - #include -#endif - -#if OMPT_SUPPORT -#include "ompt-internal.h" -#include "ompt-specific.h" -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -// template for type limits -template< typename T > -struct i_maxmin { - static const T mx; - static const T mn; -}; -template<> -struct i_maxmin< int > { - static const int mx = 0x7fffffff; - static const int mn = 0x80000000; -}; -template<> -struct i_maxmin< unsigned int > { - static const unsigned int mx = 0xffffffff; - static const unsigned int mn = 0x00000000; -}; -template<> -struct i_maxmin< long long > { - static const long long mx = 0x7fffffffffffffffLL; - static const long long mn = 0x8000000000000000LL; -}; -template<> -struct i_maxmin< unsigned long long > { - static const unsigned long long mx = 0xffffffffffffffffLL; - static const unsigned long long mn = 0x0000000000000000LL; -}; -//------------------------------------------------------------------------- - -#ifdef KMP_STATIC_STEAL_ENABLED - - // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types - template< typename T > - struct dispatch_private_infoXX_template { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - UT count; // unsigned - T ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - T lb; - ST st; // signed - UT tc; // unsigned - T static_steal_counter; // for static_steal only; maybe better to put after ub - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { // compiler does not accept sizeof(T)*4 - T parm1; - T parm2; - T parm3; - T parm4; - }; - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned - #if KMP_OS_WINDOWS - T last_upper; - #endif /* KMP_OS_WINDOWS */ - }; - -#else /* KMP_STATIC_STEAL_ENABLED */ - - // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types - template< typename T > - struct dispatch_private_infoXX_template { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - T lb; - T ub; - ST st; // signed - UT tc; // unsigned - - T parm1; - T parm2; - T parm3; - T parm4; - - UT count; // unsigned - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned - #if KMP_OS_WINDOWS - T last_upper; - #endif /* KMP_OS_WINDOWS */ - }; - -#endif /* KMP_STATIC_STEAL_ENABLED */ - -// replaces dispatch_private_info structure and dispatch_private_info_t type -template< typename T > -struct KMP_ALIGN_CACHE dispatch_private_info_template { - // duplicate alignment here, otherwise size of structure is not correct in our compiler - union KMP_ALIGN_CACHE private_info_tmpl { - dispatch_private_infoXX_template< T > p; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_uint32 ordered; /* ordered clause specified */ - kmp_uint32 ordered_bumped; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making order - dispatch_private_info * next; /* stack of buffers for nest of serial regions */ - kmp_uint32 nomerge; /* don't merge iters if serialized */ - kmp_uint32 type_size; - enum cons_type pushed_ws; -}; - - -// replaces dispatch_shared_info{32,64} structures and dispatch_shared_info{32,64}_t types -template< typename UT > -struct dispatch_shared_infoXX_template { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile UT iteration; - volatile UT num_done; - volatile UT ordered_iteration; - UT ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size making ordered_iteration scalar -}; - -// replaces dispatch_shared_info structure and dispatch_shared_info_t type -template< typename UT > -struct dispatch_shared_info_template { - // we need union here to keep the structure size - union shared_info_tmpl { - dispatch_shared_infoXX_template< UT > s; - dispatch_shared_info64_t s64; - } u; - volatile kmp_uint32 buffer_index; -}; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#undef USE_TEST_LOCKS - -// test_then_add template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_add( volatile T *p, T d ) { KMP_ASSERT(0); }; - -template<> -__forceinline kmp_int32 -test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_ADD32( p, d ); - return r; -} - -template<> -__forceinline kmp_int64 -test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_ADD64( p, d ); - return r; -} - -// test_then_inc_acq template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_inc_acq( volatile T *p ) { KMP_ASSERT(0); }; - -template<> -__forceinline kmp_int32 -test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_INC_ACQ32( p ); - return r; -} - -template<> -__forceinline kmp_int64 -test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_INC_ACQ64( p ); - return r; -} - -// test_then_inc template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_inc( volatile T *p ) { KMP_ASSERT(0); }; - -template<> -__forceinline kmp_int32 -test_then_inc< kmp_int32 >( volatile kmp_int32 *p ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_INC32( p ); - return r; -} - -template<> -__forceinline kmp_int64 -test_then_inc< kmp_int64 >( volatile kmp_int64 *p ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_INC64( p ); - return r; -} - -// compare_and_swap template (general template should NOT be used) -template< typename T > -static __forceinline kmp_int32 -compare_and_swap( volatile T *p, T c, T s ) { KMP_ASSERT(0); }; - -template<> -__forceinline kmp_int32 -compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s ) -{ - return KMP_COMPARE_AND_STORE_REL32( p, c, s ); -} - -template<> -__forceinline kmp_int32 -compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s ) -{ - return KMP_COMPARE_AND_STORE_REL64( p, c, s ); -} - -/* - Spin wait loop that first does pause, then yield. - Waits until function returns non-zero when called with *spinner and check. - Does NOT put threads to sleep. -#if USE_ITT_BUILD - Arguments: - obj -- is higher-level synchronization object to report to ittnotify. It is used to report - locks consistently. For example, if lock is acquired immediately, its address is - reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired - immediately and lock routine calls to KMP_WAIT_YIELD(), the later should report the same - address, not an address of low-level spinner. -#endif // USE_ITT_BUILD -*/ -template< typename UT > -// ToDo: make inline function (move to header file for icl) -static UT // unsigned 4- or 8-byte type -__kmp_wait_yield( volatile UT * spinner, - UT checker, - kmp_uint32 (* pred)( UT, UT ) - USE_ITT_BUILD_ARG(void * obj) // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point +/* + * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +/* + * Dynamic scheduling initialization and dispatch. + * + * NOTE: __kmp_nth is a constant inside of any dispatch loop, however + * it may change values between parallel regions. __kmp_max_nth + * is the largest value __kmp_nth may take, 1 is the smallest. + * + */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_itt.h" +#include "kmp_str.h" +#include "kmp_error.h" +#include "kmp_stats.h" +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + #include +#endif + +#if OMPT_SUPPORT +#include "ompt-internal.h" +#include "ompt-specific.h" +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +// template for type limits +template< typename T > +struct i_maxmin { + static const T mx; + static const T mn; +}; +template<> +struct i_maxmin< int > { + static const int mx = 0x7fffffff; + static const int mn = 0x80000000; +}; +template<> +struct i_maxmin< unsigned int > { + static const unsigned int mx = 0xffffffff; + static const unsigned int mn = 0x00000000; +}; +template<> +struct i_maxmin< long long > { + static const long long mx = 0x7fffffffffffffffLL; + static const long long mn = 0x8000000000000000LL; +}; +template<> +struct i_maxmin< unsigned long long > { + static const unsigned long long mx = 0xffffffffffffffffLL; + static const unsigned long long mn = 0x0000000000000000LL; +}; +//------------------------------------------------------------------------- + +#ifdef KMP_STATIC_STEAL_ENABLED + + // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types + template< typename T > + struct dispatch_private_infoXX_template { + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + UT count; // unsigned + T ub; + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + T lb; + ST st; // signed + UT tc; // unsigned + T static_steal_counter; // for static_steal only; maybe better to put after ub + + /* parm[1-4] are used in different ways by different scheduling algorithms */ + + // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN( 32 ) { // compiler does not accept sizeof(T)*4 + T parm1; + T parm2; + T parm3; + T parm4; + }; + + UT ordered_lower; // unsigned + UT ordered_upper; // unsigned + #if KMP_OS_WINDOWS + T last_upper; + #endif /* KMP_OS_WINDOWS */ + }; + +#else /* KMP_STATIC_STEAL_ENABLED */ + + // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types + template< typename T > + struct dispatch_private_infoXX_template { + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + T lb; + T ub; + ST st; // signed + UT tc; // unsigned + + T parm1; + T parm2; + T parm3; + T parm4; + + UT count; // unsigned + + UT ordered_lower; // unsigned + UT ordered_upper; // unsigned + #if KMP_OS_WINDOWS + T last_upper; + #endif /* KMP_OS_WINDOWS */ + }; + +#endif /* KMP_STATIC_STEAL_ENABLED */ + +// replaces dispatch_private_info structure and dispatch_private_info_t type +template< typename T > +struct KMP_ALIGN_CACHE dispatch_private_info_template { + // duplicate alignment here, otherwise size of structure is not correct in our compiler + union KMP_ALIGN_CACHE private_info_tmpl { + dispatch_private_infoXX_template< T > p; + dispatch_private_info64_t p64; + } u; + enum sched_type schedule; /* scheduling algorithm */ + kmp_uint32 ordered; /* ordered clause specified */ + kmp_uint32 ordered_bumped; + kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making order + dispatch_private_info * next; /* stack of buffers for nest of serial regions */ + kmp_uint32 nomerge; /* don't merge iters if serialized */ + kmp_uint32 type_size; + enum cons_type pushed_ws; +}; + + +// replaces dispatch_shared_info{32,64} structures and dispatch_shared_info{32,64}_t types +template< typename UT > +struct dispatch_shared_infoXX_template { + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile UT iteration; + volatile UT num_done; + volatile UT ordered_iteration; + UT ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size making ordered_iteration scalar +}; + +// replaces dispatch_shared_info structure and dispatch_shared_info_t type +template< typename UT > +struct dispatch_shared_info_template { + // we need union here to keep the structure size + union shared_info_tmpl { + dispatch_shared_infoXX_template< UT > s; + dispatch_shared_info64_t s64; + } u; + volatile kmp_uint32 buffer_index; +}; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#undef USE_TEST_LOCKS + +// test_then_add template (general template should NOT be used) +template< typename T > +static __forceinline T +test_then_add( volatile T *p, T d ) { KMP_ASSERT(0); }; + +template<> +__forceinline kmp_int32 +test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d ) +{ + kmp_int32 r; + r = KMP_TEST_THEN_ADD32( p, d ); + return r; +} + +template<> +__forceinline kmp_int64 +test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 r; + r = KMP_TEST_THEN_ADD64( p, d ); + return r; +} + +// test_then_inc_acq template (general template should NOT be used) +template< typename T > +static __forceinline T +test_then_inc_acq( volatile T *p ) { KMP_ASSERT(0); }; + +template<> +__forceinline kmp_int32 +test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p ) +{ + kmp_int32 r; + r = KMP_TEST_THEN_INC_ACQ32( p ); + return r; +} + +template<> +__forceinline kmp_int64 +test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p ) +{ + kmp_int64 r; + r = KMP_TEST_THEN_INC_ACQ64( p ); + return r; +} + +// test_then_inc template (general template should NOT be used) +template< typename T > +static __forceinline T +test_then_inc( volatile T *p ) { KMP_ASSERT(0); }; + +template<> +__forceinline kmp_int32 +test_then_inc< kmp_int32 >( volatile kmp_int32 *p ) +{ + kmp_int32 r; + r = KMP_TEST_THEN_INC32( p ); + return r; +} + +template<> +__forceinline kmp_int64 +test_then_inc< kmp_int64 >( volatile kmp_int64 *p ) +{ + kmp_int64 r; + r = KMP_TEST_THEN_INC64( p ); + return r; +} + +// compare_and_swap template (general template should NOT be used) +template< typename T > +static __forceinline kmp_int32 +compare_and_swap( volatile T *p, T c, T s ) { KMP_ASSERT(0); }; + +template<> +__forceinline kmp_int32 +compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s ) +{ + return KMP_COMPARE_AND_STORE_REL32( p, c, s ); +} + +template<> +__forceinline kmp_int32 +compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s ) +{ + return KMP_COMPARE_AND_STORE_REL64( p, c, s ); +} + +/* + Spin wait loop that first does pause, then yield. + Waits until function returns non-zero when called with *spinner and check. + Does NOT put threads to sleep. +#if USE_ITT_BUILD + Arguments: + obj -- is higher-level synchronization object to report to ittnotify. It is used to report + locks consistently. For example, if lock is acquired immediately, its address is + reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired + immediately and lock routine calls to KMP_WAIT_YIELD(), the later should report the same + address, not an address of low-level spinner. +#endif // USE_ITT_BUILD +*/ +template< typename UT > +// ToDo: make inline function (move to header file for icl) +static UT // unsigned 4- or 8-byte type +__kmp_wait_yield( volatile UT * spinner, + UT checker, + kmp_uint32 (* pred)( UT, UT ) + USE_ITT_BUILD_ARG(void * obj) // Higher-level synchronization object, or NULL. + ) +{ + // note: we may not belong to a team at this point volatile UT * spin = spinner; UT check = checker; kmp_uint32 spins; kmp_uint32 (*f) ( UT, UT ) = pred; UT r; - - KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while(!f(r = *spin, check)) - { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* GEH - remove this since it was accidentally introduced when kmp_wait was split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - // if we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=throughput, then yield - // pause is in the following code - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); - return r; -} - -template< typename UT > -static kmp_uint32 __kmp_eq( UT value, UT checker) { - return value == checker; -} - -template< typename UT > -static kmp_uint32 __kmp_neq( UT value, UT checker) { - return value != checker; -} - -template< typename UT > -static kmp_uint32 __kmp_lt( UT value, UT checker) { - return value < checker; -} - -template< typename UT > -static kmp_uint32 __kmp_ge( UT value, UT checker) { - return value >= checker; -} - -template< typename UT > -static kmp_uint32 __kmp_le( UT value, UT checker) { - return value <= checker; -} - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static void -__kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - kmp_info_t *th; - - KMP_DEBUG_ASSERT( gtid_ref ); - - if ( __kmp_env_consistency_check ) { - th = __kmp_threads[*gtid_ref]; - if ( th -> th.th_root -> r.r_active - && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) { -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 ); -#else - __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL ); -#endif - } - } -} - -template< typename UT > -static void -__kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - typedef typename traits_t< UT >::signed_t ST; - dispatch_private_info_template< UT > * pr; - - int gtid = *gtid_ref; -// int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( th -> th.th_dispatch ); - - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) ); - if ( __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - if ( pr -> pushed_ws != ct_none ) { -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 ); -#else - __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL ); -#endif - } - } - - if ( ! th -> th.th_team -> t.t_serialized ) { - dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_sh_current ); - UT lower; - - if ( ! __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - } - lower = pr->u.p.ordered_lower; - - #if ! defined( KMP_GOMP_COMPAT ) - if ( __kmp_env_consistency_check ) { - if ( pr->ordered_bumped ) { - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - __kmp_error_construct2( - kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - & p->stack_data[ p->w_top ] - ); - } - } - #endif /* !defined(KMP_GOMP_COMPAT) */ - - KMP_MB(); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif - - __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG( NULL ) - ); - KMP_MB(); /* is this necessary? */ - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif - } - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) ); -} - -static void -__kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - kmp_info_t *th; - - if ( __kmp_env_consistency_check ) { - th = __kmp_threads[*gtid_ref]; - if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) { - __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref ); - } - } -} - -template< typename UT > -static void -__kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - typedef typename traits_t< UT >::signed_t ST; - dispatch_private_info_template< UT > * pr; - - int gtid = *gtid_ref; -// int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( th -> th.th_dispatch ); - - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) ); - if ( __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - if ( pr -> pushed_ws != ct_none ) { - __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref ); - } - } - - if ( ! th -> th.th_team -> t.t_serialized ) { - dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_sh_current ); - - if ( ! __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - } - - KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration ); - #if ! defined( KMP_GOMP_COMPAT ) - if ( __kmp_env_consistency_check ) { - if ( pr->ordered_bumped != 0 ) { - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - /* How to test it? - OM */ - __kmp_error_construct2( - kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - & p->stack_data[ p->w_top ] - ); - } - } - #endif /* !defined(KMP_GOMP_COMPAT) */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - pr->ordered_bumped += 1; - - KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", - gtid, pr->ordered_bumped ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* TODO use general release procedure? */ - test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) ); -} - -/* Computes and returns x to the power of y, where y must a non-negative integer */ -template< typename UT > -static __forceinline long double -__kmp_pow(long double x, UT y) { - long double s=1.0L; - - KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); - //KMP_DEBUG_ASSERT(y >= 0); // y is unsigned - while(y) { - if ( y & 1 ) - s *= x; - x *= x; - y >>= 1; - } - return s; -} - -/* Computes and returns the number of unassigned iterations after idx chunks have been assigned - (the total number of unassigned iterations in chunks with index greater than or equal to idx). - __forceinline seems to be broken so that if we __forceinline this function, the behavior is wrong - (one of the unit tests, sch_guided_analytical_basic.cpp, fails) -*/ -template< typename T > -static __inline typename traits_t< T >::unsigned_t -__kmp_dispatch_guided_remaining( - T tc, - typename traits_t< T >::floating_t base, - typename traits_t< T >::unsigned_t idx -) { - /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at - least for ICL 8.1, long double arithmetic may not really have - long double precision, even with /Qlong_double. Currently, we - workaround that in the caller code, by manipulating the FPCW for - Windows* OS on IA-32 architecture. The lack of precision is not - expected to be a correctness issue, though. - */ - typedef typename traits_t< T >::unsigned_t UT; - - long double x = tc * __kmp_pow< UT >(base, idx); - UT r = (UT) x; - if ( x == r ) - return r; - return r + 1; -} - -// Parameters of the guided-iterative algorithm: -// p2 = n * nproc * ( chunk + 1 ) // point of switching to dynamic -// p3 = 1 / ( n * nproc ) // remaining iterations multiplier -// by default n = 2. For example with n = 3 the chunks distribution will be more flat. -// With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc. -static int guided_int_param = 2; -static double guided_flt_param = 0.5;// = 1.0 / guided_int_param; - -// UT - unsigned flavor of T, ST - signed flavor of T, -// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 -template< typename T > -static void -__kmp_dispatch_init( - ident_t * loc, - int gtid, - enum sched_type schedule, - T lb, - T ub, - typename traits_t< T >::signed_t st, - typename traits_t< T >::signed_t chunk, - int push_ws -) { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - typedef typename traits_t< T >::floating_t DBL; - static const int ___kmp_size_type = sizeof( UT ); - - int active; - T tc; - kmp_info_t * th; - kmp_team_t * team; - kmp_uint32 my_buffer_index; - dispatch_private_info_template< T > * pr; - dispatch_shared_info_template< UT > volatile * sh; - - KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) ); - KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) ); - - if ( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_INIT(); -#endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", - traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) ); - __kmp_str_free( &buff ); - } - #endif - /* setup data */ - th = __kmp_threads[ gtid ]; - team = th -> th.th_team; - active = ! team -> t.t_serialized; - th->th.th_ident = loc; - -#if USE_ITT_BUILD - kmp_uint64 cur_chunk = chunk; - int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && - KMP_MASTER_GTID(gtid) && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1; -#endif - if ( ! active ) { - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ - } else { - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - my_buffer_index = th->th.th_dispatch->th_disp_index ++; - - /* What happens when number of threads changes, need to resize buffer? */ - pr = reinterpret_cast< dispatch_private_info_template< T > * > - ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); - sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * > - ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); - } - - /* Pick up the nomerge/ordered bits from the scheduling type */ - if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) { - pr->nomerge = TRUE; - schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); - } else { - pr->nomerge = FALSE; - } - pr->type_size = ___kmp_size_type; // remember the size of variables - if ( kmp_ord_lower & schedule ) { - pr->ordered = TRUE; - schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); - } else { - pr->ordered = FALSE; - } - - if ( schedule == kmp_sch_static ) { - schedule = __kmp_static; - } else { - if ( schedule == kmp_sch_runtime ) { - // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if not specified) - schedule = team -> t.t_sched.r_sched_type; - // Detail the schedule if needed (global controls are differentiated appropriately) - if ( schedule == kmp_sch_guided_chunked ) { - schedule = __kmp_guided; - } else if ( schedule == kmp_sch_static ) { - schedule = __kmp_static; - } - // Use the chunk size specified by OMP_SCHEDULE (or default if not specified) - chunk = team -> t.t_sched.chunk; -#if USE_ITT_BUILD - cur_chunk = chunk; -#endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n", - traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - } else { - if ( schedule == kmp_sch_guided_chunked ) { - schedule = __kmp_guided; - } - if ( chunk <= 0 ) { - chunk = KMP_DEFAULT_CHUNK; - } - } - - if ( schedule == kmp_sch_auto ) { - // mapping and differentiation: in the __kmp_do_serial_initialize() - schedule = __kmp_auto; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n", - traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - } - - /* guided analytical not safe for too many threads */ - if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) { - schedule = kmp_sch_guided_iterative_chunked; - KMP_WARNING( DispatchManyThreads ); - } - pr->u.p.parm1 = chunk; - } - KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper), - "unknown scheduling type" ); - - pr->u.p.count = 0; - - if ( __kmp_env_consistency_check ) { - if ( st == 0 ) { - __kmp_error_construct( - kmp_i18n_msg_CnsLoopIncrZeroProhibited, - ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc - ); - } - } - - tc = ( ub - lb + st ); - if ( st != 1 ) { - if ( st < 0 ) { - if ( lb < ub ) { - tc = 0; // zero-trip - } else { // lb >= ub - tc = (ST)tc / st; // convert to signed division - } - } else { // st > 0 - if ( ub < lb ) { - tc = 0; // zero-trip - } else { // lb >= ub - tc /= st; - } - } - } else if ( ub < lb ) { // st == 1 - tc = 0; // zero-trip - } - - // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing - // when statistics are disabled. - if (schedule == __kmp_static) - { - KMP_COUNT_BLOCK(OMP_FOR_static); - KMP_COUNT_VALUE(FOR_static_iterations, tc); - } - else - { - KMP_COUNT_BLOCK(OMP_FOR_dynamic); - KMP_COUNT_VALUE(FOR_dynamic_iterations, tc); - } - - pr->u.p.lb = lb; - pr->u.p.ub = ub; - pr->u.p.st = st; - pr->u.p.tc = tc; - - #if KMP_OS_WINDOWS - pr->u.p.last_upper = ub + st; - #endif /* KMP_OS_WINDOWS */ - - /* NOTE: only the active parallel region(s) has active ordered sections */ - - if ( active ) { - if ( pr->ordered == 0 ) { - th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error; - th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error; - } else { - pr->ordered_bumped = 0; - - pr->u.p.ordered_lower = 1; - pr->u.p.ordered_upper = 0; - - th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >; - th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >; - } - } - - if ( __kmp_env_consistency_check ) { - enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo; - if ( push_ws ) { - __kmp_push_workshare( gtid, ws, loc ); - pr->pushed_ws = ws; - } else { - __kmp_check_workshare( gtid, ws, loc ); - pr->pushed_ws = ct_none; - } - } - - switch ( schedule ) { - #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) - case kmp_sch_static_steal: - { - T nproc = team->t.t_nproc; - T ntc, init; - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) ); - - ntc = (tc % chunk ? 1 : 0) + tc / chunk; - if ( nproc > 1 && ntc >= nproc ) { - T id = __kmp_tid_from_gtid(gtid); - T small_chunk, extras; - - small_chunk = ntc / nproc; - extras = ntc % nproc; - - init = id * small_chunk + ( id < extras ? id : extras ); - pr->u.p.count = init; - pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 ); - - pr->u.p.parm2 = lb; - //pr->pfields.parm3 = 0; // it's not used in static_steal - pr->u.p.parm4 = id; - pr->u.p.st = st; - break; - } else { - KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n", - gtid ) ); - schedule = kmp_sch_static_balanced; - /* too few iterations: fall-through to kmp_sch_static_balanced */ - } // if - /* FALL-THROUGH to static balanced */ - } // case - #endif - case kmp_sch_static_balanced: - { - T nproc = team->t.t_nproc; - T init, limit; - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n", - gtid ) ); - - if ( nproc > 1 ) { - T id = __kmp_tid_from_gtid(gtid); - - if ( tc < nproc ) { - if ( id < tc ) { - init = id; - limit = id; - pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ - } else { - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } else { - T small_chunk = tc / nproc; - T extras = tc % nproc; - init = id * small_chunk + (id < extras ? id : extras); - limit = init + small_chunk - (id < extras ? 0 : 1); - pr->u.p.parm1 = (id == nproc - 1); - } - } else { - if ( tc > 0 ) { - init = 0; - limit = tc - 1; - pr->u.p.parm1 = TRUE; - } else { - // zero trip count - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } -#if USE_ITT_BUILD - // Calculate chunk for metadata report - if ( itt_need_metadata_reporting ) - cur_chunk = limit - init + 1; -#endif - if ( st == 1 ) { - pr->u.p.lb = lb + init; - pr->u.p.ub = lb + limit; - } else { - T ub_tmp = lb + limit * st; // calculated upper bound, "ub" is user-defined upper bound - pr->u.p.lb = lb + init * st; - // adjust upper bound to "ub" if needed, so that MS lastprivate will match it exactly - if ( st > 0 ) { - pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp ); - } else { - pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp ); - } - } - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } - break; - } // case - case kmp_sch_guided_iterative_chunked : - { - T nproc = team->t.t_nproc; - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid)); - - if ( nproc > 1 ) { - if ( (2L * chunk + 1 ) * nproc >= tc ) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - // when remaining iters become less than parm2 - switch to dynamic - pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 ); - *(double*)&pr->u.p.parm3 = guided_flt_param / nproc; // may occupy parm3 and parm4 - } - } else { - KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid)); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_guided_analytical_chunked: - { - T nproc = team->t.t_nproc; - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid)); - - if ( nproc > 1 ) { - if ( (2L * chunk + 1 ) * nproc >= tc ) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - DBL x; - - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* Linux* OS already has 64-bit computation by default for - long double, and on Windows* OS on Intel(R) 64, - /Qlong_double doesn't work. On Windows* OS - on IA-32 architecture, we need to set precision to - 64-bit instead of the default 53-bit. Even though long - double doesn't work on Windows* OS on Intel(R) 64, the - resulting lack of precision is not expected to impact - the correctness of the algorithm, but this has not been - mathematically proven. - */ - // save original FPCW and set precision to 64-bit, as - // Windows* OS on IA-32 architecture defaults to 53-bit - unsigned int oldFpcw = _control87(0,0); - _control87(_PC_64,_MCW_PC); // 0,0x30000 - #endif - /* value used for comparison in solver for cross-over point */ - long double target = ((long double)chunk * 2 + 1) * nproc / tc; - - /* crossover point--chunk indexes equal to or greater than - this point switch to dynamic-style scheduling */ - UT cross; - - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - x = (long double)1.0 - (long double)0.5 / nproc; - - #ifdef KMP_DEBUG - { // test natural alignment - struct _test_a { - char a; - union { - char b; - DBL d; - }; - } t; - ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; - //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long long)natural_alignment ); - KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 ); - } - #endif // KMP_DEBUG - - /* save the term in thread private dispatch structure */ - *(DBL*)&pr->u.p.parm3 = x; - - /* solve for the crossover point to the nearest integer i for which C_i <= chunk */ - { - UT left, right, mid; - long double p; - - /* estimate initial upper and lower bound */ - - /* doesn't matter what value right is as long as it is positive, but - it affects performance of the solver - */ - right = 229; - p = __kmp_pow< UT >(x,right); - if ( p > target ) { - do{ - p *= p; - right <<= 1; - } while(p>target && right < (1<<27)); - left = right >> 1; /* lower bound is previous (failed) estimate of upper bound */ - } else { - left = 0; - } - - /* bisection root-finding method */ - while ( left + 1 < right ) { - mid = (left + right) / 2; - if ( __kmp_pow< UT >(x,mid) > target ) { - left = mid; - } else { - right = mid; - } - } // while - cross = right; - } - /* assert sanity of computed crossover point */ - KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target); - - /* save the crossover point in thread private dispatch structure */ - pr->u.p.parm2 = cross; - - // C75803 - #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) - #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) - #else - #define GUIDED_ANALYTICAL_WORKAROUND (x) - #endif - /* dynamic-style scheduling offset */ - pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - // restore FPCW - _control87(oldFpcw,_MCW_PC); - #endif - } // if - } else { - KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n", - gtid ) ); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_static_greedy: - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); - pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ? - ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc : - tc; - break; - case kmp_sch_static_chunked : - case kmp_sch_dynamic_chunked : - if ( pr->u.p.parm1 <= 0 ) { - pr->u.p.parm1 = KMP_DEFAULT_CHUNK; - } - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid)); - break; - case kmp_sch_trapezoidal : - { - /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ - - T parm1, parm2, parm3, parm4; - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) ); - - parm1 = chunk; - - /* F : size of the first cycle */ - parm2 = ( tc / (2 * team->t.t_nproc) ); - - if ( parm2 < 1 ) { - parm2 = 1; - } - - /* L : size of the last cycle. Make sure the last cycle - * is not larger than the first cycle. - */ - if ( parm1 < 1 ) { - parm1 = 1; - } else if ( parm1 > parm2 ) { - parm1 = parm2; - } - - /* N : number of cycles */ - parm3 = ( parm2 + parm1 ); - parm3 = ( 2 * tc + parm3 - 1) / parm3; - - if ( parm3 < 2 ) { - parm3 = 2; - } - - /* sigma : decreasing incr of the trapezoid */ - parm4 = ( parm3 - 1 ); - parm4 = ( parm2 - parm1 ) / parm4; - - // pointless check, because parm4 >= 0 always - //if ( parm4 < 0 ) { - // parm4 = 0; - //} - - pr->u.p.parm1 = parm1; - pr->u.p.parm2 = parm2; - pr->u.p.parm3 = parm3; - pr->u.p.parm4 = parm4; - } // case - break; - - default: - { - __kmp_msg( - kmp_ms_fatal, // Severity - KMP_MSG( UnknownSchedTypeDetected ), // Primary message - KMP_HNT( GetNewerLibrary ), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } - break; - } // switch - pr->schedule = schedule; - if ( active ) { - /* The name of this buffer should be my_buffer_index when it's free to use it */ - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index) ); - __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 > - USE_ITT_BUILD_ARG( NULL ) - ); - // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and my_buffer_index are - // *always* 32-bit integers. - KMP_MB(); /* is this necessary? */ - KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index) ); - - th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr; - th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh; -#if USE_ITT_BUILD - if ( pr->ordered ) { - __kmp_itt_ordered_init( gtid ); - }; // if - // Report loop metadata - if ( itt_need_metadata_reporting ) { - // Only report metadata by master of active team at level 1 - kmp_uint64 schedtype = 0; - switch ( schedule ) { - case kmp_sch_static_chunked: - case kmp_sch_static_balanced:// Chunk is calculated in the switch above - break; - case kmp_sch_static_greedy: - cur_chunk = pr->u.p.parm1; - break; - case kmp_sch_dynamic_chunked: - schedtype = 1; - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - schedtype = 2; - break; - default: -// Should we put this case under "static"? -// case kmp_sch_static_steal: - schedtype = 3; - break; - } - __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); - } -#endif /* USE_ITT_BUILD */ - }; // if - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \ - " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \ - " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", - traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec, - traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec, - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec, - traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec ); - KD_TRACE(10, ( buff, - gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub, - pr->u.p.st, pr->u.p.tc, pr->u.p.count, - pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, - pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) ); - __kmp_str_free( &buff ); - } - #endif - #if ( KMP_STATIC_STEAL_ENABLED ) - if ( ___kmp_size_type < 8 ) { - // It cannot be guaranteed that after execution of a loop with some other schedule kind - // all the parm3 variables will contain the same value. - // Even if all parm3 will be the same, it still exists a bad case like using 0 and 1 - // rather than program life-time increment. - // So the dedicated variable is required. The 'static_steal_counter' is used. - if( schedule == kmp_sch_static_steal ) { - // Other threads will inspect this variable when searching for a victim. - // This is a flag showing that other threads may steal from this thread since then. - volatile T * p = &pr->u.p.static_steal_counter; - *p = *p + 1; - } - } - #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING ) - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); - } -#endif -} - -/* - * For ordered loops, either __kmp_dispatch_finish() should be called after - * every iteration, or __kmp_dispatch_finish_chunk() should be called after - * every chunk of iterations. If the ordered section(s) were not executed - * for this iteration (or every iteration in this chunk), we need to set the - * ordered iteration counters so that the next thread can proceed. - */ -template< typename UT > -static void -__kmp_dispatch_finish( int gtid, ident_t *loc ) -{ - typedef typename traits_t< UT >::signed_t ST; - kmp_info_t *th = __kmp_threads[ gtid ]; - - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) ); - if ( ! th -> th.th_team -> t.t_serialized ) { - - dispatch_private_info_template< UT > * pr = - reinterpret_cast< dispatch_private_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - dispatch_shared_info_template< UT > volatile * sh = - reinterpret_cast< dispatch_shared_info_template< UT >volatile* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( pr ); - KMP_DEBUG_ASSERT( sh ); - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - if ( pr->ordered_bumped ) { - KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; - } else { - UT lower = pr->u.p.ordered_lower; - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif - - __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG(NULL) - ); - KMP_MB(); /* is this necessary? */ - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif - - test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); - } // if - } // if - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) ); -} - -#ifdef KMP_GOMP_COMPAT - -template< typename UT > -static void -__kmp_dispatch_finish_chunk( int gtid, ident_t *loc ) -{ - typedef typename traits_t< UT >::signed_t ST; - kmp_info_t *th = __kmp_threads[ gtid ]; - - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) ); - if ( ! th -> th.th_team -> t.t_serialized ) { -// int cid; - dispatch_private_info_template< UT > * pr = - reinterpret_cast< dispatch_private_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - dispatch_shared_info_template< UT > volatile * sh = - reinterpret_cast< dispatch_shared_info_template< UT >volatile* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( pr ); - KMP_DEBUG_ASSERT( sh ); - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - -// for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { - UT lower = pr->u.p.ordered_lower; - UT upper = pr->u.p.ordered_upper; - UT inc = upper - lower + 1; - - if ( pr->ordered_bumped == inc ) { - KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; - } else { - inc -= pr->ordered_bumped; - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d before wait: " \ - "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) ); - __kmp_str_free( &buff ); - } - #endif - - __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG(NULL) - ); - - KMP_MB(); /* is this necessary? */ - KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; -//!!!!! TODO check if the inc should be unsigned, or signed??? - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d after wait: " \ - "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) ); - __kmp_str_free( &buff ); - } - #endif - - test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc); - } -// } - } - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) ); -} - -#endif /* KMP_GOMP_COMPAT */ - -/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 - * (no more work), then tell OMPT the loop is over. In some cases - * kmp_dispatch_fini() is not called. */ -#if OMPT_SUPPORT && OMPT_TRACE -#define OMPT_LOOP_END \ - if (status == 0) { \ - if (ompt_enabled && \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ - team_info->parallel_id, task_info->task_id); \ - } \ - } -#else -#define OMPT_LOOP_END // no-op -#endif - -template< typename T > -static int -__kmp_dispatch_next( - ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st -) { - - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - typedef typename traits_t< T >::floating_t DBL; -#if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) - static const int ___kmp_size_type = sizeof( UT ); -#endif - - // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule - // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs - // more than a compile time choice to use static scheduling would.) - KMP_TIME_BLOCK(FOR_dynamic_scheduling); - - int status; - dispatch_private_info_template< T > * pr; - kmp_info_t * th = __kmp_threads[ gtid ]; - kmp_team_t * team = th -> th.th_team; - - KMP_DEBUG_ASSERT( p_lb && p_ub && p_st ); // AC: these cannot be NULL - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) ); - __kmp_str_free( &buff ); - } - #endif - - if ( team -> t.t_serialized ) { - /* NOTE: serialize this dispatch becase we are not at the active level */ - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ - KMP_DEBUG_ASSERT( pr ); - - if ( (status = (pr->u.p.tc != 0)) == 0 ) { - *p_lb = 0; - *p_ub = 0; -// if ( p_last != NULL ) -// *p_last = 0; - if ( p_st != NULL ) - *p_st = 0; - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } - } - } else if ( pr->nomerge ) { - kmp_int32 last; - T start; - UT limit, trip, init; - ST incr; - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) ); - - init = chunk * pr->u.p.count++; - trip = pr->u.p.tc - 1; - - if ( (status = (init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; -// if ( p_last != NULL ) -// *p_last = 0; - if ( p_st != NULL ) - *p_st = 0; - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } - } - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) { - limit = trip; - #if KMP_OS_WINDOWS - pr->u.p.last_upper = pr->u.p.ub; - #endif /* KMP_OS_WINDOWS */ - } - if ( p_last != NULL ) - *p_last = last; - if ( p_st != NULL ) - *p_st = incr; - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } else { - pr->u.p.tc = 0; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; - #if KMP_OS_WINDOWS - pr->u.p.last_upper = *p_ub; - #endif /* KMP_OS_WINDOWS */ - if ( p_last != NULL ) - *p_last = TRUE; - if ( p_st != NULL ) - *p_st = pr->u.p.st; - } // if - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \ - "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) ); - __kmp_str_free( &buff ); - } - #endif -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); -#endif - OMPT_LOOP_END; - return status; - } else { - kmp_int32 last = 0; - dispatch_shared_info_template< UT > *sh; - T start; - ST incr; - UT limit, trip, init; - - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - KMP_DEBUG_ASSERT( pr ); - sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( sh ); - - if ( pr->u.p.tc == 0 ) { - // zero trip count - status = 0; - } else { - switch (pr->schedule) { - #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) - case kmp_sch_static_steal: - { - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) ); - - trip = pr->u.p.tc - 1; - - if ( ___kmp_size_type > 4 ) { - // Other threads do not look into the data of this thread, - // so it's not necessary to make volatile casting. - init = ( pr->u.p.count )++; - status = ( init < (UT)pr->u.p.ub ); - } else { - typedef union { - struct { - UT count; - T ub; - } p; - kmp_int64 b; - } union_i4; - // All operations on 'count' or 'ub' must be combined atomically together. - // stealing implemented only for 4-byte indexes - { - union_i4 vold, vnew; - vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - while( ! KMP_COMPARE_AND_STORE_ACQ64( - ( volatile kmp_int64* )&pr->u.p.count, - *VOLATILE_CAST(kmp_int64 *)&vold.b, - *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { - KMP_CPU_PAUSE(); - vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - } - vnew = vold; - init = vnew.p.count; - status = ( init < (UT)vnew.p.ub ) ; - } - - if( !status ) { - kmp_info_t **other_threads = team->t.t_threads; - int while_limit = 10; - int while_index = 0; - - // TODO: algorithm of searching for a victim - // should be cleaned up and measured - while ( ( !status ) && ( while_limit != ++while_index ) ) { - union_i4 vold, vnew; - kmp_int32 remaining; // kmp_int32 because KMP_I4 only - T victimIdx = pr->u.p.parm4; - T oldVictimIdx = victimIdx; - dispatch_private_info_template< T > * victim; - - do { - if( !victimIdx ) { - victimIdx = team->t.t_nproc - 1; - } else { - --victimIdx; - } - victim = reinterpret_cast< dispatch_private_info_template< T >* > - ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current ); - } while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx ); - // TODO: think about a proper place of this test - if ( ( !victim ) || - ( (*( volatile T * )&victim->u.p.static_steal_counter) != - (*( volatile T * )&pr->u.p.static_steal_counter) ) ) { - // TODO: delay would be nice - continue; - // the victim is not ready yet to participate in stealing - // because the victim is still in kmp_init_dispatch - } - if ( oldVictimIdx == victimIdx ) { - break; - } - pr->u.p.parm4 = victimIdx; - - while( 1 ) { - vold.b = *( volatile kmp_int64 * )( &victim->u.p.count ); - vnew = vold; - - KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip ); - if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) { - break; - } - vnew.p.ub -= (remaining >> 2); - KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); - #pragma warning( push ) - // disable warning on pointless comparison of unsigned with 0 - #pragma warning( disable: 186 ) - KMP_DEBUG_ASSERT(vnew.p.ub >= 0); - #pragma warning( pop ) - // TODO: Should this be acquire or release? - if ( KMP_COMPARE_AND_STORE_ACQ64( - ( volatile kmp_int64 * )&victim->u.p.count, - *VOLATILE_CAST(kmp_int64 *)&vold.b, - *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { - status = 1; - while_index = 0; - // now update own count and ub - #if KMP_ARCH_X86 - // stealing executed on non-KMP_ARCH_X86 only - // Atomic 64-bit write on ia32 is - // unavailable, so we do this in steps. - // This code is not tested. - init = vold.p.count; - pr->u.p.ub = 0; - pr->u.p.count = init + 1; - pr->u.p.ub = vnew.p.count; - #else - init = vnew.p.ub; - vold.p.count = init + 1; - // TODO: is it safe and enough? - *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b; - #endif // KMP_ARCH_X86 - break; - } // if - KMP_CPU_PAUSE(); - } // while (1) - } // while - } // if - } // if - if ( !status ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.parm2; - init *= chunk; - limit = chunk + init - 1; - incr = pr->u.p.st; - - KMP_DEBUG_ASSERT(init <= trip); - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - break; - } // case - #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) - case kmp_sch_static_balanced: - { - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) ); - if ( (status = !pr->u.p.count) != 0 ) { /* check if thread has any iteration to do */ - pr->u.p.count = 1; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; - last = pr->u.p.parm1; - if ( p_st != NULL ) - *p_st = pr->u.p.st; - } else { /* no iterations to do */ - pr->u.p.lb = pr->u.p.ub + pr->u.p.st; - } - if ( pr->ordered ) { - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // case - break; - case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was merged here */ - case kmp_sch_static_chunked: - { - T parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n", - gtid ) ); - parm1 = pr->u.p.parm1; - - trip = pr->u.p.tc - 1; - init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid)); - - if ( (status = (init <= trip)) != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - limit = parm1 + init - 1; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - pr->u.p.count += team->t.t_nproc; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } - else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - - case kmp_sch_dynamic_chunked: - { - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", - gtid ) ); - - init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); - trip = pr->u.p.tc - 1; - - if ( (status = (init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - - case kmp_sch_guided_iterative_chunked: - { - T chunkspec = pr->u.p.parm1; - KD_TRACE(100, - ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid)); - trip = pr->u.p.tc; - // Start atomic part of calculations - while(1) { - ST remaining; // signed, because can be < 0 - init = sh->u.s.iteration; // shared value - remaining = trip - init; - if ( remaining <= 0 ) { // AC: need to compare with 0 first - // nothing to do, don't try atomic op - status = 0; - break; - } - if ( (T)remaining < pr->u.p.parm2 ) { // compare with K*nproc*(chunk+1), K=2 by default - // use dynamic-style shcedule - // atomically inrement iterations, get old value - init = test_then_add( (ST*)&sh->u.s.iteration, (ST)chunkspec ); - remaining = trip - init; - if (remaining <= 0) { - status = 0; // all iterations got by other threads - } else { - // got some iterations to work on - status = 1; - if ( (T)remaining > chunkspec ) { - limit = init + chunkspec - 1; - } else { - last = 1; // the last chunk - limit = init + remaining - 1; - } // if - } // if - break; - } // if - limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 ); // divide by K*nproc - if ( compare_and_swap( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) { - // CAS was successful, chunk obtained - status = 1; - --limit; - break; - } // if - } // while - if ( status != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if ( p_st != NULL ) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } else { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) - *p_st = 0; - } // if - } // case - break; - - case kmp_sch_guided_analytical_chunked: - { - T chunkspec = pr->u.p.parm1; - UT chunkIdx; - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* for storing original FPCW value for Windows* OS on - IA-32 architecture 8-byte version */ - unsigned int oldFpcw; - unsigned int fpcwSet = 0; - #endif - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n", - gtid ) ); - - trip = pr->u.p.tc; - - KMP_DEBUG_ASSERT(team->t.t_nproc > 1); - KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip); - - while(1) { /* this while loop is a safeguard against unexpected zero chunk sizes */ - chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); - if ( chunkIdx >= (UT)pr->u.p.parm2 ) { - --trip; - /* use dynamic-style scheduling */ - init = chunkIdx * chunkspec + pr->u.p.count; - /* need to verify init > 0 in case of overflow in the above calculation */ - if ( (status = (init > 0 && init <= trip)) != 0 ) { - limit = init + chunkspec -1; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - } - break; - } else { - /* use exponential-style scheduling */ - /* The following check is to workaround the lack of long double precision on Windows* OS. - This check works around the possible effect that init != 0 for chunkIdx == 0. - */ - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* If we haven't already done so, save original - FPCW and set precision to 64-bit, as Windows* OS - on IA-32 architecture defaults to 53-bit */ - if ( !fpcwSet ) { - oldFpcw = _control87(0,0); - _control87(_PC_64,_MCW_PC); - fpcwSet = 0x30000; - } - #endif - if ( chunkIdx ) { - init = __kmp_dispatch_guided_remaining< T >( - trip, *( DBL * )&pr->u.p.parm3, chunkIdx ); - KMP_DEBUG_ASSERT(init); - init = trip - init; - } else - init = 0; - limit = trip - __kmp_dispatch_guided_remaining< T >( - trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 ); - KMP_ASSERT(init <= limit); - if ( init < limit ) { - KMP_DEBUG_ASSERT(limit <= trip); - --limit; - status = 1; - break; - } // if - } // if - } // while (1) - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* restore FPCW if necessary - AC: check fpcwSet flag first because oldFpcw can be uninitialized here - */ - if ( fpcwSet && ( oldFpcw & fpcwSet ) ) - _control87(oldFpcw,_MCW_PC); - #endif - if ( status != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if ( p_st != NULL ) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } - } else { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) - *p_st = 0; - } - } // case - break; - - case kmp_sch_trapezoidal: - { - UT index; - T parm2 = pr->u.p.parm2; - T parm3 = pr->u.p.parm3; - T parm4 = pr->u.p.parm4; - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n", - gtid ) ); - - index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration ); - - init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2; - trip = pr->u.p.tc - 1; - - if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.lb; - limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - default: - { - status = 0; // to avoid complaints on uninitialized variable use - __kmp_msg( - kmp_ms_fatal, // Severity - KMP_MSG( UnknownSchedTypeDetected ), // Primary message - KMP_HNT( GetNewerLibrary ), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } - break; - } // switch - } // if tc == 0; - - if ( status == 0 ) { - UT num_done; - - num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done ); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", - traits_t< UT >::spec ); - KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) ); - __kmp_str_free( &buff ); - } - #endif - - if ( (ST)num_done == team->t.t_nproc-1 ) { - /* NOTE: release this buffer to be reused */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - sh->u.s.num_done = 0; - sh->u.s.iteration = 0; - - /* TODO replace with general release procedure? */ - if ( pr->ordered ) { - sh->u.s.ordered_iteration = 0; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - sh -> buffer_index += KMP_MAX_DISP_BUF; - KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", - gtid, sh->buffer_index) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - } // if - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } - } - - th -> th.th_dispatch -> th_deo_fcn = NULL; - th -> th.th_dispatch -> th_dxo_fcn = NULL; - th -> th.th_dispatch -> th_dispatch_sh_current = NULL; - th -> th.th_dispatch -> th_dispatch_pr_current = NULL; - } // if (status == 0) -#if KMP_OS_WINDOWS - else if ( last ) { - pr->u.p.last_upper = pr->u.p.ub; - } -#endif /* KMP_OS_WINDOWS */ - if ( p_last != NULL && status != 0 ) - *p_last = last; - } // if - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d normal case: " \ - "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) ); - __kmp_str_free( &buff ); - } - #endif -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); -#endif - OMPT_LOOP_END; - return status; -} - -template< typename T > -static void -__kmp_dist_get_bounds( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 *plastiter, - T *plower, - T *pupper, - typename traits_t< T >::signed_t incr -) { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; + + KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); + KMP_INIT_YIELD( spins ); + // main wait spin loop + while(!f(r = *spin, check)) + { + KMP_FSYNC_SPIN_PREPARE( obj ); + /* GEH - remove this since it was accidentally introduced when kmp_wait was split. + It causes problems with infinite recursion because of exit lock */ + /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) + __kmp_abort_thread(); */ + + // if we are oversubscribed, + // or have waited a bit (and KMP_LIBRARY=throughput, then yield + // pause is in the following code + KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); + } + KMP_FSYNC_SPIN_ACQUIRED( obj ); + return r; +} + +template< typename UT > +static kmp_uint32 __kmp_eq( UT value, UT checker) { + return value == checker; +} + +template< typename UT > +static kmp_uint32 __kmp_neq( UT value, UT checker) { + return value != checker; +} + +template< typename UT > +static kmp_uint32 __kmp_lt( UT value, UT checker) { + return value < checker; +} + +template< typename UT > +static kmp_uint32 __kmp_ge( UT value, UT checker) { + return value >= checker; +} + +template< typename UT > +static kmp_uint32 __kmp_le( UT value, UT checker) { + return value <= checker; +} + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +static void +__kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + kmp_info_t *th; + + KMP_DEBUG_ASSERT( gtid_ref ); + + if ( __kmp_env_consistency_check ) { + th = __kmp_threads[*gtid_ref]; + if ( th -> th.th_root -> r.r_active + && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) { +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 ); +#else + __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL ); +#endif + } + } +} + +template< typename UT > +static void +__kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + typedef typename traits_t< UT >::signed_t ST; + dispatch_private_info_template< UT > * pr; + + int gtid = *gtid_ref; +// int cid = *cid_ref; + kmp_info_t *th = __kmp_threads[ gtid ]; + KMP_DEBUG_ASSERT( th -> th.th_dispatch ); + + KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) ); + if ( __kmp_env_consistency_check ) { + pr = reinterpret_cast< dispatch_private_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_pr_current ); + if ( pr -> pushed_ws != ct_none ) { +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 ); +#else + __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL ); +#endif + } + } + + if ( ! th -> th.th_team -> t.t_serialized ) { + dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_sh_current ); + UT lower; + + if ( ! __kmp_env_consistency_check ) { + pr = reinterpret_cast< dispatch_private_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_pr_current ); + } + lower = pr->u.p.ordered_lower; + + #if ! defined( KMP_GOMP_COMPAT ) + if ( __kmp_env_consistency_check ) { + if ( pr->ordered_bumped ) { + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + __kmp_error_construct2( + kmp_i18n_msg_CnsMultipleNesting, + ct_ordered_in_pdo, loc_ref, + & p->stack_data[ p->w_top ] + ); + } + } + #endif /* !defined(KMP_GOMP_COMPAT) */ + + KMP_MB(); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); + __kmp_str_free( &buff ); + } + #endif + + __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT > + USE_ITT_BUILD_ARG( NULL ) + ); + KMP_MB(); /* is this necessary? */ + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); + __kmp_str_free( &buff ); + } + #endif + } + KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) ); +} + +static void +__kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + kmp_info_t *th; + + if ( __kmp_env_consistency_check ) { + th = __kmp_threads[*gtid_ref]; + if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) { + __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref ); + } + } +} + +template< typename UT > +static void +__kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + typedef typename traits_t< UT >::signed_t ST; + dispatch_private_info_template< UT > * pr; + + int gtid = *gtid_ref; +// int cid = *cid_ref; + kmp_info_t *th = __kmp_threads[ gtid ]; + KMP_DEBUG_ASSERT( th -> th.th_dispatch ); + + KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) ); + if ( __kmp_env_consistency_check ) { + pr = reinterpret_cast< dispatch_private_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_pr_current ); + if ( pr -> pushed_ws != ct_none ) { + __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref ); + } + } + + if ( ! th -> th.th_team -> t.t_serialized ) { + dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_sh_current ); + + if ( ! __kmp_env_consistency_check ) { + pr = reinterpret_cast< dispatch_private_info_template< UT >* > + ( th -> th.th_dispatch -> th_dispatch_pr_current ); + } + + KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration ); + #if ! defined( KMP_GOMP_COMPAT ) + if ( __kmp_env_consistency_check ) { + if ( pr->ordered_bumped != 0 ) { + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + /* How to test it? - OM */ + __kmp_error_construct2( + kmp_i18n_msg_CnsMultipleNesting, + ct_ordered_in_pdo, loc_ref, + & p->stack_data[ p->w_top ] + ); + } + } + #endif /* !defined(KMP_GOMP_COMPAT) */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + pr->ordered_bumped += 1; + + KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", + gtid, pr->ordered_bumped ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* TODO use general release procedure? */ + test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + } + KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) ); +} + +/* Computes and returns x to the power of y, where y must a non-negative integer */ +template< typename UT > +static __forceinline long double +__kmp_pow(long double x, UT y) { + long double s=1.0L; + + KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); + //KMP_DEBUG_ASSERT(y >= 0); // y is unsigned + while(y) { + if ( y & 1 ) + s *= x; + x *= x; + y >>= 1; + } + return s; +} + +/* Computes and returns the number of unassigned iterations after idx chunks have been assigned + (the total number of unassigned iterations in chunks with index greater than or equal to idx). + __forceinline seems to be broken so that if we __forceinline this function, the behavior is wrong + (one of the unit tests, sch_guided_analytical_basic.cpp, fails) +*/ +template< typename T > +static __inline typename traits_t< T >::unsigned_t +__kmp_dispatch_guided_remaining( + T tc, + typename traits_t< T >::floating_t base, + typename traits_t< T >::unsigned_t idx +) { + /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at + least for ICL 8.1, long double arithmetic may not really have + long double precision, even with /Qlong_double. Currently, we + workaround that in the caller code, by manipulating the FPCW for + Windows* OS on IA-32 architecture. The lack of precision is not + expected to be a correctness issue, though. + */ + typedef typename traits_t< T >::unsigned_t UT; + + long double x = tc * __kmp_pow< UT >(base, idx); + UT r = (UT) x; + if ( x == r ) + return r; + return r + 1; +} + +// Parameters of the guided-iterative algorithm: +// p2 = n * nproc * ( chunk + 1 ) // point of switching to dynamic +// p3 = 1 / ( n * nproc ) // remaining iterations multiplier +// by default n = 2. For example with n = 3 the chunks distribution will be more flat. +// With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc. +static int guided_int_param = 2; +static double guided_flt_param = 0.5;// = 1.0 / guided_int_param; + +// UT - unsigned flavor of T, ST - signed flavor of T, +// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 +template< typename T > +static void +__kmp_dispatch_init( + ident_t * loc, + int gtid, + enum sched_type schedule, + T lb, + T ub, + typename traits_t< T >::signed_t st, + typename traits_t< T >::signed_t chunk, + int push_ws +) { + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + typedef typename traits_t< T >::floating_t DBL; + static const int ___kmp_size_type = sizeof( UT ); + + int active; + T tc; + kmp_info_t * th; + kmp_team_t * team; + kmp_uint32 my_buffer_index; + dispatch_private_info_template< T > * pr; + dispatch_shared_info_template< UT > volatile * sh; + + KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) ); + KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) ); + + if ( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + +#if INCLUDE_SSC_MARKS + SSC_MARK_DISPATCH_INIT(); +#endif + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", + traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) ); + __kmp_str_free( &buff ); + } + #endif + /* setup data */ + th = __kmp_threads[ gtid ]; + team = th -> th.th_team; + active = ! team -> t.t_serialized; + th->th.th_ident = loc; + +#if USE_ITT_BUILD + kmp_uint64 cur_chunk = chunk; + int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && + KMP_MASTER_GTID(gtid) && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1; +#endif + if ( ! active ) { + pr = reinterpret_cast< dispatch_private_info_template< T >* > + ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ + } else { + KMP_DEBUG_ASSERT( th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); + + my_buffer_index = th->th.th_dispatch->th_disp_index ++; + + /* What happens when number of threads changes, need to resize buffer? */ + pr = reinterpret_cast< dispatch_private_info_template< T > * > + ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); + sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * > + ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); + } + + /* Pick up the nomerge/ordered bits from the scheduling type */ + if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) { + pr->nomerge = TRUE; + schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); + } else { + pr->nomerge = FALSE; + } + pr->type_size = ___kmp_size_type; // remember the size of variables + if ( kmp_ord_lower & schedule ) { + pr->ordered = TRUE; + schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); + } else { + pr->ordered = FALSE; + } + + if ( schedule == kmp_sch_static ) { + schedule = __kmp_static; + } else { + if ( schedule == kmp_sch_runtime ) { + // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if not specified) + schedule = team -> t.t_sched.r_sched_type; + // Detail the schedule if needed (global controls are differentiated appropriately) + if ( schedule == kmp_sch_guided_chunked ) { + schedule = __kmp_guided; + } else if ( schedule == kmp_sch_static ) { + schedule = __kmp_static; + } + // Use the chunk size specified by OMP_SCHEDULE (or default if not specified) + chunk = team -> t.t_sched.chunk; +#if USE_ITT_BUILD + cur_chunk = chunk; +#endif + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n", + traits_t< ST >::spec ); + KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); + __kmp_str_free( &buff ); + } + #endif + } else { + if ( schedule == kmp_sch_guided_chunked ) { + schedule = __kmp_guided; + } + if ( chunk <= 0 ) { + chunk = KMP_DEFAULT_CHUNK; + } + } + + if ( schedule == kmp_sch_auto ) { + // mapping and differentiation: in the __kmp_do_serial_initialize() + schedule = __kmp_auto; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n", + traits_t< ST >::spec ); + KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); + __kmp_str_free( &buff ); + } + #endif + } + + /* guided analytical not safe for too many threads */ + if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) { + schedule = kmp_sch_guided_iterative_chunked; + KMP_WARNING( DispatchManyThreads ); + } + pr->u.p.parm1 = chunk; + } + KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper), + "unknown scheduling type" ); + + pr->u.p.count = 0; + + if ( __kmp_env_consistency_check ) { + if ( st == 0 ) { + __kmp_error_construct( + kmp_i18n_msg_CnsLoopIncrZeroProhibited, + ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc + ); + } + } + + tc = ( ub - lb + st ); + if ( st != 1 ) { + if ( st < 0 ) { + if ( lb < ub ) { + tc = 0; // zero-trip + } else { // lb >= ub + tc = (ST)tc / st; // convert to signed division + } + } else { // st > 0 + if ( ub < lb ) { + tc = 0; // zero-trip + } else { // lb >= ub + tc /= st; + } + } + } else if ( ub < lb ) { // st == 1 + tc = 0; // zero-trip + } + + // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing + // when statistics are disabled. + if (schedule == __kmp_static) + { + KMP_COUNT_BLOCK(OMP_FOR_static); + KMP_COUNT_VALUE(FOR_static_iterations, tc); + } + else + { + KMP_COUNT_BLOCK(OMP_FOR_dynamic); + KMP_COUNT_VALUE(FOR_dynamic_iterations, tc); + } + + pr->u.p.lb = lb; + pr->u.p.ub = ub; + pr->u.p.st = st; + pr->u.p.tc = tc; + + #if KMP_OS_WINDOWS + pr->u.p.last_upper = ub + st; + #endif /* KMP_OS_WINDOWS */ + + /* NOTE: only the active parallel region(s) has active ordered sections */ + + if ( active ) { + if ( pr->ordered == 0 ) { + th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error; + th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error; + } else { + pr->ordered_bumped = 0; + + pr->u.p.ordered_lower = 1; + pr->u.p.ordered_upper = 0; + + th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >; + th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >; + } + } + + if ( __kmp_env_consistency_check ) { + enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo; + if ( push_ws ) { + __kmp_push_workshare( gtid, ws, loc ); + pr->pushed_ws = ws; + } else { + __kmp_check_workshare( gtid, ws, loc ); + pr->pushed_ws = ct_none; + } + } + + switch ( schedule ) { + #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) + case kmp_sch_static_steal: + { + T nproc = team->t.t_nproc; + T ntc, init; + + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) ); + + ntc = (tc % chunk ? 1 : 0) + tc / chunk; + if ( nproc > 1 && ntc >= nproc ) { + T id = __kmp_tid_from_gtid(gtid); + T small_chunk, extras; + + small_chunk = ntc / nproc; + extras = ntc % nproc; + + init = id * small_chunk + ( id < extras ? id : extras ); + pr->u.p.count = init; + pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 ); + + pr->u.p.parm2 = lb; + //pr->pfields.parm3 = 0; // it's not used in static_steal + pr->u.p.parm4 = id; + pr->u.p.st = st; + break; + } else { + KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n", + gtid ) ); + schedule = kmp_sch_static_balanced; + /* too few iterations: fall-through to kmp_sch_static_balanced */ + } // if + /* FALL-THROUGH to static balanced */ + } // case + #endif + case kmp_sch_static_balanced: + { + T nproc = team->t.t_nproc; + T init, limit; + + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n", + gtid ) ); + + if ( nproc > 1 ) { + T id = __kmp_tid_from_gtid(gtid); + + if ( tc < nproc ) { + if ( id < tc ) { + init = id; + limit = id; + pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ + } else { + pr->u.p.count = 1; /* means no more chunks to execute */ + pr->u.p.parm1 = FALSE; + break; + } + } else { + T small_chunk = tc / nproc; + T extras = tc % nproc; + init = id * small_chunk + (id < extras ? id : extras); + limit = init + small_chunk - (id < extras ? 0 : 1); + pr->u.p.parm1 = (id == nproc - 1); + } + } else { + if ( tc > 0 ) { + init = 0; + limit = tc - 1; + pr->u.p.parm1 = TRUE; + } else { + // zero trip count + pr->u.p.count = 1; /* means no more chunks to execute */ + pr->u.p.parm1 = FALSE; + break; + } + } +#if USE_ITT_BUILD + // Calculate chunk for metadata report + if ( itt_need_metadata_reporting ) + cur_chunk = limit - init + 1; +#endif + if ( st == 1 ) { + pr->u.p.lb = lb + init; + pr->u.p.ub = lb + limit; + } else { + T ub_tmp = lb + limit * st; // calculated upper bound, "ub" is user-defined upper bound + pr->u.p.lb = lb + init * st; + // adjust upper bound to "ub" if needed, so that MS lastprivate will match it exactly + if ( st > 0 ) { + pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp ); + } else { + pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp ); + } + } + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + } + break; + } // case + case kmp_sch_guided_iterative_chunked : + { + T nproc = team->t.t_nproc; + KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid)); + + if ( nproc > 1 ) { + if ( (2L * chunk + 1 ) * nproc >= tc ) { + /* chunk size too large, switch to dynamic */ + schedule = kmp_sch_dynamic_chunked; + } else { + // when remaining iters become less than parm2 - switch to dynamic + pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 ); + *(double*)&pr->u.p.parm3 = guided_flt_param / nproc; // may occupy parm3 and parm4 + } + } else { + KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid)); + schedule = kmp_sch_static_greedy; + /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ + KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); + pr->u.p.parm1 = tc; + } // if + } // case + break; + case kmp_sch_guided_analytical_chunked: + { + T nproc = team->t.t_nproc; + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid)); + + if ( nproc > 1 ) { + if ( (2L * chunk + 1 ) * nproc >= tc ) { + /* chunk size too large, switch to dynamic */ + schedule = kmp_sch_dynamic_chunked; + } else { + /* commonly used term: (2 nproc - 1)/(2 nproc) */ + DBL x; + + #if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* Linux* OS already has 64-bit computation by default for + long double, and on Windows* OS on Intel(R) 64, + /Qlong_double doesn't work. On Windows* OS + on IA-32 architecture, we need to set precision to + 64-bit instead of the default 53-bit. Even though long + double doesn't work on Windows* OS on Intel(R) 64, the + resulting lack of precision is not expected to impact + the correctness of the algorithm, but this has not been + mathematically proven. + */ + // save original FPCW and set precision to 64-bit, as + // Windows* OS on IA-32 architecture defaults to 53-bit + unsigned int oldFpcw = _control87(0,0); + _control87(_PC_64,_MCW_PC); // 0,0x30000 + #endif + /* value used for comparison in solver for cross-over point */ + long double target = ((long double)chunk * 2 + 1) * nproc / tc; + + /* crossover point--chunk indexes equal to or greater than + this point switch to dynamic-style scheduling */ + UT cross; + + /* commonly used term: (2 nproc - 1)/(2 nproc) */ + x = (long double)1.0 - (long double)0.5 / nproc; + + #ifdef KMP_DEBUG + { // test natural alignment + struct _test_a { + char a; + union { + char b; + DBL d; + }; + } t; + ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; + //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long long)natural_alignment ); + KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 ); + } + #endif // KMP_DEBUG + + /* save the term in thread private dispatch structure */ + *(DBL*)&pr->u.p.parm3 = x; + + /* solve for the crossover point to the nearest integer i for which C_i <= chunk */ + { + UT left, right, mid; + long double p; + + /* estimate initial upper and lower bound */ + + /* doesn't matter what value right is as long as it is positive, but + it affects performance of the solver + */ + right = 229; + p = __kmp_pow< UT >(x,right); + if ( p > target ) { + do{ + p *= p; + right <<= 1; + } while(p>target && right < (1<<27)); + left = right >> 1; /* lower bound is previous (failed) estimate of upper bound */ + } else { + left = 0; + } + + /* bisection root-finding method */ + while ( left + 1 < right ) { + mid = (left + right) / 2; + if ( __kmp_pow< UT >(x,mid) > target ) { + left = mid; + } else { + right = mid; + } + } // while + cross = right; + } + /* assert sanity of computed crossover point */ + KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target); + + /* save the crossover point in thread private dispatch structure */ + pr->u.p.parm2 = cross; + + // C75803 + #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) + #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) + #else + #define GUIDED_ANALYTICAL_WORKAROUND (x) + #endif + /* dynamic-style scheduling offset */ + pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; + #if KMP_OS_WINDOWS && KMP_ARCH_X86 + // restore FPCW + _control87(oldFpcw,_MCW_PC); + #endif + } // if + } else { + KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n", + gtid ) ); + schedule = kmp_sch_static_greedy; + /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ + pr->u.p.parm1 = tc; + } // if + } // case + break; + case kmp_sch_static_greedy: + KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); + pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ? + ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc : + tc; + break; + case kmp_sch_static_chunked : + case kmp_sch_dynamic_chunked : + if ( pr->u.p.parm1 <= 0 ) { + pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + } + KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid)); + break; + case kmp_sch_trapezoidal : + { + /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ + + T parm1, parm2, parm3, parm4; + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) ); + + parm1 = chunk; + + /* F : size of the first cycle */ + parm2 = ( tc / (2 * team->t.t_nproc) ); + + if ( parm2 < 1 ) { + parm2 = 1; + } + + /* L : size of the last cycle. Make sure the last cycle + * is not larger than the first cycle. + */ + if ( parm1 < 1 ) { + parm1 = 1; + } else if ( parm1 > parm2 ) { + parm1 = parm2; + } + + /* N : number of cycles */ + parm3 = ( parm2 + parm1 ); + parm3 = ( 2 * tc + parm3 - 1) / parm3; + + if ( parm3 < 2 ) { + parm3 = 2; + } + + /* sigma : decreasing incr of the trapezoid */ + parm4 = ( parm3 - 1 ); + parm4 = ( parm2 - parm1 ) / parm4; + + // pointless check, because parm4 >= 0 always + //if ( parm4 < 0 ) { + // parm4 = 0; + //} + + pr->u.p.parm1 = parm1; + pr->u.p.parm2 = parm2; + pr->u.p.parm3 = parm3; + pr->u.p.parm4 = parm4; + } // case + break; + + default: + { + __kmp_msg( + kmp_ms_fatal, // Severity + KMP_MSG( UnknownSchedTypeDetected ), // Primary message + KMP_HNT( GetNewerLibrary ), // Hint + __kmp_msg_null // Variadic argument list terminator + ); + } + break; + } // switch + pr->schedule = schedule; + if ( active ) { + /* The name of this buffer should be my_buffer_index when it's free to use it */ + + KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index) ); + __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 > + USE_ITT_BUILD_ARG( NULL ) + ); + // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and my_buffer_index are + // *always* 32-bit integers. + KMP_MB(); /* is this necessary? */ + KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index) ); + + th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr; + th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh; +#if USE_ITT_BUILD + if ( pr->ordered ) { + __kmp_itt_ordered_init( gtid ); + }; // if + // Report loop metadata + if ( itt_need_metadata_reporting ) { + // Only report metadata by master of active team at level 1 + kmp_uint64 schedtype = 0; + switch ( schedule ) { + case kmp_sch_static_chunked: + case kmp_sch_static_balanced:// Chunk is calculated in the switch above + break; + case kmp_sch_static_greedy: + cur_chunk = pr->u.p.parm1; + break; + case kmp_sch_dynamic_chunked: + schedtype = 1; + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + schedtype = 2; + break; + default: +// Should we put this case under "static"? +// case kmp_sch_static_steal: + schedtype = 3; + break; + } + __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); + } +#endif /* USE_ITT_BUILD */ + }; // if + + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \ + " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \ + " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", + traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec, + traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec, + traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec, + traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec ); + KD_TRACE(10, ( buff, + gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub, + pr->u.p.st, pr->u.p.tc, pr->u.p.count, + pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, + pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) ); + __kmp_str_free( &buff ); + } + #endif + #if ( KMP_STATIC_STEAL_ENABLED ) + if ( ___kmp_size_type < 8 ) { + // It cannot be guaranteed that after execution of a loop with some other schedule kind + // all the parm3 variables will contain the same value. + // Even if all parm3 will be the same, it still exists a bad case like using 0 and 1 + // rather than program life-time increment. + // So the dedicated variable is required. The 'static_steal_counter' is used. + if( schedule == kmp_sch_static_steal ) { + // Other threads will inspect this variable when searching for a victim. + // This is a flag showing that other threads may steal from this thread since then. + volatile T * p = &pr->u.p.static_steal_counter; + *p = *p + 1; + } + } + #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING ) + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } +#endif +} + +/* + * For ordered loops, either __kmp_dispatch_finish() should be called after + * every iteration, or __kmp_dispatch_finish_chunk() should be called after + * every chunk of iterations. If the ordered section(s) were not executed + * for this iteration (or every iteration in this chunk), we need to set the + * ordered iteration counters so that the next thread can proceed. + */ +template< typename UT > +static void +__kmp_dispatch_finish( int gtid, ident_t *loc ) +{ + typedef typename traits_t< UT >::signed_t ST; + kmp_info_t *th = __kmp_threads[ gtid ]; + + KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) ); + if ( ! th -> th.th_team -> t.t_serialized ) { + + dispatch_private_info_template< UT > * pr = + reinterpret_cast< dispatch_private_info_template< UT >* > + ( th->th.th_dispatch->th_dispatch_pr_current ); + dispatch_shared_info_template< UT > volatile * sh = + reinterpret_cast< dispatch_shared_info_template< UT >volatile* > + ( th->th.th_dispatch->th_dispatch_sh_current ); + KMP_DEBUG_ASSERT( pr ); + KMP_DEBUG_ASSERT( sh ); + KMP_DEBUG_ASSERT( th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); + + if ( pr->ordered_bumped ) { + KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", + gtid ) ); + pr->ordered_bumped = 0; + } else { + UT lower = pr->u.p.ordered_lower; + + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); + __kmp_str_free( &buff ); + } + #endif + + __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > + USE_ITT_BUILD_ARG(NULL) + ); + KMP_MB(); /* is this necessary? */ + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); + __kmp_str_free( &buff ); + } + #endif + + test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); + } // if + } // if + KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) ); +} + +#ifdef KMP_GOMP_COMPAT + +template< typename UT > +static void +__kmp_dispatch_finish_chunk( int gtid, ident_t *loc ) +{ + typedef typename traits_t< UT >::signed_t ST; + kmp_info_t *th = __kmp_threads[ gtid ]; + + KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) ); + if ( ! th -> th.th_team -> t.t_serialized ) { +// int cid; + dispatch_private_info_template< UT > * pr = + reinterpret_cast< dispatch_private_info_template< UT >* > + ( th->th.th_dispatch->th_dispatch_pr_current ); + dispatch_shared_info_template< UT > volatile * sh = + reinterpret_cast< dispatch_shared_info_template< UT >volatile* > + ( th->th.th_dispatch->th_dispatch_sh_current ); + KMP_DEBUG_ASSERT( pr ); + KMP_DEBUG_ASSERT( sh ); + KMP_DEBUG_ASSERT( th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); + +// for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { + UT lower = pr->u.p.ordered_lower; + UT upper = pr->u.p.ordered_upper; + UT inc = upper - lower + 1; + + if ( pr->ordered_bumped == inc ) { + KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", + gtid ) ); + pr->ordered_bumped = 0; + } else { + inc -= pr->ordered_bumped; + + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish_chunk: T#%%d before wait: " \ + "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) ); + __kmp_str_free( &buff ); + } + #endif + + __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > + USE_ITT_BUILD_ARG(NULL) + ); + + KMP_MB(); /* is this necessary? */ + KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n", + gtid ) ); + pr->ordered_bumped = 0; +//!!!!! TODO check if the inc should be unsigned, or signed??? + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish_chunk: T#%%d after wait: " \ + "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) ); + __kmp_str_free( &buff ); + } + #endif + + test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc); + } +// } + } + KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) ); +} + +#endif /* KMP_GOMP_COMPAT */ + +/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 + * (no more work), then tell OMPT the loop is over. In some cases + * kmp_dispatch_fini() is not called. */ +#if OMPT_SUPPORT && OMPT_TRACE +#define OMPT_LOOP_END \ + if (status == 0) { \ + if (ompt_enabled && \ + ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ + ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ + team_info->parallel_id, task_info->task_id); \ + } \ + } +#else +#define OMPT_LOOP_END // no-op +#endif + +template< typename T > +static int +__kmp_dispatch_next( + ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st +) { + + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + typedef typename traits_t< T >::floating_t DBL; +#if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) + static const int ___kmp_size_type = sizeof( UT ); +#endif + + // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule + // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs + // more than a compile time choice to use static scheduling would.) + KMP_TIME_BLOCK(FOR_dynamic_scheduling); + + int status; + dispatch_private_info_template< T > * pr; + kmp_info_t * th = __kmp_threads[ gtid ]; + kmp_team_t * team = th -> th.th_team; + + KMP_DEBUG_ASSERT( p_lb && p_ub && p_st ); // AC: these cannot be NULL + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) ); + __kmp_str_free( &buff ); + } + #endif + + if ( team -> t.t_serialized ) { + /* NOTE: serialize this dispatch becase we are not at the active level */ + pr = reinterpret_cast< dispatch_private_info_template< T >* > + ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ + KMP_DEBUG_ASSERT( pr ); + + if ( (status = (pr->u.p.tc != 0)) == 0 ) { + *p_lb = 0; + *p_ub = 0; +// if ( p_last != NULL ) +// *p_last = 0; + if ( p_st != NULL ) + *p_st = 0; + if ( __kmp_env_consistency_check ) { + if ( pr->pushed_ws != ct_none ) { + pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); + } + } + } else if ( pr->nomerge ) { + kmp_int32 last; + T start; + UT limit, trip, init; + ST incr; + T chunk = pr->u.p.parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) ); + + init = chunk * pr->u.p.count++; + trip = pr->u.p.tc - 1; + + if ( (status = (init <= trip)) == 0 ) { + *p_lb = 0; + *p_ub = 0; +// if ( p_last != NULL ) +// *p_last = 0; + if ( p_st != NULL ) + *p_st = 0; + if ( __kmp_env_consistency_check ) { + if ( pr->pushed_ws != ct_none ) { + pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); + } + } + } else { + start = pr->u.p.lb; + limit = chunk + init - 1; + incr = pr->u.p.st; + + if ( (last = (limit >= trip)) != 0 ) { + limit = trip; + #if KMP_OS_WINDOWS + pr->u.p.last_upper = pr->u.p.ub; + #endif /* KMP_OS_WINDOWS */ + } + if ( p_last != NULL ) + *p_last = last; + if ( p_st != NULL ) + *p_st = incr; + if ( incr == 1 ) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // if + } else { + pr->u.p.tc = 0; + *p_lb = pr->u.p.lb; + *p_ub = pr->u.p.ub; + #if KMP_OS_WINDOWS + pr->u.p.last_upper = *p_ub; + #endif /* KMP_OS_WINDOWS */ + if ( p_last != NULL ) + *p_last = TRUE; + if ( p_st != NULL ) + *p_st = pr->u.p.st; + } // if + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \ + "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) ); + __kmp_str_free( &buff ); + } + #endif +#if INCLUDE_SSC_MARKS + SSC_MARK_DISPATCH_NEXT(); +#endif + OMPT_LOOP_END; + return status; + } else { + kmp_int32 last = 0; + dispatch_shared_info_template< UT > *sh; + T start; + ST incr; + UT limit, trip, init; + + KMP_DEBUG_ASSERT( th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); + + pr = reinterpret_cast< dispatch_private_info_template< T >* > + ( th->th.th_dispatch->th_dispatch_pr_current ); + KMP_DEBUG_ASSERT( pr ); + sh = reinterpret_cast< dispatch_shared_info_template< UT >* > + ( th->th.th_dispatch->th_dispatch_sh_current ); + KMP_DEBUG_ASSERT( sh ); + + if ( pr->u.p.tc == 0 ) { + // zero trip count + status = 0; + } else { + switch (pr->schedule) { + #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) + case kmp_sch_static_steal: + { + T chunk = pr->u.p.parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) ); + + trip = pr->u.p.tc - 1; + + if ( ___kmp_size_type > 4 ) { + // Other threads do not look into the data of this thread, + // so it's not necessary to make volatile casting. + init = ( pr->u.p.count )++; + status = ( init < (UT)pr->u.p.ub ); + } else { + typedef union { + struct { + UT count; + T ub; + } p; + kmp_int64 b; + } union_i4; + // All operations on 'count' or 'ub' must be combined atomically together. + // stealing implemented only for 4-byte indexes + { + union_i4 vold, vnew; + vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); + vnew = vold; + vnew.p.count++; + while( ! KMP_COMPARE_AND_STORE_ACQ64( + ( volatile kmp_int64* )&pr->u.p.count, + *VOLATILE_CAST(kmp_int64 *)&vold.b, + *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { + KMP_CPU_PAUSE(); + vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); + vnew = vold; + vnew.p.count++; + } + vnew = vold; + init = vnew.p.count; + status = ( init < (UT)vnew.p.ub ) ; + } + + if( !status ) { + kmp_info_t **other_threads = team->t.t_threads; + int while_limit = 10; + int while_index = 0; + + // TODO: algorithm of searching for a victim + // should be cleaned up and measured + while ( ( !status ) && ( while_limit != ++while_index ) ) { + union_i4 vold, vnew; + kmp_int32 remaining; // kmp_int32 because KMP_I4 only + T victimIdx = pr->u.p.parm4; + T oldVictimIdx = victimIdx; + dispatch_private_info_template< T > * victim; + + do { + if( !victimIdx ) { + victimIdx = team->t.t_nproc - 1; + } else { + --victimIdx; + } + victim = reinterpret_cast< dispatch_private_info_template< T >* > + ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current ); + } while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx ); + // TODO: think about a proper place of this test + if ( ( !victim ) || + ( (*( volatile T * )&victim->u.p.static_steal_counter) != + (*( volatile T * )&pr->u.p.static_steal_counter) ) ) { + // TODO: delay would be nice + continue; + // the victim is not ready yet to participate in stealing + // because the victim is still in kmp_init_dispatch + } + if ( oldVictimIdx == victimIdx ) { + break; + } + pr->u.p.parm4 = victimIdx; + + while( 1 ) { + vold.b = *( volatile kmp_int64 * )( &victim->u.p.count ); + vnew = vold; + + KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip ); + if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) { + break; + } + vnew.p.ub -= (remaining >> 2); + KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); + #pragma warning( push ) + // disable warning on pointless comparison of unsigned with 0 + #pragma warning( disable: 186 ) + KMP_DEBUG_ASSERT(vnew.p.ub >= 0); + #pragma warning( pop ) + // TODO: Should this be acquire or release? + if ( KMP_COMPARE_AND_STORE_ACQ64( + ( volatile kmp_int64 * )&victim->u.p.count, + *VOLATILE_CAST(kmp_int64 *)&vold.b, + *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { + status = 1; + while_index = 0; + // now update own count and ub + #if KMP_ARCH_X86 + // stealing executed on non-KMP_ARCH_X86 only + // Atomic 64-bit write on ia32 is + // unavailable, so we do this in steps. + // This code is not tested. + init = vold.p.count; + pr->u.p.ub = 0; + pr->u.p.count = init + 1; + pr->u.p.ub = vnew.p.count; + #else + init = vnew.p.ub; + vold.p.count = init + 1; + // TODO: is it safe and enough? + *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b; + #endif // KMP_ARCH_X86 + break; + } // if + KMP_CPU_PAUSE(); + } // while (1) + } // while + } // if + } // if + if ( !status ) { + *p_lb = 0; + *p_ub = 0; + if ( p_st != NULL ) *p_st = 0; + } else { + start = pr->u.p.parm2; + init *= chunk; + limit = chunk + init - 1; + incr = pr->u.p.st; + + KMP_DEBUG_ASSERT(init <= trip); + if ( (last = (limit >= trip)) != 0 ) + limit = trip; + if ( p_st != NULL ) *p_st = incr; + + if ( incr == 1 ) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // if + break; + } // case + #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) + case kmp_sch_static_balanced: + { + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) ); + if ( (status = !pr->u.p.count) != 0 ) { /* check if thread has any iteration to do */ + pr->u.p.count = 1; + *p_lb = pr->u.p.lb; + *p_ub = pr->u.p.ub; + last = pr->u.p.parm1; + if ( p_st != NULL ) + *p_st = pr->u.p.st; + } else { /* no iterations to do */ + pr->u.p.lb = pr->u.p.ub + pr->u.p.st; + } + if ( pr->ordered ) { + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // case + break; + case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was merged here */ + case kmp_sch_static_chunked: + { + T parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n", + gtid ) ); + parm1 = pr->u.p.parm1; + + trip = pr->u.p.tc - 1; + init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid)); + + if ( (status = (init <= trip)) != 0 ) { + start = pr->u.p.lb; + incr = pr->u.p.st; + limit = parm1 + init - 1; + + if ( (last = (limit >= trip)) != 0 ) + limit = trip; + + if ( p_st != NULL ) *p_st = incr; + + pr->u.p.count += team->t.t_nproc; + + if ( incr == 1 ) { + *p_lb = start + init; + *p_ub = start + limit; + } + else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // if + } // case + break; + + case kmp_sch_dynamic_chunked: + { + T chunk = pr->u.p.parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", + gtid ) ); + + init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); + trip = pr->u.p.tc - 1; + + if ( (status = (init <= trip)) == 0 ) { + *p_lb = 0; + *p_ub = 0; + if ( p_st != NULL ) *p_st = 0; + } else { + start = pr->u.p.lb; + limit = chunk + init - 1; + incr = pr->u.p.st; + + if ( (last = (limit >= trip)) != 0 ) + limit = trip; + + if ( p_st != NULL ) *p_st = incr; + + if ( incr == 1 ) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // if + } // case + break; + + case kmp_sch_guided_iterative_chunked: + { + T chunkspec = pr->u.p.parm1; + KD_TRACE(100, + ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid)); + trip = pr->u.p.tc; + // Start atomic part of calculations + while(1) { + ST remaining; // signed, because can be < 0 + init = sh->u.s.iteration; // shared value + remaining = trip - init; + if ( remaining <= 0 ) { // AC: need to compare with 0 first + // nothing to do, don't try atomic op + status = 0; + break; + } + if ( (T)remaining < pr->u.p.parm2 ) { // compare with K*nproc*(chunk+1), K=2 by default + // use dynamic-style shcedule + // atomically inrement iterations, get old value + init = test_then_add( (ST*)&sh->u.s.iteration, (ST)chunkspec ); + remaining = trip - init; + if (remaining <= 0) { + status = 0; // all iterations got by other threads + } else { + // got some iterations to work on + status = 1; + if ( (T)remaining > chunkspec ) { + limit = init + chunkspec - 1; + } else { + last = 1; // the last chunk + limit = init + remaining - 1; + } // if + } // if + break; + } // if + limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 ); // divide by K*nproc + if ( compare_and_swap( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) { + // CAS was successful, chunk obtained + status = 1; + --limit; + break; + } // if + } // while + if ( status != 0 ) { + start = pr->u.p.lb; + incr = pr->u.p.st; + if ( p_st != NULL ) + *p_st = incr; + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } else { + *p_lb = 0; + *p_ub = 0; + if ( p_st != NULL ) + *p_st = 0; + } // if + } // case + break; + + case kmp_sch_guided_analytical_chunked: + { + T chunkspec = pr->u.p.parm1; + UT chunkIdx; + #if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* for storing original FPCW value for Windows* OS on + IA-32 architecture 8-byte version */ + unsigned int oldFpcw; + unsigned int fpcwSet = 0; + #endif + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n", + gtid ) ); + + trip = pr->u.p.tc; + + KMP_DEBUG_ASSERT(team->t.t_nproc > 1); + KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip); + + while(1) { /* this while loop is a safeguard against unexpected zero chunk sizes */ + chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); + if ( chunkIdx >= (UT)pr->u.p.parm2 ) { + --trip; + /* use dynamic-style scheduling */ + init = chunkIdx * chunkspec + pr->u.p.count; + /* need to verify init > 0 in case of overflow in the above calculation */ + if ( (status = (init > 0 && init <= trip)) != 0 ) { + limit = init + chunkspec -1; + + if ( (last = (limit >= trip)) != 0 ) + limit = trip; + } + break; + } else { + /* use exponential-style scheduling */ + /* The following check is to workaround the lack of long double precision on Windows* OS. + This check works around the possible effect that init != 0 for chunkIdx == 0. + */ + #if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* If we haven't already done so, save original + FPCW and set precision to 64-bit, as Windows* OS + on IA-32 architecture defaults to 53-bit */ + if ( !fpcwSet ) { + oldFpcw = _control87(0,0); + _control87(_PC_64,_MCW_PC); + fpcwSet = 0x30000; + } + #endif + if ( chunkIdx ) { + init = __kmp_dispatch_guided_remaining< T >( + trip, *( DBL * )&pr->u.p.parm3, chunkIdx ); + KMP_DEBUG_ASSERT(init); + init = trip - init; + } else + init = 0; + limit = trip - __kmp_dispatch_guided_remaining< T >( + trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 ); + KMP_ASSERT(init <= limit); + if ( init < limit ) { + KMP_DEBUG_ASSERT(limit <= trip); + --limit; + status = 1; + break; + } // if + } // if + } // while (1) + #if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* restore FPCW if necessary + AC: check fpcwSet flag first because oldFpcw can be uninitialized here + */ + if ( fpcwSet && ( oldFpcw & fpcwSet ) ) + _control87(oldFpcw,_MCW_PC); + #endif + if ( status != 0 ) { + start = pr->u.p.lb; + incr = pr->u.p.st; + if ( p_st != NULL ) + *p_st = incr; + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } + } else { + *p_lb = 0; + *p_ub = 0; + if ( p_st != NULL ) + *p_st = 0; + } + } // case + break; + + case kmp_sch_trapezoidal: + { + UT index; + T parm2 = pr->u.p.parm2; + T parm3 = pr->u.p.parm3; + T parm4 = pr->u.p.parm4; + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n", + gtid ) ); + + index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration ); + + init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2; + trip = pr->u.p.tc - 1; + + if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) { + *p_lb = 0; + *p_ub = 0; + if ( p_st != NULL ) *p_st = 0; + } else { + start = pr->u.p.lb; + limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1; + incr = pr->u.p.st; + + if ( (last = (limit >= trip)) != 0 ) + limit = trip; + + if ( p_st != NULL ) *p_st = incr; + + if ( incr == 1 ) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if ( pr->ordered ) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t< UT >::spec, traits_t< UT >::spec ); + KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); + __kmp_str_free( &buff ); + } + #endif + } // if + } // if + } // case + break; + default: + { + status = 0; // to avoid complaints on uninitialized variable use + __kmp_msg( + kmp_ms_fatal, // Severity + KMP_MSG( UnknownSchedTypeDetected ), // Primary message + KMP_HNT( GetNewerLibrary ), // Hint + __kmp_msg_null // Variadic argument list terminator + ); + } + break; + } // switch + } // if tc == 0; + + if ( status == 0 ) { + UT num_done; + + num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done ); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", + traits_t< UT >::spec ); + KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) ); + __kmp_str_free( &buff ); + } + #endif + + if ( (ST)num_done == team->t.t_nproc-1 ) { + /* NOTE: release this buffer to be reused */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh->u.s.num_done = 0; + sh->u.s.iteration = 0; + + /* TODO replace with general release procedure? */ + if ( pr->ordered ) { + sh->u.s.ordered_iteration = 0; + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh -> buffer_index += KMP_MAX_DISP_BUF; + KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", + gtid, sh->buffer_index) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + } // if + if ( __kmp_env_consistency_check ) { + if ( pr->pushed_ws != ct_none ) { + pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); + } + } + + th -> th.th_dispatch -> th_deo_fcn = NULL; + th -> th.th_dispatch -> th_dxo_fcn = NULL; + th -> th.th_dispatch -> th_dispatch_sh_current = NULL; + th -> th.th_dispatch -> th_dispatch_pr_current = NULL; + } // if (status == 0) +#if KMP_OS_WINDOWS + else if ( last ) { + pr->u.p.last_upper = pr->u.p.ub; + } +#endif /* KMP_OS_WINDOWS */ + if ( p_last != NULL && status != 0 ) + *p_last = last; + } // if + + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d normal case: " \ + "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) ); + __kmp_str_free( &buff ); + } + #endif +#if INCLUDE_SSC_MARKS + SSC_MARK_DISPATCH_NEXT(); +#endif + OMPT_LOOP_END; + return status; +} + +template< typename T > +static void +__kmp_dist_get_bounds( + ident_t *loc, + kmp_int32 gtid, + kmp_int32 *plastiter, + T *plower, + T *pupper, + typename traits_t< T >::signed_t incr +) { + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; kmp_uint32 team_id; kmp_uint32 nteams; UT trip_count; kmp_team_t *team; - kmp_info_t * th; - - KMP_DEBUG_ASSERT( plastiter && plower && pupper ); - KE_TRACE( 10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmpc_dist_get_bounds: T#%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) ); - __kmp_str_free( &buff ); - } - #endif - - if( __kmp_env_consistency_check ) { - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); - } - } - th = __kmp_threads[gtid]; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute global trip count - if( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupper + 1; - } else { - trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case - } - - if( trip_count <= nteams ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - // only some teams get single iteration, others get nothing - if( team_id < trip_count ) { - *pupper = *plower = *plower + team_id * incr; - } else { - *plower = *pupper + incr; // zero-trip loop - } - if( plastiter != NULL ) - *plastiter = ( team_id == trip_count - 1 ); - } else { - if( __kmp_static == kmp_sch_static_balanced ) { + kmp_info_t * th; + + KMP_DEBUG_ASSERT( plastiter && plower && pupper ); + KE_TRACE( 10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( "__kmpc_dist_get_bounds: T#%%d liter=%%d "\ + "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, + traits_t< T >::spec ); + KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) ); + __kmp_str_free( &buff ); + } + #endif + + if( __kmp_env_consistency_check ) { + if( incr == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); + } + if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); + } + } + th = __kmp_threads[gtid]; + team = th->th.th_team; + #if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; + #endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute global trip count + if( incr == 1 ) { + trip_count = *pupper - *plower + 1; + } else if(incr == -1) { + trip_count = *plower - *pupper + 1; + } else { + trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case + } + + if( trip_count <= nteams ) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || \ + __kmp_static == kmp_sch_static_balanced + ); // Unknown static scheduling type. + // only some teams get single iteration, others get nothing + if( team_id < trip_count ) { + *pupper = *plower = *plower + team_id * incr; + } else { + *plower = *pupper + incr; // zero-trip loop + } + if( plastiter != NULL ) + *plastiter = ( team_id == trip_count - 1 ); + } else { + if( __kmp_static == kmp_sch_static_balanced ) { UT chunk = trip_count / nteams; UT extras = trip_count % nteams; - *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) ); - *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( team_id == nteams - 1 ); - } else { + *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) ); + *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr ); + if( plastiter != NULL ) + *plastiter = ( team_id == nteams - 1 ); + } else { T chunk_inc_count = - ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; + ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; T upper = *pupper; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if( incr > 0 ) { - if( *pupper < *plower ) - *pupper = i_maxmin< T >::mx; - if( plastiter != NULL ) - *plastiter = *plower <= upper && *pupper > upper - incr; - if( *pupper > upper ) - *pupper = upper; // tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = i_maxmin< T >::mn; - if( plastiter != NULL ) - *plastiter = *plower >= upper && *pupper < upper - incr; - if( *pupper < upper ) - *pupper = upper; // tracker C73258 - } - } - } -} - -//----------------------------------------------------------------------------------------- -// Dispatch routines -// Transfer call to template< type T > -// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, -// T lb, T ub, ST st, ST chunk ) -extern "C" { - -/*! -@ingroup WORK_SHARING -@{ -@param loc Source location -@param gtid Global thread id -@param schedule Schedule type -@param lb Lower bound -@param ub Upper bound -@param st Step (or increment if you prefer) -@param chunk The chunk size to block with - -This function prepares the runtime to start a dynamically scheduled for loop, saving the loop arguments. -These functions are all identical apart from the types of the arguments. -*/ - -void -__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void -__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void -__kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int64 lb, kmp_int64 ub, - kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -/*! -See @ref __kmpc_dispatch_init_4 -*/ -void -__kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint64 lb, kmp_uint64 ub, - kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -/*! -See @ref __kmpc_dispatch_init_4 - -Difference from __kmpc_dispatch_init set of functions is these functions -are called for composite distribute parallel for construct. Thus before -regular iterations dispatching we need to calc per-team iteration space. - -These functions are all identical apart from the types of the arguments. -*/ -void -__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -void -__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -void -__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -void -__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); -} - -/*! -@param loc Source code location -@param gtid Global thread id -@param p_last Pointer to a flag set to one if this is the last chunk or zero otherwise -@param p_lb Pointer to the lower bound for the next chunk of work -@param p_ub Pointer to the upper bound for the next chunk of work -@param p_st Pointer to the stride for the next chunk of work -@return one if there is work to be done, zero otherwise - -Get the next dynamically allocated chunk of work for this thread. -If there is no more work, then the lb,ub and stride need not be modified. -*/ -int -__kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ) -{ - return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int -__kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ) -{ - return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int -__kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ) -{ - return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); -} - -/*! -See @ref __kmpc_dispatch_next_4 -*/ -int -__kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ) -{ - return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); -} - -/*! -@param loc Source code location -@param gtid Global thread id - -Mark the end of a dynamic loop. -*/ -void -__kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void -__kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void -__kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); -} - -/*! -See @ref __kmpc_dispatch_fini_4 -*/ -void -__kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); -} -/*! @} */ - -//----------------------------------------------------------------------------------------- -//Non-template routines from kmp_dispatch.c used in other sources - -kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) { - return value == checker; -} - -kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) { - return value != checker; -} - -kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) { - return value < checker; -} - -kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) { - return value >= checker; -} - -kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) { - return value <= checker; -} -kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) { - return value == checker; -} - -kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) { - return value != checker; -} - -kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) { - return value < checker; -} - -kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) { - return value >= checker; -} - -kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) { - return value <= checker; -} - -kmp_uint32 -__kmp_wait_yield_4(volatile kmp_uint32 * spinner, - kmp_uint32 checker, - kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 ) - , void * obj // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point + KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); + // Unknown static scheduling type. + *plower += team_id * chunk_inc_count; + *pupper = *plower + chunk_inc_count - incr; + // Check/correct bounds if needed + if( incr > 0 ) { + if( *pupper < *plower ) + *pupper = i_maxmin< T >::mx; + if( plastiter != NULL ) + *plastiter = *plower <= upper && *pupper > upper - incr; + if( *pupper > upper ) + *pupper = upper; // tracker C73258 + } else { + if( *pupper > *plower ) + *pupper = i_maxmin< T >::mn; + if( plastiter != NULL ) + *plastiter = *plower >= upper && *pupper < upper - incr; + if( *pupper < upper ) + *pupper = upper; // tracker C73258 + } + } + } +} + +//----------------------------------------------------------------------------------------- +// Dispatch routines +// Transfer call to template< type T > +// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, +// T lb, T ub, ST st, ST chunk ) +extern "C" { + +/*! +@ingroup WORK_SHARING +@{ +@param loc Source location +@param gtid Global thread id +@param schedule Schedule type +@param lb Lower bound +@param ub Upper bound +@param st Step (or increment if you prefer) +@param chunk The chunk size to block with + +This function prepares the runtime to start a dynamically scheduled for loop, saving the loop arguments. +These functions are all identical apart from the types of the arguments. +*/ + +void +__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} +/*! +See @ref __kmpc_dispatch_init_4 +*/ +void +__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +/*! +See @ref __kmpc_dispatch_init_4 +*/ +void +__kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int64 lb, kmp_int64 ub, + kmp_int64 st, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +/*! +See @ref __kmpc_dispatch_init_4 +*/ +void +__kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_uint64 lb, kmp_uint64 ub, + kmp_int64 st, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +/*! +See @ref __kmpc_dispatch_init_4 + +Difference from __kmpc_dispatch_init set of functions is these functions +are called for composite distribute parallel for construct. Thus before +regular iterations dispatching we need to calc per-team iteration space. + +These functions are all identical apart from the types of the arguments. +*/ +void +__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st ); + __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +void +__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st ); + __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +void +__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st ); + __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +void +__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st ); + __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +} + +/*! +@param loc Source code location +@param gtid Global thread id +@param p_last Pointer to a flag set to one if this is the last chunk or zero otherwise +@param p_lb Pointer to the lower bound for the next chunk of work +@param p_ub Pointer to the upper bound for the next chunk of work +@param p_st Pointer to the stride for the next chunk of work +@return one if there is work to be done, zero otherwise + +Get the next dynamically allocated chunk of work for this thread. +If there is no more work, then the lb,ub and stride need not be modified. +*/ +int +__kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ) +{ + return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +} + +/*! +See @ref __kmpc_dispatch_next_4 +*/ +int +__kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ) +{ + return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +} + +/*! +See @ref __kmpc_dispatch_next_4 +*/ +int +__kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ) +{ + return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +} + +/*! +See @ref __kmpc_dispatch_next_4 +*/ +int +__kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ) +{ + return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +} + +/*! +@param loc Source code location +@param gtid Global thread id + +Mark the end of a dynamic loop. +*/ +void +__kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); +} + +/*! +See @ref __kmpc_dispatch_fini_4 +*/ +void +__kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); +} + +/*! +See @ref __kmpc_dispatch_fini_4 +*/ +void +__kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); +} + +/*! +See @ref __kmpc_dispatch_fini_4 +*/ +void +__kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); +} +/*! @} */ + +//----------------------------------------------------------------------------------------- +//Non-template routines from kmp_dispatch.c used in other sources + +kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) { + return value == checker; +} + +kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) { + return value != checker; +} + +kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) { + return value < checker; +} + +kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) { + return value >= checker; +} + +kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) { + return value <= checker; +} +kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) { + return value == checker; +} + +kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) { + return value != checker; +} + +kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) { + return value < checker; +} + +kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) { + return value >= checker; +} + +kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) { + return value <= checker; +} + +kmp_uint32 +__kmp_wait_yield_4(volatile kmp_uint32 * spinner, + kmp_uint32 checker, + kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 ) + , void * obj // Higher-level synchronization object, or NULL. + ) +{ + // note: we may not belong to a team at this point volatile kmp_uint32 * spin = spinner; kmp_uint32 check = checker; kmp_uint32 spins; kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred; kmp_uint32 r; - - KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while(!f(r = TCR_4(*spin), check)) { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* GEH - remove this since it was accidentally introduced when kmp_wait was split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); - return r; -} - -kmp_uint64 -__kmp_wait_yield_8( volatile kmp_uint64 * spinner, - kmp_uint64 checker, - kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 ) - , void * obj // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point + + KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); + KMP_INIT_YIELD( spins ); + // main wait spin loop + while(!f(r = TCR_4(*spin), check)) { + KMP_FSYNC_SPIN_PREPARE( obj ); + /* GEH - remove this since it was accidentally introduced when kmp_wait was split. + It causes problems with infinite recursion because of exit lock */ + /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) + __kmp_abort_thread(); */ + + /* if we have waited a bit, or are oversubscribed, yield */ + /* pause is in the following code */ + KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); + } + KMP_FSYNC_SPIN_ACQUIRED( obj ); + return r; +} + +kmp_uint64 +__kmp_wait_yield_8( volatile kmp_uint64 * spinner, + kmp_uint64 checker, + kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 ) + , void * obj // Higher-level synchronization object, or NULL. + ) +{ + // note: we may not belong to a team at this point volatile kmp_uint64 * spin = spinner; kmp_uint64 check = checker; kmp_uint32 spins; kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred; kmp_uint64 r; - - KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while(!f(r = *spin, check)) - { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* GEH - remove this since it was accidentally introduced when kmp_wait was split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - // if we are oversubscribed, - // or have waited a bit (and KMP_LIBARRY=throughput, then yield - // pause is in the following code - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); - return r; -} - -} // extern "C" - -#ifdef KMP_GOMP_COMPAT - -void -__kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); -} - -void -__kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); -} - -void -__kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); -} - -void -__kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); -} - -void -__kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); -} - -void -__kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); -} - -void -__kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); -} - -void -__kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); -} - -#endif /* KMP_GOMP_COMPAT */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - + + KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); + KMP_INIT_YIELD( spins ); + // main wait spin loop + while(!f(r = *spin, check)) + { + KMP_FSYNC_SPIN_PREPARE( obj ); + /* GEH - remove this since it was accidentally introduced when kmp_wait was split. + It causes problems with infinite recursion because of exit lock */ + /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) + __kmp_abort_thread(); */ + + // if we are oversubscribed, + // or have waited a bit (and KMP_LIBARRY=throughput, then yield + // pause is in the following code + KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); + } + KMP_FSYNC_SPIN_ACQUIRED( obj ); + return r; +} + +} // extern "C" + +#ifdef KMP_GOMP_COMPAT + +void +__kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int32 lb, kmp_int32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws ) +{ + __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, + push_ws ); +} + +void +__kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws ) +{ + __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, + push_ws ); +} + +void +__kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_int64 lb, kmp_int64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws ) +{ + __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, + push_ws ); +} + +void +__kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, + kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws ) +{ + __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, + push_ws ); +} + +void +__kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); +} + +void +__kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); +} + +void +__kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); +} + +void +__kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ) +{ + __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); +} + +#endif /* KMP_GOMP_COMPAT */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_environment.c b/contrib/libs/cxxsupp/openmp/kmp_environment.c index f1da9604911..75090d6c32c 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_environment.c +++ b/contrib/libs/cxxsupp/openmp/kmp_environment.c @@ -1,596 +1,596 @@ -/* - * kmp_environment.c -- Handle environment variables OS-independently. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -/* - ------------------------------------------------------------------------------------------------ - We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of - loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) - unavailable. getenv() apparently gets a clean copy of the env variables as they existed - at the start of the run. - JH 12/23/2002 - ------------------------------------------------------------------------------------------------ - On Windows* OS, there are two environments (at least, see below): - - 1. Environment maintained by Windows* OS on IA-32 architecture. - Accessible through GetEnvironmentVariable(), - SetEnvironmentVariable(), and GetEnvironmentStrings(). - - 2. Environment maintained by C RTL. Accessible through getenv(), putenv(). - - putenv() function updates both C and Windows* OS on IA-32 architecture. getenv() function - search for variables in C RTL environment only. Windows* OS on IA-32 architecture functions work *only* - with Windows* OS on IA-32 architecture. - - Windows* OS on IA-32 architecture maintained by OS, so there is always only one Windows* OS on - IA-32 architecture per process. Changes in Windows* OS on IA-32 architecture are process-visible. - - C environment maintained by C RTL. Multiple copies of C RTL may be present in the process, and - each C RTL maintains its own environment. :-( - - Thus, proper way to work with environment on Windows* OS is: - - 1. Set variables with putenv() function -- both C and Windows* OS on - IA-32 architecture are being updated. Windows* OS on - IA-32 architecture may be considered as primary target, - while updating C RTL environment is a free bonus. - - 2. Get variables with GetEnvironmentVariable() -- getenv() does not - search Windows* OS on IA-32 architecture, and can not see variables - set with SetEnvironmentVariable(). - - 2007-04-05 -- lev - ------------------------------------------------------------------------------------------------ -*/ - -#include "kmp_environment.h" - -#include "kmp_os.h" // KMP_OS_*. -#include "kmp.h" // -#include "kmp_str.h" // __kmp_str_*(). -#include "kmp_i18n.h" - -#if KMP_OS_UNIX - #include // getenv, setenv, unsetenv. - #include // strlen, strcpy. - #if KMP_OS_DARWIN - #include - #define environ (*_NSGetEnviron()) - #else - extern char * * environ; - #endif -#elif KMP_OS_WINDOWS - #include // GetEnvironmentVariable, SetEnvironmentVariable, GetLastError. -#else - #error Unknown or unsupported OS. -#endif - - -// TODO: Eliminate direct memory allocations, use string operations instead. - -static inline -void * -allocate( - size_t size -) { - void * ptr = KMP_INTERNAL_MALLOC( size ); - if ( ptr == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - return ptr; -} // allocate - - -char * -__kmp_env_get( char const * name ) { - - char * result = NULL; - - #if KMP_OS_UNIX - char const * value = getenv( name ); - if ( value != NULL ) { - size_t len = KMP_STRLEN( value ) + 1; - result = (char *) KMP_INTERNAL_MALLOC( len ); - if ( result == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_STRNCPY_S( result, len, value, len ); - }; // if - #elif KMP_OS_WINDOWS - /* - We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of - loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) - unavailable. getenv() apparently gets a clean copy of the env variables as they existed - at the start of the run. - JH 12/23/2002 - */ - DWORD rc; - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( ! rc ) { - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - // Variable is not found, it's ok, just continue. - } else { - DWORD len = rc; - result = (char *) KMP_INTERNAL_MALLOC( len ); - if ( result == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - rc = GetEnvironmentVariable( name, result, len ); - if ( ! rc ) { - // GetEnvironmentVariable() may return 0 if variable is empty. - // In such a case GetLastError() returns ERROR_SUCCESS. - DWORD error = GetLastError(); - if ( error != ERROR_SUCCESS ) { - // Unexpected error. The variable should be in the environment, - // and buffer should be large enough. - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - KMP_INTERNAL_FREE( (void *) result ); - result = NULL; - }; // if - }; // if - }; // if - #else - #error Unknown or unsupported OS. - #endif - - return result; - -} // func __kmp_env_get - - -// TODO: Find and replace all regular free() with __kmp_env_free(). - -void -__kmp_env_free( char const * * value ) { - - KMP_DEBUG_ASSERT( value != NULL ); - KMP_INTERNAL_FREE( (void *) * value ); - * value = NULL; - -} // func __kmp_env_free - - - -int -__kmp_env_exists( char const * name ) { - - #if KMP_OS_UNIX - char const * value = getenv( name ); - return ( ( value == NULL ) ? ( 0 ) : ( 1 ) ); - #elif KMP_OS_WINDOWS - DWORD rc; - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( rc == 0 ) { - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - return 0; - }; // if - return 1; - #else - #error Unknown or unsupported OS. - #endif - -} // func __kmp_env_exists - - - -void -__kmp_env_set( char const * name, char const * value, int overwrite ) { - - #if KMP_OS_UNIX - int rc = setenv( name, value, overwrite ); - if ( rc != 0 ) { - // Dead code. I tried to put too many variables into Linux* OS - // environment on IA-32 architecture. When application consumes - // more than ~2.5 GB of memory, entire system feels bad. Sometimes - // application is killed (by OS?), sometimes system stops - // responding... But this error message never appears. --ln - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_HNT( NotEnoughMemory ), - __kmp_msg_null - ); - }; // if - #elif KMP_OS_WINDOWS - BOOL rc; - if ( ! overwrite ) { - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( rc ) { - // Variable exists, do not overwrite. - return; - }; // if - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - }; // if - rc = SetEnvironmentVariable( name, value ); - if ( ! rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - #else - #error Unknown or unsupported OS. - #endif - -} // func __kmp_env_set - - - -void -__kmp_env_unset( char const * name ) { - - #if KMP_OS_UNIX - unsetenv( name ); - #elif KMP_OS_WINDOWS - BOOL rc = SetEnvironmentVariable( name, NULL ); - if ( ! rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - #else - #error Unknown or unsupported OS. - #endif - -} // func __kmp_env_unset - -// ------------------------------------------------------------------------------------------------- - -/* - Intel OpenMP RTL string representation of environment: just a string of characters, variables - are separated with vertical bars, e. g.: - - "KMP_WARNINGS=0|KMP_AFFINITY=compact|" - - Empty variables are allowed and ignored: - - "||KMP_WARNINGS=1||" - -*/ - -static -void -___kmp_env_blk_parse_string( - kmp_env_blk_t * block, // M: Env block to fill. - char const * env // I: String to parse. -) { - - char const chr_delimiter = '|'; - char const str_delimiter[] = { chr_delimiter, 0 }; - - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; // Number of used elements in vars array. - int delimiters = 0; // Number of delimiters in input string. - - // Copy original string, we will modify the copy. - bulk = __kmp_str_format( "%s", env ); - - // Loop thru all the vars in environment block. Count delimiters (maximum number of variables - // is number of delimiters plus one). - { - char const * ptr = bulk; - for ( ; ; ) { - ptr = strchr( ptr, chr_delimiter ); - if ( ptr == NULL ) { - break; - }; // if - ++ delimiters; - ptr += 1; - }; // forever - } - - // Allocate vars array. - vars = (kmp_env_var_t *) allocate( ( delimiters + 1 ) * sizeof( kmp_env_var_t ) ); - - // Loop thru all the variables. - { - char * var; // Pointer to variable (both name and value). - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - char * buf; // Buffer for __kmp_str_token() function. - var = __kmp_str_token( bulk, str_delimiter, & buf ); // Get the first var. - while ( var != NULL ) { - // Save found variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - KMP_DEBUG_ASSERT( count < delimiters + 1 ); - vars[ count ].name = name; - vars[ count ].value = value; - ++ count; - // Get the next var. - var = __kmp_str_token( NULL, str_delimiter, & buf ); - }; // while - } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; - -}; // ___kmp_env_blk_parse_string - - - -/* - Windows* OS (actually, DOS) environment block is a piece of memory with environment variables. Each - variable is terminated with zero byte, entire block is terminated with one extra zero byte, so - we have two zero bytes at the end of environment block, e. g.: - - "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" - - It is not clear how empty environment is represented. "\x00\x00"? -*/ - -#if KMP_OS_WINDOWS -static -void -___kmp_env_blk_parse_windows( - kmp_env_blk_t * block, // M: Env block to fill. - char const * env // I: Pointer to Windows* OS (DOS) environment block. -) { - - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; // Number of used elements in vars array. - int size = 0; // Size of bulk. - - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - - if ( env != NULL ) { - - // Loop thru all the vars in environment block. Count variables, find size of block. - { - char const * var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = env; // The first variable starts and beginning of environment block. - len = KMP_STRLEN( var ); - while ( len != 0 ) { - ++ count; - size = size + len + 1; - var = var + len + 1; // Move pointer to the beginning of the next variable. - len = KMP_STRLEN( var ); - }; // while - size = size + 1; // Total size of env block, including terminating zero byte. - } - - // Copy original block to bulk, we will modify bulk, not original block. - bulk = (char *) allocate( size ); - KMP_MEMCPY_S( bulk, size, env, size ); - // Allocate vars array. - vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); - - // Loop thru all the vars, now in bulk. - { - char * var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = bulk; - len = KMP_STRLEN( var ); - while ( len != 0 ) { - // Save variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - vars[ count ].name = name; - vars[ count ].value = value; - ++ count; - // Get the next var. - var = var + len + 1; - len = KMP_STRLEN( var ); - }; // while - } - - }; // if - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; - -}; // ___kmp_env_blk_parse_windows -#endif - - -/* - Unix environment block is a array of pointers to variables, last pointer in array is NULL: - - { "HOME=/home/lev", "TERM=xterm", NULL } -*/ - -static -void -___kmp_env_blk_parse_unix( - kmp_env_blk_t * block, // M: Env block to fill. - char * * env // I: Unix environment to parse. -) { - - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; - int size = 0; // Size of bulk. - - // Count number of variables and length of required bulk. - { - count = 0; - size = 0; - while ( env[ count ] != NULL ) { - size += KMP_STRLEN( env[ count ] ) + 1; - ++ count; - }; // while - } - - // Allocate memory. - bulk = (char *) allocate( size ); - vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); - - // Loop thru all the vars. - { - char * var; // Pointer to beginning of var. - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - int len; // Length of variable. - int i; - var = bulk; - for ( i = 0; i < count; ++ i ) { - // Copy variable to bulk. - len = KMP_STRLEN( env[ i ] ); - KMP_MEMCPY_S( var, size, env[ i ], len + 1 ); - // Save found variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - vars[ i ].name = name; - vars[ i ].value = value; - // Move pointer. - var += len + 1; - }; // for - } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; - -}; // ___kmp_env_blk_parse_unix - - - -void -__kmp_env_blk_init( - kmp_env_blk_t * block, // M: Block to initialize. - char const * bulk // I: Initialization string, or NULL. -) { - - if ( bulk != NULL ) { - ___kmp_env_blk_parse_string( block, bulk ); - } else { - #if KMP_OS_UNIX - ___kmp_env_blk_parse_unix( block, environ ); - #elif KMP_OS_WINDOWS - { - char * mem = GetEnvironmentStrings(); - if ( mem == NULL ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvironment ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - ___kmp_env_blk_parse_windows( block, mem ); - FreeEnvironmentStrings( mem ); - } - #else - #error Unknown or unsupported OS. - #endif - }; // if - -} // __kmp_env_blk_init - - - -static -int -___kmp_env_var_cmp( // Comparison function for qsort(). - kmp_env_var_t const * lhs, - kmp_env_var_t const * rhs -) { - return strcmp( lhs->name, rhs->name ); -} - -void -__kmp_env_blk_sort( - kmp_env_blk_t * block // M: Block of environment variables to sort. -) { - - qsort( - (void *) block->vars, - block->count, - sizeof( kmp_env_var_t ), - ( int ( * )( void const *, void const * ) ) & ___kmp_env_var_cmp - ); - -} // __kmp_env_block_sort - - - -void -__kmp_env_blk_free( - kmp_env_blk_t * block // M: Block of environment variables to free. -) { - - KMP_INTERNAL_FREE( (void *) block->vars ); - KMP_INTERNAL_FREE( (void *) block->bulk ); - - block->count = 0; - block->vars = NULL; - block->bulk = NULL; - -} // __kmp_env_blk_free - - - -char const * // R: Value of variable or NULL if variable does not exist. -__kmp_env_blk_var( - kmp_env_blk_t * block, // I: Block of environment variables. - char const * name // I: Name of variable to find. -) { - - int i; - for ( i = 0; i < block->count; ++ i ) { - if ( strcmp( block->vars[ i ].name, name ) == 0 ) { - return block->vars[ i ].value; - }; // if - }; // for - return NULL; - -} // __kmp_env_block_var - - -// end of file // +/* + * kmp_environment.c -- Handle environment variables OS-independently. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +/* + ------------------------------------------------------------------------------------------------ + We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of + loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) + unavailable. getenv() apparently gets a clean copy of the env variables as they existed + at the start of the run. + JH 12/23/2002 + ------------------------------------------------------------------------------------------------ + On Windows* OS, there are two environments (at least, see below): + + 1. Environment maintained by Windows* OS on IA-32 architecture. + Accessible through GetEnvironmentVariable(), + SetEnvironmentVariable(), and GetEnvironmentStrings(). + + 2. Environment maintained by C RTL. Accessible through getenv(), putenv(). + + putenv() function updates both C and Windows* OS on IA-32 architecture. getenv() function + search for variables in C RTL environment only. Windows* OS on IA-32 architecture functions work *only* + with Windows* OS on IA-32 architecture. + + Windows* OS on IA-32 architecture maintained by OS, so there is always only one Windows* OS on + IA-32 architecture per process. Changes in Windows* OS on IA-32 architecture are process-visible. + + C environment maintained by C RTL. Multiple copies of C RTL may be present in the process, and + each C RTL maintains its own environment. :-( + + Thus, proper way to work with environment on Windows* OS is: + + 1. Set variables with putenv() function -- both C and Windows* OS on + IA-32 architecture are being updated. Windows* OS on + IA-32 architecture may be considered as primary target, + while updating C RTL environment is a free bonus. + + 2. Get variables with GetEnvironmentVariable() -- getenv() does not + search Windows* OS on IA-32 architecture, and can not see variables + set with SetEnvironmentVariable(). + + 2007-04-05 -- lev + ------------------------------------------------------------------------------------------------ +*/ + +#include "kmp_environment.h" + +#include "kmp_os.h" // KMP_OS_*. +#include "kmp.h" // +#include "kmp_str.h" // __kmp_str_*(). +#include "kmp_i18n.h" + +#if KMP_OS_UNIX + #include // getenv, setenv, unsetenv. + #include // strlen, strcpy. + #if KMP_OS_DARWIN + #include + #define environ (*_NSGetEnviron()) + #else + extern char * * environ; + #endif +#elif KMP_OS_WINDOWS + #include // GetEnvironmentVariable, SetEnvironmentVariable, GetLastError. +#else + #error Unknown or unsupported OS. +#endif + + +// TODO: Eliminate direct memory allocations, use string operations instead. + +static inline +void * +allocate( + size_t size +) { + void * ptr = KMP_INTERNAL_MALLOC( size ); + if ( ptr == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + return ptr; +} // allocate + + +char * +__kmp_env_get( char const * name ) { + + char * result = NULL; + + #if KMP_OS_UNIX + char const * value = getenv( name ); + if ( value != NULL ) { + size_t len = KMP_STRLEN( value ) + 1; + result = (char *) KMP_INTERNAL_MALLOC( len ); + if ( result == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + KMP_STRNCPY_S( result, len, value, len ); + }; // if + #elif KMP_OS_WINDOWS + /* + We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of + loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) + unavailable. getenv() apparently gets a clean copy of the env variables as they existed + at the start of the run. + JH 12/23/2002 + */ + DWORD rc; + rc = GetEnvironmentVariable( name, NULL, 0 ); + if ( ! rc ) { + DWORD error = GetLastError(); + if ( error != ERROR_ENVVAR_NOT_FOUND ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantGetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + // Variable is not found, it's ok, just continue. + } else { + DWORD len = rc; + result = (char *) KMP_INTERNAL_MALLOC( len ); + if ( result == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + rc = GetEnvironmentVariable( name, result, len ); + if ( ! rc ) { + // GetEnvironmentVariable() may return 0 if variable is empty. + // In such a case GetLastError() returns ERROR_SUCCESS. + DWORD error = GetLastError(); + if ( error != ERROR_SUCCESS ) { + // Unexpected error. The variable should be in the environment, + // and buffer should be large enough. + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantGetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + KMP_INTERNAL_FREE( (void *) result ); + result = NULL; + }; // if + }; // if + }; // if + #else + #error Unknown or unsupported OS. + #endif + + return result; + +} // func __kmp_env_get + + +// TODO: Find and replace all regular free() with __kmp_env_free(). + +void +__kmp_env_free( char const * * value ) { + + KMP_DEBUG_ASSERT( value != NULL ); + KMP_INTERNAL_FREE( (void *) * value ); + * value = NULL; + +} // func __kmp_env_free + + + +int +__kmp_env_exists( char const * name ) { + + #if KMP_OS_UNIX + char const * value = getenv( name ); + return ( ( value == NULL ) ? ( 0 ) : ( 1 ) ); + #elif KMP_OS_WINDOWS + DWORD rc; + rc = GetEnvironmentVariable( name, NULL, 0 ); + if ( rc == 0 ) { + DWORD error = GetLastError(); + if ( error != ERROR_ENVVAR_NOT_FOUND ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantGetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + return 0; + }; // if + return 1; + #else + #error Unknown or unsupported OS. + #endif + +} // func __kmp_env_exists + + + +void +__kmp_env_set( char const * name, char const * value, int overwrite ) { + + #if KMP_OS_UNIX + int rc = setenv( name, value, overwrite ); + if ( rc != 0 ) { + // Dead code. I tried to put too many variables into Linux* OS + // environment on IA-32 architecture. When application consumes + // more than ~2.5 GB of memory, entire system feels bad. Sometimes + // application is killed (by OS?), sometimes system stops + // responding... But this error message never appears. --ln + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetEnvVar, name ), + KMP_HNT( NotEnoughMemory ), + __kmp_msg_null + ); + }; // if + #elif KMP_OS_WINDOWS + BOOL rc; + if ( ! overwrite ) { + rc = GetEnvironmentVariable( name, NULL, 0 ); + if ( rc ) { + // Variable exists, do not overwrite. + return; + }; // if + DWORD error = GetLastError(); + if ( error != ERROR_ENVVAR_NOT_FOUND ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantGetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + }; // if + rc = SetEnvironmentVariable( name, value ); + if ( ! rc ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + #else + #error Unknown or unsupported OS. + #endif + +} // func __kmp_env_set + + + +void +__kmp_env_unset( char const * name ) { + + #if KMP_OS_UNIX + unsetenv( name ); + #elif KMP_OS_WINDOWS + BOOL rc = SetEnvironmentVariable( name, NULL ); + if ( ! rc ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetEnvVar, name ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + #else + #error Unknown or unsupported OS. + #endif + +} // func __kmp_env_unset + +// ------------------------------------------------------------------------------------------------- + +/* + Intel OpenMP RTL string representation of environment: just a string of characters, variables + are separated with vertical bars, e. g.: + + "KMP_WARNINGS=0|KMP_AFFINITY=compact|" + + Empty variables are allowed and ignored: + + "||KMP_WARNINGS=1||" + +*/ + +static +void +___kmp_env_blk_parse_string( + kmp_env_blk_t * block, // M: Env block to fill. + char const * env // I: String to parse. +) { + + char const chr_delimiter = '|'; + char const str_delimiter[] = { chr_delimiter, 0 }; + + char * bulk = NULL; + kmp_env_var_t * vars = NULL; + int count = 0; // Number of used elements in vars array. + int delimiters = 0; // Number of delimiters in input string. + + // Copy original string, we will modify the copy. + bulk = __kmp_str_format( "%s", env ); + + // Loop thru all the vars in environment block. Count delimiters (maximum number of variables + // is number of delimiters plus one). + { + char const * ptr = bulk; + for ( ; ; ) { + ptr = strchr( ptr, chr_delimiter ); + if ( ptr == NULL ) { + break; + }; // if + ++ delimiters; + ptr += 1; + }; // forever + } + + // Allocate vars array. + vars = (kmp_env_var_t *) allocate( ( delimiters + 1 ) * sizeof( kmp_env_var_t ) ); + + // Loop thru all the variables. + { + char * var; // Pointer to variable (both name and value). + char * name; // Pointer to name of variable. + char * value; // Pointer to value. + char * buf; // Buffer for __kmp_str_token() function. + var = __kmp_str_token( bulk, str_delimiter, & buf ); // Get the first var. + while ( var != NULL ) { + // Save found variable in vars array. + __kmp_str_split( var, '=', & name, & value ); + KMP_DEBUG_ASSERT( count < delimiters + 1 ); + vars[ count ].name = name; + vars[ count ].value = value; + ++ count; + // Get the next var. + var = __kmp_str_token( NULL, str_delimiter, & buf ); + }; // while + } + + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; + +}; // ___kmp_env_blk_parse_string + + + +/* + Windows* OS (actually, DOS) environment block is a piece of memory with environment variables. Each + variable is terminated with zero byte, entire block is terminated with one extra zero byte, so + we have two zero bytes at the end of environment block, e. g.: + + "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" + + It is not clear how empty environment is represented. "\x00\x00"? +*/ + +#if KMP_OS_WINDOWS +static +void +___kmp_env_blk_parse_windows( + kmp_env_blk_t * block, // M: Env block to fill. + char const * env // I: Pointer to Windows* OS (DOS) environment block. +) { + + char * bulk = NULL; + kmp_env_var_t * vars = NULL; + int count = 0; // Number of used elements in vars array. + int size = 0; // Size of bulk. + + char * name; // Pointer to name of variable. + char * value; // Pointer to value. + + if ( env != NULL ) { + + // Loop thru all the vars in environment block. Count variables, find size of block. + { + char const * var; // Pointer to beginning of var. + int len; // Length of variable. + count = 0; + var = env; // The first variable starts and beginning of environment block. + len = KMP_STRLEN( var ); + while ( len != 0 ) { + ++ count; + size = size + len + 1; + var = var + len + 1; // Move pointer to the beginning of the next variable. + len = KMP_STRLEN( var ); + }; // while + size = size + 1; // Total size of env block, including terminating zero byte. + } + + // Copy original block to bulk, we will modify bulk, not original block. + bulk = (char *) allocate( size ); + KMP_MEMCPY_S( bulk, size, env, size ); + // Allocate vars array. + vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); + + // Loop thru all the vars, now in bulk. + { + char * var; // Pointer to beginning of var. + int len; // Length of variable. + count = 0; + var = bulk; + len = KMP_STRLEN( var ); + while ( len != 0 ) { + // Save variable in vars array. + __kmp_str_split( var, '=', & name, & value ); + vars[ count ].name = name; + vars[ count ].value = value; + ++ count; + // Get the next var. + var = var + len + 1; + len = KMP_STRLEN( var ); + }; // while + } + + }; // if + + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; + +}; // ___kmp_env_blk_parse_windows +#endif + + +/* + Unix environment block is a array of pointers to variables, last pointer in array is NULL: + + { "HOME=/home/lev", "TERM=xterm", NULL } +*/ + +static +void +___kmp_env_blk_parse_unix( + kmp_env_blk_t * block, // M: Env block to fill. + char * * env // I: Unix environment to parse. +) { + + char * bulk = NULL; + kmp_env_var_t * vars = NULL; + int count = 0; + int size = 0; // Size of bulk. + + // Count number of variables and length of required bulk. + { + count = 0; + size = 0; + while ( env[ count ] != NULL ) { + size += KMP_STRLEN( env[ count ] ) + 1; + ++ count; + }; // while + } + + // Allocate memory. + bulk = (char *) allocate( size ); + vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); + + // Loop thru all the vars. + { + char * var; // Pointer to beginning of var. + char * name; // Pointer to name of variable. + char * value; // Pointer to value. + int len; // Length of variable. + int i; + var = bulk; + for ( i = 0; i < count; ++ i ) { + // Copy variable to bulk. + len = KMP_STRLEN( env[ i ] ); + KMP_MEMCPY_S( var, size, env[ i ], len + 1 ); + // Save found variable in vars array. + __kmp_str_split( var, '=', & name, & value ); + vars[ i ].name = name; + vars[ i ].value = value; + // Move pointer. + var += len + 1; + }; // for + } + + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; + +}; // ___kmp_env_blk_parse_unix + + + +void +__kmp_env_blk_init( + kmp_env_blk_t * block, // M: Block to initialize. + char const * bulk // I: Initialization string, or NULL. +) { + + if ( bulk != NULL ) { + ___kmp_env_blk_parse_string( block, bulk ); + } else { + #if KMP_OS_UNIX + ___kmp_env_blk_parse_unix( block, environ ); + #elif KMP_OS_WINDOWS + { + char * mem = GetEnvironmentStrings(); + if ( mem == NULL ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantGetEnvironment ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + ___kmp_env_blk_parse_windows( block, mem ); + FreeEnvironmentStrings( mem ); + } + #else + #error Unknown or unsupported OS. + #endif + }; // if + +} // __kmp_env_blk_init + + + +static +int +___kmp_env_var_cmp( // Comparison function for qsort(). + kmp_env_var_t const * lhs, + kmp_env_var_t const * rhs +) { + return strcmp( lhs->name, rhs->name ); +} + +void +__kmp_env_blk_sort( + kmp_env_blk_t * block // M: Block of environment variables to sort. +) { + + qsort( + (void *) block->vars, + block->count, + sizeof( kmp_env_var_t ), + ( int ( * )( void const *, void const * ) ) & ___kmp_env_var_cmp + ); + +} // __kmp_env_block_sort + + + +void +__kmp_env_blk_free( + kmp_env_blk_t * block // M: Block of environment variables to free. +) { + + KMP_INTERNAL_FREE( (void *) block->vars ); + KMP_INTERNAL_FREE( (void *) block->bulk ); + + block->count = 0; + block->vars = NULL; + block->bulk = NULL; + +} // __kmp_env_blk_free + + + +char const * // R: Value of variable or NULL if variable does not exist. +__kmp_env_blk_var( + kmp_env_blk_t * block, // I: Block of environment variables. + char const * name // I: Name of variable to find. +) { + + int i; + for ( i = 0; i < block->count; ++ i ) { + if ( strcmp( block->vars[ i ].name, name ) == 0 ) { + return block->vars[ i ].value; + }; // if + }; // for + return NULL; + +} // __kmp_env_block_var + + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_environment.h b/contrib/libs/cxxsupp/openmp/kmp_environment.h index 52b462478f7..243b547451c 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_environment.h +++ b/contrib/libs/cxxsupp/openmp/kmp_environment.h @@ -1,81 +1,81 @@ -/* - * kmp_environment.h -- Handle environment varoiables OS-independently. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_ENVIRONMENT_H -#define KMP_ENVIRONMENT_H - -#ifdef __cplusplus -extern "C" { -#endif - -// Return a copy of the value of environment variable or NULL if the variable does not exist. -// *Note*: Returned pointed *must* be freed after use with __kmp_env_free(). -char * __kmp_env_get( char const * name ); -void __kmp_env_free( char const * * value ); - -// Return 1 if the environment variable exists or 0 if does not exist. -int __kmp_env_exists( char const * name ); - -// Set the environment variable. -void __kmp_env_set( char const * name, char const * value, int overwrite ); - -// Unset (remove) environment variable. -void __kmp_env_unset( char const * name ); - - -// ------------------------------------------------------------------------------------------------- -// Working with environment blocks. -// ------------------------------------------------------------------------------------------------- - -/* - kmp_env_blk_t is read-only collection of environment variables (or environment-like). Usage: - - kmp_env_blk_t block; - __kmp_env_blk_init( & block, NULL ); // Initialize block from process environment. - // or - __kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string. - __kmp_env_blk_sort( & block ); // Optionally, sort list. - for ( i = 0; i < block.count; ++ i ) { - // Process block.vars[ i ].name and block.vars[ i ].value... - }; // for i - __kmp_env_block_free( & block ); -*/ - -struct __kmp_env_var { - char const * name; - char const * value; -}; -typedef struct __kmp_env_var kmp_env_var_t; - -struct __kmp_env_blk { - char const * bulk; - kmp_env_var_t const * vars; - int count; -}; -typedef struct __kmp_env_blk kmp_env_blk_t; - -void __kmp_env_blk_init( kmp_env_blk_t * block, char const * bulk ); -void __kmp_env_blk_free( kmp_env_blk_t * block ); -void __kmp_env_blk_sort( kmp_env_blk_t * block ); -char const * __kmp_env_blk_var( kmp_env_blk_t * block, char const * name ); - -#ifdef __cplusplus -} -#endif - -#endif // KMP_ENVIRONMENT_H - -// end of file // - +/* + * kmp_environment.h -- Handle environment varoiables OS-independently. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_ENVIRONMENT_H +#define KMP_ENVIRONMENT_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Return a copy of the value of environment variable or NULL if the variable does not exist. +// *Note*: Returned pointed *must* be freed after use with __kmp_env_free(). +char * __kmp_env_get( char const * name ); +void __kmp_env_free( char const * * value ); + +// Return 1 if the environment variable exists or 0 if does not exist. +int __kmp_env_exists( char const * name ); + +// Set the environment variable. +void __kmp_env_set( char const * name, char const * value, int overwrite ); + +// Unset (remove) environment variable. +void __kmp_env_unset( char const * name ); + + +// ------------------------------------------------------------------------------------------------- +// Working with environment blocks. +// ------------------------------------------------------------------------------------------------- + +/* + kmp_env_blk_t is read-only collection of environment variables (or environment-like). Usage: + + kmp_env_blk_t block; + __kmp_env_blk_init( & block, NULL ); // Initialize block from process environment. + // or + __kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string. + __kmp_env_blk_sort( & block ); // Optionally, sort list. + for ( i = 0; i < block.count; ++ i ) { + // Process block.vars[ i ].name and block.vars[ i ].value... + }; // for i + __kmp_env_block_free( & block ); +*/ + +struct __kmp_env_var { + char const * name; + char const * value; +}; +typedef struct __kmp_env_var kmp_env_var_t; + +struct __kmp_env_blk { + char const * bulk; + kmp_env_var_t const * vars; + int count; +}; +typedef struct __kmp_env_blk kmp_env_blk_t; + +void __kmp_env_blk_init( kmp_env_blk_t * block, char const * bulk ); +void __kmp_env_blk_free( kmp_env_blk_t * block ); +void __kmp_env_blk_sort( kmp_env_blk_t * block ); +char const * __kmp_env_blk_var( kmp_env_blk_t * block, char const * name ); + +#ifdef __cplusplus +} +#endif + +#endif // KMP_ENVIRONMENT_H + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/kmp_error.c b/contrib/libs/cxxsupp/openmp/kmp_error.c index 5bb4bcf22fc..6866df5d496 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_error.c +++ b/contrib/libs/cxxsupp/openmp/kmp_error.c @@ -1,523 +1,523 @@ -/* - * kmp_error.c -- KPTS functions for error checking at runtime - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_str.h" -#include "kmp_error.h" - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#define MIN_STACK 100 - - -static char const * cons_text_c[] = { - "(none)", - "\"parallel\"", - "work-sharing", /* this is not called "for" because of lowering of "sections" pragmas */ - "\"ordered\" work-sharing", /* this is not called "for ordered" because of lowering of "sections" pragmas */ - "\"sections\"", - "work-sharing", /* this is not called "single" because of lowering of "sections" pragmas */ - "\"taskq\"", - "\"taskq\"", - "\"taskq ordered\"", - "\"critical\"", - "\"ordered\"", /* in PARALLEL */ - "\"ordered\"", /* in PDO */ - "\"ordered\"", /* in TASKQ */ - "\"master\"", - "\"reduce\"", - "\"barrier\"" -}; - -#define get_src( ident ) ( (ident) == NULL ? NULL : (ident)->psource ) - -#define PUSH_MSG( ct, ident ) \ - "\tpushing on stack: %s (%s)\n", cons_text_c[ (ct) ], get_src( (ident) ) -#define POP_MSG( p ) \ - "\tpopping off stack: %s (%s)\n", \ - cons_text_c[ (p)->stack_data[ tos ].type ], \ - get_src( (p)->stack_data[ tos ].ident ) - -static int const cons_text_c_num = sizeof( cons_text_c ) / sizeof( char const * ); - -/* ------------------------------------------------------------------------ */ -/* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */ -/* ------------------------------------------------------------------------ */ - -static void -__kmp_check_null_func( void ) -{ - /* nothing to do */ -} - -static void -__kmp_expand_cons_stack( int gtid, struct cons_header *p ) -{ - int i; - struct cons_data *d; - - /* TODO for monitor perhaps? */ - if (gtid < 0) - __kmp_check_null_func(); - - KE_TRACE( 10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid() ) ); - - d = p->stack_data; - - p->stack_size = (p->stack_size * 2) + 100; - - /* TODO free the old data */ - p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (p->stack_size+1) ); - - for (i = p->stack_top; i >= 0; --i) - p->stack_data[i] = d[i]; - - /* NOTE: we do not free the old stack_data */ -} - -// NOTE: Function returns allocated memory, caller must free it! -static char const * -__kmp_pragma( - int ct, - ident_t const * ident -) { - char const * cons = NULL; // Construct name. - char * file = NULL; // File name. - char * func = NULL; // Function (routine) name. - char * line = NULL; // Line number. - kmp_str_buf_t buffer; - kmp_msg_t prgm; - __kmp_str_buf_init( & buffer ); - if ( 0 < ct && ct < cons_text_c_num ) { - cons = cons_text_c[ ct ]; - } else { - KMP_DEBUG_ASSERT( 0 ); - }; - if ( ident != NULL && ident->psource != NULL ) { - char * tail = NULL; - __kmp_str_buf_print( & buffer, "%s", ident->psource ); // Copy source to buffer. - // Split string in buffer to file, func, and line. - tail = buffer.str; - __kmp_str_split( tail, ';', NULL, & tail ); - __kmp_str_split( tail, ';', & file, & tail ); - __kmp_str_split( tail, ';', & func, & tail ); - __kmp_str_split( tail, ';', & line, & tail ); - }; // if - prgm = __kmp_msg_format( kmp_i18n_fmt_Pragma, cons, file, func, line ); - __kmp_str_buf_free( & buffer ); - return prgm.str; -} // __kmp_pragma - -/* ------------------------------------------------------------------------ */ -/* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */ -/* ------------------------------------------------------------------------ */ - - -void -__kmp_error_construct( - kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // Construct type. - ident_t const * ident // Construct ident. -) { - char const * construct = __kmp_pragma( ct, ident ); - __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct ), __kmp_msg_null ); - KMP_INTERNAL_FREE( (void *) construct ); -} - -void -__kmp_error_construct2( - kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // First construct type. - ident_t const * ident, // First construct ident. - struct cons_data const * cons // Second construct. -) { - char const * construct1 = __kmp_pragma( ct, ident ); - char const * construct2 = __kmp_pragma( cons->type, cons->ident ); - __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct1, construct2 ), __kmp_msg_null ); - KMP_INTERNAL_FREE( (void *) construct1 ); - KMP_INTERNAL_FREE( (void *) construct2 ); -} - - -struct cons_header * -__kmp_allocate_cons_stack( int gtid ) -{ - struct cons_header *p; - - /* TODO for monitor perhaps? */ - if ( gtid < 0 ) { - __kmp_check_null_func(); - }; // if - KE_TRACE( 10, ("allocate cons_stack (%d)\n", gtid ) ); - p = (struct cons_header *) __kmp_allocate( sizeof( struct cons_header ) ); - p->p_top = p->w_top = p->s_top = 0; - p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (MIN_STACK+1) ); - p->stack_size = MIN_STACK; - p->stack_top = 0; - p->stack_data[ 0 ].type = ct_none; - p->stack_data[ 0 ].prev = 0; - p->stack_data[ 0 ].ident = NULL; - return p; -} - -void -__kmp_free_cons_stack( void * ptr ) { - struct cons_header * p = (struct cons_header *) ptr; - if ( p != NULL ) { - if ( p->stack_data != NULL ) { - __kmp_free( p->stack_data ); - p->stack_data = NULL; - }; // if - __kmp_free( p ); - }; // if -} - - -#if KMP_DEBUG -static void -dump_cons_stack( int gtid, struct cons_header * p ) { - int i; - int tos = p->stack_top; - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); - __kmp_str_buf_print( & buffer, "Begin construct stack with %d items for thread %d\n", tos, gtid ); - __kmp_str_buf_print( & buffer, " stack_top=%d { P=%d, W=%d, S=%d }\n", tos, p->p_top, p->w_top, p->s_top ); - for ( i = tos; i > 0; i-- ) { - struct cons_data * c = & ( p->stack_data[ i ] ); - __kmp_str_buf_print( & buffer, " stack_data[%2d] = { %s (%s) %d %p }\n", i, cons_text_c[ c->type ], get_src( c->ident ), c->prev, c->name ); - }; // for i - __kmp_str_buf_print( & buffer, "End construct stack for thread %d\n", gtid ); - __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); - __kmp_debug_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); -} -#endif - -void -__kmp_push_parallel( int gtid, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); - KE_TRACE( 10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); - KE_TRACE( 100, ( PUSH_MSG( ct_parallel, ident ) ) ); - if ( p->stack_top >= p->stack_size ) { - __kmp_expand_cons_stack( gtid, p ); - }; // if - tos = ++p->stack_top; - p->stack_data[ tos ].type = ct_parallel; - p->stack_data[ tos ].prev = p->p_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = NULL; - p->p_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); -} - -void -__kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); - KE_TRACE( 10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - - - if ( p->stack_top >= p->stack_size ) { - __kmp_expand_cons_stack( gtid, p ); - }; // if - if ( p->w_top > p->p_top && - !(IS_CONS_TYPE_TASKQ(p->stack_data[ p->w_top ].type) && IS_CONS_TYPE_TASKQ(ct))) { - // We are already in a WORKSHARE construct for this PARALLEL region. - __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->w_top ] ); - }; // if - if ( p->s_top > p->p_top ) { - // We are already in a SYNC construct for this PARALLEL region. - __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->s_top ] ); - }; // if -} - -void -__kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - KE_TRACE( 10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - __kmp_check_workshare( gtid, ct, ident ); - KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); - tos = ++p->stack_top; - p->stack_data[ tos ].type = ct; - p->stack_data[ tos ].prev = p->w_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = NULL; - p->w_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); -} - -void -#if KMP_USE_DYNAMIC_LOCK -__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) -#else -__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) -#endif -{ - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KE_TRACE( 10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid() ) ); - - if (p->stack_top >= p->stack_size) - __kmp_expand_cons_stack( gtid, p ); - - if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || ct == ct_ordered_in_taskq ) { - if (p->w_top <= p->p_top) { - /* we are not in a worksharing construct */ - #ifdef BUILD_PARALLEL_ORDERED - /* do not report error messages for PARALLEL ORDERED */ - KMP_ASSERT( ct == ct_ordered_in_parallel ); - #else - __kmp_error_construct( kmp_i18n_msg_CnsBoundToWorksharing, ct, ident ); - #endif /* BUILD_PARALLEL_ORDERED */ - } else { - /* inside a WORKSHARING construct for this PARALLEL region */ - if (!IS_CONS_TYPE_ORDERED(p->stack_data[ p->w_top ].type)) { - if (p->stack_data[ p->w_top ].type == ct_taskq) { - __kmp_error_construct2( - kmp_i18n_msg_CnsNotInTaskConstruct, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } else { - __kmp_error_construct2( - kmp_i18n_msg_CnsNoOrderedClause, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - } - } - if (p->s_top > p->p_top && p->s_top > p->w_top) { - /* inside a sync construct which is inside a worksharing construct */ - int index = p->s_top; - enum cons_type stack_type; - - stack_type = p->stack_data[ index ].type; - - if (stack_type == ct_critical || - ( ( stack_type == ct_ordered_in_parallel || - stack_type == ct_ordered_in_pdo || - stack_type == ct_ordered_in_taskq ) && /* C doesn't allow named ordered; ordered in ordered gets error */ - p->stack_data[ index ].ident != NULL && - (p->stack_data[ index ].ident->flags & KMP_IDENT_KMPC ))) { - /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ index ] - ); - } - } - } else if ( ct == ct_critical ) { -#if KMP_USE_DYNAMIC_LOCK - if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) { /* this same thread already has lock for this critical section */ -#else - if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) { /* this same thread already has lock for this critical section */ -#endif - int index = p->s_top; - struct cons_data cons = { NULL, ct_critical, 0, NULL }; - /* walk up construct stack and try to find critical with matching name */ - while ( index != 0 && p->stack_data[ index ].name != lck ) { - index = p->stack_data[ index ].prev; - } - if ( index != 0 ) { - /* found match on the stack (may not always because of interleaved critical for Fortran) */ - cons = p->stack_data[ index ]; - } - /* we are in CRITICAL which is inside a CRITICAL construct of the same name */ - __kmp_error_construct2( kmp_i18n_msg_CnsNestingSameName, ct, ident, & cons ); - } - } else if ( ct == ct_master || ct == ct_reduce ) { - if (p->w_top > p->p_top) { - /* inside a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - if (ct == ct_reduce && p->s_top > p->p_top) { - /* inside a another SYNC construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->s_top ] - ); - }; // if - }; // if -} - -void -#if KMP_USE_DYNAMIC_LOCK -__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) -#else -__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) -#endif -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KMP_ASSERT( gtid == __kmp_get_gtid() ); - KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) ); -#if KMP_USE_DYNAMIC_LOCK - __kmp_check_sync( gtid, ct, ident, lck, seq ); -#else - __kmp_check_sync( gtid, ct, ident, lck ); -#endif - KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); - tos = ++ p->stack_top; - p->stack_data[ tos ].type = ct; - p->stack_data[ tos ].prev = p->s_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = lck; - p->s_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); -} - -/* ------------------------------------------------------------------------ */ - -void -__kmp_pop_parallel( int gtid, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->p_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident ); - } - if ( tos != p->p_top || p->stack_data[ tos ].type != ct_parallel ) { - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct_parallel, ident, - & p->stack_data[ tos ] - ); - } - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->p_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); -} - -enum cons_type -__kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->w_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); - } - - if ( tos != p->w_top || - ( p->stack_data[ tos ].type != ct && - /* below are two exceptions to the rule that construct types must match */ - ! ( p->stack_data[ tos ].type == ct_pdo_ordered && ct == ct_pdo ) && - ! ( p->stack_data[ tos ].type == ct_task_ordered && ct == ct_task ) - ) - ) { - __kmp_check_null_func(); - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct, ident, - & p->stack_data[ tos ] - ); - } - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->w_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); - return p->stack_data[ p->w_top ].type; -} - -void -__kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->s_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); - }; - if ( tos != p->s_top || p->stack_data[ tos ].type != ct ) { - __kmp_check_null_func(); - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct, ident, - & p->stack_data[ tos ] - ); - }; - if ( gtid < 0 ) { - __kmp_check_null_func(); - }; - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->s_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); -} - -/* ------------------------------------------------------------------------ */ - -void -__kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ) -{ - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - KE_TRACE( 10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, __kmp_get_gtid() ) ); - if ( ident != 0 ) { - __kmp_check_null_func(); - } - if ( p->w_top > p->p_top ) { - /* we are already in a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - if (p->s_top > p->p_top) { - /* we are already in a SYNC construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->s_top ] - ); - } -} - -/* ------------------------------------------------------------------------ */ - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +/* + * kmp_error.c -- KPTS functions for error checking at runtime + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_str.h" +#include "kmp_error.h" + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#define MIN_STACK 100 + + +static char const * cons_text_c[] = { + "(none)", + "\"parallel\"", + "work-sharing", /* this is not called "for" because of lowering of "sections" pragmas */ + "\"ordered\" work-sharing", /* this is not called "for ordered" because of lowering of "sections" pragmas */ + "\"sections\"", + "work-sharing", /* this is not called "single" because of lowering of "sections" pragmas */ + "\"taskq\"", + "\"taskq\"", + "\"taskq ordered\"", + "\"critical\"", + "\"ordered\"", /* in PARALLEL */ + "\"ordered\"", /* in PDO */ + "\"ordered\"", /* in TASKQ */ + "\"master\"", + "\"reduce\"", + "\"barrier\"" +}; + +#define get_src( ident ) ( (ident) == NULL ? NULL : (ident)->psource ) + +#define PUSH_MSG( ct, ident ) \ + "\tpushing on stack: %s (%s)\n", cons_text_c[ (ct) ], get_src( (ident) ) +#define POP_MSG( p ) \ + "\tpopping off stack: %s (%s)\n", \ + cons_text_c[ (p)->stack_data[ tos ].type ], \ + get_src( (p)->stack_data[ tos ].ident ) + +static int const cons_text_c_num = sizeof( cons_text_c ) / sizeof( char const * ); + +/* ------------------------------------------------------------------------ */ +/* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */ +/* ------------------------------------------------------------------------ */ + +static void +__kmp_check_null_func( void ) +{ + /* nothing to do */ +} + +static void +__kmp_expand_cons_stack( int gtid, struct cons_header *p ) +{ + int i; + struct cons_data *d; + + /* TODO for monitor perhaps? */ + if (gtid < 0) + __kmp_check_null_func(); + + KE_TRACE( 10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid() ) ); + + d = p->stack_data; + + p->stack_size = (p->stack_size * 2) + 100; + + /* TODO free the old data */ + p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (p->stack_size+1) ); + + for (i = p->stack_top; i >= 0; --i) + p->stack_data[i] = d[i]; + + /* NOTE: we do not free the old stack_data */ +} + +// NOTE: Function returns allocated memory, caller must free it! +static char const * +__kmp_pragma( + int ct, + ident_t const * ident +) { + char const * cons = NULL; // Construct name. + char * file = NULL; // File name. + char * func = NULL; // Function (routine) name. + char * line = NULL; // Line number. + kmp_str_buf_t buffer; + kmp_msg_t prgm; + __kmp_str_buf_init( & buffer ); + if ( 0 < ct && ct < cons_text_c_num ) { + cons = cons_text_c[ ct ]; + } else { + KMP_DEBUG_ASSERT( 0 ); + }; + if ( ident != NULL && ident->psource != NULL ) { + char * tail = NULL; + __kmp_str_buf_print( & buffer, "%s", ident->psource ); // Copy source to buffer. + // Split string in buffer to file, func, and line. + tail = buffer.str; + __kmp_str_split( tail, ';', NULL, & tail ); + __kmp_str_split( tail, ';', & file, & tail ); + __kmp_str_split( tail, ';', & func, & tail ); + __kmp_str_split( tail, ';', & line, & tail ); + }; // if + prgm = __kmp_msg_format( kmp_i18n_fmt_Pragma, cons, file, func, line ); + __kmp_str_buf_free( & buffer ); + return prgm.str; +} // __kmp_pragma + +/* ------------------------------------------------------------------------ */ +/* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */ +/* ------------------------------------------------------------------------ */ + + +void +__kmp_error_construct( + kmp_i18n_id_t id, // Message identifier. + enum cons_type ct, // Construct type. + ident_t const * ident // Construct ident. +) { + char const * construct = __kmp_pragma( ct, ident ); + __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct ), __kmp_msg_null ); + KMP_INTERNAL_FREE( (void *) construct ); +} + +void +__kmp_error_construct2( + kmp_i18n_id_t id, // Message identifier. + enum cons_type ct, // First construct type. + ident_t const * ident, // First construct ident. + struct cons_data const * cons // Second construct. +) { + char const * construct1 = __kmp_pragma( ct, ident ); + char const * construct2 = __kmp_pragma( cons->type, cons->ident ); + __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct1, construct2 ), __kmp_msg_null ); + KMP_INTERNAL_FREE( (void *) construct1 ); + KMP_INTERNAL_FREE( (void *) construct2 ); +} + + +struct cons_header * +__kmp_allocate_cons_stack( int gtid ) +{ + struct cons_header *p; + + /* TODO for monitor perhaps? */ + if ( gtid < 0 ) { + __kmp_check_null_func(); + }; // if + KE_TRACE( 10, ("allocate cons_stack (%d)\n", gtid ) ); + p = (struct cons_header *) __kmp_allocate( sizeof( struct cons_header ) ); + p->p_top = p->w_top = p->s_top = 0; + p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (MIN_STACK+1) ); + p->stack_size = MIN_STACK; + p->stack_top = 0; + p->stack_data[ 0 ].type = ct_none; + p->stack_data[ 0 ].prev = 0; + p->stack_data[ 0 ].ident = NULL; + return p; +} + +void +__kmp_free_cons_stack( void * ptr ) { + struct cons_header * p = (struct cons_header *) ptr; + if ( p != NULL ) { + if ( p->stack_data != NULL ) { + __kmp_free( p->stack_data ); + p->stack_data = NULL; + }; // if + __kmp_free( p ); + }; // if +} + + +#if KMP_DEBUG +static void +dump_cons_stack( int gtid, struct cons_header * p ) { + int i; + int tos = p->stack_top; + kmp_str_buf_t buffer; + __kmp_str_buf_init( & buffer ); + __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); + __kmp_str_buf_print( & buffer, "Begin construct stack with %d items for thread %d\n", tos, gtid ); + __kmp_str_buf_print( & buffer, " stack_top=%d { P=%d, W=%d, S=%d }\n", tos, p->p_top, p->w_top, p->s_top ); + for ( i = tos; i > 0; i-- ) { + struct cons_data * c = & ( p->stack_data[ i ] ); + __kmp_str_buf_print( & buffer, " stack_data[%2d] = { %s (%s) %d %p }\n", i, cons_text_c[ c->type ], get_src( c->ident ), c->prev, c->name ); + }; // for i + __kmp_str_buf_print( & buffer, "End construct stack for thread %d\n", gtid ); + __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); + __kmp_debug_printf( "%s", buffer.str ); + __kmp_str_buf_free( & buffer ); +} +#endif + +void +__kmp_push_parallel( int gtid, ident_t const * ident ) +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + + KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); + KE_TRACE( 10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); + KE_TRACE( 100, ( PUSH_MSG( ct_parallel, ident ) ) ); + if ( p->stack_top >= p->stack_size ) { + __kmp_expand_cons_stack( gtid, p ); + }; // if + tos = ++p->stack_top; + p->stack_data[ tos ].type = ct_parallel; + p->stack_data[ tos ].prev = p->p_top; + p->stack_data[ tos ].ident = ident; + p->stack_data[ tos ].name = NULL; + p->p_top = tos; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +} + +void +__kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ) +{ + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + + KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); + KE_TRACE( 10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); + + + if ( p->stack_top >= p->stack_size ) { + __kmp_expand_cons_stack( gtid, p ); + }; // if + if ( p->w_top > p->p_top && + !(IS_CONS_TYPE_TASKQ(p->stack_data[ p->w_top ].type) && IS_CONS_TYPE_TASKQ(ct))) { + // We are already in a WORKSHARE construct for this PARALLEL region. + __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->w_top ] ); + }; // if + if ( p->s_top > p->p_top ) { + // We are already in a SYNC construct for this PARALLEL region. + __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->s_top ] ); + }; // if +} + +void +__kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ) +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + KE_TRACE( 10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); + __kmp_check_workshare( gtid, ct, ident ); + KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); + tos = ++p->stack_top; + p->stack_data[ tos ].type = ct; + p->stack_data[ tos ].prev = p->w_top; + p->stack_data[ tos ].ident = ident; + p->stack_data[ tos ].name = NULL; + p->w_top = tos; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +} + +void +#if KMP_USE_DYNAMIC_LOCK +__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) +#else +__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) +#endif +{ + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + + KE_TRACE( 10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid() ) ); + + if (p->stack_top >= p->stack_size) + __kmp_expand_cons_stack( gtid, p ); + + if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || ct == ct_ordered_in_taskq ) { + if (p->w_top <= p->p_top) { + /* we are not in a worksharing construct */ + #ifdef BUILD_PARALLEL_ORDERED + /* do not report error messages for PARALLEL ORDERED */ + KMP_ASSERT( ct == ct_ordered_in_parallel ); + #else + __kmp_error_construct( kmp_i18n_msg_CnsBoundToWorksharing, ct, ident ); + #endif /* BUILD_PARALLEL_ORDERED */ + } else { + /* inside a WORKSHARING construct for this PARALLEL region */ + if (!IS_CONS_TYPE_ORDERED(p->stack_data[ p->w_top ].type)) { + if (p->stack_data[ p->w_top ].type == ct_taskq) { + __kmp_error_construct2( + kmp_i18n_msg_CnsNotInTaskConstruct, + ct, ident, + & p->stack_data[ p->w_top ] + ); + } else { + __kmp_error_construct2( + kmp_i18n_msg_CnsNoOrderedClause, + ct, ident, + & p->stack_data[ p->w_top ] + ); + } + } + } + if (p->s_top > p->p_top && p->s_top > p->w_top) { + /* inside a sync construct which is inside a worksharing construct */ + int index = p->s_top; + enum cons_type stack_type; + + stack_type = p->stack_data[ index ].type; + + if (stack_type == ct_critical || + ( ( stack_type == ct_ordered_in_parallel || + stack_type == ct_ordered_in_pdo || + stack_type == ct_ordered_in_taskq ) && /* C doesn't allow named ordered; ordered in ordered gets error */ + p->stack_data[ index ].ident != NULL && + (p->stack_data[ index ].ident->flags & KMP_IDENT_KMPC ))) { + /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ + __kmp_error_construct2( + kmp_i18n_msg_CnsInvalidNesting, + ct, ident, + & p->stack_data[ index ] + ); + } + } + } else if ( ct == ct_critical ) { +#if KMP_USE_DYNAMIC_LOCK + if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) { /* this same thread already has lock for this critical section */ +#else + if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) { /* this same thread already has lock for this critical section */ +#endif + int index = p->s_top; + struct cons_data cons = { NULL, ct_critical, 0, NULL }; + /* walk up construct stack and try to find critical with matching name */ + while ( index != 0 && p->stack_data[ index ].name != lck ) { + index = p->stack_data[ index ].prev; + } + if ( index != 0 ) { + /* found match on the stack (may not always because of interleaved critical for Fortran) */ + cons = p->stack_data[ index ]; + } + /* we are in CRITICAL which is inside a CRITICAL construct of the same name */ + __kmp_error_construct2( kmp_i18n_msg_CnsNestingSameName, ct, ident, & cons ); + } + } else if ( ct == ct_master || ct == ct_reduce ) { + if (p->w_top > p->p_top) { + /* inside a WORKSHARING construct for this PARALLEL region */ + __kmp_error_construct2( + kmp_i18n_msg_CnsInvalidNesting, + ct, ident, + & p->stack_data[ p->w_top ] + ); + } + if (ct == ct_reduce && p->s_top > p->p_top) { + /* inside a another SYNC construct for this PARALLEL region */ + __kmp_error_construct2( + kmp_i18n_msg_CnsInvalidNesting, + ct, ident, + & p->stack_data[ p->s_top ] + ); + }; // if + }; // if +} + +void +#if KMP_USE_DYNAMIC_LOCK +__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) +#else +__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) +#endif +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + + KMP_ASSERT( gtid == __kmp_get_gtid() ); + KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) ); +#if KMP_USE_DYNAMIC_LOCK + __kmp_check_sync( gtid, ct, ident, lck, seq ); +#else + __kmp_check_sync( gtid, ct, ident, lck ); +#endif + KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); + tos = ++ p->stack_top; + p->stack_data[ tos ].type = ct; + p->stack_data[ tos ].prev = p->s_top; + p->stack_data[ tos ].ident = ident; + p->stack_data[ tos ].name = lck; + p->s_top = tos; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +} + +/* ------------------------------------------------------------------------ */ + +void +__kmp_pop_parallel( int gtid, ident_t const * ident ) +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + tos = p->stack_top; + KE_TRACE( 10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); + if ( tos == 0 || p->p_top == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident ); + } + if ( tos != p->p_top || p->stack_data[ tos ].type != ct_parallel ) { + __kmp_error_construct2( + kmp_i18n_msg_CnsExpectedEnd, + ct_parallel, ident, + & p->stack_data[ tos ] + ); + } + KE_TRACE( 100, ( POP_MSG( p ) ) ); + p->p_top = p->stack_data[ tos ].prev; + p->stack_data[ tos ].type = ct_none; + p->stack_data[ tos ].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +} + +enum cons_type +__kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ) +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + + tos = p->stack_top; + KE_TRACE( 10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); + if ( tos == 0 || p->w_top == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); + } + + if ( tos != p->w_top || + ( p->stack_data[ tos ].type != ct && + /* below are two exceptions to the rule that construct types must match */ + ! ( p->stack_data[ tos ].type == ct_pdo_ordered && ct == ct_pdo ) && + ! ( p->stack_data[ tos ].type == ct_task_ordered && ct == ct_task ) + ) + ) { + __kmp_check_null_func(); + __kmp_error_construct2( + kmp_i18n_msg_CnsExpectedEnd, + ct, ident, + & p->stack_data[ tos ] + ); + } + KE_TRACE( 100, ( POP_MSG( p ) ) ); + p->w_top = p->stack_data[ tos ].prev; + p->stack_data[ tos ].type = ct_none; + p->stack_data[ tos ].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); + return p->stack_data[ p->w_top ].type; +} + +void +__kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ) +{ + int tos; + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + tos = p->stack_top; + KE_TRACE( 10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid() ) ); + if ( tos == 0 || p->s_top == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); + }; + if ( tos != p->s_top || p->stack_data[ tos ].type != ct ) { + __kmp_check_null_func(); + __kmp_error_construct2( + kmp_i18n_msg_CnsExpectedEnd, + ct, ident, + & p->stack_data[ tos ] + ); + }; + if ( gtid < 0 ) { + __kmp_check_null_func(); + }; + KE_TRACE( 100, ( POP_MSG( p ) ) ); + p->s_top = p->stack_data[ tos ].prev; + p->stack_data[ tos ].type = ct_none; + p->stack_data[ tos ].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +} + +/* ------------------------------------------------------------------------ */ + +void +__kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ) +{ + struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + KE_TRACE( 10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, __kmp_get_gtid() ) ); + if ( ident != 0 ) { + __kmp_check_null_func(); + } + if ( p->w_top > p->p_top ) { + /* we are already in a WORKSHARING construct for this PARALLEL region */ + __kmp_error_construct2( + kmp_i18n_msg_CnsInvalidNesting, + ct, ident, + & p->stack_data[ p->w_top ] + ); + } + if (p->s_top > p->p_top) { + /* we are already in a SYNC construct for this PARALLEL region */ + __kmp_error_construct2( + kmp_i18n_msg_CnsInvalidNesting, + ct, ident, + & p->stack_data[ p->s_top ] + ); + } +} + +/* ------------------------------------------------------------------------ */ + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_error.h b/contrib/libs/cxxsupp/openmp/kmp_error.h index 0df196f64dc..9dfe111dea1 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_error.h +++ b/contrib/libs/cxxsupp/openmp/kmp_error.h @@ -1,57 +1,57 @@ -/* - * kmp_error.h -- PTS functions for error checking at runtime. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_ERROR_H -#define KMP_ERROR_H - -#include "kmp_i18n.h" - -/* ------------------------------------------------------------------------ */ -#ifdef __cplusplus - extern "C" { -#endif - -void __kmp_error_construct( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident ); -void __kmp_error_construct2( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident, struct cons_data const * cons ); - -struct cons_header * __kmp_allocate_cons_stack( int gtid ); -void __kmp_free_cons_stack( void * ptr ); - -void __kmp_push_parallel( int gtid, ident_t const * ident ); -void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ); -#if KMP_USE_DYNAMIC_LOCK -void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); -#else -void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); -#endif - -void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ); -#if KMP_USE_DYNAMIC_LOCK -void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); -#else -void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); -#endif - -void __kmp_pop_parallel( int gtid, ident_t const * ident ); -enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ); -void __kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ); -void __kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ); - -#ifdef __cplusplus - } // extern "C" -#endif - -#endif // KMP_ERROR_H - +/* + * kmp_error.h -- PTS functions for error checking at runtime. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_ERROR_H +#define KMP_ERROR_H + +#include "kmp_i18n.h" + +/* ------------------------------------------------------------------------ */ +#ifdef __cplusplus + extern "C" { +#endif + +void __kmp_error_construct( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident ); +void __kmp_error_construct2( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident, struct cons_data const * cons ); + +struct cons_header * __kmp_allocate_cons_stack( int gtid ); +void __kmp_free_cons_stack( void * ptr ); + +void __kmp_push_parallel( int gtid, ident_t const * ident ); +void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +#if KMP_USE_DYNAMIC_LOCK +void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +#else +void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +#endif + +void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +#if KMP_USE_DYNAMIC_LOCK +void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +#else +void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +#endif + +void __kmp_pop_parallel( int gtid, ident_t const * ident ); +enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +void __kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ); +void __kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ); + +#ifdef __cplusplus + } // extern "C" +#endif + +#endif // KMP_ERROR_H + diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c index 472ec894df5..51fa1bf8954 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c +++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c @@ -1,35 +1,35 @@ -/* - * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" - -#if KMP_OS_WINDOWS -# if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB -# define KMP_FTN_ENTRIES KMP_FTN_UPPER -# endif -#elif KMP_OS_UNIX -# define KMP_FTN_ENTRIES KMP_FTN_PLAIN -#endif - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " -#ifdef KMP_FTN_ENTRIES - "yes"; -# define FTN_STDCALL /* no stdcall */ -# include "kmp_ftn_os.h" -# include "kmp_ftn_entry.h" -#else - "no"; -#endif /* KMP_FTN_ENTRIES */ +/* + * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" + +#if KMP_OS_WINDOWS +# if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB +# define KMP_FTN_ENTRIES KMP_FTN_UPPER +# endif +#elif KMP_OS_UNIX +# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#endif + +// Note: This string is not printed when KMP_VERSION=1. +char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " +#ifdef KMP_FTN_ENTRIES + "yes"; +# define FTN_STDCALL /* no stdcall */ +# include "kmp_ftn_os.h" +# include "kmp_ftn_entry.h" +#else + "no"; +#endif /* KMP_FTN_ENTRIES */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h index b54e8765ed6..fcbaacbffa7 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h +++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h @@ -1,1253 +1,1253 @@ -/* - * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef FTN_STDCALL -# error The support file kmp_ftn_entry.h should not be compiled by itself. -#endif - -#ifdef KMP_STUB - #include "kmp_stub.h" -#endif - -#include "kmp_i18n.h" - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -/* - * For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), - * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o - * a trailing underscore on Linux* OS] take call by value integer arguments. - * + omp_set_max_active_levels() - * + omp_set_schedule() - * - * For backward compatibility with 9.1 and previous Intel compiler, these - * entry points take call by reference integer arguments. - */ -#ifdef KMP_GOMP_COMPAT -# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) -# define PASS_ARGS_BY_VALUE 1 -# endif -#endif -#if KMP_OS_WINDOWS -# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) -# define PASS_ARGS_BY_VALUE 1 -# endif -#endif - -// This macro helps to reduce code duplication. -#ifdef PASS_ARGS_BY_VALUE - #define KMP_DEREF -#else - #define KMP_DEREF * -#endif - -void FTN_STDCALL -FTN_SET_STACKSIZE( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_stacksize( KMP_DEREF arg ); - #else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( (size_t) KMP_DEREF arg ); - #endif -} - -void FTN_STDCALL -FTN_SET_STACKSIZE_S( size_t KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_stacksize( KMP_DEREF arg ); - #else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( KMP_DEREF arg ); - #endif -} - -int FTN_STDCALL -FTN_GET_STACKSIZE( void ) -{ - #ifdef KMP_STUB - return __kmps_get_stacksize(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - return (int)__kmp_stksize; - #endif -} - -size_t FTN_STDCALL -FTN_GET_STACKSIZE_S( void ) -{ - #ifdef KMP_STUB - return __kmps_get_stacksize(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - return __kmp_stksize; - #endif -} - -void FTN_STDCALL -FTN_SET_BLOCKTIME( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_blocktime( KMP_DEREF arg ); - #else - int gtid, tid; - kmp_info_t *thread; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - - __kmp_aux_set_blocktime( KMP_DEREF arg, thread, tid ); - #endif -} - -int FTN_STDCALL -FTN_GET_BLOCKTIME( void ) -{ - #ifdef KMP_STUB - return __kmps_get_blocktime(); - #else - int gtid, tid; - kmp_info_t *thread; - kmp_team_p *team; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - team = __kmp_threads[ gtid ] -> th.th_team; - - /* These must match the settings used in __kmp_wait_sleep() */ - if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, KMP_MAX_BLOCKTIME) ); - return KMP_MAX_BLOCKTIME; - } -#ifdef KMP_ADJUST_BLOCKTIME - else if ( __kmp_zero_bt && !get__bt_set( team, tid ) ) { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, 0) ); - return 0; - } -#endif /* KMP_ADJUST_BLOCKTIME */ - else { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, get__blocktime( team, tid ) ) ); - return get__blocktime( team, tid ); - }; - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_SERIAL( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_serial ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_serial ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_TURNAROUND( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_turnaround ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_turnaround ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_THROUGHPUT( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_throughput ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_throughput ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_library( KMP_DEREF arg ); - #else - enum library_type lib; - lib = (enum library_type) KMP_DEREF arg; - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( lib ); - #endif -} - -int FTN_STDCALL -FTN_GET_LIBRARY (void) -{ - #ifdef KMP_STUB - return __kmps_get_library(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - return ((int) __kmp_library); - #endif -} - -int FTN_STDCALL -FTN_SET_AFFINITY( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity( mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity( mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY_MAX_PROC( void ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; - #else - // - // We really only NEED serial initialization here. - // - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if ( ! ( KMP_AFFINITY_CAPABLE() ) ) { - return 0; - } - - #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC - if ( __kmp_num_proc_groups > 1 ) { - return (int)KMP_CPU_SETSIZE; - } - #endif /* KMP_GROUP_AFFINITY */ - return __kmp_xproc; - #endif -} - -void FTN_STDCALL -FTN_CREATE_AFFINITY_MASK( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - *mask = NULL; - #else - // - // We really only NEED serial initialization here. - // - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - # if KMP_USE_HWLOC - *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc(); - # else - *mask = kmpc_malloc( __kmp_affin_mask_size ); - # endif - KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) ); - #endif -} - -void FTN_STDCALL -FTN_DESTROY_AFFINITY_MASK( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - // Nothing - #else - // - // We really only NEED serial initialization here. - // - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if ( __kmp_env_consistency_check ) { - if ( *mask == NULL ) { - KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" ); - } - } - # if KMP_USE_HWLOC - hwloc_bitmap_free((hwloc_cpuset_t)(*mask)); - # else - kmpc_free( *mask ); - # endif - *mask = NULL; - #endif -} - -int FTN_STDCALL -FTN_SET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif -} - -int FTN_STDCALL -FTN_UNSET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif -} - - -/* ------------------------------------------------------------------------ */ - -/* sets the requested number of threads for the next parallel region */ - -void FTN_STDCALL -xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - // Nothing. - #else - __kmp_set_num_threads( KMP_DEREF arg, __kmp_entry_gtid() ); - #endif -} - - -/* returns the number of threads in current team */ -int FTN_STDCALL -xexpand(FTN_GET_NUM_THREADS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - // __kmpc_bound_num_threads initializes the library if needed - return __kmpc_bound_num_threads(NULL); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_MAX_THREADS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - int gtid; - kmp_info_t *thread; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[ gtid ]; - //return thread -> th.th_team -> t.t_current_task[ thread->th.th_info.ds.ds_tid ] -> icvs.nproc; - return thread -> th.th_current_task -> td_icvs.nproc; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_THREAD_NUM)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - int gtid; - - #if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD - gtid = __kmp_entry_gtid(); - #elif KMP_OS_WINDOWS - if (!__kmp_init_parallel || - (gtid = (int)((kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ))) == 0) { - // Either library isn't initialized or thread is not registered - // 0 is the correct TID in this case - return 0; - } - --gtid; // We keep (gtid+1) in TLS - #elif KMP_OS_LINUX - #ifdef KMP_TDATA_GTID - if ( __kmp_gtid_mode >= 3 ) { - if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { - return 0; - } - } else { - #endif - if (!__kmp_init_parallel || - (gtid = (kmp_intptr_t)(pthread_getspecific( __kmp_gtid_threadprivate_key ))) == 0) { - return 0; - } - --gtid; - #ifdef KMP_TDATA_GTID - } - #endif - #else - #error Unknown or unsupported OS - #endif - - return __kmp_tid_from_gtid( gtid ); - #endif -} - -int FTN_STDCALL -FTN_GET_NUM_KNOWN_THREADS( void ) -{ - #ifdef KMP_STUB - return 1; - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - /* NOTE: this is not syncronized, so it can change at any moment */ - /* NOTE: this number also includes threads preallocated in hot-teams */ - return TCR_4(__kmp_nth); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_NUM_PROCS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_avail_proc; - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_NESTED)( int KMP_DEREF flag ) -{ - #ifdef KMP_STUB - __kmps_set_nested( KMP_DEREF flag ); - #else - kmp_info_t *thread; - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); - __kmp_save_internal_controls( thread ); - set__nested( thread, ( (KMP_DEREF flag) ? TRUE : FALSE ) ); - #endif -} - - -int FTN_STDCALL -xexpand(FTN_GET_NESTED)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_nested(); - #else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__nested( thread ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag ) -{ - #ifdef KMP_STUB - __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE ); - #else - kmp_info_t *thread; - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); - // !!! What if foreign thread calls it? - __kmp_save_internal_controls( thread ); - set__dynamic( thread, KMP_DEREF flag ? TRUE : FALSE ); - #endif -} - - -int FTN_STDCALL -xexpand(FTN_GET_DYNAMIC)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_dynamic(); - #else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__dynamic( thread ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_IN_PARALLEL)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - kmp_info_t *th = __kmp_entry_thread(); -#if OMP_40_ENABLED - if ( th->th.th_teams_microtask ) { - // AC: r_in_parallel does not work inside teams construct - // where real parallel is inactive, but all threads have same root, - // so setting it in one team affects other teams. - // The solution is to use per-team nesting level - return ( th->th.th_team->t.t_active_level ? 1 : 0 ); - } - else -#endif /* OMP_40_ENABLED */ - return ( th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) -{ - #ifdef KMP_STUB - __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier ); - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - __kmp_set_schedule( __kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier ) -{ - #ifdef KMP_STUB - __kmps_get_schedule( kind, modifier ); - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - __kmp_get_schedule( __kmp_entry_gtid(), kind, modifier ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - // Nothing. - #else - /* TO DO */ - /* We want per-task implementation of this internal control */ - __kmp_set_max_active_levels( __kmp_entry_gtid(), KMP_DEREF arg ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - /* TO DO */ - /* We want per-task implementation of this internal control */ - return __kmp_get_max_active_levels( __kmp_entry_gtid() ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_ACTIVE_LEVEL)( void ) -{ - #ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - return __kmp_entry_thread() -> th.th_team -> t.t_active_level; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_LEVEL)( void ) -{ - #ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - return __kmp_entry_thread() -> th.th_team -> t.t_level; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level ) -{ - #ifdef KMP_STUB - return ( KMP_DEREF level ) ? ( -1 ) : ( 0 ); - #else - return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), KMP_DEREF level ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level ) -{ - #ifdef KMP_STUB - return ( KMP_DEREF level ) ? ( -1 ) : ( 1 ); - #else - return __kmp_get_team_size( __kmp_entry_gtid(), KMP_DEREF level ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_THREAD_LIMIT)( void ) -{ - #ifdef KMP_STUB - return 1; // TO DO: clarify whether it returns 1 or 0? - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - /* global ICV */ - return __kmp_max_nth; - #endif -} - -int FTN_STDCALL -xexpand(FTN_IN_FINAL)( void ) -{ - #ifdef KMP_STUB - return 0; // TO DO: clarify whether it returns 1 or 0? - #else - if ( ! TCR_4(__kmp_init_parallel) ) { - return 0; - } - return __kmp_entry_thread() -> th.th_current_task -> td_flags.final; - #endif -} - -#if OMP_40_ENABLED - - -kmp_proc_bind_t FTN_STDCALL -xexpand(FTN_GET_PROC_BIND)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_proc_bind(); - #else - return get__proc_bind( __kmp_entry_thread() ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_NUM_TEAMS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - kmp_info_t *thr = __kmp_entry_thread(); - if ( thr->th.th_teams_microtask ) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; - int ii = team->t.t_level; // the level of the teams construct - int dd = team -> t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT( ii >= tlevel ); - while( ii > level ) - { - for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team -> t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - if ( dd > 1 ) { - return 1; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_parent->t.t_nproc; - } - } else { - return 1; - } - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_TEAM_NUM)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - kmp_info_t *thr = __kmp_entry_thread(); - if ( thr->th.th_teams_microtask ) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; // the level of the teams construct - int ii = team->t.t_level; - int dd = team -> t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT( ii >= tlevel ); - while( ii > level ) - { - for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team -> t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - if ( dd > 1 ) { - return 0; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_master_tid; - } - } else { - return 0; - } - #endif -} - -#if KMP_MIC || KMP_OS_DARWIN - -static int __kmp_default_device = 0; - -int FTN_STDCALL -FTN_GET_DEFAULT_DEVICE( void ) -{ - return __kmp_default_device; -} - -void FTN_STDCALL -FTN_SET_DEFAULT_DEVICE( int KMP_DEREF arg ) -{ - __kmp_default_device = KMP_DEREF arg; -} - -int FTN_STDCALL -FTN_GET_NUM_DEVICES( void ) -{ - return 0; -} - -#endif // KMP_MIC || KMP_OS_DARWIN - -#if ! KMP_OS_LINUX - -int FTN_STDCALL -xexpand(FTN_IS_INITIAL_DEVICE)( void ) -{ - return 1; -} - -#else - -// This internal function is used when the entry from the offload library -// is not found. -int _Offload_get_device_number( void ) __attribute__((weak)); - -int FTN_STDCALL -xexpand(FTN_IS_INITIAL_DEVICE)( void ) -{ - if( _Offload_get_device_number ) { - return _Offload_get_device_number() == -1; - } else { - return 1; - } -} - -#endif // ! KMP_OS_LINUX - -#endif // OMP_40_ENABLED - -#ifdef KMP_STUB -typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; -#endif /* KMP_STUB */ - -#if KMP_USE_DYNAMIC_LOCK -void FTN_STDCALL -FTN_INIT_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); - #endif -} - -void FTN_STDCALL -FTN_INIT_NEST_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_nest_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); - #endif -} -#endif - -/* initialize the lock */ -void FTN_STDCALL -xexpand(FTN_INIT_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -/* initialize the lock */ -void FTN_STDCALL -xexpand(FTN_INIT_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_DESTROY_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; - #else - __kmpc_destroy_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; - #else - __kmpc_destroy_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) != UNLOCKED ) { - // TODO: Issue an error. - }; // if - *((kmp_stub_lock_t *)user_lock) = LOCKED; - #else - __kmpc_set_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - (*((int *)user_lock))++; - #else - __kmpc_set_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_UNSET_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { - // TODO: Issue an error. - }; // if - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_unset_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { - // TODO: Issue an error. - }; // if - (*((int *)user_lock))--; - #else - __kmpc_unset_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_TEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == LOCKED ) { - return 0; - }; // if - *((kmp_stub_lock_t *)user_lock) = LOCKED; - return 1; - #else - return __kmpc_test_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_TEST_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - return ++(*((int *)user_lock)); - #else - return __kmpc_test_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -double FTN_STDCALL -xexpand(FTN_GET_WTIME)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_wtime(); - #else - double data; - #if ! KMP_OS_LINUX - // We don't need library initialization to get the time on Linux* OS. - // The routine can be used to measure library initialization time on Linux* OS now. - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - #endif - __kmp_elapsed( & data ); - return data; - #endif -} - -double FTN_STDCALL -xexpand(FTN_GET_WTICK)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_wtick(); - #else - double data; - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - __kmp_elapsed_tick( & data ); - return data; - #endif -} - -/* ------------------------------------------------------------------------ */ - -void * FTN_STDCALL -FTN_MALLOC( size_t KMP_DEREF size ) -{ - // kmpc_malloc initializes the library if needed - return kmpc_malloc( KMP_DEREF size ); -} - -void * FTN_STDCALL -FTN_CALLOC( size_t KMP_DEREF nelem, size_t KMP_DEREF elsize ) -{ - // kmpc_calloc initializes the library if needed - return kmpc_calloc( KMP_DEREF nelem, KMP_DEREF elsize ); -} - -void * FTN_STDCALL -FTN_REALLOC( void * KMP_DEREF ptr, size_t KMP_DEREF size ) -{ - // kmpc_realloc initializes the library if needed - return kmpc_realloc( KMP_DEREF ptr, KMP_DEREF size ); -} - -void FTN_STDCALL -FTN_FREE( void * KMP_DEREF ptr ) -{ - // does nothing if the library is not initialized - kmpc_free( KMP_DEREF ptr ); -} - -void FTN_STDCALL -FTN_SET_WARNINGS_ON( void ) -{ - #ifndef KMP_STUB - __kmp_generate_warnings = kmp_warnings_explicit; - #endif -} - -void FTN_STDCALL -FTN_SET_WARNINGS_OFF( void ) -{ - #ifndef KMP_STUB - __kmp_generate_warnings = FALSE; - #endif -} - -void FTN_STDCALL -FTN_SET_DEFAULTS( char const * str - #ifndef PASS_ARGS_BY_VALUE - , int len - #endif -) -{ - #ifndef KMP_STUB - #ifdef PASS_ARGS_BY_VALUE - int len = (int)KMP_STRLEN( str ); - #endif - __kmp_aux_set_defaults( str, len ); - #endif -} - -/* ------------------------------------------------------------------------ */ - - -#if OMP_40_ENABLED -/* returns the status of cancellation */ -int FTN_STDCALL -xexpand(FTN_GET_CANCELLATION)(void) { -#ifdef KMP_STUB - return 0 /* false */; -#else - // initialize the library if needed - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - return __kmp_omp_cancellation; -#endif -} - -int FTN_STDCALL -FTN_GET_CANCELLATION_STATUS(int cancel_kind) { -#ifdef KMP_STUB - return 0 /* false */; -#else - return __kmp_get_cancellation_status(cancel_kind); -#endif -} - -#endif // OMP_40_ENABLED - -// GCC compatibility (versioned symbols) -#ifdef KMP_USE_VERSION_SYMBOLS - -/* - These following sections create function aliases (dummy symbols) for the omp_* routines. - These aliases will then be versioned according to how libgomp ``versions'' its - symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the - default version which libomp uses: VERSION (defined in exports_so.txt) - If you want to see the versioned symbols for libgomp.so.1 then just type: - - objdump -T /path/to/libgomp.so.1 | grep omp_ - - Example: - Step 1) Create __kmp_api_omp_set_num_threads_10_alias - which is alias of __kmp_api_omp_set_num_threads - Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0 - Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION -*/ - -// OMP_1.0 aliases -xaliasify(FTN_SET_NUM_THREADS, 10); -xaliasify(FTN_GET_NUM_THREADS, 10); -xaliasify(FTN_GET_MAX_THREADS, 10); -xaliasify(FTN_GET_THREAD_NUM, 10); -xaliasify(FTN_GET_NUM_PROCS, 10); -xaliasify(FTN_IN_PARALLEL, 10); -xaliasify(FTN_SET_DYNAMIC, 10); -xaliasify(FTN_GET_DYNAMIC, 10); -xaliasify(FTN_SET_NESTED, 10); -xaliasify(FTN_GET_NESTED, 10); -xaliasify(FTN_INIT_LOCK, 10); -xaliasify(FTN_INIT_NEST_LOCK, 10); -xaliasify(FTN_DESTROY_LOCK, 10); -xaliasify(FTN_DESTROY_NEST_LOCK, 10); -xaliasify(FTN_SET_LOCK, 10); -xaliasify(FTN_SET_NEST_LOCK, 10); -xaliasify(FTN_UNSET_LOCK, 10); -xaliasify(FTN_UNSET_NEST_LOCK, 10); -xaliasify(FTN_TEST_LOCK, 10); -xaliasify(FTN_TEST_NEST_LOCK, 10); - -// OMP_2.0 aliases -xaliasify(FTN_GET_WTICK, 20); -xaliasify(FTN_GET_WTIME, 20); - -// OMP_3.0 aliases -xaliasify(FTN_SET_SCHEDULE, 30); -xaliasify(FTN_GET_SCHEDULE, 30); -xaliasify(FTN_GET_THREAD_LIMIT, 30); -xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30); -xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30); -xaliasify(FTN_GET_LEVEL, 30); -xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30); -xaliasify(FTN_GET_TEAM_SIZE, 30); -xaliasify(FTN_GET_ACTIVE_LEVEL, 30); -xaliasify(FTN_INIT_LOCK, 30); -xaliasify(FTN_INIT_NEST_LOCK, 30); -xaliasify(FTN_DESTROY_LOCK, 30); -xaliasify(FTN_DESTROY_NEST_LOCK, 30); -xaliasify(FTN_SET_LOCK, 30); -xaliasify(FTN_SET_NEST_LOCK, 30); -xaliasify(FTN_UNSET_LOCK, 30); -xaliasify(FTN_UNSET_NEST_LOCK, 30); -xaliasify(FTN_TEST_LOCK, 30); -xaliasify(FTN_TEST_NEST_LOCK, 30); - -// OMP_3.1 aliases -xaliasify(FTN_IN_FINAL, 31); - -#if OMP_40_ENABLED -// OMP_4.0 aliases -xaliasify(FTN_GET_PROC_BIND, 40); -xaliasify(FTN_GET_NUM_TEAMS, 40); -xaliasify(FTN_GET_TEAM_NUM, 40); -xaliasify(FTN_GET_CANCELLATION, 40); -xaliasify(FTN_IS_INITIAL_DEVICE, 40); -#endif /* OMP_40_ENABLED */ - -#if OMP_41_ENABLED -// OMP_4.1 aliases -#endif - -#if OMP_50_ENABLED -// OMP_5.0 aliases -#endif - -// OMP_1.0 versioned symbols -xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); -xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); -xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0"); -xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0"); -xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0"); -xversionify(FTN_SET_NESTED, 10, "OMP_1.0"); -xversionify(FTN_GET_NESTED, 10, "OMP_1.0"); -xversionify(FTN_INIT_LOCK, 10, "OMP_1.0"); -xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0"); -xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_SET_LOCK, 10, "OMP_1.0"); -xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0"); -xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_TEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); - -// OMP_2.0 versioned symbols -xversionify(FTN_GET_WTICK, 20, "OMP_2.0"); -xversionify(FTN_GET_WTIME, 20, "OMP_2.0"); - -// OMP_3.0 versioned symbols -xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0"); -xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0"); -xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); -xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); -xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); -xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); -xversionify(FTN_GET_LEVEL, 30, "OMP_3.0"); -xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); -xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); - -// the lock routines have a 1.0 and 3.0 version -xversionify(FTN_INIT_LOCK, 30, "OMP_3.0"); -xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0"); -xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_SET_LOCK, 30, "OMP_3.0"); -xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0"); -xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_TEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); - -// OMP_3.1 versioned symbol -xversionify(FTN_IN_FINAL, 31, "OMP_3.1"); - -#if OMP_40_ENABLED -// OMP_4.0 versioned symbols -xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0"); -xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); -xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); -xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0"); -xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); -#endif /* OMP_40_ENABLED */ - -#if OMP_41_ENABLED -// OMP_4.1 versioned symbols -#endif - -#if OMP_50_ENABLED -// OMP_5.0 versioned symbols -#endif - -#endif // KMP_USE_VERSION_SYMBOLS - -#ifdef __cplusplus - } //extern "C" -#endif // __cplusplus - -// end of file // +/* + * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef FTN_STDCALL +# error The support file kmp_ftn_entry.h should not be compiled by itself. +#endif + +#ifdef KMP_STUB + #include "kmp_stub.h" +#endif + +#include "kmp_i18n.h" + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +/* + * For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), + * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o + * a trailing underscore on Linux* OS] take call by value integer arguments. + * + omp_set_max_active_levels() + * + omp_set_schedule() + * + * For backward compatibility with 9.1 and previous Intel compiler, these + * entry points take call by reference integer arguments. + */ +#ifdef KMP_GOMP_COMPAT +# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) +# define PASS_ARGS_BY_VALUE 1 +# endif +#endif +#if KMP_OS_WINDOWS +# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) +# define PASS_ARGS_BY_VALUE 1 +# endif +#endif + +// This macro helps to reduce code duplication. +#ifdef PASS_ARGS_BY_VALUE + #define KMP_DEREF +#else + #define KMP_DEREF * +#endif + +void FTN_STDCALL +FTN_SET_STACKSIZE( int KMP_DEREF arg ) +{ + #ifdef KMP_STUB + __kmps_set_stacksize( KMP_DEREF arg ); + #else + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize( (size_t) KMP_DEREF arg ); + #endif +} + +void FTN_STDCALL +FTN_SET_STACKSIZE_S( size_t KMP_DEREF arg ) +{ + #ifdef KMP_STUB + __kmps_set_stacksize( KMP_DEREF arg ); + #else + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize( KMP_DEREF arg ); + #endif +} + +int FTN_STDCALL +FTN_GET_STACKSIZE( void ) +{ + #ifdef KMP_STUB + return __kmps_get_stacksize(); + #else + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + return (int)__kmp_stksize; + #endif +} + +size_t FTN_STDCALL +FTN_GET_STACKSIZE_S( void ) +{ + #ifdef KMP_STUB + return __kmps_get_stacksize(); + #else + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + return __kmp_stksize; + #endif +} + +void FTN_STDCALL +FTN_SET_BLOCKTIME( int KMP_DEREF arg ) +{ + #ifdef KMP_STUB + __kmps_set_blocktime( KMP_DEREF arg ); + #else + int gtid, tid; + kmp_info_t *thread; + + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); + + __kmp_aux_set_blocktime( KMP_DEREF arg, thread, tid ); + #endif +} + +int FTN_STDCALL +FTN_GET_BLOCKTIME( void ) +{ + #ifdef KMP_STUB + return __kmps_get_blocktime(); + #else + int gtid, tid; + kmp_info_t *thread; + kmp_team_p *team; + + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); + team = __kmp_threads[ gtid ] -> th.th_team; + + /* These must match the settings used in __kmp_wait_sleep() */ + if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { + KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", + gtid, team->t.t_id, tid, KMP_MAX_BLOCKTIME) ); + return KMP_MAX_BLOCKTIME; + } +#ifdef KMP_ADJUST_BLOCKTIME + else if ( __kmp_zero_bt && !get__bt_set( team, tid ) ) { + KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", + gtid, team->t.t_id, tid, 0) ); + return 0; + } +#endif /* KMP_ADJUST_BLOCKTIME */ + else { + KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", + gtid, team->t.t_id, tid, get__blocktime( team, tid ) ) ); + return get__blocktime( team, tid ); + }; + #endif +} + +void FTN_STDCALL +FTN_SET_LIBRARY_SERIAL( void ) +{ + #ifdef KMP_STUB + __kmps_set_library( library_serial ); + #else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library( library_serial ); + #endif +} + +void FTN_STDCALL +FTN_SET_LIBRARY_TURNAROUND( void ) +{ + #ifdef KMP_STUB + __kmps_set_library( library_turnaround ); + #else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library( library_turnaround ); + #endif +} + +void FTN_STDCALL +FTN_SET_LIBRARY_THROUGHPUT( void ) +{ + #ifdef KMP_STUB + __kmps_set_library( library_throughput ); + #else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library( library_throughput ); + #endif +} + +void FTN_STDCALL +FTN_SET_LIBRARY( int KMP_DEREF arg ) +{ + #ifdef KMP_STUB + __kmps_set_library( KMP_DEREF arg ); + #else + enum library_type lib; + lib = (enum library_type) KMP_DEREF arg; + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library( lib ); + #endif +} + +int FTN_STDCALL +FTN_GET_LIBRARY (void) +{ + #ifdef KMP_STUB + return __kmps_get_library(); + #else + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + } + return ((int) __kmp_library); + #endif +} + +int FTN_STDCALL +FTN_SET_AFFINITY( void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity( mask ); + #endif +} + +int FTN_STDCALL +FTN_GET_AFFINITY( void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity( mask ); + #endif +} + +int FTN_STDCALL +FTN_GET_AFFINITY_MAX_PROC( void ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return 0; + #else + // + // We really only NEED serial initialization here. + // + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + if ( ! ( KMP_AFFINITY_CAPABLE() ) ) { + return 0; + } + + #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC + if ( __kmp_num_proc_groups > 1 ) { + return (int)KMP_CPU_SETSIZE; + } + #endif /* KMP_GROUP_AFFINITY */ + return __kmp_xproc; + #endif +} + +void FTN_STDCALL +FTN_CREATE_AFFINITY_MASK( void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + *mask = NULL; + #else + // + // We really only NEED serial initialization here. + // + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + # if KMP_USE_HWLOC + *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc(); + # else + *mask = kmpc_malloc( __kmp_affin_mask_size ); + # endif + KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) ); + #endif +} + +void FTN_STDCALL +FTN_DESTROY_AFFINITY_MASK( void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + // Nothing + #else + // + // We really only NEED serial initialization here. + // + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + if ( __kmp_env_consistency_check ) { + if ( *mask == NULL ) { + KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" ); + } + } + # if KMP_USE_HWLOC + hwloc_bitmap_free((hwloc_cpuset_t)(*mask)); + # else + kmpc_free( *mask ); + # endif + *mask = NULL; + #endif +} + +int FTN_STDCALL +FTN_SET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity_mask_proc( KMP_DEREF proc, mask ); + #endif +} + +int FTN_STDCALL +FTN_UNSET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_unset_affinity_mask_proc( KMP_DEREF proc, mask ); + #endif +} + +int FTN_STDCALL +FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) +{ + #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity_mask_proc( KMP_DEREF proc, mask ); + #endif +} + + +/* ------------------------------------------------------------------------ */ + +/* sets the requested number of threads for the next parallel region */ + +void FTN_STDCALL +xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg ) +{ + #ifdef KMP_STUB + // Nothing. + #else + __kmp_set_num_threads( KMP_DEREF arg, __kmp_entry_gtid() ); + #endif +} + + +/* returns the number of threads in current team */ +int FTN_STDCALL +xexpand(FTN_GET_NUM_THREADS)( void ) +{ + #ifdef KMP_STUB + return 1; + #else + // __kmpc_bound_num_threads initializes the library if needed + return __kmpc_bound_num_threads(NULL); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_MAX_THREADS)( void ) +{ + #ifdef KMP_STUB + return 1; + #else + int gtid; + kmp_info_t *thread; + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + gtid = __kmp_entry_gtid(); + thread = __kmp_threads[ gtid ]; + //return thread -> th.th_team -> t.t_current_task[ thread->th.th_info.ds.ds_tid ] -> icvs.nproc; + return thread -> th.th_current_task -> td_icvs.nproc; + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_THREAD_NUM)( void ) +{ + #ifdef KMP_STUB + return 0; + #else + int gtid; + + #if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD + gtid = __kmp_entry_gtid(); + #elif KMP_OS_WINDOWS + if (!__kmp_init_parallel || + (gtid = (int)((kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ))) == 0) { + // Either library isn't initialized or thread is not registered + // 0 is the correct TID in this case + return 0; + } + --gtid; // We keep (gtid+1) in TLS + #elif KMP_OS_LINUX + #ifdef KMP_TDATA_GTID + if ( __kmp_gtid_mode >= 3 ) { + if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { + return 0; + } + } else { + #endif + if (!__kmp_init_parallel || + (gtid = (kmp_intptr_t)(pthread_getspecific( __kmp_gtid_threadprivate_key ))) == 0) { + return 0; + } + --gtid; + #ifdef KMP_TDATA_GTID + } + #endif + #else + #error Unknown or unsupported OS + #endif + + return __kmp_tid_from_gtid( gtid ); + #endif +} + +int FTN_STDCALL +FTN_GET_NUM_KNOWN_THREADS( void ) +{ + #ifdef KMP_STUB + return 1; + #else + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + } + /* NOTE: this is not syncronized, so it can change at any moment */ + /* NOTE: this number also includes threads preallocated in hot-teams */ + return TCR_4(__kmp_nth); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_NUM_PROCS)( void ) +{ + #ifdef KMP_STUB + return 1; + #else + if ( ! TCR_4(__kmp_init_middle) ) { + __kmp_middle_initialize(); + } + return __kmp_avail_proc; + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_NESTED)( int KMP_DEREF flag ) +{ + #ifdef KMP_STUB + __kmps_set_nested( KMP_DEREF flag ); + #else + kmp_info_t *thread; + /* For the thread-private internal controls implementation */ + thread = __kmp_entry_thread(); + __kmp_save_internal_controls( thread ); + set__nested( thread, ( (KMP_DEREF flag) ? TRUE : FALSE ) ); + #endif +} + + +int FTN_STDCALL +xexpand(FTN_GET_NESTED)( void ) +{ + #ifdef KMP_STUB + return __kmps_get_nested(); + #else + kmp_info_t *thread; + thread = __kmp_entry_thread(); + return get__nested( thread ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag ) +{ + #ifdef KMP_STUB + __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE ); + #else + kmp_info_t *thread; + /* For the thread-private implementation of the internal controls */ + thread = __kmp_entry_thread(); + // !!! What if foreign thread calls it? + __kmp_save_internal_controls( thread ); + set__dynamic( thread, KMP_DEREF flag ? TRUE : FALSE ); + #endif +} + + +int FTN_STDCALL +xexpand(FTN_GET_DYNAMIC)( void ) +{ + #ifdef KMP_STUB + return __kmps_get_dynamic(); + #else + kmp_info_t *thread; + thread = __kmp_entry_thread(); + return get__dynamic( thread ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_IN_PARALLEL)( void ) +{ + #ifdef KMP_STUB + return 0; + #else + kmp_info_t *th = __kmp_entry_thread(); +#if OMP_40_ENABLED + if ( th->th.th_teams_microtask ) { + // AC: r_in_parallel does not work inside teams construct + // where real parallel is inactive, but all threads have same root, + // so setting it in one team affects other teams. + // The solution is to use per-team nesting level + return ( th->th.th_team->t.t_active_level ? 1 : 0 ); + } + else +#endif /* OMP_40_ENABLED */ + return ( th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) +{ + #ifdef KMP_STUB + __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier ); + #else + /* TO DO */ + /* For the per-task implementation of the internal controls */ + __kmp_set_schedule( __kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier ) +{ + #ifdef KMP_STUB + __kmps_get_schedule( kind, modifier ); + #else + /* TO DO */ + /* For the per-task implementation of the internal controls */ + __kmp_get_schedule( __kmp_entry_gtid(), kind, modifier ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg ) +{ + #ifdef KMP_STUB + // Nothing. + #else + /* TO DO */ + /* We want per-task implementation of this internal control */ + __kmp_set_max_active_levels( __kmp_entry_gtid(), KMP_DEREF arg ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void ) +{ + #ifdef KMP_STUB + return 0; + #else + /* TO DO */ + /* We want per-task implementation of this internal control */ + return __kmp_get_max_active_levels( __kmp_entry_gtid() ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_ACTIVE_LEVEL)( void ) +{ + #ifdef KMP_STUB + return 0; // returns 0 if it is called from the sequential part of the program + #else + /* TO DO */ + /* For the per-task implementation of the internal controls */ + return __kmp_entry_thread() -> th.th_team -> t.t_active_level; + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_LEVEL)( void ) +{ + #ifdef KMP_STUB + return 0; // returns 0 if it is called from the sequential part of the program + #else + /* TO DO */ + /* For the per-task implementation of the internal controls */ + return __kmp_entry_thread() -> th.th_team -> t.t_level; + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level ) +{ + #ifdef KMP_STUB + return ( KMP_DEREF level ) ? ( -1 ) : ( 0 ); + #else + return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), KMP_DEREF level ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level ) +{ + #ifdef KMP_STUB + return ( KMP_DEREF level ) ? ( -1 ) : ( 1 ); + #else + return __kmp_get_team_size( __kmp_entry_gtid(), KMP_DEREF level ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_THREAD_LIMIT)( void ) +{ + #ifdef KMP_STUB + return 1; // TO DO: clarify whether it returns 1 or 0? + #else + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + /* global ICV */ + return __kmp_max_nth; + #endif +} + +int FTN_STDCALL +xexpand(FTN_IN_FINAL)( void ) +{ + #ifdef KMP_STUB + return 0; // TO DO: clarify whether it returns 1 or 0? + #else + if ( ! TCR_4(__kmp_init_parallel) ) { + return 0; + } + return __kmp_entry_thread() -> th.th_current_task -> td_flags.final; + #endif +} + +#if OMP_40_ENABLED + + +kmp_proc_bind_t FTN_STDCALL +xexpand(FTN_GET_PROC_BIND)( void ) +{ + #ifdef KMP_STUB + return __kmps_get_proc_bind(); + #else + return get__proc_bind( __kmp_entry_thread() ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_NUM_TEAMS)( void ) +{ + #ifdef KMP_STUB + return 1; + #else + kmp_info_t *thr = __kmp_entry_thread(); + if ( thr->th.th_teams_microtask ) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; + int ii = team->t.t_level; // the level of the teams construct + int dd = team -> t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT( ii >= tlevel ); + while( ii > level ) + { + for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) + { + } + if( team -> t.t_serialized && ( !dd ) ) { + team = team->t.t_parent; + continue; + } + if( ii > level ) { + team = team->t.t_parent; + ii--; + } + } + if ( dd > 1 ) { + return 1; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_parent->t.t_nproc; + } + } else { + return 1; + } + #endif +} + +int FTN_STDCALL +xexpand(FTN_GET_TEAM_NUM)( void ) +{ + #ifdef KMP_STUB + return 0; + #else + kmp_info_t *thr = __kmp_entry_thread(); + if ( thr->th.th_teams_microtask ) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; // the level of the teams construct + int ii = team->t.t_level; + int dd = team -> t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT( ii >= tlevel ); + while( ii > level ) + { + for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) + { + } + if( team -> t.t_serialized && ( !dd ) ) { + team = team->t.t_parent; + continue; + } + if( ii > level ) { + team = team->t.t_parent; + ii--; + } + } + if ( dd > 1 ) { + return 0; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_master_tid; + } + } else { + return 0; + } + #endif +} + +#if KMP_MIC || KMP_OS_DARWIN + +static int __kmp_default_device = 0; + +int FTN_STDCALL +FTN_GET_DEFAULT_DEVICE( void ) +{ + return __kmp_default_device; +} + +void FTN_STDCALL +FTN_SET_DEFAULT_DEVICE( int KMP_DEREF arg ) +{ + __kmp_default_device = KMP_DEREF arg; +} + +int FTN_STDCALL +FTN_GET_NUM_DEVICES( void ) +{ + return 0; +} + +#endif // KMP_MIC || KMP_OS_DARWIN + +#if ! KMP_OS_LINUX + +int FTN_STDCALL +xexpand(FTN_IS_INITIAL_DEVICE)( void ) +{ + return 1; +} + +#else + +// This internal function is used when the entry from the offload library +// is not found. +int _Offload_get_device_number( void ) __attribute__((weak)); + +int FTN_STDCALL +xexpand(FTN_IS_INITIAL_DEVICE)( void ) +{ + if( _Offload_get_device_number ) { + return _Offload_get_device_number() == -1; + } else { + return 1; + } +} + +#endif // ! KMP_OS_LINUX + +#endif // OMP_40_ENABLED + +#ifdef KMP_STUB +typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; +#endif /* KMP_STUB */ + +#if KMP_USE_DYNAMIC_LOCK +void FTN_STDCALL +FTN_INIT_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmpc_init_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); + #endif +} + +void FTN_STDCALL +FTN_INIT_NEST_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmpc_init_nest_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); + #endif +} +#endif + +/* initialize the lock */ +void FTN_STDCALL +xexpand(FTN_INIT_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmpc_init_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +/* initialize the lock */ +void FTN_STDCALL +xexpand(FTN_INIT_NEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmpc_init_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_DESTROY_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNINIT; + #else + __kmpc_destroy_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNINIT; + #else + __kmpc_destroy_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + if ( *((kmp_stub_lock_t *)user_lock) != UNLOCKED ) { + // TODO: Issue an error. + }; // if + *((kmp_stub_lock_t *)user_lock) = LOCKED; + #else + __kmpc_set_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_SET_NEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + (*((int *)user_lock))++; + #else + __kmpc_set_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_UNSET_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { + // TODO: Issue an error. + }; // if + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmpc_unset_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +void FTN_STDCALL +xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { + // TODO: Issue an error. + }; // if + (*((int *)user_lock))--; + #else + __kmpc_unset_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_TEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + if ( *((kmp_stub_lock_t *)user_lock) == LOCKED ) { + return 0; + }; // if + *((kmp_stub_lock_t *)user_lock) = LOCKED; + return 1; + #else + return __kmpc_test_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +int FTN_STDCALL +xexpand(FTN_TEST_NEST_LOCK)( void **user_lock ) +{ + #ifdef KMP_STUB + if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { + // TODO: Issue an error. + }; // if + return ++(*((int *)user_lock)); + #else + return __kmpc_test_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); + #endif +} + +double FTN_STDCALL +xexpand(FTN_GET_WTIME)( void ) +{ + #ifdef KMP_STUB + return __kmps_get_wtime(); + #else + double data; + #if ! KMP_OS_LINUX + // We don't need library initialization to get the time on Linux* OS. + // The routine can be used to measure library initialization time on Linux* OS now. + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + #endif + __kmp_elapsed( & data ); + return data; + #endif +} + +double FTN_STDCALL +xexpand(FTN_GET_WTICK)( void ) +{ + #ifdef KMP_STUB + return __kmps_get_wtick(); + #else + double data; + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + __kmp_elapsed_tick( & data ); + return data; + #endif +} + +/* ------------------------------------------------------------------------ */ + +void * FTN_STDCALL +FTN_MALLOC( size_t KMP_DEREF size ) +{ + // kmpc_malloc initializes the library if needed + return kmpc_malloc( KMP_DEREF size ); +} + +void * FTN_STDCALL +FTN_CALLOC( size_t KMP_DEREF nelem, size_t KMP_DEREF elsize ) +{ + // kmpc_calloc initializes the library if needed + return kmpc_calloc( KMP_DEREF nelem, KMP_DEREF elsize ); +} + +void * FTN_STDCALL +FTN_REALLOC( void * KMP_DEREF ptr, size_t KMP_DEREF size ) +{ + // kmpc_realloc initializes the library if needed + return kmpc_realloc( KMP_DEREF ptr, KMP_DEREF size ); +} + +void FTN_STDCALL +FTN_FREE( void * KMP_DEREF ptr ) +{ + // does nothing if the library is not initialized + kmpc_free( KMP_DEREF ptr ); +} + +void FTN_STDCALL +FTN_SET_WARNINGS_ON( void ) +{ + #ifndef KMP_STUB + __kmp_generate_warnings = kmp_warnings_explicit; + #endif +} + +void FTN_STDCALL +FTN_SET_WARNINGS_OFF( void ) +{ + #ifndef KMP_STUB + __kmp_generate_warnings = FALSE; + #endif +} + +void FTN_STDCALL +FTN_SET_DEFAULTS( char const * str + #ifndef PASS_ARGS_BY_VALUE + , int len + #endif +) +{ + #ifndef KMP_STUB + #ifdef PASS_ARGS_BY_VALUE + int len = (int)KMP_STRLEN( str ); + #endif + __kmp_aux_set_defaults( str, len ); + #endif +} + +/* ------------------------------------------------------------------------ */ + + +#if OMP_40_ENABLED +/* returns the status of cancellation */ +int FTN_STDCALL +xexpand(FTN_GET_CANCELLATION)(void) { +#ifdef KMP_STUB + return 0 /* false */; +#else + // initialize the library if needed + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + } + return __kmp_omp_cancellation; +#endif +} + +int FTN_STDCALL +FTN_GET_CANCELLATION_STATUS(int cancel_kind) { +#ifdef KMP_STUB + return 0 /* false */; +#else + return __kmp_get_cancellation_status(cancel_kind); +#endif +} + +#endif // OMP_40_ENABLED + +// GCC compatibility (versioned symbols) +#ifdef KMP_USE_VERSION_SYMBOLS + +/* + These following sections create function aliases (dummy symbols) for the omp_* routines. + These aliases will then be versioned according to how libgomp ``versions'' its + symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the + default version which libomp uses: VERSION (defined in exports_so.txt) + If you want to see the versioned symbols for libgomp.so.1 then just type: + + objdump -T /path/to/libgomp.so.1 | grep omp_ + + Example: + Step 1) Create __kmp_api_omp_set_num_threads_10_alias + which is alias of __kmp_api_omp_set_num_threads + Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0 + Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION +*/ + +// OMP_1.0 aliases +xaliasify(FTN_SET_NUM_THREADS, 10); +xaliasify(FTN_GET_NUM_THREADS, 10); +xaliasify(FTN_GET_MAX_THREADS, 10); +xaliasify(FTN_GET_THREAD_NUM, 10); +xaliasify(FTN_GET_NUM_PROCS, 10); +xaliasify(FTN_IN_PARALLEL, 10); +xaliasify(FTN_SET_DYNAMIC, 10); +xaliasify(FTN_GET_DYNAMIC, 10); +xaliasify(FTN_SET_NESTED, 10); +xaliasify(FTN_GET_NESTED, 10); +xaliasify(FTN_INIT_LOCK, 10); +xaliasify(FTN_INIT_NEST_LOCK, 10); +xaliasify(FTN_DESTROY_LOCK, 10); +xaliasify(FTN_DESTROY_NEST_LOCK, 10); +xaliasify(FTN_SET_LOCK, 10); +xaliasify(FTN_SET_NEST_LOCK, 10); +xaliasify(FTN_UNSET_LOCK, 10); +xaliasify(FTN_UNSET_NEST_LOCK, 10); +xaliasify(FTN_TEST_LOCK, 10); +xaliasify(FTN_TEST_NEST_LOCK, 10); + +// OMP_2.0 aliases +xaliasify(FTN_GET_WTICK, 20); +xaliasify(FTN_GET_WTIME, 20); + +// OMP_3.0 aliases +xaliasify(FTN_SET_SCHEDULE, 30); +xaliasify(FTN_GET_SCHEDULE, 30); +xaliasify(FTN_GET_THREAD_LIMIT, 30); +xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_LEVEL, 30); +xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30); +xaliasify(FTN_GET_TEAM_SIZE, 30); +xaliasify(FTN_GET_ACTIVE_LEVEL, 30); +xaliasify(FTN_INIT_LOCK, 30); +xaliasify(FTN_INIT_NEST_LOCK, 30); +xaliasify(FTN_DESTROY_LOCK, 30); +xaliasify(FTN_DESTROY_NEST_LOCK, 30); +xaliasify(FTN_SET_LOCK, 30); +xaliasify(FTN_SET_NEST_LOCK, 30); +xaliasify(FTN_UNSET_LOCK, 30); +xaliasify(FTN_UNSET_NEST_LOCK, 30); +xaliasify(FTN_TEST_LOCK, 30); +xaliasify(FTN_TEST_NEST_LOCK, 30); + +// OMP_3.1 aliases +xaliasify(FTN_IN_FINAL, 31); + +#if OMP_40_ENABLED +// OMP_4.0 aliases +xaliasify(FTN_GET_PROC_BIND, 40); +xaliasify(FTN_GET_NUM_TEAMS, 40); +xaliasify(FTN_GET_TEAM_NUM, 40); +xaliasify(FTN_GET_CANCELLATION, 40); +xaliasify(FTN_IS_INITIAL_DEVICE, 40); +#endif /* OMP_40_ENABLED */ + +#if OMP_41_ENABLED +// OMP_4.1 aliases +#endif + +#if OMP_50_ENABLED +// OMP_5.0 aliases +#endif + +// OMP_1.0 versioned symbols +xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); +xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0"); +xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_SET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_GET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_INIT_LOCK, 10, "OMP_1.0"); +xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0"); +xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); + +// OMP_2.0 versioned symbols +xversionify(FTN_GET_WTICK, 20, "OMP_2.0"); +xversionify(FTN_GET_WTIME, 20, "OMP_2.0"); + +// OMP_3.0 versioned symbols +xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); +xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); +xversionify(FTN_GET_LEVEL, 30, "OMP_3.0"); +xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); +xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); + +// the lock routines have a 1.0 and 3.0 version +xversionify(FTN_INIT_LOCK, 30, "OMP_3.0"); +xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0"); +xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); + +// OMP_3.1 versioned symbol +xversionify(FTN_IN_FINAL, 31, "OMP_3.1"); + +#if OMP_40_ENABLED +// OMP_4.0 versioned symbols +xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0"); +xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); +xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); +xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0"); +xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); +#endif /* OMP_40_ENABLED */ + +#if OMP_41_ENABLED +// OMP_4.1 versioned symbols +#endif + +#if OMP_50_ENABLED +// OMP_5.0 versioned symbols +#endif + +#endif // KMP_USE_VERSION_SYMBOLS + +#ifdef __cplusplus + } //extern "C" +#endif // __cplusplus + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c index c954d2072a0..1d0fb4ca13f 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c +++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c @@ -1,33 +1,33 @@ -/* - * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" - -#if KMP_OS_WINDOWS -# define KMP_FTN_ENTRIES KMP_FTN_PLAIN -#elif KMP_OS_UNIX -# define KMP_FTN_ENTRIES KMP_FTN_APPEND -#endif - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " -#ifdef KMP_FTN_ENTRIES - "yes"; -# define FTN_STDCALL /* nothing to do */ -# include "kmp_ftn_os.h" -# include "kmp_ftn_entry.h" -#else - "no"; -#endif /* KMP_FTN_ENTRIES */ +/* + * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" + +#if KMP_OS_WINDOWS +# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#elif KMP_OS_UNIX +# define KMP_FTN_ENTRIES KMP_FTN_APPEND +#endif + +// Note: This string is not printed when KMP_VERSION=1. +char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " +#ifdef KMP_FTN_ENTRIES + "yes"; +# define FTN_STDCALL /* nothing to do */ +# include "kmp_ftn_os.h" +# include "kmp_ftn_entry.h" +#else + "no"; +#endif /* KMP_FTN_ENTRIES */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h index d7d30343de5..4b41260a9dd 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h +++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h @@ -1,532 +1,532 @@ -/* - * kmp_ftn_os.h -- KPTS Fortran defines header file. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_FTN_OS_H -#define KMP_FTN_OS_H - -// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, KMP_FTN_UAPPEND. - - -/* -------------------------- External definitions ------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_PLAIN - - #define FTN_SET_STACKSIZE kmp_set_stacksize - #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s - #define FTN_GET_STACKSIZE kmp_get_stacksize - #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s - #define FTN_SET_BLOCKTIME kmp_set_blocktime - #define FTN_GET_BLOCKTIME kmp_get_blocktime - #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial - #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround - #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput - #define FTN_SET_LIBRARY kmp_set_library - #define FTN_GET_LIBRARY kmp_get_library - #define FTN_SET_DEFAULTS kmp_set_defaults - #define FTN_SET_AFFINITY kmp_set_affinity - #define FTN_GET_AFFINITY kmp_get_affinity - #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc - #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask - #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask - #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc - #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc - #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc - - #define FTN_MALLOC kmp_malloc - #define FTN_CALLOC kmp_calloc - #define FTN_REALLOC kmp_realloc - #define FTN_FREE kmp_free - - #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads - - #define FTN_SET_NUM_THREADS omp_set_num_threads - #define FTN_GET_NUM_THREADS omp_get_num_threads - #define FTN_GET_MAX_THREADS omp_get_max_threads - #define FTN_GET_THREAD_NUM omp_get_thread_num - #define FTN_GET_NUM_PROCS omp_get_num_procs - #define FTN_SET_DYNAMIC omp_set_dynamic - #define FTN_GET_DYNAMIC omp_get_dynamic - #define FTN_SET_NESTED omp_set_nested - #define FTN_GET_NESTED omp_get_nested - #define FTN_IN_PARALLEL omp_in_parallel - #define FTN_GET_THREAD_LIMIT omp_get_thread_limit - #define FTN_SET_SCHEDULE omp_set_schedule - #define FTN_GET_SCHEDULE omp_get_schedule - #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels - #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels - #define FTN_GET_ACTIVE_LEVEL omp_get_active_level - #define FTN_GET_LEVEL omp_get_level - #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num - #define FTN_GET_TEAM_SIZE omp_get_team_size - #define FTN_IN_FINAL omp_in_final -// #define FTN_SET_PROC_BIND omp_set_proc_bind - #define FTN_GET_PROC_BIND omp_get_proc_bind -// #define FTN_CURR_PROC_BIND omp_curr_proc_bind -#if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS omp_get_num_teams - #define FTN_GET_TEAM_NUM omp_get_team_num -#endif - #define FTN_INIT_LOCK omp_init_lock -#if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint - #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint -#endif - #define FTN_DESTROY_LOCK omp_destroy_lock - #define FTN_SET_LOCK omp_set_lock - #define FTN_UNSET_LOCK omp_unset_lock - #define FTN_TEST_LOCK omp_test_lock - #define FTN_INIT_NEST_LOCK omp_init_nest_lock - #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock - #define FTN_SET_NEST_LOCK omp_set_nest_lock - #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock - #define FTN_TEST_NEST_LOCK omp_test_nest_lock - - #define FTN_SET_WARNINGS_ON kmp_set_warnings_on - #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off - - #define FTN_GET_WTIME omp_get_wtime - #define FTN_GET_WTICK omp_get_wtick - -#if OMP_40_ENABLED -#if KMP_MIC || KMP_OS_DARWIN - #define FTN_GET_DEFAULT_DEVICE omp_get_default_device - #define FTN_SET_DEFAULT_DEVICE omp_set_default_device - #define FTN_GET_NUM_DEVICES omp_get_num_devices -#endif - #define FTN_IS_INITIAL_DEVICE omp_is_initial_device -#endif - -#if OMP_40_ENABLED - #define FTN_GET_CANCELLATION omp_get_cancellation - #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status -#endif - -#endif /* KMP_FTN_PLAIN */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_APPEND - - #define FTN_SET_STACKSIZE kmp_set_stacksize_ - #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s_ - #define FTN_GET_STACKSIZE kmp_get_stacksize_ - #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s_ - #define FTN_SET_BLOCKTIME kmp_set_blocktime_ - #define FTN_GET_BLOCKTIME kmp_get_blocktime_ - #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial_ - #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround_ - #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput_ - #define FTN_SET_LIBRARY kmp_set_library_ - #define FTN_GET_LIBRARY kmp_get_library_ - #define FTN_SET_DEFAULTS kmp_set_defaults_ - #define FTN_SET_AFFINITY kmp_set_affinity_ - #define FTN_GET_AFFINITY kmp_get_affinity_ - #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc_ - #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask_ - #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask_ - #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc_ - #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc_ - #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc_ - - #define FTN_MALLOC kmp_malloc_ - #define FTN_CALLOC kmp_calloc_ - #define FTN_REALLOC kmp_realloc_ - #define FTN_FREE kmp_free_ - - #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ - - #define FTN_SET_NUM_THREADS omp_set_num_threads_ - #define FTN_GET_NUM_THREADS omp_get_num_threads_ - #define FTN_GET_MAX_THREADS omp_get_max_threads_ - #define FTN_GET_THREAD_NUM omp_get_thread_num_ - #define FTN_GET_NUM_PROCS omp_get_num_procs_ - #define FTN_SET_DYNAMIC omp_set_dynamic_ - #define FTN_GET_DYNAMIC omp_get_dynamic_ - #define FTN_SET_NESTED omp_set_nested_ - #define FTN_GET_NESTED omp_get_nested_ - #define FTN_IN_PARALLEL omp_in_parallel_ - #define FTN_GET_THREAD_LIMIT omp_get_thread_limit_ - #define FTN_SET_SCHEDULE omp_set_schedule_ - #define FTN_GET_SCHEDULE omp_get_schedule_ - #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels_ - #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels_ - #define FTN_GET_ACTIVE_LEVEL omp_get_active_level_ - #define FTN_GET_LEVEL omp_get_level_ - #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num_ - #define FTN_GET_TEAM_SIZE omp_get_team_size_ - #define FTN_IN_FINAL omp_in_final_ -// #define FTN_SET_PROC_BIND omp_set_proc_bind_ - #define FTN_GET_PROC_BIND omp_get_proc_bind_ -// #define FTN_CURR_PROC_BIND omp_curr_proc_bind_ -#if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS omp_get_num_teams_ - #define FTN_GET_TEAM_NUM omp_get_team_num_ -#endif - #define FTN_INIT_LOCK omp_init_lock_ -#if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint_ - #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint_ -#endif - #define FTN_DESTROY_LOCK omp_destroy_lock_ - #define FTN_SET_LOCK omp_set_lock_ - #define FTN_UNSET_LOCK omp_unset_lock_ - #define FTN_TEST_LOCK omp_test_lock_ - #define FTN_INIT_NEST_LOCK omp_init_nest_lock_ - #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock_ - #define FTN_SET_NEST_LOCK omp_set_nest_lock_ - #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock_ - #define FTN_TEST_NEST_LOCK omp_test_nest_lock_ - - #define FTN_SET_WARNINGS_ON kmp_set_warnings_on_ - #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off_ - - #define FTN_GET_WTIME omp_get_wtime_ - #define FTN_GET_WTICK omp_get_wtick_ - -#if OMP_40_ENABLED -#if KMP_MIC || KMP_OS_DARWIN - #define FTN_GET_DEFAULT_DEVICE omp_get_default_device_ - #define FTN_SET_DEFAULT_DEVICE omp_set_default_device_ - #define FTN_GET_NUM_DEVICES omp_get_num_devices_ -#endif - #define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ -#endif - - -#if OMP_40_ENABLED - #define FTN_GET_CANCELLATION omp_get_cancellation_ - #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ -#endif - -#endif /* KMP_FTN_APPEND */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_UPPER - - #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE - #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S - #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE - #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S - #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME - #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME - #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL - #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND - #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT - #define FTN_SET_LIBRARY KMP_SET_LIBRARY - #define FTN_GET_LIBRARY KMP_GET_LIBRARY - #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS - #define FTN_SET_AFFINITY KMP_SET_AFFINITY - #define FTN_GET_AFFINITY KMP_GET_AFFINITY - #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC - #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK - #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK - #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC - #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC - #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC - - #define FTN_MALLOC KMP_MALLOC - #define FTN_CALLOC KMP_CALLOC - #define FTN_REALLOC KMP_REALLOC - #define FTN_FREE KMP_FREE - - #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS - - #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS - #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS - #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS - #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM - #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS - #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC - #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC - #define FTN_SET_NESTED OMP_SET_NESTED - #define FTN_GET_NESTED OMP_GET_NESTED - #define FTN_IN_PARALLEL OMP_IN_PARALLEL - #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT - #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE - #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE - #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS - #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS - #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL - #define FTN_GET_LEVEL OMP_GET_LEVEL - #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM - #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE - #define FTN_IN_FINAL OMP_IN_FINAL -// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND - #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND -// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND -#if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS - #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM -#endif - #define FTN_INIT_LOCK OMP_INIT_LOCK -#if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT - #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT -#endif - #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK - #define FTN_SET_LOCK OMP_SET_LOCK - #define FTN_UNSET_LOCK OMP_UNSET_LOCK - #define FTN_TEST_LOCK OMP_TEST_LOCK - #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK - #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK - #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK - #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK - #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK - - #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON - #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF - - #define FTN_GET_WTIME OMP_GET_WTIME - #define FTN_GET_WTICK OMP_GET_WTICK - -#if OMP_40_ENABLED -#if KMP_MIC || KMP_OS_DARWIN - #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE - #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE - #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES -#endif - #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE -#endif - - -#if OMP_40_ENABLED - #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION - #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS -#endif - -#endif /* KMP_FTN_UPPER */ - -/* ------------------------------------------------------------------------ */ - -#if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND - - #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE_ - #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S_ - #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE_ - #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S_ - #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME_ - #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME_ - #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL_ - #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND_ - #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT_ - #define FTN_SET_LIBRARY KMP_SET_LIBRARY_ - #define FTN_GET_LIBRARY KMP_GET_LIBRARY_ - #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS_ - #define FTN_SET_AFFINITY KMP_SET_AFFINITY_ - #define FTN_GET_AFFINITY KMP_GET_AFFINITY_ - #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC_ - #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK_ - #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK_ - #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC_ - #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC_ - #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC_ - - #define FTN_MALLOC KMP_MALLOC_ - #define FTN_CALLOC KMP_CALLOC_ - #define FTN_REALLOC KMP_REALLOC_ - #define FTN_FREE KMP_FREE_ - - #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ - - #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ - #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ - #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ - #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM_ - #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS_ - #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC_ - #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC_ - #define FTN_SET_NESTED OMP_SET_NESTED_ - #define FTN_GET_NESTED OMP_GET_NESTED_ - #define FTN_IN_PARALLEL OMP_IN_PARALLEL_ - #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT_ - #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE_ - #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE_ - #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS_ - #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS_ - #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL_ - #define FTN_GET_LEVEL OMP_GET_LEVEL_ - #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM_ - #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE_ - #define FTN_IN_FINAL OMP_IN_FINAL_ -// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND_ - #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND_ -// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND_ -#if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_ - #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ -#endif - #define FTN_INIT_LOCK OMP_INIT_LOCK_ -#if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT_ - #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT_ -#endif - #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ - #define FTN_SET_LOCK OMP_SET_LOCK_ - #define FTN_UNSET_LOCK OMP_UNSET_LOCK_ - #define FTN_TEST_LOCK OMP_TEST_LOCK_ - #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK_ - #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK_ - #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK_ - #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK_ - #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK_ - - #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON_ - #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF_ - - #define FTN_GET_WTIME OMP_GET_WTIME_ - #define FTN_GET_WTICK OMP_GET_WTICK_ - -#if OMP_40_ENABLED -#if KMP_MIC || KMP_OS_DARWIN - #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE_ - #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE_ - #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES_ -#endif - #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ -#endif - - -#if OMP_40_ENABLED - #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ - #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ -#endif - -#endif /* KMP_FTN_UAPPEND */ - -/* ------------------------------------------------------------------ */ -/* -------------------------- GOMP API NAMES ------------------------ */ -// All GOMP_1.0 symbols -#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end -#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start -#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier -#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start -#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end -#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait -#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next -#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT GOMP_loop_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START GOMP_loop_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START GOMP_loop_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT GOMP_loop_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START GOMP_loop_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START GOMP_loop_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start -#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next -#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start -#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end -#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start -#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START GOMP_parallel_loop_dynamic_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START GOMP_parallel_loop_guided_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START GOMP_parallel_loop_runtime_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START GOMP_parallel_loop_static_start -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start -#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start -#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end -#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait -#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next -#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start -#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end -#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start -#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start - -// All GOMP_2.0 symbols -#define KMP_API_NAME_GOMP_TASK GOMP_task -#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT GOMP_loop_ull_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT GOMP_loop_ull_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START GOMP_loop_ull_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT GOMP_loop_ull_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT GOMP_loop_ull_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START GOMP_loop_ull_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start - -// All GOMP_3.0 symbols -#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield - -// All GOMP_4.0 symbols -// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libomp -#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel -#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel -#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point -#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections -#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel -#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel -#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start -#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end -/* Target functions should be taken care of by liboffload */ -#define KMP_API_NAME_GOMP_TARGET GOMP_target -#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data -#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data -#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update -#define KMP_API_NAME_GOMP_TEAMS GOMP_teams - -#ifdef KMP_USE_VERSION_SYMBOLS - #define xstr(x) str(x) - #define str(x) #x - - // If Linux, xexpand prepends __kmp_api_ to the real API name - #define xexpand(api_name) expand(api_name) - #define expand(api_name) __kmp_api_##api_name - - #define xaliasify(api_name,ver) aliasify(api_name,ver) - #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name)))) - - #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION") - #define versionify(api_name, version_num, version_str, default_ver) \ - __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \ - __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t") - -#else // KMP_USE_VERSION_SYMBOLS - #define xstr(x) /* Nothing */ - #define str(x) /* Nothing */ - - // if Windows or Mac, xexpand does no name transformation - #define xexpand(api_name) expand(api_name) - #define expand(api_name) api_name - - #define xaliasify(api_name,ver) /* Nothing */ - #define aliasify(api_name,ver) /* Nothing */ - - #define xversionify(api_name, version_num, version_str) /* Nothing */ - #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */ - -#endif // KMP_USE_VERSION_SYMBOLS - -#endif /* KMP_FTN_OS_H */ - +/* + * kmp_ftn_os.h -- KPTS Fortran defines header file. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_FTN_OS_H +#define KMP_FTN_OS_H + +// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, KMP_FTN_UAPPEND. + + +/* -------------------------- External definitions ------------------------ */ + +#if KMP_FTN_ENTRIES == KMP_FTN_PLAIN + + #define FTN_SET_STACKSIZE kmp_set_stacksize + #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s + #define FTN_GET_STACKSIZE kmp_get_stacksize + #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s + #define FTN_SET_BLOCKTIME kmp_set_blocktime + #define FTN_GET_BLOCKTIME kmp_get_blocktime + #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial + #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround + #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput + #define FTN_SET_LIBRARY kmp_set_library + #define FTN_GET_LIBRARY kmp_get_library + #define FTN_SET_DEFAULTS kmp_set_defaults + #define FTN_SET_AFFINITY kmp_set_affinity + #define FTN_GET_AFFINITY kmp_get_affinity + #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc + #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask + #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask + #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc + #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc + #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc + + #define FTN_MALLOC kmp_malloc + #define FTN_CALLOC kmp_calloc + #define FTN_REALLOC kmp_realloc + #define FTN_FREE kmp_free + + #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads + + #define FTN_SET_NUM_THREADS omp_set_num_threads + #define FTN_GET_NUM_THREADS omp_get_num_threads + #define FTN_GET_MAX_THREADS omp_get_max_threads + #define FTN_GET_THREAD_NUM omp_get_thread_num + #define FTN_GET_NUM_PROCS omp_get_num_procs + #define FTN_SET_DYNAMIC omp_set_dynamic + #define FTN_GET_DYNAMIC omp_get_dynamic + #define FTN_SET_NESTED omp_set_nested + #define FTN_GET_NESTED omp_get_nested + #define FTN_IN_PARALLEL omp_in_parallel + #define FTN_GET_THREAD_LIMIT omp_get_thread_limit + #define FTN_SET_SCHEDULE omp_set_schedule + #define FTN_GET_SCHEDULE omp_get_schedule + #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels + #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels + #define FTN_GET_ACTIVE_LEVEL omp_get_active_level + #define FTN_GET_LEVEL omp_get_level + #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num + #define FTN_GET_TEAM_SIZE omp_get_team_size + #define FTN_IN_FINAL omp_in_final +// #define FTN_SET_PROC_BIND omp_set_proc_bind + #define FTN_GET_PROC_BIND omp_get_proc_bind +// #define FTN_CURR_PROC_BIND omp_curr_proc_bind +#if OMP_40_ENABLED + #define FTN_GET_NUM_TEAMS omp_get_num_teams + #define FTN_GET_TEAM_NUM omp_get_team_num +#endif + #define FTN_INIT_LOCK omp_init_lock +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint + #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint +#endif + #define FTN_DESTROY_LOCK omp_destroy_lock + #define FTN_SET_LOCK omp_set_lock + #define FTN_UNSET_LOCK omp_unset_lock + #define FTN_TEST_LOCK omp_test_lock + #define FTN_INIT_NEST_LOCK omp_init_nest_lock + #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock + #define FTN_SET_NEST_LOCK omp_set_nest_lock + #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock + #define FTN_TEST_NEST_LOCK omp_test_nest_lock + + #define FTN_SET_WARNINGS_ON kmp_set_warnings_on + #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off + + #define FTN_GET_WTIME omp_get_wtime + #define FTN_GET_WTICK omp_get_wtick + +#if OMP_40_ENABLED +#if KMP_MIC || KMP_OS_DARWIN + #define FTN_GET_DEFAULT_DEVICE omp_get_default_device + #define FTN_SET_DEFAULT_DEVICE omp_set_default_device + #define FTN_GET_NUM_DEVICES omp_get_num_devices +#endif + #define FTN_IS_INITIAL_DEVICE omp_is_initial_device +#endif + +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION omp_get_cancellation + #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status +#endif + +#endif /* KMP_FTN_PLAIN */ + +/* ------------------------------------------------------------------------ */ + +#if KMP_FTN_ENTRIES == KMP_FTN_APPEND + + #define FTN_SET_STACKSIZE kmp_set_stacksize_ + #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s_ + #define FTN_GET_STACKSIZE kmp_get_stacksize_ + #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s_ + #define FTN_SET_BLOCKTIME kmp_set_blocktime_ + #define FTN_GET_BLOCKTIME kmp_get_blocktime_ + #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial_ + #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround_ + #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput_ + #define FTN_SET_LIBRARY kmp_set_library_ + #define FTN_GET_LIBRARY kmp_get_library_ + #define FTN_SET_DEFAULTS kmp_set_defaults_ + #define FTN_SET_AFFINITY kmp_set_affinity_ + #define FTN_GET_AFFINITY kmp_get_affinity_ + #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc_ + #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask_ + #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask_ + #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc_ + #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc_ + #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc_ + + #define FTN_MALLOC kmp_malloc_ + #define FTN_CALLOC kmp_calloc_ + #define FTN_REALLOC kmp_realloc_ + #define FTN_FREE kmp_free_ + + #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ + + #define FTN_SET_NUM_THREADS omp_set_num_threads_ + #define FTN_GET_NUM_THREADS omp_get_num_threads_ + #define FTN_GET_MAX_THREADS omp_get_max_threads_ + #define FTN_GET_THREAD_NUM omp_get_thread_num_ + #define FTN_GET_NUM_PROCS omp_get_num_procs_ + #define FTN_SET_DYNAMIC omp_set_dynamic_ + #define FTN_GET_DYNAMIC omp_get_dynamic_ + #define FTN_SET_NESTED omp_set_nested_ + #define FTN_GET_NESTED omp_get_nested_ + #define FTN_IN_PARALLEL omp_in_parallel_ + #define FTN_GET_THREAD_LIMIT omp_get_thread_limit_ + #define FTN_SET_SCHEDULE omp_set_schedule_ + #define FTN_GET_SCHEDULE omp_get_schedule_ + #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels_ + #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels_ + #define FTN_GET_ACTIVE_LEVEL omp_get_active_level_ + #define FTN_GET_LEVEL omp_get_level_ + #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num_ + #define FTN_GET_TEAM_SIZE omp_get_team_size_ + #define FTN_IN_FINAL omp_in_final_ +// #define FTN_SET_PROC_BIND omp_set_proc_bind_ + #define FTN_GET_PROC_BIND omp_get_proc_bind_ +// #define FTN_CURR_PROC_BIND omp_curr_proc_bind_ +#if OMP_40_ENABLED + #define FTN_GET_NUM_TEAMS omp_get_num_teams_ + #define FTN_GET_TEAM_NUM omp_get_team_num_ +#endif + #define FTN_INIT_LOCK omp_init_lock_ +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint_ + #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint_ +#endif + #define FTN_DESTROY_LOCK omp_destroy_lock_ + #define FTN_SET_LOCK omp_set_lock_ + #define FTN_UNSET_LOCK omp_unset_lock_ + #define FTN_TEST_LOCK omp_test_lock_ + #define FTN_INIT_NEST_LOCK omp_init_nest_lock_ + #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock_ + #define FTN_SET_NEST_LOCK omp_set_nest_lock_ + #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock_ + #define FTN_TEST_NEST_LOCK omp_test_nest_lock_ + + #define FTN_SET_WARNINGS_ON kmp_set_warnings_on_ + #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off_ + + #define FTN_GET_WTIME omp_get_wtime_ + #define FTN_GET_WTICK omp_get_wtick_ + +#if OMP_40_ENABLED +#if KMP_MIC || KMP_OS_DARWIN + #define FTN_GET_DEFAULT_DEVICE omp_get_default_device_ + #define FTN_SET_DEFAULT_DEVICE omp_set_default_device_ + #define FTN_GET_NUM_DEVICES omp_get_num_devices_ +#endif + #define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ +#endif + + +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION omp_get_cancellation_ + #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ +#endif + +#endif /* KMP_FTN_APPEND */ + +/* ------------------------------------------------------------------------ */ + +#if KMP_FTN_ENTRIES == KMP_FTN_UPPER + + #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE + #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S + #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE + #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S + #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME + #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME + #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL + #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND + #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT + #define FTN_SET_LIBRARY KMP_SET_LIBRARY + #define FTN_GET_LIBRARY KMP_GET_LIBRARY + #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS + #define FTN_SET_AFFINITY KMP_SET_AFFINITY + #define FTN_GET_AFFINITY KMP_GET_AFFINITY + #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC + #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK + #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK + #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC + #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC + #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC + + #define FTN_MALLOC KMP_MALLOC + #define FTN_CALLOC KMP_CALLOC + #define FTN_REALLOC KMP_REALLOC + #define FTN_FREE KMP_FREE + + #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS + + #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS + #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS + #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS + #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM + #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS + #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC + #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC + #define FTN_SET_NESTED OMP_SET_NESTED + #define FTN_GET_NESTED OMP_GET_NESTED + #define FTN_IN_PARALLEL OMP_IN_PARALLEL + #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT + #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE + #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE + #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS + #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS + #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL + #define FTN_GET_LEVEL OMP_GET_LEVEL + #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM + #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE + #define FTN_IN_FINAL OMP_IN_FINAL +// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND + #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND +// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND +#if OMP_40_ENABLED + #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS + #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM +#endif + #define FTN_INIT_LOCK OMP_INIT_LOCK +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT + #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT +#endif + #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK + #define FTN_SET_LOCK OMP_SET_LOCK + #define FTN_UNSET_LOCK OMP_UNSET_LOCK + #define FTN_TEST_LOCK OMP_TEST_LOCK + #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK + #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK + #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK + #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK + #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK + + #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON + #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF + + #define FTN_GET_WTIME OMP_GET_WTIME + #define FTN_GET_WTICK OMP_GET_WTICK + +#if OMP_40_ENABLED +#if KMP_MIC || KMP_OS_DARWIN + #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE + #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE + #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES +#endif + #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE +#endif + + +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION + #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS +#endif + +#endif /* KMP_FTN_UPPER */ + +/* ------------------------------------------------------------------------ */ + +#if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND + + #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE_ + #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S_ + #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE_ + #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S_ + #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME_ + #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME_ + #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL_ + #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND_ + #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT_ + #define FTN_SET_LIBRARY KMP_SET_LIBRARY_ + #define FTN_GET_LIBRARY KMP_GET_LIBRARY_ + #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS_ + #define FTN_SET_AFFINITY KMP_SET_AFFINITY_ + #define FTN_GET_AFFINITY KMP_GET_AFFINITY_ + #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC_ + #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK_ + #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK_ + #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC_ + #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC_ + #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC_ + + #define FTN_MALLOC KMP_MALLOC_ + #define FTN_CALLOC KMP_CALLOC_ + #define FTN_REALLOC KMP_REALLOC_ + #define FTN_FREE KMP_FREE_ + + #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ + + #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ + #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ + #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ + #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM_ + #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS_ + #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC_ + #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC_ + #define FTN_SET_NESTED OMP_SET_NESTED_ + #define FTN_GET_NESTED OMP_GET_NESTED_ + #define FTN_IN_PARALLEL OMP_IN_PARALLEL_ + #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT_ + #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE_ + #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE_ + #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS_ + #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS_ + #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL_ + #define FTN_GET_LEVEL OMP_GET_LEVEL_ + #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM_ + #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE_ + #define FTN_IN_FINAL OMP_IN_FINAL_ +// #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND_ + #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND_ +// #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND_ +#if OMP_40_ENABLED + #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_ + #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ +#endif + #define FTN_INIT_LOCK OMP_INIT_LOCK_ +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT_ + #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT_ +#endif + #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ + #define FTN_SET_LOCK OMP_SET_LOCK_ + #define FTN_UNSET_LOCK OMP_UNSET_LOCK_ + #define FTN_TEST_LOCK OMP_TEST_LOCK_ + #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK_ + #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK_ + #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK_ + #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK_ + #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK_ + + #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON_ + #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF_ + + #define FTN_GET_WTIME OMP_GET_WTIME_ + #define FTN_GET_WTICK OMP_GET_WTICK_ + +#if OMP_40_ENABLED +#if KMP_MIC || KMP_OS_DARWIN + #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE_ + #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE_ + #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES_ +#endif + #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ +#endif + + +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ + #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ +#endif + +#endif /* KMP_FTN_UAPPEND */ + +/* ------------------------------------------------------------------ */ +/* -------------------------- GOMP API NAMES ------------------------ */ +// All GOMP_1.0 symbols +#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end +#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start +#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier +#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start +#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end +#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait +#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next +#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT GOMP_loop_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START GOMP_loop_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START GOMP_loop_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT GOMP_loop_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START GOMP_loop_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START GOMP_loop_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start +#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next +#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start +#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end +#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start +#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START GOMP_parallel_loop_dynamic_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START GOMP_parallel_loop_guided_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START GOMP_parallel_loop_runtime_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START GOMP_parallel_loop_static_start +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start +#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start +#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end +#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait +#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next +#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start +#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end +#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start +#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start + +// All GOMP_2.0 symbols +#define KMP_API_NAME_GOMP_TASK GOMP_task +#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT GOMP_loop_ull_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT GOMP_loop_ull_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START GOMP_loop_ull_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT GOMP_loop_ull_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT GOMP_loop_ull_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START GOMP_loop_ull_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start + +// All GOMP_3.0 symbols +#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield + +// All GOMP_4.0 symbols +// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libomp +#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel +#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel +#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point +#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections +#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel +#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel +#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start +#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end +/* Target functions should be taken care of by liboffload */ +#define KMP_API_NAME_GOMP_TARGET GOMP_target +#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data +#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data +#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update +#define KMP_API_NAME_GOMP_TEAMS GOMP_teams + +#ifdef KMP_USE_VERSION_SYMBOLS + #define xstr(x) str(x) + #define str(x) #x + + // If Linux, xexpand prepends __kmp_api_ to the real API name + #define xexpand(api_name) expand(api_name) + #define expand(api_name) __kmp_api_##api_name + + #define xaliasify(api_name,ver) aliasify(api_name,ver) + #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name)))) + + #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION") + #define versionify(api_name, version_num, version_str, default_ver) \ + __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \ + __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t") + +#else // KMP_USE_VERSION_SYMBOLS + #define xstr(x) /* Nothing */ + #define str(x) /* Nothing */ + + // if Windows or Mac, xexpand does no name transformation + #define xexpand(api_name) expand(api_name) + #define expand(api_name) api_name + + #define xaliasify(api_name,ver) /* Nothing */ + #define aliasify(api_name,ver) /* Nothing */ + + #define xversionify(api_name, version_num, version_str) /* Nothing */ + #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */ + +#endif // KMP_USE_VERSION_SYMBOLS + +#endif /* KMP_FTN_OS_H */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c index 2083eced2ee..cf70d74af55 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c +++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c @@ -1,35 +1,35 @@ -/* - * kmp_ftn_stdcall.c -- Fortran __stdcall linkage support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" - -// Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnstdcall[] = KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " -#ifdef USE_FTN_STDCALL - "yes"; -#else - "no"; -#endif - -#ifdef USE_FTN_STDCALL - -#define FTN_STDCALL KMP_STDCALL -#define KMP_FTN_ENTRIES USE_FTN_STDCALL - -#include "kmp_ftn_os.h" -#include "kmp_ftn_entry.h" - -#endif /* USE_FTN_STDCALL */ - +/* + * kmp_ftn_stdcall.c -- Fortran __stdcall linkage support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" + +// Note: This string is not printed when KMP_VERSION=1. +char const __kmp_version_ftnstdcall[] = KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " +#ifdef USE_FTN_STDCALL + "yes"; +#else + "no"; +#endif + +#ifdef USE_FTN_STDCALL + +#define FTN_STDCALL KMP_STDCALL +#define KMP_FTN_ENTRIES USE_FTN_STDCALL + +#include "kmp_ftn_os.h" +#include "kmp_ftn_entry.h" + +#endif /* USE_FTN_STDCALL */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_global.c b/contrib/libs/cxxsupp/openmp/kmp_global.c index 7c307443571..bdac75b0058 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_global.c +++ b/contrib/libs/cxxsupp/openmp/kmp_global.c @@ -1,471 +1,471 @@ -/* - * kmp_global.c -- KPTS global variables for runtime support library - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" - -kmp_key_t __kmp_gtid_threadprivate_key; - -kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized - -#if KMP_STATS_ENABLED -#include "kmp_stats.h" -// lock for modifying the global __kmp_stats_list -kmp_tas_lock_t __kmp_stats_lock; - -// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. -kmp_stats_list __kmp_stats_list; - -// thread local pointer to stats node within list -__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; - -// gives reference tick for all events (considered the 0 tick) -tsc_tick_count __kmp_stats_start_time; -#endif -#if KMP_USE_HWLOC -int __kmp_hwloc_error = FALSE; -hwloc_topology_t __kmp_hwloc_topology = NULL; -#endif - -/* ----------------------------------------------------- */ -/* INITIALIZATION VARIABLES */ -/* they are syncronized to write during init, but read anytime */ -volatile int __kmp_init_serial = FALSE; -volatile int __kmp_init_gtid = FALSE; -volatile int __kmp_init_common = FALSE; -volatile int __kmp_init_middle = FALSE; -volatile int __kmp_init_parallel = FALSE; -volatile int __kmp_init_monitor = 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ -volatile int __kmp_init_user_locks = FALSE; - -/* list of address of allocated caches for commons */ -kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; - -int __kmp_init_counter = 0; -int __kmp_root_counter = 0; -int __kmp_version = 0; - -volatile kmp_uint32 __kmp_team_counter = 0; -volatile kmp_uint32 __kmp_task_counter = 0; - -unsigned int __kmp_init_wait = KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ -unsigned int __kmp_next_wait = KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ - -size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; -size_t __kmp_monitor_stksize = 0; // auto adjust -size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; -int __kmp_stkpadding = KMP_MIN_STKPADDING; - -size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; - -/* Barrier method defaults, settings, and strings */ -/* branch factor = 2^branch_bits (only relevant for tree and hyper barrier types) */ -#if KMP_ARCH_X86_64 -kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ -kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ -#else -kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ -kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ -#endif // KMP_ARCH_X86_64 -#if KMP_ARCH_X86_64 -kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ -kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ -#else -kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_linear_bar; -kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_linear_bar; -#endif -kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ] = { 0 }; -kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ] = { 0 }; -kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ] = { bp_linear_bar }; -kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ] = { bp_linear_bar }; -char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ] = - { "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" - #if KMP_FAST_REDUCTION_BARRIER - , "KMP_REDUCTION_BARRIER" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ] = - { "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" - #if KMP_FAST_REDUCTION_BARRIER - , "KMP_REDUCTION_BARRIER_PATTERN" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_type_name [ bs_last_barrier ] = - { "plain", "forkjoin" - #if KMP_FAST_REDUCTION_BARRIER - , "reduction" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear","tree","hyper","hierarchical"}; - -int __kmp_allThreadsSpecified = 0; -size_t __kmp_align_alloc = CACHE_LINE; - - -int __kmp_generate_warnings = kmp_warnings_low; -int __kmp_reserve_warn = 0; -int __kmp_xproc = 0; -int __kmp_avail_proc = 0; -size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; -int __kmp_sys_max_nth = KMP_MAX_NTH; -int __kmp_max_nth = 0; -int __kmp_threads_capacity = 0; -int __kmp_dflt_team_nth = 0; -int __kmp_dflt_team_nth_ub = 0; -int __kmp_tp_capacity = 0; -int __kmp_tp_cached = 0; -int __kmp_dflt_nested = FALSE; -int __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ -#if KMP_NESTED_HOT_TEAMS -int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ - /* 1 - keep extra threads when reduced */ -int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ -#endif -enum library_type __kmp_library = library_none; -enum sched_type __kmp_sched = kmp_sch_default; /* scheduling method for runtime scheduling */ -enum sched_type __kmp_static = kmp_sch_static_greedy; /* default static scheduling method */ -enum sched_type __kmp_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ -enum sched_type __kmp_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ -int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; -int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; -int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( KMP_DEFAULT_BLOCKTIME, KMP_MIN_MONITOR_WAKEUPS ); -#ifdef KMP_ADJUST_BLOCKTIME -int __kmp_zero_bt = FALSE; -#endif /* KMP_ADJUST_BLOCKTIME */ -#ifdef KMP_DFLT_NTH_CORES -int __kmp_ncores = 0; -#endif -int __kmp_chunk = 0; -int __kmp_abort_delay = 0; -#if KMP_OS_LINUX && defined(KMP_TDATA_GTID) -int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; -#elif KMP_OS_WINDOWS -int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; -#else -int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ -int __kmp_adjust_gtid_mode = TRUE; -#endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ -#ifdef KMP_TDATA_GTID -#if KMP_OS_WINDOWS -__declspec(thread) int __kmp_gtid = KMP_GTID_DNE; -#else -__thread int __kmp_gtid = KMP_GTID_DNE; -#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ -#endif /* KMP_TDATA_GTID */ -int __kmp_tls_gtid_min = INT_MAX; -int __kmp_foreign_tp = TRUE; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -int __kmp_inherit_fp_control = TRUE; -kmp_int16 __kmp_init_x87_fpu_control_word = 0; -kmp_uint32 __kmp_init_mxcsr = 0; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef USE_LOAD_BALANCE -double __kmp_load_balance_interval = 1.0; -#endif /* USE_LOAD_BALANCE */ - -kmp_nested_nthreads_t __kmp_nested_nth = { NULL, 0, 0 }; - -#if KMP_USE_ADAPTIVE_LOCKS - -kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { 1, 1024 }; // TODO: tune it! - -#if KMP_DEBUG_ADAPTIVE_LOCKS -char * __kmp_speculative_statsfile = "-"; -#endif - -#endif // KMP_USE_ADAPTIVE_LOCKS - -#if OMP_40_ENABLED -int __kmp_display_env = FALSE; -int __kmp_display_env_verbose = FALSE; -int __kmp_omp_cancellation = FALSE; -#endif - -/* map OMP 3.0 schedule types with our internal schedule types */ -enum sched_type __kmp_sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = { - kmp_sch_static_chunked, // ==> kmp_sched_static = 1 - kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 - kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 - kmp_sch_auto, // ==> kmp_sched_auto = 4 - kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 - // will likely not used, introduced here just to debug the code - // of public intel extension schedules -}; - -#if KMP_OS_LINUX -enum clock_function_type __kmp_clock_function; -int __kmp_clock_function_param; -#endif /* KMP_OS_LINUX */ - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) -enum mic_type __kmp_mic_type = non_mic; -#endif - -#if KMP_AFFINITY_SUPPORTED - -# if KMP_GROUP_AFFINITY - -int __kmp_num_proc_groups = 1; - -kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; -kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; -kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; -kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; - -# endif /* KMP_GROUP_AFFINITY */ - -size_t __kmp_affin_mask_size = 0; -enum affinity_type __kmp_affinity_type = affinity_default; -enum affinity_gran __kmp_affinity_gran = affinity_gran_default; -int __kmp_affinity_gran_levels = -1; -int __kmp_affinity_dups = TRUE; -enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default; -int __kmp_affinity_compact = 0; -int __kmp_affinity_offset = 0; -int __kmp_affinity_verbose = FALSE; -int __kmp_affinity_warnings = TRUE; -int __kmp_affinity_respect_mask = affinity_respect_mask_default; -char * __kmp_affinity_proclist = NULL; -kmp_affin_mask_t *__kmp_affinity_masks = NULL; -unsigned __kmp_affinity_num_masks = 0; - -char const * __kmp_cpuinfo_file = NULL; - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED -kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 }; -int __kmp_affinity_num_places = 0; -#endif - -int __kmp_place_num_sockets = 0; -int __kmp_place_socket_offset = 0; -int __kmp_place_num_cores = 0; -int __kmp_place_core_offset = 0; -int __kmp_place_num_threads_per_core = 0; - -kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; - -/* This check ensures that the compiler is passing the correct data type - * for the flags formal parameter of the function kmpc_omp_task_alloc(). - * If the type is not a 4-byte type, then give an error message about - * a non-positive length array pointing here. If that happens, the - * kmp_tasking_flags_t structure must be redefined to have exactly 32 bits. - */ -KMP_BUILD_ASSERT( sizeof(kmp_tasking_flags_t) == 4 ); - -kmp_int32 __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ - -#ifdef DEBUG_SUSPEND -int __kmp_suspend_count = 0; -#endif - -int __kmp_settings = FALSE; -int __kmp_duplicate_library_ok = 0; -#if USE_ITT_BUILD -int __kmp_forkjoin_frames = 1; -int __kmp_forkjoin_frames_mode = 3; -#endif -PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined; -int __kmp_determ_red = FALSE; - -#ifdef KMP_DEBUG -int kmp_a_debug = 0; -int kmp_b_debug = 0; -int kmp_c_debug = 0; -int kmp_d_debug = 0; -int kmp_e_debug = 0; -int kmp_f_debug = 0; -int kmp_diag = 0; -#endif - -/* For debug information logging using rotating buffer */ -int __kmp_debug_buf = FALSE; /* TRUE means use buffer, FALSE means print to stderr */ -int __kmp_debug_buf_lines = KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ -int __kmp_debug_buf_chars = KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ -int __kmp_debug_buf_atomic = FALSE; /* TRUE means use atomic update of buffer entry pointer */ - -char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ -int __kmp_debug_count = 0; /* Counter for number of lines printed in buffer so far */ -int __kmp_debug_buf_warn_chars = 0; /* Keep track of char increase recommended in warnings */ -/* end rotating debug buffer */ - -#ifdef KMP_DEBUG -int __kmp_par_range; /* +1 => only go par for constructs in range */ - /* -1 => only go par for constructs outside range */ -char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = { '\0' }; -char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = { '\0' }; -int __kmp_par_range_lb = 0; -int __kmp_par_range_ub = INT_MAX; -#endif /* KMP_DEBUG */ - -/* For printing out dynamic storage map for threads and teams */ -int __kmp_storage_map = FALSE; /* True means print storage map for threads and teams */ -int __kmp_storage_map_verbose = FALSE; /* True means storage map includes placement info */ -int __kmp_storage_map_verbose_specified = FALSE; -/* Initialize the library data structures when we fork a child process, defaults to TRUE */ -int __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ -int __kmp_need_register_atfork_specified = TRUE; - -int __kmp_env_chunk = FALSE; /* KMP_CHUNK specified? */ -int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ -int __kmp_env_omp_stksize = FALSE; /* OMP_STACKSIZE specified? */ -int __kmp_env_all_threads = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ -int __kmp_env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */ -int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ -int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ -int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ - -kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; -kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; -kmp_uint32 __kmp_yielding_on = 1; -#if KMP_OS_CNK -kmp_uint32 __kmp_yield_cycle = 0; -#else -kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ -#endif -kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */ -kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */ -/* ----------------------------------------------------- */ - - -/* ------------------------------------------------------ */ -/* STATE mostly syncronized with global lock */ -/* data written to rarely by masters, read often by workers */ -/* - * SHALL WE EDIT THE COMMENT BELOW IN SOME WAY? - * TODO: None of this global padding stuff works consistently because - * the order of declaration is not necessarily correlated to storage order. - * To fix this, all the important globals must be put in a big structure - * instead. - */ -KMP_ALIGN_CACHE - kmp_info_t **__kmp_threads = NULL; - kmp_root_t **__kmp_root = NULL; - -/* data read/written to often by masters */ -KMP_ALIGN_CACHE -volatile int __kmp_nth = 0; -volatile int __kmp_all_nth = 0; -int __kmp_thread_pool_nth = 0; -volatile kmp_info_t *__kmp_thread_pool = NULL; -volatile kmp_team_t *__kmp_team_pool = NULL; - -KMP_ALIGN_CACHE -volatile int __kmp_thread_pool_active_nth = 0; - -/* ------------------------------------------------- - * GLOBAL/ROOT STATE */ -KMP_ALIGN_CACHE -kmp_global_t __kmp_global = {{ 0 }}; - -/* ----------------------------------------------- */ -/* GLOBAL SYNCHRONIZATION LOCKS */ -/* TODO verify the need for these locks and if they need to be global */ - -#if KMP_USE_INTERNODE_ALIGNMENT -/* Multinode systems have larger cache line granularity which can cause - * false sharing if the alignment is not large enough for these locks */ -KMP_ALIGN_CACHE_INTERNODE - -kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ -KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ -KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ -KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ - -KMP_ALIGN_CACHE_INTERNODE -kmp_lock_t __kmp_global_lock; /* Control OS/global access */ -KMP_ALIGN_CACHE_INTERNODE -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN_CACHE_INTERNODE -kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ -#else -KMP_ALIGN_CACHE - -kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ -kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ -kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ -kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ - -KMP_ALIGN(128) -kmp_lock_t __kmp_global_lock; /* Control OS/global access */ -KMP_ALIGN(128) -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN(128) -kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ -#endif - -/* ----------------------------------------------- */ - -#if KMP_HANDLE_SIGNALS - /* - Signal handling is disabled by default, because it confuses users: In case of sigsegv - (or other trouble) in user code signal handler catches the signal, which then "appears" in - the monitor thread (when the monitor executes raise() function). Users see signal in the - monitor thread and blame OpenMP RTL. - - Grant said signal handling required on some older OSes (Irix?) supported by KAI, because - bad applications hung but not aborted. Currently it is not a problem for Linux* OS, OS X* and - Windows* OS. - - Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm putting - the default back for now to see if that fixes hangs on those machines. - - 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of stack backtrace - when program is aborting, but the code is not signal-safe. When multiple signals raised at - the same time (which occurs in dynamic negative tests because all the worker threads detects - the same error), Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided - by Steve R., and will be available soon. - */ - int __kmp_handle_signals = FALSE; -#endif - -/* ----------------------------------------------- */ -#ifdef BUILD_TV -kmp_key_t __kmp_tv_key = 0; -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef DEBUG_SUSPEND -int -get_suspend_count_( void ) { - int count = __kmp_suspend_count; - __kmp_suspend_count = 0; - return count; -} -void -set_suspend_count_( int * value ) { - __kmp_suspend_count = *value; -} -#endif - -// Symbols for MS mutual detection. -int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; -#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) - int _You_must_link_with_Microsoft_OpenMP_library = 1; -#endif - -// end of file // +/* + * kmp_global.c -- KPTS global variables for runtime support library + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" + +kmp_key_t __kmp_gtid_threadprivate_key; + +kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized + +#if KMP_STATS_ENABLED +#include "kmp_stats.h" +// lock for modifying the global __kmp_stats_list +kmp_tas_lock_t __kmp_stats_lock; + +// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. +kmp_stats_list __kmp_stats_list; + +// thread local pointer to stats node within list +__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; + +// gives reference tick for all events (considered the 0 tick) +tsc_tick_count __kmp_stats_start_time; +#endif +#if KMP_USE_HWLOC +int __kmp_hwloc_error = FALSE; +hwloc_topology_t __kmp_hwloc_topology = NULL; +#endif + +/* ----------------------------------------------------- */ +/* INITIALIZATION VARIABLES */ +/* they are syncronized to write during init, but read anytime */ +volatile int __kmp_init_serial = FALSE; +volatile int __kmp_init_gtid = FALSE; +volatile int __kmp_init_common = FALSE; +volatile int __kmp_init_middle = FALSE; +volatile int __kmp_init_parallel = FALSE; +volatile int __kmp_init_monitor = 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ +volatile int __kmp_init_user_locks = FALSE; + +/* list of address of allocated caches for commons */ +kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; + +int __kmp_init_counter = 0; +int __kmp_root_counter = 0; +int __kmp_version = 0; + +volatile kmp_uint32 __kmp_team_counter = 0; +volatile kmp_uint32 __kmp_task_counter = 0; + +unsigned int __kmp_init_wait = KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ +unsigned int __kmp_next_wait = KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ + +size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; +size_t __kmp_monitor_stksize = 0; // auto adjust +size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; +int __kmp_stkpadding = KMP_MIN_STKPADDING; + +size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; + +/* Barrier method defaults, settings, and strings */ +/* branch factor = 2^branch_bits (only relevant for tree and hyper barrier types) */ +#if KMP_ARCH_X86_64 +kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ +kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ +#else +kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ +kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ +#endif // KMP_ARCH_X86_64 +#if KMP_ARCH_X86_64 +kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ +kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ +#else +kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_linear_bar; +kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_linear_bar; +#endif +kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ] = { 0 }; +kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ] = { 0 }; +kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ] = { bp_linear_bar }; +kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ] = { bp_linear_bar }; +char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ] = + { "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" + #if KMP_FAST_REDUCTION_BARRIER + , "KMP_REDUCTION_BARRIER" + #endif // KMP_FAST_REDUCTION_BARRIER + }; +char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ] = + { "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" + #if KMP_FAST_REDUCTION_BARRIER + , "KMP_REDUCTION_BARRIER_PATTERN" + #endif // KMP_FAST_REDUCTION_BARRIER + }; +char const *__kmp_barrier_type_name [ bs_last_barrier ] = + { "plain", "forkjoin" + #if KMP_FAST_REDUCTION_BARRIER + , "reduction" + #endif // KMP_FAST_REDUCTION_BARRIER + }; +char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear","tree","hyper","hierarchical"}; + +int __kmp_allThreadsSpecified = 0; +size_t __kmp_align_alloc = CACHE_LINE; + + +int __kmp_generate_warnings = kmp_warnings_low; +int __kmp_reserve_warn = 0; +int __kmp_xproc = 0; +int __kmp_avail_proc = 0; +size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; +int __kmp_sys_max_nth = KMP_MAX_NTH; +int __kmp_max_nth = 0; +int __kmp_threads_capacity = 0; +int __kmp_dflt_team_nth = 0; +int __kmp_dflt_team_nth_ub = 0; +int __kmp_tp_capacity = 0; +int __kmp_tp_cached = 0; +int __kmp_dflt_nested = FALSE; +int __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ +#if KMP_NESTED_HOT_TEAMS +int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ + /* 1 - keep extra threads when reduced */ +int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ +#endif +enum library_type __kmp_library = library_none; +enum sched_type __kmp_sched = kmp_sch_default; /* scheduling method for runtime scheduling */ +enum sched_type __kmp_static = kmp_sch_static_greedy; /* default static scheduling method */ +enum sched_type __kmp_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ +enum sched_type __kmp_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ +int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; +int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; +int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( KMP_DEFAULT_BLOCKTIME, KMP_MIN_MONITOR_WAKEUPS ); +#ifdef KMP_ADJUST_BLOCKTIME +int __kmp_zero_bt = FALSE; +#endif /* KMP_ADJUST_BLOCKTIME */ +#ifdef KMP_DFLT_NTH_CORES +int __kmp_ncores = 0; +#endif +int __kmp_chunk = 0; +int __kmp_abort_delay = 0; +#if KMP_OS_LINUX && defined(KMP_TDATA_GTID) +int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ +int __kmp_adjust_gtid_mode = FALSE; +#elif KMP_OS_WINDOWS +int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ +int __kmp_adjust_gtid_mode = FALSE; +#else +int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ +int __kmp_adjust_gtid_mode = TRUE; +#endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ +#ifdef KMP_TDATA_GTID +#if KMP_OS_WINDOWS +__declspec(thread) int __kmp_gtid = KMP_GTID_DNE; +#else +__thread int __kmp_gtid = KMP_GTID_DNE; +#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ +#endif /* KMP_TDATA_GTID */ +int __kmp_tls_gtid_min = INT_MAX; +int __kmp_foreign_tp = TRUE; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +int __kmp_inherit_fp_control = TRUE; +kmp_int16 __kmp_init_x87_fpu_control_word = 0; +kmp_uint32 __kmp_init_mxcsr = 0; +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#ifdef USE_LOAD_BALANCE +double __kmp_load_balance_interval = 1.0; +#endif /* USE_LOAD_BALANCE */ + +kmp_nested_nthreads_t __kmp_nested_nth = { NULL, 0, 0 }; + +#if KMP_USE_ADAPTIVE_LOCKS + +kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { 1, 1024 }; // TODO: tune it! + +#if KMP_DEBUG_ADAPTIVE_LOCKS +char * __kmp_speculative_statsfile = "-"; +#endif + +#endif // KMP_USE_ADAPTIVE_LOCKS + +#if OMP_40_ENABLED +int __kmp_display_env = FALSE; +int __kmp_display_env_verbose = FALSE; +int __kmp_omp_cancellation = FALSE; +#endif + +/* map OMP 3.0 schedule types with our internal schedule types */ +enum sched_type __kmp_sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = { + kmp_sch_static_chunked, // ==> kmp_sched_static = 1 + kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 + kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 + kmp_sch_auto, // ==> kmp_sched_auto = 4 + kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 + // will likely not used, introduced here just to debug the code + // of public intel extension schedules +}; + +#if KMP_OS_LINUX +enum clock_function_type __kmp_clock_function; +int __kmp_clock_function_param; +#endif /* KMP_OS_LINUX */ + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +enum mic_type __kmp_mic_type = non_mic; +#endif + +#if KMP_AFFINITY_SUPPORTED + +# if KMP_GROUP_AFFINITY + +int __kmp_num_proc_groups = 1; + +kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; +kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; +kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; +kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; + +# endif /* KMP_GROUP_AFFINITY */ + +size_t __kmp_affin_mask_size = 0; +enum affinity_type __kmp_affinity_type = affinity_default; +enum affinity_gran __kmp_affinity_gran = affinity_gran_default; +int __kmp_affinity_gran_levels = -1; +int __kmp_affinity_dups = TRUE; +enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default; +int __kmp_affinity_compact = 0; +int __kmp_affinity_offset = 0; +int __kmp_affinity_verbose = FALSE; +int __kmp_affinity_warnings = TRUE; +int __kmp_affinity_respect_mask = affinity_respect_mask_default; +char * __kmp_affinity_proclist = NULL; +kmp_affin_mask_t *__kmp_affinity_masks = NULL; +unsigned __kmp_affinity_num_masks = 0; + +char const * __kmp_cpuinfo_file = NULL; + +#endif /* KMP_AFFINITY_SUPPORTED */ + +#if OMP_40_ENABLED +kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 }; +int __kmp_affinity_num_places = 0; +#endif + +int __kmp_place_num_sockets = 0; +int __kmp_place_socket_offset = 0; +int __kmp_place_num_cores = 0; +int __kmp_place_core_offset = 0; +int __kmp_place_num_threads_per_core = 0; + +kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; + +/* This check ensures that the compiler is passing the correct data type + * for the flags formal parameter of the function kmpc_omp_task_alloc(). + * If the type is not a 4-byte type, then give an error message about + * a non-positive length array pointing here. If that happens, the + * kmp_tasking_flags_t structure must be redefined to have exactly 32 bits. + */ +KMP_BUILD_ASSERT( sizeof(kmp_tasking_flags_t) == 4 ); + +kmp_int32 __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ + +#ifdef DEBUG_SUSPEND +int __kmp_suspend_count = 0; +#endif + +int __kmp_settings = FALSE; +int __kmp_duplicate_library_ok = 0; +#if USE_ITT_BUILD +int __kmp_forkjoin_frames = 1; +int __kmp_forkjoin_frames_mode = 3; +#endif +PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined; +int __kmp_determ_red = FALSE; + +#ifdef KMP_DEBUG +int kmp_a_debug = 0; +int kmp_b_debug = 0; +int kmp_c_debug = 0; +int kmp_d_debug = 0; +int kmp_e_debug = 0; +int kmp_f_debug = 0; +int kmp_diag = 0; +#endif + +/* For debug information logging using rotating buffer */ +int __kmp_debug_buf = FALSE; /* TRUE means use buffer, FALSE means print to stderr */ +int __kmp_debug_buf_lines = KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ +int __kmp_debug_buf_chars = KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ +int __kmp_debug_buf_atomic = FALSE; /* TRUE means use atomic update of buffer entry pointer */ + +char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ +int __kmp_debug_count = 0; /* Counter for number of lines printed in buffer so far */ +int __kmp_debug_buf_warn_chars = 0; /* Keep track of char increase recommended in warnings */ +/* end rotating debug buffer */ + +#ifdef KMP_DEBUG +int __kmp_par_range; /* +1 => only go par for constructs in range */ + /* -1 => only go par for constructs outside range */ +char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = { '\0' }; +char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = { '\0' }; +int __kmp_par_range_lb = 0; +int __kmp_par_range_ub = INT_MAX; +#endif /* KMP_DEBUG */ + +/* For printing out dynamic storage map for threads and teams */ +int __kmp_storage_map = FALSE; /* True means print storage map for threads and teams */ +int __kmp_storage_map_verbose = FALSE; /* True means storage map includes placement info */ +int __kmp_storage_map_verbose_specified = FALSE; +/* Initialize the library data structures when we fork a child process, defaults to TRUE */ +int __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ +int __kmp_need_register_atfork_specified = TRUE; + +int __kmp_env_chunk = FALSE; /* KMP_CHUNK specified? */ +int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ +int __kmp_env_omp_stksize = FALSE; /* OMP_STACKSIZE specified? */ +int __kmp_env_all_threads = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ +int __kmp_env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */ +int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ +int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ +int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ + +kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; +kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; +kmp_uint32 __kmp_yielding_on = 1; +#if KMP_OS_CNK +kmp_uint32 __kmp_yield_cycle = 0; +#else +kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ +#endif +kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */ +kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */ +/* ----------------------------------------------------- */ + + +/* ------------------------------------------------------ */ +/* STATE mostly syncronized with global lock */ +/* data written to rarely by masters, read often by workers */ +/* + * SHALL WE EDIT THE COMMENT BELOW IN SOME WAY? + * TODO: None of this global padding stuff works consistently because + * the order of declaration is not necessarily correlated to storage order. + * To fix this, all the important globals must be put in a big structure + * instead. + */ +KMP_ALIGN_CACHE + kmp_info_t **__kmp_threads = NULL; + kmp_root_t **__kmp_root = NULL; + +/* data read/written to often by masters */ +KMP_ALIGN_CACHE +volatile int __kmp_nth = 0; +volatile int __kmp_all_nth = 0; +int __kmp_thread_pool_nth = 0; +volatile kmp_info_t *__kmp_thread_pool = NULL; +volatile kmp_team_t *__kmp_team_pool = NULL; + +KMP_ALIGN_CACHE +volatile int __kmp_thread_pool_active_nth = 0; + +/* ------------------------------------------------- + * GLOBAL/ROOT STATE */ +KMP_ALIGN_CACHE +kmp_global_t __kmp_global = {{ 0 }}; + +/* ----------------------------------------------- */ +/* GLOBAL SYNCHRONIZATION LOCKS */ +/* TODO verify the need for these locks and if they need to be global */ + +#if KMP_USE_INTERNODE_ALIGNMENT +/* Multinode systems have larger cache line granularity which can cause + * false sharing if the alignment is not large enough for these locks */ +KMP_ALIGN_CACHE_INTERNODE + +kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ +KMP_ALIGN_CACHE_INTERNODE +kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ +KMP_ALIGN_CACHE_INTERNODE +kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +KMP_ALIGN_CACHE_INTERNODE +kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ +KMP_ALIGN_CACHE_INTERNODE +kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ + +KMP_ALIGN_CACHE_INTERNODE +kmp_lock_t __kmp_global_lock; /* Control OS/global access */ +KMP_ALIGN_CACHE_INTERNODE +kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ +KMP_ALIGN_CACHE_INTERNODE +kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ +#else +KMP_ALIGN_CACHE + +kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ +kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ +kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ +kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ + +KMP_ALIGN(128) +kmp_lock_t __kmp_global_lock; /* Control OS/global access */ +KMP_ALIGN(128) +kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ +KMP_ALIGN(128) +kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ +#endif + +/* ----------------------------------------------- */ + +#if KMP_HANDLE_SIGNALS + /* + Signal handling is disabled by default, because it confuses users: In case of sigsegv + (or other trouble) in user code signal handler catches the signal, which then "appears" in + the monitor thread (when the monitor executes raise() function). Users see signal in the + monitor thread and blame OpenMP RTL. + + Grant said signal handling required on some older OSes (Irix?) supported by KAI, because + bad applications hung but not aborted. Currently it is not a problem for Linux* OS, OS X* and + Windows* OS. + + Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm putting + the default back for now to see if that fixes hangs on those machines. + + 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of stack backtrace + when program is aborting, but the code is not signal-safe. When multiple signals raised at + the same time (which occurs in dynamic negative tests because all the worker threads detects + the same error), Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided + by Steve R., and will be available soon. + */ + int __kmp_handle_signals = FALSE; +#endif + +/* ----------------------------------------------- */ +#ifdef BUILD_TV +kmp_key_t __kmp_tv_key = 0; +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef DEBUG_SUSPEND +int +get_suspend_count_( void ) { + int count = __kmp_suspend_count; + __kmp_suspend_count = 0; + return count; +} +void +set_suspend_count_( int * value ) { + __kmp_suspend_count = *value; +} +#endif + +// Symbols for MS mutual detection. +int _You_must_link_with_exactly_one_OpenMP_library = 1; +int _You_must_link_with_Intel_OpenMP_library = 1; +#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) + int _You_must_link_with_Microsoft_OpenMP_library = 1; +#endif + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_gsupport.c b/contrib/libs/cxxsupp/openmp/kmp_gsupport.c index e0fb6ff0479..2a89aa2f942 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_gsupport.c +++ b/contrib/libs/cxxsupp/openmp/kmp_gsupport.c @@ -1,1605 +1,1605 @@ -/* - * kmp_gsupport.c - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#if defined(__x86_64) || defined (__powerpc64__) || defined(__aarch64__) -# define KMP_I8 -#endif -#include "kmp.h" -#include "kmp_atomic.h" - +/* + * kmp_gsupport.c + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#if defined(__x86_64) || defined (__powerpc64__) || defined(__aarch64__) +# define KMP_I8 +#endif +#include "kmp.h" +#include "kmp_atomic.h" + +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +#define MKLOC(loc,routine) \ + static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" }; + +#include "kmp_ftn_os.h" + +void +xexpand(KMP_API_NAME_GOMP_BARRIER)(void) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_barrier"); + KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); + __kmpc_barrier(&loc, gtid); +} + + +// +// Mutual exclusion +// + +// +// The symbol that icc/ifort generates for unnamed for unnamed critical +// sections - .gomp_critical_user_ - is defined using .comm in any objects +// reference it. We can't reference it directly here in C code, as the +// symbol contains a ".". +// +// The RTL contains an assembly language definition of .gomp_critical_user_ +// with another symbol __kmp_unnamed_critical_addr initialized with it's +// address. +// +extern kmp_critical_name *__kmp_unnamed_critical_addr; + + +void +xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_critical_start"); + KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); + __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); +} + + +void +xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) +{ + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_critical_end"); + KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); + __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); +} + + +void +xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_critical_name_start"); + KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); + __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); +} + + +void +xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) +{ + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_critical_name_end"); + KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); + __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); +} + + +// +// The Gnu codegen tries to use locked operations to perform atomic updates +// inline. If it can't, then it calls GOMP_atomic_start() before performing +// the update and GOMP_atomic_end() afterward, regardless of the data type. +// + +void +xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) +{ + int gtid = __kmp_entry_gtid(); + KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); + +#if OMPT_SUPPORT + __ompt_thread_assign_wait_id(0); +#endif + + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); +} + + +void +xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) +{ + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); +} + + +int +xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_single_start"); + KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + // + // 3rd parameter == FALSE prevents kmp_enter_single from pushing a + // workshare when USE_CHECKS is defined. We need to avoid the push, + // as there is no corresponding GOMP_single_end() call. + // + return __kmp_enter_single(gtid, &loc, FALSE); +} + + +void * +xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) +{ + void *retval; + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_single_copy_start"); + KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); + + if (! TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + // + // If this is the first thread to enter, return NULL. The generated + // code will then call GOMP_single_copy_end() for this thread only, + // with the copyprivate data pointer as an argument. + // + if (__kmp_enter_single(gtid, &loc, FALSE)) + return NULL; + + // + // Wait for the first thread to set the copyprivate data pointer, + // and for all other threads to reach this point. + // + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + // + // Retrieve the value of the copyprivate data point, and wait for all + // threads to do likewise, then return. + // + retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + return retval; +} + + +void +xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) +{ + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); + + // + // Set the copyprivate data pointer fo the team, then hit the barrier + // so that the other threads will continue on and read it. Hit another + // barrier before continuing, so that the know that the copyprivate + // data pointer has been propagated to all threads before trying to + // reuse the t_copypriv_data field. + // + __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +} + + +void +xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_ordered_start"); + KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); + __kmpc_ordered(&loc, gtid); +} + + +void +xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) +{ + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_ordered_end"); + KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); + __kmpc_end_ordered(&loc, gtid); +} + + +// +// Dispatch macro defs +// +// They come in two flavors: 64-bit unsigned, and either 32-bit signed +// (IA-32 architecture) or 64-bit signed (Intel(R) 64). +// + +#if KMP_ARCH_X86 || KMP_ARCH_ARM +# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 +# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 +# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 +#else +# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 +# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 +# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 +#endif /* KMP_ARCH_X86 */ + +# define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u +# define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u +# define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u + + +// +// The parallel contruct +// + +#ifndef KMP_DEBUG +static +#endif /* KMP_DEBUG */ +void +__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), + void *data) +{ +#if OMPT_SUPPORT + kmp_info_t *thr; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; + + if (ompt_enabled) { + // get pointer to thread data structure + thr = __kmp_threads[*gtid]; + + // save enclosing task state; set current state for task + enclosing_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + + // set task frame + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + } +#endif + + task(data); + +#if OMPT_SUPPORT + if (ompt_enabled) { + // clear task frame + ompt_frame->exit_runtime_frame = NULL; + + // restore enclosing state + thr->th.ompt_thread_info.state = enclosing_state; + } +#endif +} + + +#ifndef KMP_DEBUG +static +#endif /* KMP_DEBUG */ +void +__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, + void (*task)(void *), void *data, unsigned num_threads, ident_t *loc, + enum sched_type schedule, long start, long end, long incr, long chunk_size) +{ + // + // Intialize the loop worksharing construct. + // + KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, + schedule != kmp_sch_static); + +#if OMPT_SUPPORT + kmp_info_t *thr; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; + + if (ompt_enabled) { + thr = __kmp_threads[*gtid]; + // save enclosing task state; set current state for task + enclosing_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + + // set task frame + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + } +#endif + + // + // Now invoke the microtask. + // + task(data); + +#if OMPT_SUPPORT + if (ompt_enabled) { + // clear task frame + ompt_frame->exit_runtime_frame = NULL; + + // reset enclosing state + thr->th.ompt_thread_info.state = enclosing_state; + } +#endif +} + + +#ifndef KMP_DEBUG +static +#endif /* KMP_DEBUG */ +void +__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...) +{ + int rc; + kmp_info_t *thr = __kmp_threads[gtid]; + kmp_team_t *team = thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + + va_list ap; + va_start(ap, argc); + + rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, +#if OMPT_SUPPORT + VOLATILE_CAST(void *) unwrapped_task, +#endif + wrapper, __kmp_invoke_task_func, +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + &ap +#else + ap +#endif + ); + + va_end(ap); + + if (rc) { + __kmp_run_before_invoked_task(gtid, tid, thr, team); + } + #if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -#define MKLOC(loc,routine) \ - static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" }; - -#include "kmp_ftn_os.h" - -void -xexpand(KMP_API_NAME_GOMP_BARRIER)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_barrier"); - KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); - __kmpc_barrier(&loc, gtid); -} - - -// -// Mutual exclusion -// - -// -// The symbol that icc/ifort generates for unnamed for unnamed critical -// sections - .gomp_critical_user_ - is defined using .comm in any objects -// reference it. We can't reference it directly here in C code, as the -// symbol contains a ".". -// -// The RTL contains an assembly language definition of .gomp_critical_user_ -// with another symbol __kmp_unnamed_critical_addr initialized with it's -// address. -// -extern kmp_critical_name *__kmp_unnamed_critical_addr; - - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_start"); - KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); -} - - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_end"); - KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); -} - - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_name_start"); - KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); -} - - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_name_end"); - KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); -} - - -// -// The Gnu codegen tries to use locked operations to perform atomic updates -// inline. If it can't, then it calls GOMP_atomic_start() before performing -// the update and GOMP_atomic_end() afterward, regardless of the data type. -// - -void -xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) -{ - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); - -#if OMPT_SUPPORT - __ompt_thread_assign_wait_id(0); -#endif - - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); -} - - -void -xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); -} - - -int -xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_start"); - KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - // - // 3rd parameter == FALSE prevents kmp_enter_single from pushing a - // workshare when USE_CHECKS is defined. We need to avoid the push, - // as there is no corresponding GOMP_single_end() call. - // - return __kmp_enter_single(gtid, &loc, FALSE); -} - - -void * -xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) -{ - void *retval; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_copy_start"); - KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - // - // If this is the first thread to enter, return NULL. The generated - // code will then call GOMP_single_copy_end() for this thread only, - // with the copyprivate data pointer as an argument. - // - if (__kmp_enter_single(gtid, &loc, FALSE)) - return NULL; - - // - // Wait for the first thread to set the copyprivate data pointer, - // and for all other threads to reach this point. - // - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - // - // Retrieve the value of the copyprivate data point, and wait for all - // threads to do likewise, then return. - // - retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - return retval; -} - - -void -xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); - - // - // Set the copyprivate data pointer fo the team, then hit the barrier - // so that the other threads will continue on and read it. Hit another - // barrier before continuing, so that the know that the copyprivate - // data pointer has been propagated to all threads before trying to - // reuse the t_copypriv_data field. - // - __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -} - - -void -xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_ordered_start"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); - __kmpc_ordered(&loc, gtid); -} - - -void -xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_ordered_end"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); - __kmpc_end_ordered(&loc, gtid); -} - - -// -// Dispatch macro defs -// -// They come in two flavors: 64-bit unsigned, and either 32-bit signed -// (IA-32 architecture) or 64-bit signed (Intel(R) 64). -// - -#if KMP_ARCH_X86 || KMP_ARCH_ARM -# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 -# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 -# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 -#else -# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 -# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 -# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 -#endif /* KMP_ARCH_X86 */ - -# define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u -# define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u -# define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u - - -// -// The parallel contruct -// - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ -void -__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), - void *data) -{ -#if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled) { - // get pointer to thread data structure - thr = __kmp_threads[*gtid]; - - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); - } -#endif - - task(data); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // clear task frame - ompt_frame->exit_runtime_frame = NULL; - - // restore enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } -#endif -} - - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ -void -__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, - void (*task)(void *), void *data, unsigned num_threads, ident_t *loc, - enum sched_type schedule, long start, long end, long incr, long chunk_size) -{ - // - // Intialize the loop worksharing construct. - // - KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, - schedule != kmp_sch_static); - -#if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled) { - thr = __kmp_threads[*gtid]; - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); - } -#endif - - // - // Now invoke the microtask. - // - task(data); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // clear task frame - ompt_frame->exit_runtime_frame = NULL; - - // reset enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } -#endif -} - - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ -void -__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...) -{ - int rc; - kmp_info_t *thr = __kmp_threads[gtid]; - kmp_team_t *team = thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - - va_list ap; - va_start(ap, argc); - - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) unwrapped_task, -#endif - wrapper, __kmp_invoke_task_func, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap -#else - ap -#endif - ); - - va_end(ap); - - if (rc) { - __kmp_run_before_invoked_task(gtid, tid, thr, team); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - team_info->parallel_id, task_info->task_id); - } -#endif - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - } -#endif -} - -static void -__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) -{ - __kmp_serialized_parallel(loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - ompt_task_id_t ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame_t *ompt_frame = __ompt_get_task_frame_internal(0); - kmp_info_t *thr = __kmp_threads[gtid]; - - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); - - ompt_frame->exit_runtime_frame = NULL; - - // parallel region callback - if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = 1; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, - team_size, (void *) task, - OMPT_INVOKER(fork_context_gnu)); - } - - // set up lightweight task - ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) - __kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id); - lwt->ompt_task_info.task_id = my_ompt_task_id; - lwt->ompt_task_info.frame.exit_runtime_frame = 0; - __ompt_lw_taskteam_link(lwt, thr); - -#if OMPT_TRACE - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_ompt_task_id); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - } -#endif -} - - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) -{ - int gtid = __kmp_entry_gtid(); - -#if OMPT_SUPPORT - ompt_frame_t *parent_frame; - - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - - MKLOC(loc, "GOMP_parallel_start"); - KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - parent_frame->reenter_runtime_frame = NULL; - } -#endif -} - - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) -{ - int gtid = __kmp_get_gtid(); - kmp_info_t *thr; - - thr = __kmp_threads[gtid]; - - MKLOC(loc, "GOMP_parallel_end"); - KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); - - -#if OMPT_SUPPORT - ompt_parallel_id_t parallel_id; - ompt_frame_t *ompt_frame = NULL; - - if (ompt_enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - parallel_id = team_info->parallel_id; - - // Record that we re-entered the runtime system in the implicit - // task frame representing the parallel region. - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); - -#if OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, task_info->task_id); - } -#endif - - // unlink if necessary. no-op if there is not a lightweight task. - ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); - // GOMP allocates/frees lwt since it can't be kept on the stack - if (lwt) { - __kmp_free(lwt); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Since a lightweight task was destroyed, make sure that the - // remaining deepest task knows the stack frame where the runtime - // was reentered. - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - } - } -#endif - - if (! thr->th.th_team->t.t_serialized) { - __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, - thr->th.th_team); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Set reenter frame in parent task, which will become current task - // in the midst of join. This is needed before the end_parallel callback. - ompt_frame = __ompt_get_task_frame_internal(1); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - - __kmp_join_call(&loc, gtid -#if OMPT_SUPPORT - , fork_context_gnu -#endif - ); -#if OMPT_SUPPORT - if (ompt_enabled) { - ompt_frame->reenter_runtime_frame = NULL; - } -#endif - } - else { - __kmpc_end_serialized_parallel(&loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Record that we re-entered the runtime system in the frame that - // created the parallel region. - ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, - OMPT_INVOKER(fork_context_gnu)); - } - - ompt_frame->reenter_runtime_frame = NULL; - - thr->th.ompt_thread_info.state = - (((thr->th.th_team)->t.t_serialized) ? - ompt_state_work_serial : ompt_state_work_parallel); - } -#endif - } -} - - -// -// Loop worksharing constructs -// - -// -// The Gnu codegen passes in an exclusive upper bound for the overall range, -// but the libguide dispatch code expects an inclusive upper bound, hence the -// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th -// argument to __kmp_GOMP_fork_call). -// -// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, -// but the Gnu codegen expects an excluside upper bound, so the adjustment -// "*p_ub += stride" compenstates for the discrepancy. -// -// Correction: the gnu codegen always adjusts the upper bound by +-1, not the -// stride value. We adjust the dispatch parameters accordingly (by +-1), but -// we still adjust p_ub by the actual stride value. -// -// The "runtime" versions do not take a chunk_sz parameter. -// -// The profile lib cannot support construct checking of unordered loops that -// are predetermined by the compiler to be statically scheduled, as the gcc -// codegen will not always emit calls to GOMP_loop_static_next() to get the -// next iteration. Instead, it emits inline code to call omp_get_thread_num() -// num and calculate the iteration space using the result. It doesn't do this -// with ordered static loop, so they can be checked. -// - -#define LOOP_START(func,schedule) \ - int func (long lb, long ub, long str, long chunk_sz, long *p_lb, \ - long *p_ub) \ - { \ - int status; \ - long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_RUNTIME_START(func,schedule) \ - int func (long lb, long ub, long str, long *p_lb, long *p_ub) \ - { \ - int status; \ - long stride; \ - long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_NEXT(func,fini_code) \ - int func(long *p_lb, long *p_ub) \ - { \ - int status; \ - long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ - \ - fini_code \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ - "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } - - -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) -LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) - -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) - - -void -xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) - - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) -} - - -void -xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) -{ - KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) -} - - -// -// Unsigned long long loop worksharing constructs -// -// These are new with gcc 4.4 -// - -#define LOOP_START_ULL(func,schedule) \ - int func (int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long chunk_sz, \ - unsigned long long *p_lb, unsigned long long *p_ub) \ - { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - long long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - \ - KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ - (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_RUNTIME_START_ULL(func,schedule) \ - int func (int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long *p_lb, \ - unsigned long long *p_ub) \ - { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - unsigned long long stride; \ - unsigned long long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - \ - KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE); \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ - (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT((long long)stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_NEXT_ULL(func,fini_code) \ - int func(unsigned long long *p_lb, unsigned long long *p_ub) \ - { \ - int status; \ - long long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ - \ - fini_code \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ - "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } - - -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) -LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) - -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) - - -// -// Combined parallel / loop worksharing constructs -// -// There are no ull versions (yet). -// - -#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ - void func (void (*task) (void *), void *data, unsigned num_threads, \ - long lb, long ub, long str, long chunk_sz) \ - { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - ompt_pre(); \ - \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ - task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } \ - else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ - \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - \ - ompt_post(); \ - \ - KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ - } - - - -#if OMPT_SUPPORT - -#define OMPT_LOOP_PRE() \ - ompt_frame_t *parent_frame; \ - if (ompt_enabled) { \ - parent_frame = __ompt_get_task_frame_internal(0); \ - parent_frame->reenter_runtime_frame = __builtin_frame_address(0); \ - } - - -#define OMPT_LOOP_POST() \ - if (ompt_enabled) { \ - parent_frame->reenter_runtime_frame = NULL; \ - } - -#else - -#define OMPT_LOOP_PRE() - -#define OMPT_LOOP_POST() - -#endif - - -PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), - kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), - kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), - kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), - kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) - - -// -// Tasking constructs -// - -void -xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *), - long arg_size, long arg_align, int if_cond, unsigned gomp_flags) -{ - MKLOC(loc, "GOMP_task"); - int gtid = __kmp_entry_gtid(); - kmp_int32 flags = 0; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; - - KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); - - // The low-order bit is the "tied" flag - if (gomp_flags & 1) { - input_flags->tiedness = 1; - } - // The second low-order bit is the "final" flag - if (gomp_flags & 2) { - input_flags->final = 1; - } - input_flags->native = 1; - // __kmp_task_alloc() sets up all other flags - - if (! if_cond) { - arg_size = 0; - } - - kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags, - sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0, - (kmp_routine_entry_t)func); - - if (arg_size > 0) { - if (arg_align > 0) { - task->shareds = (void *)((((size_t)task->shareds) - + arg_align - 1) / arg_align * arg_align); - } - //else error?? - - if (copy_func) { - (*copy_func)(task->shareds, data); - } - else { - KMP_MEMCPY(task->shareds, data, arg_size); - } - } - - if (if_cond) { - __kmpc_omp_task(&loc, gtid, task); - } - else { -#if OMPT_SUPPORT - ompt_thread_info_t oldInfo; - kmp_info_t *thread; - kmp_taskdata_t *taskdata; - if (ompt_enabled) { - // Store the threads states and restore them after the task - thread = __kmp_threads[ gtid ]; - taskdata = KMP_TASK_TO_TASKDATA(task); - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); - } -#endif - - __kmpc_omp_task_begin_if0(&loc, gtid, task); - func(data); - __kmpc_omp_task_complete_if0(&loc, gtid, task); - -#if OMPT_SUPPORT - if (ompt_enabled) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_runtime_frame = 0; - } -#endif - } - - KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); -} - - -void -xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) -{ - MKLOC(loc, "GOMP_taskwait"); - int gtid = __kmp_entry_gtid(); - - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); - - __kmpc_omp_taskwait(&loc, gtid); - - KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); -} - - -// -// Sections worksharing constructs -// - -// -// For the sections construct, we initialize a dynamically scheduled loop -// worksharing construct with lb 1 and stride 1, and use the iteration #'s -// that its returns as sections ids. -// -// There are no special entry points for ordered sections, so we always use -// the dynamically scheduled workshare, even if the sections aren't ordered. -// - -unsigned -xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) -{ - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_sections_start"); - KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } - else { - lb = 0; - } - - KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, - (unsigned)lb)); - return (unsigned)lb; -} - - -unsigned -xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) -{ - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_next"); - KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } - else { - lb = 0; - } - - KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, - (unsigned)lb)); - return (unsigned)lb; -} - - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data, - unsigned num_threads, unsigned count) -{ - int gtid = __kmp_entry_gtid(); - -#if OMPT_SUPPORT - ompt_frame_t *parent_frame; - - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(0); - } -#endif - - MKLOC(loc, "GOMP_parallel_sections_start"); - KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, - num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, - (kmp_int)count, (kmp_int)1, (kmp_int)1); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - parent_frame->reenter_runtime_frame = NULL; - } -#endif - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); -} - - -void -xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) - - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) -} - - -void -xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) -{ - KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) -} - -// libgomp has an empty function for GOMP_taskyield as of 2013-10-10 -void -xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) -{ - KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) - return; -} - -#if OMP_40_ENABLED // these are new GOMP_4.0 entry points - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned num_threads, unsigned int flags) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel"); - KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if(flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - task(data); - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); -} - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data, - unsigned num_threads, unsigned count, unsigned flags) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel_sections"); - KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if(flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, - num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, - (kmp_int)count, (kmp_int)1, (kmp_int)1); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - task(data); - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); - KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); -} - -#define PARALLEL_LOOP(func, schedule) \ - void func (void (*task) (void *), void *data, unsigned num_threads, \ - long lb, long ub, long str, long chunk_sz, unsigned flags) \ - { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - if (flags != 0) { \ - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ - task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } \ - else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ - \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - task(data); \ - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); \ - \ - KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ - } - -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime) - - -void -xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_taskgroup_start"); - KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); - - __kmpc_taskgroup(&loc, gtid); - - return; -} - -void -xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_taskgroup_end"); - KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); - - __kmpc_end_taskgroup(&loc, gtid); - - return; -} - -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ -kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { - kmp_int32 cncl_kind = 0; - switch(gomp_kind) { - case 1: - cncl_kind = cancel_parallel; - break; - case 2: - cncl_kind = cancel_loop; - break; - case 4: - cncl_kind = cancel_sections; - break; - case 8: - cncl_kind = cancel_taskgroup; - break; - } - return cncl_kind; -} - -bool -xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancellation_point"); - KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); - - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); -} - -bool -xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - KMP_FATAL(NoGompCancellation); - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_barrier_cancel"); - KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -bool -xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } else { - return FALSE; - } - - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancel"); - KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); - - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - - if(do_cancel == FALSE) { - return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); - } else { - return __kmpc_cancel(&loc, gtid, cncl_kind); - } -} - -bool -xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_end_cancel"); - KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -bool -xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_loop_end_cancel"); - KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); - - return __kmpc_cancel_barrier(&loc, gtid); -} - -// All target functions are empty as of 2014-05-29 -void -xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn) (void *), const void *openmp_target, - size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; -} - -void -xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, const void *openmp_target, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; -} - -void -xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) -{ - return; -} - -void -xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, const void *openmp_target, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; -} - -void -xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, unsigned int thread_limit) -{ - return; -} -#endif // OMP_40_ENABLED - - -/* - The following sections of code create aliases for the GOMP_* functions, - then create versioned symbols using the assembler directive .symver. - This is only pertinent for ELF .so library - xaliasify and xversionify are defined in kmp_ftn_os.h -*/ - -#ifdef KMP_USE_VERSION_SYMBOLS - -// GOMP_1.0 aliases -xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10); -xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_BARRIER, 10); -xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10); -xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10); -xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10); -xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10); -xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10); -xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10); -xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10); -xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10); -xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10); -xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10); -xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10); -xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10); - -// GOMP_2.0 aliases -xaliasify(KMP_API_NAME_GOMP_TASK, 20); -xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20); -xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20); - -// GOMP_3.0 aliases -xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30); - -// GOMP_4.0 aliases -// The GOMP_parallel* entry points below aren't OpenMP 4.0 related. -#if OMP_40_ENABLED -xaliasify(KMP_API_NAME_GOMP_PARALLEL, 40); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40); -xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40); -xaliasify(KMP_API_NAME_GOMP_TASKGROUP_START, 40); -xaliasify(KMP_API_NAME_GOMP_TASKGROUP_END, 40); -xaliasify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40); -xaliasify(KMP_API_NAME_GOMP_CANCEL, 40); -xaliasify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40); -xaliasify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40); -xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40); -xaliasify(KMP_API_NAME_GOMP_TARGET, 40); -xaliasify(KMP_API_NAME_GOMP_TARGET_DATA, 40); -xaliasify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40); -xaliasify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40); -xaliasify(KMP_API_NAME_GOMP_TEAMS, 40); -#endif - -// GOMP_1.0 versioned symbols -xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); -xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); - -// GOMP_2.0 versioned symbols -xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); - -// GOMP_3.0 versioned symbols -xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); - -// GOMP_4.0 versioned symbols -#if OMP_40_ENABLED -xversionify(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0"); -xversionify(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); -#endif - -#endif // KMP_USE_VERSION_SYMBOLS - -#ifdef __cplusplus - } //extern "C" -#endif // __cplusplus - - + if (ompt_enabled) { +#if OMPT_TRACE + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + + // implicit task callback + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + team_info->parallel_id, task_info->task_id); + } +#endif + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + } +#endif +} + +static void +__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) +{ + __kmp_serialized_parallel(loc, gtid); + +#if OMPT_SUPPORT + if (ompt_enabled) { + ompt_task_id_t ompt_task_id = __ompt_get_task_id_internal(0); + ompt_frame_t *ompt_frame = __ompt_get_task_frame_internal(0); + kmp_info_t *thr = __kmp_threads[gtid]; + + ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(gtid); + ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); + + ompt_frame->exit_runtime_frame = NULL; + + // parallel region callback + if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { + int team_size = 1; + ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( + ompt_task_id, ompt_frame, ompt_parallel_id, + team_size, (void *) task, + OMPT_INVOKER(fork_context_gnu)); + } + + // set up lightweight task + ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) + __kmp_allocate(sizeof(ompt_lw_taskteam_t)); + __ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id); + lwt->ompt_task_info.task_id = my_ompt_task_id; + lwt->ompt_task_info.frame.exit_runtime_frame = 0; + __ompt_lw_taskteam_link(lwt, thr); + +#if OMPT_TRACE + // implicit task callback + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + ompt_parallel_id, my_ompt_task_id); + } + thr->th.ompt_thread_info.state = ompt_state_work_parallel; +#endif + } +#endif +} + + +void +xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) +{ + int gtid = __kmp_entry_gtid(); + +#if OMPT_SUPPORT + ompt_frame_t *parent_frame; + + if (ompt_enabled) { + parent_frame = __ompt_get_task_frame_internal(0); + parent_frame->reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + + MKLOC(loc, "GOMP_parallel_start"); + KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); + } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); + } + else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + parent_frame->reenter_runtime_frame = NULL; + } +#endif +} + + +void +xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) +{ + int gtid = __kmp_get_gtid(); + kmp_info_t *thr; + + thr = __kmp_threads[gtid]; + + MKLOC(loc, "GOMP_parallel_end"); + KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); + + +#if OMPT_SUPPORT + ompt_parallel_id_t parallel_id; + ompt_frame_t *ompt_frame = NULL; + + if (ompt_enabled) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + parallel_id = team_info->parallel_id; + + // Record that we re-entered the runtime system in the implicit + // task frame representing the parallel region. + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); + +#if OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + parallel_id, task_info->task_id); + } +#endif + + // unlink if necessary. no-op if there is not a lightweight task. + ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); + // GOMP allocates/frees lwt since it can't be kept on the stack + if (lwt) { + __kmp_free(lwt); + +#if OMPT_SUPPORT + if (ompt_enabled) { + // Since a lightweight task was destroyed, make sure that the + // remaining deepest task knows the stack frame where the runtime + // was reentered. + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + } + } +#endif + + if (! thr->th.th_team->t.t_serialized) { + __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, + thr->th.th_team); + +#if OMPT_SUPPORT + if (ompt_enabled) { + // Set reenter frame in parent task, which will become current task + // in the midst of join. This is needed before the end_parallel callback. + ompt_frame = __ompt_get_task_frame_internal(1); + ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + + __kmp_join_call(&loc, gtid +#if OMPT_SUPPORT + , fork_context_gnu +#endif + ); +#if OMPT_SUPPORT + if (ompt_enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif + } + else { + __kmpc_end_serialized_parallel(&loc, gtid); + +#if OMPT_SUPPORT + if (ompt_enabled) { + // Record that we re-entered the runtime system in the frame that + // created the parallel region. + ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + parallel_id, task_info->task_id, + OMPT_INVOKER(fork_context_gnu)); + } + + ompt_frame->reenter_runtime_frame = NULL; + + thr->th.ompt_thread_info.state = + (((thr->th.th_team)->t.t_serialized) ? + ompt_state_work_serial : ompt_state_work_parallel); + } +#endif + } +} + + +// +// Loop worksharing constructs +// + +// +// The Gnu codegen passes in an exclusive upper bound for the overall range, +// but the libguide dispatch code expects an inclusive upper bound, hence the +// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th +// argument to __kmp_GOMP_fork_call). +// +// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, +// but the Gnu codegen expects an excluside upper bound, so the adjustment +// "*p_ub += stride" compenstates for the discrepancy. +// +// Correction: the gnu codegen always adjusts the upper bound by +-1, not the +// stride value. We adjust the dispatch parameters accordingly (by +-1), but +// we still adjust p_ub by the actual stride value. +// +// The "runtime" versions do not take a chunk_sz parameter. +// +// The profile lib cannot support construct checking of unordered loops that +// are predetermined by the compiler to be statically scheduled, as the gcc +// codegen will not always emit calls to GOMP_loop_static_next() to get the +// next iteration. Instead, it emits inline code to call omp_get_thread_num() +// num and calculate the iteration space using the result. It doesn't do this +// with ordered static loop, so they can be checked. +// + +#define LOOP_START(func,schedule) \ + int func (long lb, long ub, long str, long chunk_sz, long *p_lb, \ + long *p_ub) \ + { \ + int status; \ + long stride; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz )); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } \ + else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + + +#define LOOP_RUNTIME_START(func,schedule) \ + int func (long lb, long ub, long str, long *p_lb, long *p_ub) \ + { \ + int status; \ + long stride; \ + long chunk_sz = 0; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ + gtid, lb, ub, str, chunk_sz )); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } \ + else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + + +#define LOOP_NEXT(func,fini_code) \ + int func(long *p_lb, long *p_ub) \ + { \ + int status; \ + long stride; \ + int gtid = __kmp_get_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ + \ + fini_code \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + *p_ub += (stride > 0) ? 1 : -1; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ + "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ + return status; \ + } + + +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) + +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) + + +void +xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) +{ + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) + + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) +} + + +void +xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) +{ + KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) +} + + +// +// Unsigned long long loop worksharing constructs +// +// These are new with gcc 4.4 +// + +#define LOOP_START_ULL(func,schedule) \ + int func (int up, unsigned long long lb, unsigned long long ub, \ + unsigned long long str, unsigned long long chunk_sz, \ + unsigned long long *p_lb, unsigned long long *p_ub) \ + { \ + int status; \ + long long str2 = up ? ((long long)str) : -((long long)str); \ + long long stride; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + \ + KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ + gtid, up, lb, ub, str, chunk_sz )); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ + (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ + (schedule) != kmp_sch_static); \ + status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ + (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str2); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } \ + else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + + +#define LOOP_RUNTIME_START_ULL(func,schedule) \ + int func (int up, unsigned long long lb, unsigned long long ub, \ + unsigned long long str, unsigned long long *p_lb, \ + unsigned long long *p_ub) \ + { \ + int status; \ + long long str2 = up ? ((long long)str) : -((long long)str); \ + unsigned long long stride; \ + unsigned long long chunk_sz = 0; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + \ + KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ + gtid, up, lb, ub, str, chunk_sz )); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ + (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE); \ + status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ + (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT((long long)stride == str2); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } \ + else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + + +#define LOOP_NEXT_ULL(func,fini_code) \ + int func(unsigned long long *p_lb, unsigned long long *p_ub) \ + { \ + int status; \ + long long stride; \ + int gtid = __kmp_get_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ + \ + fini_code \ + status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ + (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + *p_ub += (stride > 0) ? 1 : -1; \ + } \ + \ + KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ + "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ + return status; \ + } + + +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) +LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) + +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) + + +// +// Combined parallel / loop worksharing constructs +// +// There are no ull versions (yet). +// + +#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ + void func (void (*task) (void *), void *data, unsigned num_threads, \ + long lb, long ub, long str, long chunk_sz) \ + { \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz )); \ + \ + ompt_pre(); \ + \ + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ + if (num_threads != 0) { \ + __kmp_push_num_threads(&loc, gtid, num_threads); \ + } \ + __kmp_GOMP_fork_call(&loc, gtid, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ + task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ + } \ + else { \ + __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ + } \ + \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + \ + ompt_post(); \ + \ + KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ + } + + + +#if OMPT_SUPPORT + +#define OMPT_LOOP_PRE() \ + ompt_frame_t *parent_frame; \ + if (ompt_enabled) { \ + parent_frame = __ompt_get_task_frame_internal(0); \ + parent_frame->reenter_runtime_frame = __builtin_frame_address(0); \ + } + + +#define OMPT_LOOP_POST() \ + if (ompt_enabled) { \ + parent_frame->reenter_runtime_frame = NULL; \ + } + +#else + +#define OMPT_LOOP_PRE() + +#define OMPT_LOOP_POST() + +#endif + + +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), + kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), + kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), + kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), + kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) + + +// +// Tasking constructs +// + +void +xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *), + long arg_size, long arg_align, int if_cond, unsigned gomp_flags) +{ + MKLOC(loc, "GOMP_task"); + int gtid = __kmp_entry_gtid(); + kmp_int32 flags = 0; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; + + KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); + + // The low-order bit is the "tied" flag + if (gomp_flags & 1) { + input_flags->tiedness = 1; + } + // The second low-order bit is the "final" flag + if (gomp_flags & 2) { + input_flags->final = 1; + } + input_flags->native = 1; + // __kmp_task_alloc() sets up all other flags + + if (! if_cond) { + arg_size = 0; + } + + kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags, + sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0, + (kmp_routine_entry_t)func); + + if (arg_size > 0) { + if (arg_align > 0) { + task->shareds = (void *)((((size_t)task->shareds) + + arg_align - 1) / arg_align * arg_align); + } + //else error?? + + if (copy_func) { + (*copy_func)(task->shareds, data); + } + else { + KMP_MEMCPY(task->shareds, data, arg_size); + } + } + + if (if_cond) { + __kmpc_omp_task(&loc, gtid, task); + } + else { +#if OMPT_SUPPORT + ompt_thread_info_t oldInfo; + kmp_info_t *thread; + kmp_taskdata_t *taskdata; + if (ompt_enabled) { + // Store the threads states and restore them after the task + thread = __kmp_threads[ gtid ]; + taskdata = KMP_TASK_TO_TASKDATA(task); + oldInfo = thread->th.ompt_thread_info; + thread->th.ompt_thread_info.wait_id = 0; + thread->th.ompt_thread_info.state = ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_runtime_frame = + __builtin_frame_address(0); + } +#endif + + __kmpc_omp_task_begin_if0(&loc, gtid, task); + func(data); + __kmpc_omp_task_complete_if0(&loc, gtid, task); + +#if OMPT_SUPPORT + if (ompt_enabled) { + thread->th.ompt_thread_info = oldInfo; + taskdata->ompt_task_info.frame.exit_runtime_frame = 0; + } +#endif + } + + KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); +} + + +void +xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) +{ + MKLOC(loc, "GOMP_taskwait"); + int gtid = __kmp_entry_gtid(); + + KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); + + __kmpc_omp_taskwait(&loc, gtid); + + KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); +} + + +// +// Sections worksharing constructs +// + +// +// For the sections construct, we initialize a dynamically scheduled loop +// worksharing construct with lb 1 and stride 1, and use the iteration #'s +// that its returns as sections ids. +// +// There are no special entry points for ordered sections, so we always use +// the dynamically scheduled workshare, even if the sections aren't ordered. +// + +unsigned +xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) +{ + int status; + kmp_int lb, ub, stride; + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_sections_start"); + KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); + + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); + if (status) { + KMP_DEBUG_ASSERT(stride == 1); + KMP_DEBUG_ASSERT(lb > 0); + KMP_ASSERT(lb == ub); + } + else { + lb = 0; + } + + KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, + (unsigned)lb)); + return (unsigned)lb; +} + + +unsigned +xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) +{ + int status; + kmp_int lb, ub, stride; + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_sections_next"); + KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); + + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); + if (status) { + KMP_DEBUG_ASSERT(stride == 1); + KMP_DEBUG_ASSERT(lb > 0); + KMP_ASSERT(lb == ub); + } + else { + lb = 0; + } + + KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, + (unsigned)lb)); + return (unsigned)lb; +} + + +void +xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data, + unsigned num_threads, unsigned count) +{ + int gtid = __kmp_entry_gtid(); + +#if OMPT_SUPPORT + ompt_frame_t *parent_frame; + + if (ompt_enabled) { + parent_frame = __ompt_get_task_frame_internal(0); + parent_frame->reenter_runtime_frame = __builtin_frame_address(0); + } +#endif + + MKLOC(loc, "GOMP_parallel_sections_start"); + KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); + } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, + num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, + (kmp_int)count, (kmp_int)1, (kmp_int)1); + } + else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + parent_frame->reenter_runtime_frame = NULL; + } +#endif + + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + + KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); +} + + +void +xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) +{ + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) + + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) +} + + +void +xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) +{ + KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) +} + +// libgomp has an empty function for GOMP_taskyield as of 2013-10-10 +void +xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) +{ + KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) + return; +} + +#if OMP_40_ENABLED // these are new GOMP_4.0 entry points + +void +xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned num_threads, unsigned int flags) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_parallel"); + KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); + } + if(flags != 0) { + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); + } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); + } + else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } + task(data); + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); +} + +void +xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data, + unsigned num_threads, unsigned count, unsigned flags) +{ + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_parallel_sections"); + KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); + } + if(flags != 0) { + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); + } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, + num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, + (kmp_int)count, (kmp_int)1, (kmp_int)1); + } + else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } + + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + + task(data); + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); + KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); +} + +#define PARALLEL_LOOP(func, schedule) \ + void func (void (*task) (void *), void *data, unsigned num_threads, \ + long lb, long ub, long str, long chunk_sz, unsigned flags) \ + { \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz )); \ + \ + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ + if (num_threads != 0) { \ + __kmp_push_num_threads(&loc, gtid, num_threads); \ + } \ + if (flags != 0) { \ + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ + } \ + __kmp_GOMP_fork_call(&loc, gtid, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ + task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ + } \ + else { \ + __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ + } \ + \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + task(data); \ + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); \ + \ + KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ + } + +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime) + + +void +xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) +{ + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_taskgroup_start"); + KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); + + __kmpc_taskgroup(&loc, gtid); + + return; +} + +void +xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) +{ + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_taskgroup_end"); + KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); + + __kmpc_end_taskgroup(&loc, gtid); + + return; +} + +#ifndef KMP_DEBUG +static +#endif /* KMP_DEBUG */ +kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { + kmp_int32 cncl_kind = 0; + switch(gomp_kind) { + case 1: + cncl_kind = cancel_parallel; + break; + case 2: + cncl_kind = cancel_loop; + break; + case 4: + cncl_kind = cancel_sections; + break; + case 8: + cncl_kind = cancel_taskgroup; + break; + } + return cncl_kind; +} + +bool +xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) +{ + if(__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_cancellation_point"); + KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); + + kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); + + return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); +} + +bool +xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) +{ + if(__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + KMP_FATAL(NoGompCancellation); + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_barrier_cancel"); + KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); + + return __kmpc_cancel_barrier(&loc, gtid); +} + +bool +xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) +{ + if(__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } else { + return FALSE; + } + + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_cancel"); + KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); + + kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); + + if(do_cancel == FALSE) { + return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); + } else { + return __kmpc_cancel(&loc, gtid, cncl_kind); + } +} + +bool +xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) +{ + if(__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_sections_end_cancel"); + KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); + + return __kmpc_cancel_barrier(&loc, gtid); +} + +bool +xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) +{ + if(__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_loop_end_cancel"); + KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); + + return __kmpc_cancel_barrier(&loc, gtid); +} + +// All target functions are empty as of 2014-05-29 +void +xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn) (void *), const void *openmp_target, + size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ + return; +} + +void +xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, const void *openmp_target, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ + return; +} + +void +xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) +{ + return; +} + +void +xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, const void *openmp_target, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ + return; +} + +void +xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, unsigned int thread_limit) +{ + return; +} +#endif // OMP_40_ENABLED + + +/* + The following sections of code create aliases for the GOMP_* functions, + then create versioned symbols using the assembler directive .symver. + This is only pertinent for ELF .so library + xaliasify and xversionify are defined in kmp_ftn_os.h +*/ + +#ifdef KMP_USE_VERSION_SYMBOLS + +// GOMP_1.0 aliases +xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10); +xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_BARRIER, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10); +xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10); + +// GOMP_2.0 aliases +xaliasify(KMP_API_NAME_GOMP_TASK, 20); +xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20); + +// GOMP_3.0 aliases +xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30); + +// GOMP_4.0 aliases +// The GOMP_parallel* entry points below aren't OpenMP 4.0 related. +#if OMP_40_ENABLED +xaliasify(KMP_API_NAME_GOMP_PARALLEL, 40); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40); +xaliasify(KMP_API_NAME_GOMP_TASKGROUP_START, 40); +xaliasify(KMP_API_NAME_GOMP_TASKGROUP_END, 40); +xaliasify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40); +xaliasify(KMP_API_NAME_GOMP_CANCEL, 40); +xaliasify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40); +xaliasify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40); +xaliasify(KMP_API_NAME_GOMP_TARGET, 40); +xaliasify(KMP_API_NAME_GOMP_TARGET_DATA, 40); +xaliasify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40); +xaliasify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40); +xaliasify(KMP_API_NAME_GOMP_TEAMS, 40); +#endif + +// GOMP_1.0 versioned symbols +xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); + +// GOMP_2.0 versioned symbols +xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); + +// GOMP_3.0 versioned symbols +xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); + +// GOMP_4.0 versioned symbols +#if OMP_40_ENABLED +xversionify(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0"); +xversionify(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); +#endif + +#endif // KMP_USE_VERSION_SYMBOLS + +#ifdef __cplusplus + } //extern "C" +#endif // __cplusplus + + diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n.c b/contrib/libs/cxxsupp/openmp/kmp_i18n.c index 3296624e177..8dad2553b00 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_i18n.c +++ b/contrib/libs/cxxsupp/openmp/kmp_i18n.c @@ -1,974 +1,974 @@ -/* - * kmp_i18n.c - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - - -#include "kmp_i18n.h" - -#include "kmp_os.h" -#include "kmp_debug.h" -#include "kmp.h" -#include "kmp_lock.h" -#include "kmp_io.h" // __kmp_printf. - -#include -#include -#include -#include -#include - -#include "kmp_i18n_default.inc" -#include "kmp_str.h" -#include "kmp_environment.h" - -#undef KMP_I18N_OK - -#define get_section( id ) ( (id) >> 16 ) -#define get_number( id ) ( (id) & 0xFFFF ) - -kmp_msg_t __kmp_msg_empty = { kmp_mt_dummy, 0, "", 0 }; -kmp_msg_t __kmp_msg_null = { kmp_mt_dummy, 0, NULL, 0 }; -static char const * no_message_available = "(No message available)"; - -enum kmp_i18n_cat_status { - KMP_I18N_CLOSED, // Not yet opened or closed. - KMP_I18N_OPENED, // Opened successfully, ready to use. - KMP_I18N_ABSENT // Opening failed, message catalog should not be used. -}; // enum kmp_i18n_cat_status -typedef enum kmp_i18n_cat_status kmp_i18n_cat_status_t; -static volatile kmp_i18n_cat_status_t status = KMP_I18N_CLOSED; - -/* - Message catalog is opened at first usage, so we have to synchronize opening to avoid race and - multiple openings. - - Closing does not require synchronization, because catalog is closed very late at library - shutting down, when no other threads are alive. -*/ - -static void __kmp_i18n_do_catopen(); -static kmp_bootstrap_lock_t lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ); - // `lock' variable may be placed into __kmp_i18n_catopen function because it is used only by - // that function. But we afraid a (buggy) compiler may treat it wrongly. So we put it outside of - // function just in case. - -void -__kmp_i18n_catopen( -) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_acquire_bootstrap_lock( & lock ); - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_do_catopen(); - }; // if - __kmp_release_bootstrap_lock( & lock ); - }; // if -} // func __kmp_i18n_catopen - - -/* - ================================================================================================ - Linux* OS and OS X* part. - ================================================================================================ -*/ - -#if KMP_OS_UNIX -#define KMP_I18N_OK - -#include - -#define KMP_I18N_NULLCAT ((nl_catd)( -1 )) -static nl_catd cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat" ); - -/* - Useful links: - http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 - http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html - http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html -*/ - -void -__kmp_i18n_do_catopen( -) { - int english = 0; - char * lang = __kmp_env_get( "LANG" ); - // TODO: What about LC_ALL or LC_MESSAGES? - - KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); - KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); - - english = - lang == NULL || // In all these cases English language is used. - strcmp( lang, "" ) == 0 || - strcmp( lang, " " ) == 0 || - // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime resets LANG env var - // to space if it is not set". - strcmp( lang, "C" ) == 0 || - strcmp( lang, "POSIX" ) == 0; - - if ( ! english ) { // English language is not yet detected, let us continue. - // Format of LANG is: [language[_territory][.codeset][@modifier]] - // Strip all parts except language. - char * tail = NULL; - __kmp_str_split( lang, '@', & lang, & tail ); - __kmp_str_split( lang, '.', & lang, & tail ); - __kmp_str_split( lang, '_', & lang, & tail ); - english = ( strcmp( lang, "en" ) == 0 ); - }; // if - - KMP_INTERNAL_FREE( lang ); - - // Do not try to open English catalog because internal messages are - // exact copy of messages in English catalog. - if ( english ) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - return; - } - - cat = catopen( name, 0 ); - // TODO: Why do we pass 0 in flags? - status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); - - if ( status == KMP_I18N_ABSENT ) { - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - int error = errno; // Save errno immediately. - char * nlspath = __kmp_env_get( "NLSPATH" ); - char * lang = __kmp_env_get( "LANG" ); - - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog, but will return default message. - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantOpenMessageCatalog, name ), - KMP_ERR( error ), - KMP_HNT( CheckEnvVar, "NLSPATH", nlspath ), - KMP_HNT( CheckEnvVar, "LANG", lang ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - KMP_INTERNAL_FREE( nlspath ); - KMP_INTERNAL_FREE( lang ); - } - } else { // status == KMP_I18N_OPENED - - int section = get_section( kmp_i18n_prp_Version ); - int number = get_number( kmp_i18n_prp_Version ); - char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; - // Expected version of the catalog. - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init( & version ); - __kmp_str_buf_print( & version, "%s", catgets( cat, section, number, NULL ) ); - - // String returned by catgets is invalid after closing the catalog, so copy it. - if ( strcmp( version.str, expected ) != 0 ) { - __kmp_i18n_catclose(); // Close bad catalog. - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - // And now print a warning using default messages. - char const * name = "NLSPATH"; - char const * nlspath = __kmp_env_get( name ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( WrongMessageCatalog, name, version.str, expected ), - KMP_HNT( CheckEnvVar, name, nlspath ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - KMP_INTERNAL_FREE( (void *) nlspath ); - } // __kmp_generate_warnings - }; // if - __kmp_str_buf_free( & version ); - - }; // if - -} // func __kmp_i18n_do_catopen - - -void -__kmp_i18n_catclose( -) { - if ( status == KMP_I18N_OPENED ) { - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - catclose( cat ); - cat = KMP_I18N_NULLCAT; - }; // if - status = KMP_I18N_CLOSED; -} // func __kmp_i18n_catclose - - -char const * -__kmp_i18n_catgets( - kmp_i18n_id_t id -) { - - int section = get_section( id ); - int number = get_number( id ); - char const * message = NULL; - - if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { - if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_catopen(); - }; // if - if ( status == KMP_I18N_OPENED ) { - message = - catgets( - cat, - section, number, - __kmp_i18n_default_table.sect[ section ].str[ number ] - ); - }; // if - if ( message == NULL ) { - message = __kmp_i18n_default_table.sect[ section ].str[ number ]; - }; // if - }; // if - }; // if - if ( message == NULL ) { - message = no_message_available; - }; // if - return message; - -} // func __kmp_i18n_catgets - - -#endif // KMP_OS_UNIX - -/* - ================================================================================================ - Windows* OS part. - ================================================================================================ -*/ - -#if KMP_OS_WINDOWS -#define KMP_I18N_OK - -#include "kmp_environment.h" -#include - -#define KMP_I18N_NULLCAT NULL -static HMODULE cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll" ); - -static kmp_i18n_table_t table = { 0, NULL }; - // Messages formatted by FormatMessage() should be freed, but catgets() interface assumes - // user will not free messages. So we cache all the retrieved messages in the table, which - // are freed at catclose(). -static UINT const default_code_page = CP_OEMCP; -static UINT code_page = default_code_page; - -static char const * ___catgets( kmp_i18n_id_t id ); -static UINT get_code_page(); -static void kmp_i18n_table_free( kmp_i18n_table_t * table ); - - -static UINT -get_code_page( -) { - - UINT cp = default_code_page; - char const * value = __kmp_env_get( "KMP_CODEPAGE" ); - if ( value != NULL ) { - if ( _stricmp( value, "ANSI" ) == 0 ) { - cp = CP_ACP; - } else if ( _stricmp( value, "OEM" ) == 0 ) { - cp = CP_OEMCP; - } else if ( _stricmp( value, "UTF-8" ) == 0 || _stricmp( value, "UTF8" ) == 0 ) { - cp = CP_UTF8; - } else if ( _stricmp( value, "UTF-7" ) == 0 || _stricmp( value, "UTF7" ) == 0 ) { - cp = CP_UTF7; - } else { - // !!! TODO: Issue a warning? - }; // if - }; // if - KMP_INTERNAL_FREE( (void *) value ); - return cp; - -} // func get_code_page - - -static void -kmp_i18n_table_free( - kmp_i18n_table_t * table -) { - int s; - int m; - for ( s = 0; s < table->size; ++ s ) { - for ( m = 0; m < table->sect[ s ].size; ++ m ) { - // Free message. - KMP_INTERNAL_FREE( (void *) table->sect[ s ].str[ m ] ); - table->sect[ s ].str[ m ] = NULL; - }; // for m - table->sect[ s ].size = 0; - // Free section itself. - KMP_INTERNAL_FREE ( (void *) table->sect[ s ].str ); - table->sect[ s ].str = NULL; - }; // for s - table->size = 0; - KMP_INTERNAL_FREE( (void *) table->sect ); - table->sect = NULL; -} // kmp_i8n_table_free - - -void -__kmp_i18n_do_catopen( -) { - - LCID locale_id = GetThreadLocale(); - WORD lang_id = LANGIDFROMLCID( locale_id ); - WORD primary_lang_id = PRIMARYLANGID( lang_id ); - kmp_str_buf_t path; - - KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); - KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); - - __kmp_str_buf_init( & path ); - - // Do not try to open English catalog because internal messages are - // exact copy of messages in English catalog. - if ( primary_lang_id == LANG_ENGLISH ) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - goto end; - }; // if - - // Construct resource DLL name. - /* - Simple - LoadLibrary( name ) - is not suitable due to security issue (see - http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full - path to the message catalog. - */ - { - - // Get handle of our DLL first. - HMODULE handle; - BOOL brc = - GetModuleHandleEx( - GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - reinterpret_cast< LPCSTR >( & __kmp_i18n_do_catopen ), - & handle - ); - if ( ! brc ) { // Error occurred. - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - goto end; - // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and print - // a proper warning. - }; // if - - // Now get path to the our DLL. - for ( ; ; ) { - DWORD drc = GetModuleFileName( handle, path.str, path.size ); - if ( drc == 0 ) { // Error occurred. - status = KMP_I18N_ABSENT; - goto end; - }; // if - if ( drc < path.size ) { - path.used = drc; - break; - }; // if - __kmp_str_buf_reserve( & path, path.size * 2 ); - }; // forever - - // Now construct the name of message catalog. - kmp_str_fname fname; - __kmp_str_fname_init( & fname, path.str ); - __kmp_str_buf_clear( & path ); - __kmp_str_buf_print( & path, "%s%lu/%s", fname.dir, (unsigned long)( locale_id ), name ); - __kmp_str_fname_free( & fname ); - - } - - // For security reasons, use LoadLibraryEx() and load message catalog as a data file. - cat = LoadLibraryEx( path.str, NULL, LOAD_LIBRARY_AS_DATAFILE ); - status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); - - if ( status == KMP_I18N_ABSENT ) { - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - DWORD error = GetLastError(); - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog but will return default message. - /* - If message catalog for another architecture found (e.g. OpenMP RTL - for IA-32 architecture opens libompui.dll for Intel(R) 64) - Windows* OS returns error 193 (ERROR_BAD_EXE_FORMAT). However, - FormatMessage fails to return a message for this error, so user - will see: - - OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": - OMP: System error #193: (No system error message available) - OMP: Info #3: Default messages will be used. - - Issue a hint in this case to let cause of trouble more understandable. - */ - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantOpenMessageCatalog, path.str ), - KMP_SYSERRCODE( error ), - ( error == ERROR_BAD_EXE_FORMAT ? KMP_HNT( BadExeFormat, path.str, KMP_ARCH_STR ) : __kmp_msg_null ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - } - } else { // status == KMP_I18N_OPENED - - int section = get_section( kmp_i18n_prp_Version ); - int number = get_number( kmp_i18n_prp_Version ); - char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init( & version ); - __kmp_str_buf_print( & version, "%s", ___catgets( kmp_i18n_prp_Version ) ); - // String returned by catgets is invalid after closing the catalog, so copy it. - if ( strcmp( version.str, expected ) != 0 ) { - // Close bad catalog. - __kmp_i18n_catclose(); - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { - // And now print a warning using default messages. - __kmp_msg( - kmp_ms_warning, - KMP_MSG( WrongMessageCatalog, path.str, version.str, expected ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - } // __kmp_generate_warnings - }; // if - __kmp_str_buf_free( & version ); - - }; // if - code_page = get_code_page(); - - end: - __kmp_str_buf_free( & path ); - return; - -} // func __kmp_i18n_do_catopen - - -void -__kmp_i18n_catclose( -) { - if ( status == KMP_I18N_OPENED ) { - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - kmp_i18n_table_free( & table ); - FreeLibrary( cat ); - cat = KMP_I18N_NULLCAT; - }; // if - code_page = default_code_page; - status = KMP_I18N_CLOSED; -} // func __kmp_i18n_catclose - -/* - We use FormatMessage() to get strings from catalog, get system error messages, etc. - FormatMessage() tends to return Windows* OS-style end-of-lines, "\r\n". When string is printed, - printf() also replaces all the occurrences of "\n" with "\r\n" (again!), so sequences like - "\r\r\r\n" appear in output. It is not too good. - - Additional mess comes from message catalog: Our catalog source en_US.mc file (generated by - message-converter.pl) contains only "\n" characters, but en_US_msg_1033.bin file (produced by - mc.exe) may contain "\r\n" or just "\n". This mess goes from en_US_msg_1033.bin file to - message catalog, libompui.dll. For example, message - - Error - - (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while - - OMP: Error %1!d!: %2!s!\n - - (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: %2!s!\r\n\n". - - Thus, stripping all "\r" normalizes string and returns it to canonical form, so printf() will - produce correct end-of-line sequences. - - ___strip_crs() serves for this purpose: it removes all the occurrences of "\r" in-place and - returns new length of string. -*/ -static -int -___strip_crs( - char * str -) { - int in = 0; // Input character index. - int out = 0; // Output character index. - for ( ; ; ) { - if ( str[ in ] != '\r' ) { - str[ out ] = str[ in ]; - ++ out; - }; // if - if ( str[ in ] == 0 ) { - break; - }; // if - ++ in; - }; // forever - return out - 1; -} // func __strip_crs - - -static -char const * -___catgets( - kmp_i18n_id_t id -) { - - char * result = NULL; - PVOID addr = NULL; - wchar_t * wmsg = NULL; - DWORD wlen = 0; - char * msg = NULL; - int len = 0; - int rc; - - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - wlen = // wlen does *not* include terminating null. - FormatMessageW( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | - FORMAT_MESSAGE_IGNORE_INSERTS, - cat, - id, - 0, // LangId - (LPWSTR) & addr, - 0, // Size in elements, not in bytes. - NULL - ); - if ( wlen <= 0 ) { - goto end; - }; // if - wmsg = (wchar_t *) addr; // Warning: wmsg may be not nul-terminated! - - // Calculate length of multibyte message. - len = // Since wlen does not include terminating null, len does not include it also. - WideCharToMultiByte( - code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - NULL, 0, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if ( len <= 0 ) { - goto end; - }; // if - - // Allocate memory. - msg = (char *) KMP_INTERNAL_MALLOC( len + 1 ); - - // Convert wide message to multibyte one. - rc = - WideCharToMultiByte( - code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - msg, len, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if ( rc <= 0 || rc > len ) { - goto end; - }; // if - KMP_DEBUG_ASSERT( rc == len ); - len = rc; - msg[ len ] = 0; // Put terminating null to the end. - - // Stripping all "\r" before stripping last end-of-line simplifies the task. - len = ___strip_crs( msg ); - - // Every message in catalog is terminated with "\n". Strip it. - if ( len >= 1 && msg[ len - 1 ] == '\n' ) { - -- len; - msg[ len ] = 0; - }; // if - - // Everything looks ok. - result = msg; - msg = NULL; - - end: - - if ( msg != NULL ) { - KMP_INTERNAL_FREE( msg ); - }; // if - if ( wmsg != NULL ) { - LocalFree( wmsg ); - }; // if - - return result; - -} // ___catgets - - -char const * -__kmp_i18n_catgets( - kmp_i18n_id_t id -) { - - int section = get_section( id ); - int number = get_number( id ); - char const * message = NULL; - - if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { - if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_catopen(); - }; // if - if ( cat != KMP_I18N_NULLCAT ) { - if ( table.size == 0 ) { - table.sect = (kmp_i18n_section_t *) - KMP_INTERNAL_CALLOC( - ( __kmp_i18n_default_table.size + 2 ), - sizeof( kmp_i18n_section_t ) - ); - table.size = __kmp_i18n_default_table.size; - }; // if - if ( table.sect[ section ].size == 0 ) { - table.sect[ section ].str = (const char **) - KMP_INTERNAL_CALLOC( - __kmp_i18n_default_table.sect[ section ].size + 2, - sizeof( char const * ) - ); - table.sect[ section ].size = __kmp_i18n_default_table.sect[ section ].size; - }; // if - if ( table.sect[ section ].str[ number ] == NULL ) { - table.sect[ section ].str[ number ] = ___catgets( id ); - }; // if - message = table.sect[ section ].str[ number ]; - }; // if - if ( message == NULL ) { - // Catalog is not opened or message is not found, return default message. - message = __kmp_i18n_default_table.sect[ section ].str[ number ]; - }; // if - }; // if - }; // if - if ( message == NULL ) { - message = no_message_available; - }; // if - return message; - -} // func __kmp_i18n_catgets - - -#endif // KMP_OS_WINDOWS - -// ------------------------------------------------------------------------------------------------- - -#ifndef KMP_I18N_OK - #error I18n support is not implemented for this OS. -#endif // KMP_I18N_OK - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_i18n_dump_catalog( - kmp_str_buf_t * buffer -) { - - struct kmp_i18n_id_range_t { - kmp_i18n_id_t first; - kmp_i18n_id_t last; - }; // struct kmp_i18n_id_range_t - - static struct kmp_i18n_id_range_t ranges[] = { - { kmp_i18n_prp_first, kmp_i18n_prp_last }, - { kmp_i18n_str_first, kmp_i18n_str_last }, - { kmp_i18n_fmt_first, kmp_i18n_fmt_last }, - { kmp_i18n_msg_first, kmp_i18n_msg_last }, - { kmp_i18n_hnt_first, kmp_i18n_hnt_last } - }; // ranges - - int num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t ); - int range; - kmp_i18n_id_t id; - - for ( range = 0; range < num_of_ranges; ++ range ) { - __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 ); - for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 ); - id < ranges[ range ].last; - id = (kmp_i18n_id_t)( id + 1 ) ) { - __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); - }; // for id - }; // for range - - __kmp_printf( "%s", buffer->str ); - -} // __kmp_i18n_dump_catalog - -// ------------------------------------------------------------------------------------------------- - -kmp_msg_t -__kmp_msg_format( - kmp_i18n_id_t id, - ... -) { - - kmp_msg_t msg; - va_list args; - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - - va_start( args, id ); - #if KMP_OS_UNIX - // On Linux* OS and OS X*, printf() family functions process parameter numbers, for example: - // "%2$s %1$s". - __kmp_str_buf_vprint( & buffer, __kmp_i18n_catgets( id ), args ); - #elif KMP_OS_WINDOWS - // On Winodws, printf() family functions does not recognize GNU style parameter numbers, - // so we have to use FormatMessage() instead. It recognizes parameter numbers, e. g.: - // "%2!s! "%1!s!". - { - LPTSTR str = NULL; - int len; - FormatMessage( - FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, - __kmp_i18n_catgets( id ), - 0, 0, - (LPTSTR)( & str ), - 0, - & args - ); - len = ___strip_crs( str ); - __kmp_str_buf_cat( & buffer, str, len ); - LocalFree( str ); - } - #else - #error - #endif - va_end( args ); - __kmp_str_buf_detach( & buffer ); - - msg.type = (kmp_msg_type_t)( id >> 16 ); - msg.num = id & 0xFFFF; - msg.str = buffer.str; - msg.len = buffer.used; - - return msg; - -} // __kmp_msg_format - -// ------------------------------------------------------------------------------------------------- - -static -char * -sys_error( - int err -) { - - char * message = NULL; - - #if KMP_OS_WINDOWS - - LPVOID buffer = NULL; - int len; - DWORD rc; - rc = - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, - err, - MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ), // Default language. - (LPTSTR) & buffer, - 0, - NULL - ); - if ( rc > 0 ) { - // Message formatted. Copy it (so we can free it later with normal free(). - message = __kmp_str_format( "%s", (char *) buffer ); - len = ___strip_crs( message ); // Delete carriage returns if any. - // Strip trailing newlines. - while ( len > 0 && message[ len - 1 ] == '\n' ) { - -- len; - }; // while - message[ len ] = 0; - } else { - // FormatMessage() failed to format system error message. GetLastError() would give us - // error code, which we would convert to message... this it dangerous recursion, which - // cannot clarify original error, so we will not even start it. - }; // if - if ( buffer != NULL ) { - LocalFree( buffer ); - }; // if - - #else // Non-Windows* OS: Linux* OS or OS X* - - /* - There are 2 incompatible versions of strerror_r: - - char * strerror_r( int, char *, size_t ); // GNU version - int strerror_r( int, char *, size_t ); // XSI version - */ - - #if KMP_OS_LINUX - - // GNU version of strerror_r. - - char buffer[ 2048 ]; - char * const err_msg = strerror_r( err, buffer, sizeof( buffer ) ); - // Do not eliminate this assignment to temporary variable, otherwise compiler would - // not issue warning if strerror_r() returns `int' instead of expected `char *'. - message = __kmp_str_format( "%s", err_msg ); - - #else // OS X*, FreeBSD* etc. - - // XSI version of strerror_r. - - int size = 2048; - // TODO: Add checking result of malloc(). - char * buffer = (char *) KMP_INTERNAL_MALLOC( size ); - int rc; - rc = strerror_r( err, buffer, size ); - if ( rc == -1 ) { - rc = errno; // XSI version sets errno. - }; // if - while ( rc == ERANGE ) { // ERANGE means the buffer is too small. - KMP_INTERNAL_FREE( buffer ); - size *= 2; - buffer = (char *) KMP_INTERNAL_MALLOC( size ); - rc = strerror_r( err, buffer, size ); - if ( rc == -1 ) { - rc = errno; // XSI version sets errno. - }; // if - }; // while - if ( rc == 0 ) { - message = buffer; - } else { - // Buffer is unused. Free it. - KMP_INTERNAL_FREE( buffer ); - }; // if - - #endif - - #endif /* KMP_OS_WINDOWS */ - - if ( message == NULL ) { - // TODO: I18n this message. - message = __kmp_str_format( "%s", "(No system error message available)" ); - }; // if - return message; - -} // sys_error - -// ------------------------------------------------------------------------------------------------- - -kmp_msg_t -__kmp_msg_error_code( - int code -) { - - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = code; - msg.str = sys_error( code ); - msg.len = KMP_STRLEN( msg.str ); - return msg; - -} // __kmp_msg_error_code - -// ------------------------------------------------------------------------------------------------- - -kmp_msg_t -__kmp_msg_error_mesg( - char const * mesg -) { - - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = 0; - msg.str = __kmp_str_format( "%s", mesg ); - msg.len = KMP_STRLEN( msg.str ); - return msg; - -} // __kmp_msg_error_mesg - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_msg( - kmp_msg_severity_t severity, - kmp_msg_t message, - ... -) { - - va_list args; - kmp_i18n_id_t format; // format identifier - kmp_msg_t fmsg; // formatted message - kmp_str_buf_t buffer; - - if ( severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off ) - return; // no reason to form a string in order to not print it - - __kmp_str_buf_init( & buffer ); - - // Format the primary message. - switch ( severity ) { - case kmp_ms_inform : { - format = kmp_i18n_fmt_Info; - } break; - case kmp_ms_warning : { - format = kmp_i18n_fmt_Warning; - } break; - case kmp_ms_fatal : { - format = kmp_i18n_fmt_Fatal; - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - }; - }; // switch - fmsg = __kmp_msg_format( format, message.num, message.str ); - KMP_INTERNAL_FREE( (void *) message.str ); - __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); - KMP_INTERNAL_FREE( (void *) fmsg.str ); - - // Format other messages. - va_start( args, message ); - for ( ; ; ) { - message = va_arg( args, kmp_msg_t ); - if ( message.type == kmp_mt_dummy && message.str == NULL ) { - break; - }; // if - if ( message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str ) { - continue; - }; // if - switch ( message.type ) { - case kmp_mt_hint : { - format = kmp_i18n_fmt_Hint; - } break; - case kmp_mt_syserr : { - format = kmp_i18n_fmt_SysErr; - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - }; - }; // switch - fmsg = __kmp_msg_format( format, message.num, message.str ); - KMP_INTERNAL_FREE( (void *) message.str ); - __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); - KMP_INTERNAL_FREE( (void *) fmsg.str ); - }; // forever - va_end( args ); - - // Print formatted messages. - // This lock prevents multiple fatal errors on the same problem. - // __kmp_acquire_bootstrap_lock( & lock ); // GEH - This lock causing tests to hang on OS X*. - __kmp_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); - - if ( severity == kmp_ms_fatal ) { - #if KMP_OS_WINDOWS - __kmp_thread_sleep( 500 ); /* Delay to give message a chance to appear before reaping */ - #endif - __kmp_abort_process(); - }; // if - - // __kmp_release_bootstrap_lock( & lock ); // GEH - this lock causing tests to hang on OS X*. - -} // __kmp_msg - -// ------------------------------------------------------------------------------------------------- - -// end of file // +/* + * kmp_i18n.c + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + + +#include "kmp_i18n.h" + +#include "kmp_os.h" +#include "kmp_debug.h" +#include "kmp.h" +#include "kmp_lock.h" +#include "kmp_io.h" // __kmp_printf. + +#include +#include +#include +#include +#include + +#include "kmp_i18n_default.inc" +#include "kmp_str.h" +#include "kmp_environment.h" + +#undef KMP_I18N_OK + +#define get_section( id ) ( (id) >> 16 ) +#define get_number( id ) ( (id) & 0xFFFF ) + +kmp_msg_t __kmp_msg_empty = { kmp_mt_dummy, 0, "", 0 }; +kmp_msg_t __kmp_msg_null = { kmp_mt_dummy, 0, NULL, 0 }; +static char const * no_message_available = "(No message available)"; + +enum kmp_i18n_cat_status { + KMP_I18N_CLOSED, // Not yet opened or closed. + KMP_I18N_OPENED, // Opened successfully, ready to use. + KMP_I18N_ABSENT // Opening failed, message catalog should not be used. +}; // enum kmp_i18n_cat_status +typedef enum kmp_i18n_cat_status kmp_i18n_cat_status_t; +static volatile kmp_i18n_cat_status_t status = KMP_I18N_CLOSED; + +/* + Message catalog is opened at first usage, so we have to synchronize opening to avoid race and + multiple openings. + + Closing does not require synchronization, because catalog is closed very late at library + shutting down, when no other threads are alive. +*/ + +static void __kmp_i18n_do_catopen(); +static kmp_bootstrap_lock_t lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ); + // `lock' variable may be placed into __kmp_i18n_catopen function because it is used only by + // that function. But we afraid a (buggy) compiler may treat it wrongly. So we put it outside of + // function just in case. + +void +__kmp_i18n_catopen( +) { + if ( status == KMP_I18N_CLOSED ) { + __kmp_acquire_bootstrap_lock( & lock ); + if ( status == KMP_I18N_CLOSED ) { + __kmp_i18n_do_catopen(); + }; // if + __kmp_release_bootstrap_lock( & lock ); + }; // if +} // func __kmp_i18n_catopen + + +/* + ================================================================================================ + Linux* OS and OS X* part. + ================================================================================================ +*/ + +#if KMP_OS_UNIX +#define KMP_I18N_OK + +#include + +#define KMP_I18N_NULLCAT ((nl_catd)( -1 )) +static nl_catd cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? +static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat" ); + +/* + Useful links: + http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 + http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html + http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html +*/ + +void +__kmp_i18n_do_catopen( +) { + int english = 0; + char * lang = __kmp_env_get( "LANG" ); + // TODO: What about LC_ALL or LC_MESSAGES? + + KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); + KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); + + english = + lang == NULL || // In all these cases English language is used. + strcmp( lang, "" ) == 0 || + strcmp( lang, " " ) == 0 || + // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime resets LANG env var + // to space if it is not set". + strcmp( lang, "C" ) == 0 || + strcmp( lang, "POSIX" ) == 0; + + if ( ! english ) { // English language is not yet detected, let us continue. + // Format of LANG is: [language[_territory][.codeset][@modifier]] + // Strip all parts except language. + char * tail = NULL; + __kmp_str_split( lang, '@', & lang, & tail ); + __kmp_str_split( lang, '.', & lang, & tail ); + __kmp_str_split( lang, '_', & lang, & tail ); + english = ( strcmp( lang, "en" ) == 0 ); + }; // if + + KMP_INTERNAL_FREE( lang ); + + // Do not try to open English catalog because internal messages are + // exact copy of messages in English catalog. + if ( english ) { + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. + return; + } + + cat = catopen( name, 0 ); + // TODO: Why do we pass 0 in flags? + status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); + + if ( status == KMP_I18N_ABSENT ) { + if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to + int error = errno; // Save errno immediately. + char * nlspath = __kmp_env_get( "NLSPATH" ); + char * lang = __kmp_env_get( "LANG" ); + + // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so + // __kmp_i18n_catgets() will not try to open catalog, but will return default message. + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantOpenMessageCatalog, name ), + KMP_ERR( error ), + KMP_HNT( CheckEnvVar, "NLSPATH", nlspath ), + KMP_HNT( CheckEnvVar, "LANG", lang ), + __kmp_msg_null + ); + KMP_INFORM( WillUseDefaultMessages ); + KMP_INTERNAL_FREE( nlspath ); + KMP_INTERNAL_FREE( lang ); + } + } else { // status == KMP_I18N_OPENED + + int section = get_section( kmp_i18n_prp_Version ); + int number = get_number( kmp_i18n_prp_Version ); + char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; + // Expected version of the catalog. + kmp_str_buf_t version; // Actual version of the catalog. + __kmp_str_buf_init( & version ); + __kmp_str_buf_print( & version, "%s", catgets( cat, section, number, NULL ) ); + + // String returned by catgets is invalid after closing the catalog, so copy it. + if ( strcmp( version.str, expected ) != 0 ) { + __kmp_i18n_catclose(); // Close bad catalog. + status = KMP_I18N_ABSENT; // And mark it as absent. + if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to + // And now print a warning using default messages. + char const * name = "NLSPATH"; + char const * nlspath = __kmp_env_get( name ); + __kmp_msg( + kmp_ms_warning, + KMP_MSG( WrongMessageCatalog, name, version.str, expected ), + KMP_HNT( CheckEnvVar, name, nlspath ), + __kmp_msg_null + ); + KMP_INFORM( WillUseDefaultMessages ); + KMP_INTERNAL_FREE( (void *) nlspath ); + } // __kmp_generate_warnings + }; // if + __kmp_str_buf_free( & version ); + + }; // if + +} // func __kmp_i18n_do_catopen + + +void +__kmp_i18n_catclose( +) { + if ( status == KMP_I18N_OPENED ) { + KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); + catclose( cat ); + cat = KMP_I18N_NULLCAT; + }; // if + status = KMP_I18N_CLOSED; +} // func __kmp_i18n_catclose + + +char const * +__kmp_i18n_catgets( + kmp_i18n_id_t id +) { + + int section = get_section( id ); + int number = get_number( id ); + char const * message = NULL; + + if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { + if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { + if ( status == KMP_I18N_CLOSED ) { + __kmp_i18n_catopen(); + }; // if + if ( status == KMP_I18N_OPENED ) { + message = + catgets( + cat, + section, number, + __kmp_i18n_default_table.sect[ section ].str[ number ] + ); + }; // if + if ( message == NULL ) { + message = __kmp_i18n_default_table.sect[ section ].str[ number ]; + }; // if + }; // if + }; // if + if ( message == NULL ) { + message = no_message_available; + }; // if + return message; + +} // func __kmp_i18n_catgets + + +#endif // KMP_OS_UNIX + +/* + ================================================================================================ + Windows* OS part. + ================================================================================================ +*/ + +#if KMP_OS_WINDOWS +#define KMP_I18N_OK + +#include "kmp_environment.h" +#include + +#define KMP_I18N_NULLCAT NULL +static HMODULE cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? +static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll" ); + +static kmp_i18n_table_t table = { 0, NULL }; + // Messages formatted by FormatMessage() should be freed, but catgets() interface assumes + // user will not free messages. So we cache all the retrieved messages in the table, which + // are freed at catclose(). +static UINT const default_code_page = CP_OEMCP; +static UINT code_page = default_code_page; + +static char const * ___catgets( kmp_i18n_id_t id ); +static UINT get_code_page(); +static void kmp_i18n_table_free( kmp_i18n_table_t * table ); + + +static UINT +get_code_page( +) { + + UINT cp = default_code_page; + char const * value = __kmp_env_get( "KMP_CODEPAGE" ); + if ( value != NULL ) { + if ( _stricmp( value, "ANSI" ) == 0 ) { + cp = CP_ACP; + } else if ( _stricmp( value, "OEM" ) == 0 ) { + cp = CP_OEMCP; + } else if ( _stricmp( value, "UTF-8" ) == 0 || _stricmp( value, "UTF8" ) == 0 ) { + cp = CP_UTF8; + } else if ( _stricmp( value, "UTF-7" ) == 0 || _stricmp( value, "UTF7" ) == 0 ) { + cp = CP_UTF7; + } else { + // !!! TODO: Issue a warning? + }; // if + }; // if + KMP_INTERNAL_FREE( (void *) value ); + return cp; + +} // func get_code_page + + +static void +kmp_i18n_table_free( + kmp_i18n_table_t * table +) { + int s; + int m; + for ( s = 0; s < table->size; ++ s ) { + for ( m = 0; m < table->sect[ s ].size; ++ m ) { + // Free message. + KMP_INTERNAL_FREE( (void *) table->sect[ s ].str[ m ] ); + table->sect[ s ].str[ m ] = NULL; + }; // for m + table->sect[ s ].size = 0; + // Free section itself. + KMP_INTERNAL_FREE ( (void *) table->sect[ s ].str ); + table->sect[ s ].str = NULL; + }; // for s + table->size = 0; + KMP_INTERNAL_FREE( (void *) table->sect ); + table->sect = NULL; +} // kmp_i8n_table_free + + +void +__kmp_i18n_do_catopen( +) { + + LCID locale_id = GetThreadLocale(); + WORD lang_id = LANGIDFROMLCID( locale_id ); + WORD primary_lang_id = PRIMARYLANGID( lang_id ); + kmp_str_buf_t path; + + KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); + KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); + + __kmp_str_buf_init( & path ); + + // Do not try to open English catalog because internal messages are + // exact copy of messages in English catalog. + if ( primary_lang_id == LANG_ENGLISH ) { + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. + goto end; + }; // if + + // Construct resource DLL name. + /* + Simple + LoadLibrary( name ) + is not suitable due to security issue (see + http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full + path to the message catalog. + */ + { + + // Get handle of our DLL first. + HMODULE handle; + BOOL brc = + GetModuleHandleEx( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + reinterpret_cast< LPCSTR >( & __kmp_i18n_do_catopen ), + & handle + ); + if ( ! brc ) { // Error occurred. + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. + goto end; + // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and print + // a proper warning. + }; // if + + // Now get path to the our DLL. + for ( ; ; ) { + DWORD drc = GetModuleFileName( handle, path.str, path.size ); + if ( drc == 0 ) { // Error occurred. + status = KMP_I18N_ABSENT; + goto end; + }; // if + if ( drc < path.size ) { + path.used = drc; + break; + }; // if + __kmp_str_buf_reserve( & path, path.size * 2 ); + }; // forever + + // Now construct the name of message catalog. + kmp_str_fname fname; + __kmp_str_fname_init( & fname, path.str ); + __kmp_str_buf_clear( & path ); + __kmp_str_buf_print( & path, "%s%lu/%s", fname.dir, (unsigned long)( locale_id ), name ); + __kmp_str_fname_free( & fname ); + + } + + // For security reasons, use LoadLibraryEx() and load message catalog as a data file. + cat = LoadLibraryEx( path.str, NULL, LOAD_LIBRARY_AS_DATAFILE ); + status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); + + if ( status == KMP_I18N_ABSENT ) { + if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to + DWORD error = GetLastError(); + // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so + // __kmp_i18n_catgets() will not try to open catalog but will return default message. + /* + If message catalog for another architecture found (e.g. OpenMP RTL + for IA-32 architecture opens libompui.dll for Intel(R) 64) + Windows* OS returns error 193 (ERROR_BAD_EXE_FORMAT). However, + FormatMessage fails to return a message for this error, so user + will see: + + OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": + OMP: System error #193: (No system error message available) + OMP: Info #3: Default messages will be used. + + Issue a hint in this case to let cause of trouble more understandable. + */ + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantOpenMessageCatalog, path.str ), + KMP_SYSERRCODE( error ), + ( error == ERROR_BAD_EXE_FORMAT ? KMP_HNT( BadExeFormat, path.str, KMP_ARCH_STR ) : __kmp_msg_null ), + __kmp_msg_null + ); + KMP_INFORM( WillUseDefaultMessages ); + } + } else { // status == KMP_I18N_OPENED + + int section = get_section( kmp_i18n_prp_Version ); + int number = get_number( kmp_i18n_prp_Version ); + char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; + kmp_str_buf_t version; // Actual version of the catalog. + __kmp_str_buf_init( & version ); + __kmp_str_buf_print( & version, "%s", ___catgets( kmp_i18n_prp_Version ) ); + // String returned by catgets is invalid after closing the catalog, so copy it. + if ( strcmp( version.str, expected ) != 0 ) { + // Close bad catalog. + __kmp_i18n_catclose(); + status = KMP_I18N_ABSENT; // And mark it as absent. + if (__kmp_generate_warnings > kmp_warnings_low) { + // And now print a warning using default messages. + __kmp_msg( + kmp_ms_warning, + KMP_MSG( WrongMessageCatalog, path.str, version.str, expected ), + __kmp_msg_null + ); + KMP_INFORM( WillUseDefaultMessages ); + } // __kmp_generate_warnings + }; // if + __kmp_str_buf_free( & version ); + + }; // if + code_page = get_code_page(); + + end: + __kmp_str_buf_free( & path ); + return; + +} // func __kmp_i18n_do_catopen + + +void +__kmp_i18n_catclose( +) { + if ( status == KMP_I18N_OPENED ) { + KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); + kmp_i18n_table_free( & table ); + FreeLibrary( cat ); + cat = KMP_I18N_NULLCAT; + }; // if + code_page = default_code_page; + status = KMP_I18N_CLOSED; +} // func __kmp_i18n_catclose + +/* + We use FormatMessage() to get strings from catalog, get system error messages, etc. + FormatMessage() tends to return Windows* OS-style end-of-lines, "\r\n". When string is printed, + printf() also replaces all the occurrences of "\n" with "\r\n" (again!), so sequences like + "\r\r\r\n" appear in output. It is not too good. + + Additional mess comes from message catalog: Our catalog source en_US.mc file (generated by + message-converter.pl) contains only "\n" characters, but en_US_msg_1033.bin file (produced by + mc.exe) may contain "\r\n" or just "\n". This mess goes from en_US_msg_1033.bin file to + message catalog, libompui.dll. For example, message + + Error + + (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while + + OMP: Error %1!d!: %2!s!\n + + (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: %2!s!\r\n\n". + + Thus, stripping all "\r" normalizes string and returns it to canonical form, so printf() will + produce correct end-of-line sequences. + + ___strip_crs() serves for this purpose: it removes all the occurrences of "\r" in-place and + returns new length of string. +*/ +static +int +___strip_crs( + char * str +) { + int in = 0; // Input character index. + int out = 0; // Output character index. + for ( ; ; ) { + if ( str[ in ] != '\r' ) { + str[ out ] = str[ in ]; + ++ out; + }; // if + if ( str[ in ] == 0 ) { + break; + }; // if + ++ in; + }; // forever + return out - 1; +} // func __strip_crs + + +static +char const * +___catgets( + kmp_i18n_id_t id +) { + + char * result = NULL; + PVOID addr = NULL; + wchar_t * wmsg = NULL; + DWORD wlen = 0; + char * msg = NULL; + int len = 0; + int rc; + + KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); + wlen = // wlen does *not* include terminating null. + FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | + FORMAT_MESSAGE_IGNORE_INSERTS, + cat, + id, + 0, // LangId + (LPWSTR) & addr, + 0, // Size in elements, not in bytes. + NULL + ); + if ( wlen <= 0 ) { + goto end; + }; // if + wmsg = (wchar_t *) addr; // Warning: wmsg may be not nul-terminated! + + // Calculate length of multibyte message. + len = // Since wlen does not include terminating null, len does not include it also. + WideCharToMultiByte( + code_page, + 0, // Flags. + wmsg, wlen, // Wide buffer and size. + NULL, 0, // Buffer and size. + NULL, NULL // Default char and used default char. + ); + if ( len <= 0 ) { + goto end; + }; // if + + // Allocate memory. + msg = (char *) KMP_INTERNAL_MALLOC( len + 1 ); + + // Convert wide message to multibyte one. + rc = + WideCharToMultiByte( + code_page, + 0, // Flags. + wmsg, wlen, // Wide buffer and size. + msg, len, // Buffer and size. + NULL, NULL // Default char and used default char. + ); + if ( rc <= 0 || rc > len ) { + goto end; + }; // if + KMP_DEBUG_ASSERT( rc == len ); + len = rc; + msg[ len ] = 0; // Put terminating null to the end. + + // Stripping all "\r" before stripping last end-of-line simplifies the task. + len = ___strip_crs( msg ); + + // Every message in catalog is terminated with "\n". Strip it. + if ( len >= 1 && msg[ len - 1 ] == '\n' ) { + -- len; + msg[ len ] = 0; + }; // if + + // Everything looks ok. + result = msg; + msg = NULL; + + end: + + if ( msg != NULL ) { + KMP_INTERNAL_FREE( msg ); + }; // if + if ( wmsg != NULL ) { + LocalFree( wmsg ); + }; // if + + return result; + +} // ___catgets + + +char const * +__kmp_i18n_catgets( + kmp_i18n_id_t id +) { + + int section = get_section( id ); + int number = get_number( id ); + char const * message = NULL; + + if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { + if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { + if ( status == KMP_I18N_CLOSED ) { + __kmp_i18n_catopen(); + }; // if + if ( cat != KMP_I18N_NULLCAT ) { + if ( table.size == 0 ) { + table.sect = (kmp_i18n_section_t *) + KMP_INTERNAL_CALLOC( + ( __kmp_i18n_default_table.size + 2 ), + sizeof( kmp_i18n_section_t ) + ); + table.size = __kmp_i18n_default_table.size; + }; // if + if ( table.sect[ section ].size == 0 ) { + table.sect[ section ].str = (const char **) + KMP_INTERNAL_CALLOC( + __kmp_i18n_default_table.sect[ section ].size + 2, + sizeof( char const * ) + ); + table.sect[ section ].size = __kmp_i18n_default_table.sect[ section ].size; + }; // if + if ( table.sect[ section ].str[ number ] == NULL ) { + table.sect[ section ].str[ number ] = ___catgets( id ); + }; // if + message = table.sect[ section ].str[ number ]; + }; // if + if ( message == NULL ) { + // Catalog is not opened or message is not found, return default message. + message = __kmp_i18n_default_table.sect[ section ].str[ number ]; + }; // if + }; // if + }; // if + if ( message == NULL ) { + message = no_message_available; + }; // if + return message; + +} // func __kmp_i18n_catgets + + +#endif // KMP_OS_WINDOWS + +// ------------------------------------------------------------------------------------------------- + +#ifndef KMP_I18N_OK + #error I18n support is not implemented for this OS. +#endif // KMP_I18N_OK + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_i18n_dump_catalog( + kmp_str_buf_t * buffer +) { + + struct kmp_i18n_id_range_t { + kmp_i18n_id_t first; + kmp_i18n_id_t last; + }; // struct kmp_i18n_id_range_t + + static struct kmp_i18n_id_range_t ranges[] = { + { kmp_i18n_prp_first, kmp_i18n_prp_last }, + { kmp_i18n_str_first, kmp_i18n_str_last }, + { kmp_i18n_fmt_first, kmp_i18n_fmt_last }, + { kmp_i18n_msg_first, kmp_i18n_msg_last }, + { kmp_i18n_hnt_first, kmp_i18n_hnt_last } + }; // ranges + + int num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t ); + int range; + kmp_i18n_id_t id; + + for ( range = 0; range < num_of_ranges; ++ range ) { + __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 ); + for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 ); + id < ranges[ range ].last; + id = (kmp_i18n_id_t)( id + 1 ) ) { + __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); + }; // for id + }; // for range + + __kmp_printf( "%s", buffer->str ); + +} // __kmp_i18n_dump_catalog + +// ------------------------------------------------------------------------------------------------- + +kmp_msg_t +__kmp_msg_format( + kmp_i18n_id_t id, + ... +) { + + kmp_msg_t msg; + va_list args; + kmp_str_buf_t buffer; + __kmp_str_buf_init( & buffer ); + + va_start( args, id ); + #if KMP_OS_UNIX + // On Linux* OS and OS X*, printf() family functions process parameter numbers, for example: + // "%2$s %1$s". + __kmp_str_buf_vprint( & buffer, __kmp_i18n_catgets( id ), args ); + #elif KMP_OS_WINDOWS + // On Winodws, printf() family functions does not recognize GNU style parameter numbers, + // so we have to use FormatMessage() instead. It recognizes parameter numbers, e. g.: + // "%2!s! "%1!s!". + { + LPTSTR str = NULL; + int len; + FormatMessage( + FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, + __kmp_i18n_catgets( id ), + 0, 0, + (LPTSTR)( & str ), + 0, + & args + ); + len = ___strip_crs( str ); + __kmp_str_buf_cat( & buffer, str, len ); + LocalFree( str ); + } + #else + #error + #endif + va_end( args ); + __kmp_str_buf_detach( & buffer ); + + msg.type = (kmp_msg_type_t)( id >> 16 ); + msg.num = id & 0xFFFF; + msg.str = buffer.str; + msg.len = buffer.used; + + return msg; + +} // __kmp_msg_format + +// ------------------------------------------------------------------------------------------------- + +static +char * +sys_error( + int err +) { + + char * message = NULL; + + #if KMP_OS_WINDOWS + + LPVOID buffer = NULL; + int len; + DWORD rc; + rc = + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, + err, + MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ), // Default language. + (LPTSTR) & buffer, + 0, + NULL + ); + if ( rc > 0 ) { + // Message formatted. Copy it (so we can free it later with normal free(). + message = __kmp_str_format( "%s", (char *) buffer ); + len = ___strip_crs( message ); // Delete carriage returns if any. + // Strip trailing newlines. + while ( len > 0 && message[ len - 1 ] == '\n' ) { + -- len; + }; // while + message[ len ] = 0; + } else { + // FormatMessage() failed to format system error message. GetLastError() would give us + // error code, which we would convert to message... this it dangerous recursion, which + // cannot clarify original error, so we will not even start it. + }; // if + if ( buffer != NULL ) { + LocalFree( buffer ); + }; // if + + #else // Non-Windows* OS: Linux* OS or OS X* + + /* + There are 2 incompatible versions of strerror_r: + + char * strerror_r( int, char *, size_t ); // GNU version + int strerror_r( int, char *, size_t ); // XSI version + */ + + #if KMP_OS_LINUX + + // GNU version of strerror_r. + + char buffer[ 2048 ]; + char * const err_msg = strerror_r( err, buffer, sizeof( buffer ) ); + // Do not eliminate this assignment to temporary variable, otherwise compiler would + // not issue warning if strerror_r() returns `int' instead of expected `char *'. + message = __kmp_str_format( "%s", err_msg ); + + #else // OS X*, FreeBSD* etc. + + // XSI version of strerror_r. + + int size = 2048; + // TODO: Add checking result of malloc(). + char * buffer = (char *) KMP_INTERNAL_MALLOC( size ); + int rc; + rc = strerror_r( err, buffer, size ); + if ( rc == -1 ) { + rc = errno; // XSI version sets errno. + }; // if + while ( rc == ERANGE ) { // ERANGE means the buffer is too small. + KMP_INTERNAL_FREE( buffer ); + size *= 2; + buffer = (char *) KMP_INTERNAL_MALLOC( size ); + rc = strerror_r( err, buffer, size ); + if ( rc == -1 ) { + rc = errno; // XSI version sets errno. + }; // if + }; // while + if ( rc == 0 ) { + message = buffer; + } else { + // Buffer is unused. Free it. + KMP_INTERNAL_FREE( buffer ); + }; // if + + #endif + + #endif /* KMP_OS_WINDOWS */ + + if ( message == NULL ) { + // TODO: I18n this message. + message = __kmp_str_format( "%s", "(No system error message available)" ); + }; // if + return message; + +} // sys_error + +// ------------------------------------------------------------------------------------------------- + +kmp_msg_t +__kmp_msg_error_code( + int code +) { + + kmp_msg_t msg; + msg.type = kmp_mt_syserr; + msg.num = code; + msg.str = sys_error( code ); + msg.len = KMP_STRLEN( msg.str ); + return msg; + +} // __kmp_msg_error_code + +// ------------------------------------------------------------------------------------------------- + +kmp_msg_t +__kmp_msg_error_mesg( + char const * mesg +) { + + kmp_msg_t msg; + msg.type = kmp_mt_syserr; + msg.num = 0; + msg.str = __kmp_str_format( "%s", mesg ); + msg.len = KMP_STRLEN( msg.str ); + return msg; + +} // __kmp_msg_error_mesg + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_msg( + kmp_msg_severity_t severity, + kmp_msg_t message, + ... +) { + + va_list args; + kmp_i18n_id_t format; // format identifier + kmp_msg_t fmsg; // formatted message + kmp_str_buf_t buffer; + + if ( severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off ) + return; // no reason to form a string in order to not print it + + __kmp_str_buf_init( & buffer ); + + // Format the primary message. + switch ( severity ) { + case kmp_ms_inform : { + format = kmp_i18n_fmt_Info; + } break; + case kmp_ms_warning : { + format = kmp_i18n_fmt_Warning; + } break; + case kmp_ms_fatal : { + format = kmp_i18n_fmt_Fatal; + } break; + default : { + KMP_DEBUG_ASSERT( 0 ); + }; + }; // switch + fmsg = __kmp_msg_format( format, message.num, message.str ); + KMP_INTERNAL_FREE( (void *) message.str ); + __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); + KMP_INTERNAL_FREE( (void *) fmsg.str ); + + // Format other messages. + va_start( args, message ); + for ( ; ; ) { + message = va_arg( args, kmp_msg_t ); + if ( message.type == kmp_mt_dummy && message.str == NULL ) { + break; + }; // if + if ( message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str ) { + continue; + }; // if + switch ( message.type ) { + case kmp_mt_hint : { + format = kmp_i18n_fmt_Hint; + } break; + case kmp_mt_syserr : { + format = kmp_i18n_fmt_SysErr; + } break; + default : { + KMP_DEBUG_ASSERT( 0 ); + }; + }; // switch + fmsg = __kmp_msg_format( format, message.num, message.str ); + KMP_INTERNAL_FREE( (void *) message.str ); + __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); + KMP_INTERNAL_FREE( (void *) fmsg.str ); + }; // forever + va_end( args ); + + // Print formatted messages. + // This lock prevents multiple fatal errors on the same problem. + // __kmp_acquire_bootstrap_lock( & lock ); // GEH - This lock causing tests to hang on OS X*. + __kmp_printf( "%s", buffer.str ); + __kmp_str_buf_free( & buffer ); + + if ( severity == kmp_ms_fatal ) { + #if KMP_OS_WINDOWS + __kmp_thread_sleep( 500 ); /* Delay to give message a chance to appear before reaping */ + #endif + __kmp_abort_process(); + }; // if + + // __kmp_release_bootstrap_lock( & lock ); // GEH - this lock causing tests to hang on OS X*. + +} // __kmp_msg + +// ------------------------------------------------------------------------------------------------- + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n.h b/contrib/libs/cxxsupp/openmp/kmp_i18n.h index 37e59300d12..82ec51b2441 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_i18n.h +++ b/contrib/libs/cxxsupp/openmp/kmp_i18n.h @@ -1,193 +1,193 @@ -/* - * kmp_i18n.h - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_I18N_H -#define KMP_I18N_H - -#include "kmp_str.h" - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -/* - kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with identifiers of all the - messages in the catalog. There is one special identifier: kmp_i18n_null, which denotes absence - of message. -*/ -#include "kmp_i18n_id.inc" // Generated file. Do not edit it manually. - -/* - Low-level functions handling message catalog. __kmp_i18n_open() opens message catalog, - __kmp_i18n_closes() it. Explicit opening is not required: if message catalog is not yet open, - __kmp_i18n_catgets() will open it implicitly. However, catalog should be explicitly closed, - otherwise resources (mamory, handles) may leak. - - __kmp_i18n_catgets() returns read-only string. It should not be freed. - - KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. Following two lines are - equivalent: - - __kmp_i18n_catgets( kmp_i18n_str_Warning ) - KMP_I18N_STR( Warning ) -*/ - -void __kmp_i18n_catopen(); -void __kmp_i18n_catclose(); -char const * __kmp_i18n_catgets( kmp_i18n_id_t id ); - -#define KMP_I18N_STR( id ) __kmp_i18n_catgets( kmp_i18n_str_ ## id ) - - -/* - ------------------------------------------------------------------------------------------------ - - High-level interface for printing strings targeted to the user. - - All the strings are divided into 3 types: - - * messages, - * hints, - * system errors. - - There are 3 kind of message severities: - - * informational messages, - * warnings (non-fatal errors), - * fatal errors. - - For example: - - OMP: Warning #2: Cannot open message catalog "libguide.cat": (1) - OMP: System error #2: No such file or directory (2) - OMP: Hint: Please check NLSPATH environment variable. (3) - OMP: Info #3: Default messages will be used. (4) - - where - - (1) is a message of warning severity, - (2) is a system error caused the previous warning, - (3) is a hint for the user how to fix the problem, - (4) is a message of informational severity. - - Usage in complex cases (message is accompanied with hints and system errors): - - int error = errno; // We need save errno immediately, because it may be changed. - __kmp_msg( - kmp_ms_warning, // Severity - KMP_MSG( CantOpenMessageCatalog, name ), // Primary message - KMP_ERR( error ), // System error - KMP_HNT( CheckNLSPATH ), // Hint - __kmp_msg_null // Variadic argument list finisher - ); - - Usage in simple cases (just a message, no system errors or hints): - - KMP_INFORM( WillUseDefaultMessages ); - KMP_WARNING( CantOpenMessageCatalog, name ); - KMP_FATAL( StackOverlap ); - KMP_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - - ------------------------------------------------------------------------------------------------ -*/ - -enum kmp_msg_type { - kmp_mt_dummy = 0, // Special type for internal purposes. - kmp_mt_mesg = 4, // Primary OpenMP message, could be information, warning, or fatal. - kmp_mt_hint = 5, // Hint to the user. - kmp_mt_syserr = -1 // System error message. -}; // enum kmp_msg_type -typedef enum kmp_msg_type kmp_msg_type_t; - -struct kmp_msg { - kmp_msg_type_t type; - int num; - char const * str; - int len; -}; // struct kmp_message -typedef struct kmp_msg kmp_msg_t; - -// Two special messages. -extern kmp_msg_t __kmp_msg_empty; // Can be used in place where message is required syntactically. -extern kmp_msg_t __kmp_msg_null; // Denotes the end of variadic list of arguments. - -// Helper functions. Creates messages either from message catalog or from system. Note: these -// functions allocate memory. You should pass created messages to __kmp_msg() function, it will -// print messages and destroy them. -kmp_msg_t __kmp_msg_format( kmp_i18n_id_t id, ... ); -kmp_msg_t __kmp_msg_error_code( int code ); -kmp_msg_t __kmp_msg_error_mesg( char const * mesg ); - -// Helper macros to make calls shorter. -#define KMP_MSG( ... ) __kmp_msg_format( kmp_i18n_msg_ ## __VA_ARGS__ ) -#define KMP_HNT( ... ) __kmp_msg_format( kmp_i18n_hnt_ ## __VA_ARGS__ ) -#define KMP_SYSERRCODE( code ) __kmp_msg_error_code( code ) -#define KMP_SYSERRMESG( mesg ) __kmp_msg_error_mesg( mesg ) -#define KMP_ERR KMP_SYSERRCODE - -// Message severity. -enum kmp_msg_severity { - kmp_ms_inform, // Just information for the user. - kmp_ms_warning, // Non-fatal error, execution continues. - kmp_ms_fatal // Fatal error, program aborts. -}; // enum kmp_msg_severity -typedef enum kmp_msg_severity kmp_msg_severity_t; - -// Primary function for printing messages for the user. The first message is mandatory. Any number -// of system errors and hints may be specified. Argument list must be finished with __kmp_msg_null. -void __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... ); - -// Helper macros to make calls shorter in simple cases. -#define KMP_INFORM( ... ) __kmp_msg( kmp_ms_inform, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_WARNING( ... ) __kmp_msg( kmp_ms_warning, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_FATAL( ... ) __kmp_msg( kmp_ms_fatal, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_SYSFAIL( func, error ) \ - __kmp_msg( \ - kmp_ms_fatal, \ - KMP_MSG( FunctionError, func ), \ - KMP_SYSERRCODE( error ), \ - __kmp_msg_null \ - ) - -// Check error, if not zero, generate fatal error message. -#define KMP_CHECK_SYSFAIL( func, error ) \ - { \ - if ( error ) { \ - KMP_SYSFAIL( func, error ); \ - }; \ - } - -// Check status, if not zero, generate fatal error message using errno. -#define KMP_CHECK_SYSFAIL_ERRNO( func, status ) \ - { \ - if ( status != 0 ) { \ - int error = errno; \ - KMP_SYSFAIL( func, error ); \ - }; \ - } - -#ifdef KMP_DEBUG - void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer ); -#endif // KMP_DEBUG - -#ifdef __cplusplus - }; // extern "C" -#endif // __cplusplus - -#endif // KMP_I18N_H - -// end of file // +/* + * kmp_i18n.h + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_I18N_H +#define KMP_I18N_H + +#include "kmp_str.h" + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +/* + kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with identifiers of all the + messages in the catalog. There is one special identifier: kmp_i18n_null, which denotes absence + of message. +*/ +#include "kmp_i18n_id.inc" // Generated file. Do not edit it manually. + +/* + Low-level functions handling message catalog. __kmp_i18n_open() opens message catalog, + __kmp_i18n_closes() it. Explicit opening is not required: if message catalog is not yet open, + __kmp_i18n_catgets() will open it implicitly. However, catalog should be explicitly closed, + otherwise resources (mamory, handles) may leak. + + __kmp_i18n_catgets() returns read-only string. It should not be freed. + + KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. Following two lines are + equivalent: + + __kmp_i18n_catgets( kmp_i18n_str_Warning ) + KMP_I18N_STR( Warning ) +*/ + +void __kmp_i18n_catopen(); +void __kmp_i18n_catclose(); +char const * __kmp_i18n_catgets( kmp_i18n_id_t id ); + +#define KMP_I18N_STR( id ) __kmp_i18n_catgets( kmp_i18n_str_ ## id ) + + +/* + ------------------------------------------------------------------------------------------------ + + High-level interface for printing strings targeted to the user. + + All the strings are divided into 3 types: + + * messages, + * hints, + * system errors. + + There are 3 kind of message severities: + + * informational messages, + * warnings (non-fatal errors), + * fatal errors. + + For example: + + OMP: Warning #2: Cannot open message catalog "libguide.cat": (1) + OMP: System error #2: No such file or directory (2) + OMP: Hint: Please check NLSPATH environment variable. (3) + OMP: Info #3: Default messages will be used. (4) + + where + + (1) is a message of warning severity, + (2) is a system error caused the previous warning, + (3) is a hint for the user how to fix the problem, + (4) is a message of informational severity. + + Usage in complex cases (message is accompanied with hints and system errors): + + int error = errno; // We need save errno immediately, because it may be changed. + __kmp_msg( + kmp_ms_warning, // Severity + KMP_MSG( CantOpenMessageCatalog, name ), // Primary message + KMP_ERR( error ), // System error + KMP_HNT( CheckNLSPATH ), // Hint + __kmp_msg_null // Variadic argument list finisher + ); + + Usage in simple cases (just a message, no system errors or hints): + + KMP_INFORM( WillUseDefaultMessages ); + KMP_WARNING( CantOpenMessageCatalog, name ); + KMP_FATAL( StackOverlap ); + KMP_SYSFAIL( "pthread_create", status ); + KMP_CHECK_SYSFAIL( "pthread_create", status ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + + ------------------------------------------------------------------------------------------------ +*/ + +enum kmp_msg_type { + kmp_mt_dummy = 0, // Special type for internal purposes. + kmp_mt_mesg = 4, // Primary OpenMP message, could be information, warning, or fatal. + kmp_mt_hint = 5, // Hint to the user. + kmp_mt_syserr = -1 // System error message. +}; // enum kmp_msg_type +typedef enum kmp_msg_type kmp_msg_type_t; + +struct kmp_msg { + kmp_msg_type_t type; + int num; + char const * str; + int len; +}; // struct kmp_message +typedef struct kmp_msg kmp_msg_t; + +// Two special messages. +extern kmp_msg_t __kmp_msg_empty; // Can be used in place where message is required syntactically. +extern kmp_msg_t __kmp_msg_null; // Denotes the end of variadic list of arguments. + +// Helper functions. Creates messages either from message catalog or from system. Note: these +// functions allocate memory. You should pass created messages to __kmp_msg() function, it will +// print messages and destroy them. +kmp_msg_t __kmp_msg_format( kmp_i18n_id_t id, ... ); +kmp_msg_t __kmp_msg_error_code( int code ); +kmp_msg_t __kmp_msg_error_mesg( char const * mesg ); + +// Helper macros to make calls shorter. +#define KMP_MSG( ... ) __kmp_msg_format( kmp_i18n_msg_ ## __VA_ARGS__ ) +#define KMP_HNT( ... ) __kmp_msg_format( kmp_i18n_hnt_ ## __VA_ARGS__ ) +#define KMP_SYSERRCODE( code ) __kmp_msg_error_code( code ) +#define KMP_SYSERRMESG( mesg ) __kmp_msg_error_mesg( mesg ) +#define KMP_ERR KMP_SYSERRCODE + +// Message severity. +enum kmp_msg_severity { + kmp_ms_inform, // Just information for the user. + kmp_ms_warning, // Non-fatal error, execution continues. + kmp_ms_fatal // Fatal error, program aborts. +}; // enum kmp_msg_severity +typedef enum kmp_msg_severity kmp_msg_severity_t; + +// Primary function for printing messages for the user. The first message is mandatory. Any number +// of system errors and hints may be specified. Argument list must be finished with __kmp_msg_null. +void __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... ); + +// Helper macros to make calls shorter in simple cases. +#define KMP_INFORM( ... ) __kmp_msg( kmp_ms_inform, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) +#define KMP_WARNING( ... ) __kmp_msg( kmp_ms_warning, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) +#define KMP_FATAL( ... ) __kmp_msg( kmp_ms_fatal, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) +#define KMP_SYSFAIL( func, error ) \ + __kmp_msg( \ + kmp_ms_fatal, \ + KMP_MSG( FunctionError, func ), \ + KMP_SYSERRCODE( error ), \ + __kmp_msg_null \ + ) + +// Check error, if not zero, generate fatal error message. +#define KMP_CHECK_SYSFAIL( func, error ) \ + { \ + if ( error ) { \ + KMP_SYSFAIL( func, error ); \ + }; \ + } + +// Check status, if not zero, generate fatal error message using errno. +#define KMP_CHECK_SYSFAIL_ERRNO( func, status ) \ + { \ + if ( status != 0 ) { \ + int error = errno; \ + KMP_SYSFAIL( func, error ); \ + }; \ + } + +#ifdef KMP_DEBUG + void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer ); +#endif // KMP_DEBUG + +#ifdef __cplusplus + }; // extern "C" +#endif // __cplusplus + +#endif // KMP_I18N_H + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc b/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc index f975b83794c..0a1aa74adfb 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc +++ b/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc @@ -1,414 +1,414 @@ -// Do not edit this file! // -// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // - -static char const * -__kmp_i18n_default_meta[] = - { - NULL, - "English", - "USA", - "1033", - "2", - "20140827", - NULL - }; - -static char const * -__kmp_i18n_default_strings[] = - { - NULL, - "Error", - "(unknown file)", - "not a number", - "bad unit", - "illegal characters", - "value too large", - "value too small", - "value is not a multiple of 4k", - "Unknown processor topology", - "Cannot open /proc/cpuinfo", - "/proc/cpuinfo", - "cpuinfo file invalid (No processor records)", - "cpuinfo file invalid (Too many processor records)", - "Cannot rewind cpuinfo file", - "cpuinfo file invalid (long line)", - "cpuinfo file contains too many entries", - "cpuinfo file missing processor field", - "cpuinfo file missing physical id field", - "cpuinfo file invalid (missing val)", - "cpuinfo file invalid (duplicate field)", - "Physical node/pkg/core/thread ids not unique", - "APIC not present", - "Invalid cpuid info", - "APIC ids not unique", - "Inconsistent cpuid info", - "Out of heap memory", - "Memory allocation failed", - "core", - "thread", - "package", - "node", - "", - "decoding legacy APIC ids", - "parsing /proc/cpuinfo", - "value is not defined", - "Effective settings:", - "User settings:", - "warning: pointers or size don't make sense", - "CPU", - "TPU", - "TPUs per package", - "HT enabled", - "HT disabled", - "decoding x2APIC ids", - "cpuid leaf 11 not supported", - "cpuid leaf 4 not supported", - "thread ids not unique", - "using pthread info", - "legacy APIC ids not unique", - "x2APIC ids not unique", - "OPENMP DISPLAY ENVIRONMENT BEGIN", - "OPENMP DISPLAY ENVIRONMENT END", - "[device]", - "[host]", - NULL - }; - -static char const * -__kmp_i18n_default_formats[] = - { - NULL, - "OMP: Info #%1$d: %2$s\n", - "OMP: Warning #%1$d: %2$s\n", - "OMP: Error #%1$d: %2$s\n", - "OMP: System error #%1$d: %2$s\n", - "OMP: Hint: %2$s\n", - "%1$s pragma (at %2$s:%3$s():%4$s)", - NULL - }; - -static char const * -__kmp_i18n_default_messages[] = - { - NULL, - "Library is \"serial\".", - "Cannot open message catalog \"%1$s\":", - "Default messages will be used.", - "%1$s: Lock is uninitialized", - "%1$s: Lock was initialized as simple, but used as nestable", - "%1$s: Lock was initialized as nestable, but used as simple", - "%1$s: Lock is already owned by requesting thread", - "%1$s: Lock is still owned by a thread", - "%1$s: Attempt to release a lock not owned by any thread", - "%1$s: Attempt to release a lock owned by another thread", - "Stack overflow detected for OpenMP thread #%1$d", - "Stack overlap detected. ", - "Assertion failure at %1$s(%2$d).", - "Unable to register a new user thread.", - "Initializing %1$s, but found %2$s already initialized.", - "Cannot open file \"%1$s\" for reading:", - "Getting environment variable \"%1$s\" failed:", - "Setting environment variable \"%1$s\" failed:", - "Getting environment failed:", - "%1$s=\"%2$s\": Wrong value, boolean expected.", - "No Helper Thread support built in this OMP library.", - "Helper thread failed to soft terminate.", - "Buffer overflow detected.", - "Real-time scheduling policy is not supported.", - "OMP application is running at maximum priority with real-time scheduling policy. ", - "Changing priority of the monitor thread failed:", - "Deadlocks are highly possible due to monitor thread starvation.", - "Unable to set monitor thread stack size to %1$lu bytes:", - "Unable to set OMP thread stack size to %1$lu bytes:", - "Thread attribute initialization failed:", - "Thread attribute destroying failed:", - "OMP thread joinable state setting failed:", - "Monitor thread joinable state setting failed:", - "System unable to allocate necessary resources for OMP thread:", - "System unable to allocate necessary resources for the monitor thread:", - "Unable to terminate OMP thread:", - "Wrong schedule type %1$d, see or file for the list of values supported.", - "Unknown scheduling type \"%1$d\".", - "%1$s value \"%2$s\" is invalid.", - "%1$s value \"%2$s\" is too small.", - "%1$s value \"%2$s\" is too large.", - "%1$s: \"%2$s\" is an invalid value; ignored.", - "%1$s release value \"%2$s\" is invalid.", - "%1$s gather value \"%2$s\" is invalid.", - "%1$s supported only on debug builds; ignored.", - "Syntax error: Usage: %1$s=[ routine= | filename= | range=: | excl_range=: ],...", - "Unbalanced quotes in %1$s.", - "Empty string specified for %1$s; ignored.", - "%1$s value is too long; ignored.", - "%1$s: Invalid clause in \"%2$s\".", - "Empty clause in %1$s.", - "%1$s value \"%2$s\" is invalid chunk size.", - "%1$s value \"%2$s\" is to large chunk size.", - "%1$s value \"%2$s\" is ignored.", - "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY.", - "%1$s must be set prior to first parallel region; ignored.", - "%1$s: parameter has been specified already, ignoring \"%2$s\".", - "%1$s: parameter invalid, ignoring \"%2$s\".", - "%1$s: too many integer parameters specified, ignoring \"%2$s\".", - "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\".", - "%1$s: '%2$s' type does not take any integer parameters, ignoring them.", - "%1$s: proclist not specified with explicit affinity type, using \"none\".", - "%1$s: proclist specified, setting affinity type to \"explicit\".", - "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored.", - "%1$s: syntax error, not using affinity.", - "%1$s: range error (zero stride), not using affinity.", - "%1$s: range error (%2$d > %3$d), not using affinity.", - "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity.", - "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity.", - "%1$s: %2$s is defined. %3$s will be ignored.", - "%1$s: affinity not supported, using \"disabled\".", - "%1$s: affinity only supported for Intel(R) processors.", - "%1$s: getaffinity system call not supported.", - "%1$s: setaffinity system call not supported.", - "%1$s: pthread_aff_set_np call not found.", - "%1$s: pthread_get_num_resources_np call not found.", - "%1$s: the OS kernel does not support affinity.", - "%1$s: pthread_get_num_resources_np returned %2$d.", - "%1$s: cannot determine proper affinity mask size.", - "%1$s=\"%2$s\": %3$s.", - "%1$s: extra trailing characters ignored: \"%2$s\".", - "%1$s: unknown method \"%2$s\".", - "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday.", - "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday.", - "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday.", - "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday.", - "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\").", - "Unknown scheduling type detected.", - "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling.", - "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed.", - "ittnotify: Loading \"%1$s\" library failed.", - "ittnotify: All itt notifications disabled.", - "ittnotify: Object state itt notifications disabled.", - "ittnotify: Mark itt notifications disabled.", - "ittnotify: Unloading \"%1$s\" library failed.", - "Cannot form a team with %1$d threads, using %2$d instead.", - "Requested number of active parallel levels \"%1$d\" is negative; ignored.", - "Requested number of active parallel levels \"%1$d\" exceeds supported limit; the following limit value will be used: \"%1$d\".", - "kmp_set_library must only be called from the top level serial thread; ignored.", - "Fatal system error detected.", - "Out of heap memory.", - "Clearing __KMP_REGISTERED_LIB env var failed.", - "Registering library with env var failed.", - "%1$s value \"%2$d\" will be used.", - "%1$s value \"%2$u\" will be used.", - "%1$s value \"%2$s\" will be used.", - "%1$s value \"%2$s\" will be used.", - "%1$s maximum value \"%2$d\" will be used.", - "%1$s minimum value \"%2$d\" will be used.", - "Memory allocation failed.", - "File name too long.", - "Lock table overflow.", - "Too many threads to use threadprivate directive.", - "%1$s: invalid mask.", - "Wrong definition.", - "Windows* OS: TLS Set Value failed.", - "Windows* OS: TLS out of indexes.", - "PDONE directive must be nested within a DO directive.", - "Cannot get number of available CPUs.", - "Assumed number of CPUs is 2.", - "Error initializing affinity - not using affinity.", - "Threads may migrate across all available OS procs (granularity setting too coarse).", - "Ignoring invalid OS proc ID %1$d.", - "No valid OS proc IDs specified - not using affinity.", - "%1$s - using \"flat\" OS <-> physical proc mapping.", - "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping.", - "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping.", - "%1$s: %2$s - exiting.", - "%1$s, line %2$d: %3$s - exiting.", - "Construct identifier invalid.", - "Thread identifier invalid.", - "runtime library not initialized.", - "Inconsistent THREADPRIVATE common block declarations are non-conforming and are unsupported. Either all threadprivate common blocks must be declared identically, or the largest instance of each threadprivate common block must be referenced first during the run.", - "Cannot set thread affinity mask.", - "Cannot set thread priority.", - "Cannot create thread.", - "Cannot create event.", - "Cannot set event.", - "Cannot close handle.", - "Unknown library type: %1$d.", - "Monitor did not reap properly.", - "Worker thread failed to join.", - "Cannot change thread affinity mask.", - "%1$s: Threads may migrate across %2$d innermost levels of machine", - "%1$s: decrease to %2$d threads", - "%1$s: increase to %2$d threads", - "%1$s: Internal thread %2$d bound to OS proc set %3$s", - "%1$s: Affinity capable, using cpuinfo file", - "%1$s: Affinity capable, using global cpuid info", - "%1$s: Affinity capable, using default \"flat\" topology", - "%1$s: Affinity not capable, using local cpuid info", - "%1$s: Affinity not capable, using cpuinfo file", - "%1$s: Affinity not capable, assumming \"flat\" topology", - "%1$s: Initial OS proc set respected: %2$s", - "%1$s: Initial OS proc set not respected: %2$s", - "%1$s: %2$d available OS procs", - "%1$s: Uniform topology", - "%1$s: Nonuniform topology", - "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", - "%1$s: OS proc to physical thread map ([] => level not in map):", - "%1$s: OS proc maps to th package core 0", - "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", - "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]", - "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d", - "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]", - "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", - "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d", - "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]", - "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d", - "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d", - "%1$s: OS proc %2$d maps to %3$s", - "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s", - "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d", - "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d", - "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package", - "%1$s: HT disabled; %2$d packages", - "Threads encountered barriers in different order. ", - "Function %1$s failed:", - "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", - "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected.", - "%1$s: ignored because %2$s has been defined", - "%1$s: overrides %3$s specified before", - "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause", - "Detected end of %1$s without first executing a corresponding beginning.", - "Iteration range too large in %1$s.", - "%1$s must not have a loop increment that evaluates to zero.", - "Expected end of %1$s; %2$s, however, has most recently begun execution.", - "%1$s is incorrectly nested within %2$s", - "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s", - "%1$s is incorrectly nested within %2$s of the same name", - "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause", - "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs", - "One thread at %1$s while another thread is at %2$s.", - "Cannot connect to %1$s", - "Cannot connect to %1$s - Using %2$s", - "%1$s does not support %2$s. Continuing without using %2$s.", - "%1$s does not support %2$s for %3$s. Continuing without using %2$s.", - "Static %1$s does not support %2$s. Continuing without using %2$s.", - "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0", - "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\".", - "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu.", - "%1$s: Affinity capable, using global cpuid leaf 11 info", - "%1$s: Affinity not capable, using local cpuid leaf 11 info", - "%1$s: %2$s.", - "%1$s: %2$s - %3$s.", - "%1$s: OS proc to physical thread map:", - "%1$s: using \"flat\" OS <-> physical proc mapping.", - "%1$s: parsing %2$s.", - "%1$s - exiting.", - "Incompatible %1$s library with version %2$s found.", - "ittnotify: Function %1$s failed:", - "ittnofify: Error #%1$d.", - "%1$s must be set prior to first parallel region or certain API calls; ignored.", - "Lock initialized at %1$s(%2$d) was not destroyed", - "Cannot determine machine load balance - Using %1$s", - "%1$s: Affinity not capable, using pthread info", - "%1$s: Affinity capable, using pthread info", - "Loading \"%1$s\" library failed:", - "Lookup of \"%1$s\" function failed:", - "Buffer too small.", - "Error #%1$d.", - "%1$s: Invalid symbols found. Check the value \"%2$s\".", - "%1$s: Spaces between digits are not allowed \"%2$s\".", - "%1$s: %2$s - parsing %3$s.", - "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group.", - "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", - "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", - "%1$s: Mapping Windows* OS processor group proc to OS proc 64*+.", - "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d", - "%1$s: Affinity balanced is not available.", - "%1$s: granularity=core will be used.", - "%1$s must be set prior to first OMP lock call or critical section; ignored.", - "futex system call not supported; %1$s=%2$s ignored.", - "%1$s: granularity=%2$s will be used.", - "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt (nSockets@offset, nCores@offset, nTthreads per core)\".", - "KMP_PLACE_THREADS ignored: unsupported architecture.", - "KMP_PLACE_THREADS ignored: too many cores requested.", - "%1$s: syntax error, using %2$s.", - "%1$s: Adaptive locks are not supported; using queuing.", - "%1$s: Invalid symbols found. Check the value \"%2$s\".", - "%1$s: Spaces between digits are not allowed \"%2$s\".", - "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s", - "%1$s error: parallel loop increment and condition are inconsistent.", - "libgomp cancellation is not currently supported.", - "KMP_PLACE_THREADS ignored: non-uniform topology.", - "KMP_PLACE_THREADS ignored: only three-level topology is supported.", - "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\".", - "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\".", - "KMP_PLACE_THREADS ignored: too many sockets requested.", - "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value.", - "%1$s: Affinity capable, using hwloc.", - "%1$s: Ignoring hwloc mechanism.", - "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms.", - NULL - }; - -static char const * -__kmp_i18n_default_hints[] = - { - NULL, - "Please submit a bug report with this message, compile and run commands used, and machine configuration info including native compiler and operating system versions. Faster response will be obtained by including all program sources. For information on submitting this issue, please see http://www.intel.com/software/products/support/.", - "Check NLSPATH environment variable, its value is \"%1$s\".", - "Please try changing the shell stack limit or adjusting the OMP_STACKSIZE environment variable.", - "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set).", - "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d.", - "This could also be due to a system-related limit on the number of threads.", - "This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to ensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the OpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently produce incorrect results. For more information, please see http://www.intel.com/software/products/support/.", - "This name is specified in environment variable KMP_CPUINFO_FILE.", - "Seems application required too much memory.", - "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, \"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values.", - "Perhaps too many threads.", - "Decrease priority of application. This will allow the monitor thread run at higher priority than other threads.", - "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit.", - "Try changing OMP_STACKSIZE and/or the shell stack limit.", - "Try increasing OMP_STACKSIZE or the shell stack limit.", - "Try decreasing OMP_STACKSIZE.", - "Try decreasing the value of OMP_NUM_THREADS.", - "Try increasing KMP_MONITOR_STACKSIZE.", - "Try decreasing KMP_MONITOR_STACKSIZE.", - "Try decreasing the number of threads in use simultaneously.", - "Will use default schedule type (%1$s).", - "It could be a result of using an older OMP library with a newer compiler or memory corruption. You may check the proper OMP library is linked to the application.", - "Check %1$s environment variable, its value is \"%2$s\".", - "You may want to use an %1$s library that supports %2$s interface with version %3$s.", - "You may want to use an %1$s library with version %2$s.", - "System error #193 is \"Bad format of EXE or DLL file\". Usually it means the file is found, but it is corrupted or a file for another architecture. Check whether \"%1$s\" is a file for %2$s architecture.", - "System-related limit on the number of threads.", - NULL - }; - -struct kmp_i18n_section { - int size; - char const ** str; -}; // struct kmp_i18n_section -typedef struct kmp_i18n_section kmp_i18n_section_t; - -static kmp_i18n_section_t -__kmp_i18n_sections[] = - { - { 0, NULL }, - { 5, __kmp_i18n_default_meta }, - { 54, __kmp_i18n_default_strings }, - { 6, __kmp_i18n_default_formats }, - { 253, __kmp_i18n_default_messages }, - { 27, __kmp_i18n_default_hints }, - { 0, NULL } - }; - -struct kmp_i18n_table { - int size; - kmp_i18n_section_t * sect; -}; // struct kmp_i18n_table -typedef struct kmp_i18n_table kmp_i18n_table_t; - -static kmp_i18n_table_t __kmp_i18n_default_table = - { - 5, - __kmp_i18n_sections - }; - -// end of file // +// Do not edit this file! // +// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // + +static char const * +__kmp_i18n_default_meta[] = + { + NULL, + "English", + "USA", + "1033", + "2", + "20140827", + NULL + }; + +static char const * +__kmp_i18n_default_strings[] = + { + NULL, + "Error", + "(unknown file)", + "not a number", + "bad unit", + "illegal characters", + "value too large", + "value too small", + "value is not a multiple of 4k", + "Unknown processor topology", + "Cannot open /proc/cpuinfo", + "/proc/cpuinfo", + "cpuinfo file invalid (No processor records)", + "cpuinfo file invalid (Too many processor records)", + "Cannot rewind cpuinfo file", + "cpuinfo file invalid (long line)", + "cpuinfo file contains too many entries", + "cpuinfo file missing processor field", + "cpuinfo file missing physical id field", + "cpuinfo file invalid (missing val)", + "cpuinfo file invalid (duplicate field)", + "Physical node/pkg/core/thread ids not unique", + "APIC not present", + "Invalid cpuid info", + "APIC ids not unique", + "Inconsistent cpuid info", + "Out of heap memory", + "Memory allocation failed", + "core", + "thread", + "package", + "node", + "", + "decoding legacy APIC ids", + "parsing /proc/cpuinfo", + "value is not defined", + "Effective settings:", + "User settings:", + "warning: pointers or size don't make sense", + "CPU", + "TPU", + "TPUs per package", + "HT enabled", + "HT disabled", + "decoding x2APIC ids", + "cpuid leaf 11 not supported", + "cpuid leaf 4 not supported", + "thread ids not unique", + "using pthread info", + "legacy APIC ids not unique", + "x2APIC ids not unique", + "OPENMP DISPLAY ENVIRONMENT BEGIN", + "OPENMP DISPLAY ENVIRONMENT END", + "[device]", + "[host]", + NULL + }; + +static char const * +__kmp_i18n_default_formats[] = + { + NULL, + "OMP: Info #%1$d: %2$s\n", + "OMP: Warning #%1$d: %2$s\n", + "OMP: Error #%1$d: %2$s\n", + "OMP: System error #%1$d: %2$s\n", + "OMP: Hint: %2$s\n", + "%1$s pragma (at %2$s:%3$s():%4$s)", + NULL + }; + +static char const * +__kmp_i18n_default_messages[] = + { + NULL, + "Library is \"serial\".", + "Cannot open message catalog \"%1$s\":", + "Default messages will be used.", + "%1$s: Lock is uninitialized", + "%1$s: Lock was initialized as simple, but used as nestable", + "%1$s: Lock was initialized as nestable, but used as simple", + "%1$s: Lock is already owned by requesting thread", + "%1$s: Lock is still owned by a thread", + "%1$s: Attempt to release a lock not owned by any thread", + "%1$s: Attempt to release a lock owned by another thread", + "Stack overflow detected for OpenMP thread #%1$d", + "Stack overlap detected. ", + "Assertion failure at %1$s(%2$d).", + "Unable to register a new user thread.", + "Initializing %1$s, but found %2$s already initialized.", + "Cannot open file \"%1$s\" for reading:", + "Getting environment variable \"%1$s\" failed:", + "Setting environment variable \"%1$s\" failed:", + "Getting environment failed:", + "%1$s=\"%2$s\": Wrong value, boolean expected.", + "No Helper Thread support built in this OMP library.", + "Helper thread failed to soft terminate.", + "Buffer overflow detected.", + "Real-time scheduling policy is not supported.", + "OMP application is running at maximum priority with real-time scheduling policy. ", + "Changing priority of the monitor thread failed:", + "Deadlocks are highly possible due to monitor thread starvation.", + "Unable to set monitor thread stack size to %1$lu bytes:", + "Unable to set OMP thread stack size to %1$lu bytes:", + "Thread attribute initialization failed:", + "Thread attribute destroying failed:", + "OMP thread joinable state setting failed:", + "Monitor thread joinable state setting failed:", + "System unable to allocate necessary resources for OMP thread:", + "System unable to allocate necessary resources for the monitor thread:", + "Unable to terminate OMP thread:", + "Wrong schedule type %1$d, see or file for the list of values supported.", + "Unknown scheduling type \"%1$d\".", + "%1$s value \"%2$s\" is invalid.", + "%1$s value \"%2$s\" is too small.", + "%1$s value \"%2$s\" is too large.", + "%1$s: \"%2$s\" is an invalid value; ignored.", + "%1$s release value \"%2$s\" is invalid.", + "%1$s gather value \"%2$s\" is invalid.", + "%1$s supported only on debug builds; ignored.", + "Syntax error: Usage: %1$s=[ routine= | filename= | range=: | excl_range=: ],...", + "Unbalanced quotes in %1$s.", + "Empty string specified for %1$s; ignored.", + "%1$s value is too long; ignored.", + "%1$s: Invalid clause in \"%2$s\".", + "Empty clause in %1$s.", + "%1$s value \"%2$s\" is invalid chunk size.", + "%1$s value \"%2$s\" is to large chunk size.", + "%1$s value \"%2$s\" is ignored.", + "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY.", + "%1$s must be set prior to first parallel region; ignored.", + "%1$s: parameter has been specified already, ignoring \"%2$s\".", + "%1$s: parameter invalid, ignoring \"%2$s\".", + "%1$s: too many integer parameters specified, ignoring \"%2$s\".", + "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\".", + "%1$s: '%2$s' type does not take any integer parameters, ignoring them.", + "%1$s: proclist not specified with explicit affinity type, using \"none\".", + "%1$s: proclist specified, setting affinity type to \"explicit\".", + "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored.", + "%1$s: syntax error, not using affinity.", + "%1$s: range error (zero stride), not using affinity.", + "%1$s: range error (%2$d > %3$d), not using affinity.", + "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity.", + "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity.", + "%1$s: %2$s is defined. %3$s will be ignored.", + "%1$s: affinity not supported, using \"disabled\".", + "%1$s: affinity only supported for Intel(R) processors.", + "%1$s: getaffinity system call not supported.", + "%1$s: setaffinity system call not supported.", + "%1$s: pthread_aff_set_np call not found.", + "%1$s: pthread_get_num_resources_np call not found.", + "%1$s: the OS kernel does not support affinity.", + "%1$s: pthread_get_num_resources_np returned %2$d.", + "%1$s: cannot determine proper affinity mask size.", + "%1$s=\"%2$s\": %3$s.", + "%1$s: extra trailing characters ignored: \"%2$s\".", + "%1$s: unknown method \"%2$s\".", + "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday.", + "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday.", + "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday.", + "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday.", + "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\").", + "Unknown scheduling type detected.", + "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling.", + "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed.", + "ittnotify: Loading \"%1$s\" library failed.", + "ittnotify: All itt notifications disabled.", + "ittnotify: Object state itt notifications disabled.", + "ittnotify: Mark itt notifications disabled.", + "ittnotify: Unloading \"%1$s\" library failed.", + "Cannot form a team with %1$d threads, using %2$d instead.", + "Requested number of active parallel levels \"%1$d\" is negative; ignored.", + "Requested number of active parallel levels \"%1$d\" exceeds supported limit; the following limit value will be used: \"%1$d\".", + "kmp_set_library must only be called from the top level serial thread; ignored.", + "Fatal system error detected.", + "Out of heap memory.", + "Clearing __KMP_REGISTERED_LIB env var failed.", + "Registering library with env var failed.", + "%1$s value \"%2$d\" will be used.", + "%1$s value \"%2$u\" will be used.", + "%1$s value \"%2$s\" will be used.", + "%1$s value \"%2$s\" will be used.", + "%1$s maximum value \"%2$d\" will be used.", + "%1$s minimum value \"%2$d\" will be used.", + "Memory allocation failed.", + "File name too long.", + "Lock table overflow.", + "Too many threads to use threadprivate directive.", + "%1$s: invalid mask.", + "Wrong definition.", + "Windows* OS: TLS Set Value failed.", + "Windows* OS: TLS out of indexes.", + "PDONE directive must be nested within a DO directive.", + "Cannot get number of available CPUs.", + "Assumed number of CPUs is 2.", + "Error initializing affinity - not using affinity.", + "Threads may migrate across all available OS procs (granularity setting too coarse).", + "Ignoring invalid OS proc ID %1$d.", + "No valid OS proc IDs specified - not using affinity.", + "%1$s - using \"flat\" OS <-> physical proc mapping.", + "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping.", + "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping.", + "%1$s: %2$s - exiting.", + "%1$s, line %2$d: %3$s - exiting.", + "Construct identifier invalid.", + "Thread identifier invalid.", + "runtime library not initialized.", + "Inconsistent THREADPRIVATE common block declarations are non-conforming and are unsupported. Either all threadprivate common blocks must be declared identically, or the largest instance of each threadprivate common block must be referenced first during the run.", + "Cannot set thread affinity mask.", + "Cannot set thread priority.", + "Cannot create thread.", + "Cannot create event.", + "Cannot set event.", + "Cannot close handle.", + "Unknown library type: %1$d.", + "Monitor did not reap properly.", + "Worker thread failed to join.", + "Cannot change thread affinity mask.", + "%1$s: Threads may migrate across %2$d innermost levels of machine", + "%1$s: decrease to %2$d threads", + "%1$s: increase to %2$d threads", + "%1$s: Internal thread %2$d bound to OS proc set %3$s", + "%1$s: Affinity capable, using cpuinfo file", + "%1$s: Affinity capable, using global cpuid info", + "%1$s: Affinity capable, using default \"flat\" topology", + "%1$s: Affinity not capable, using local cpuid info", + "%1$s: Affinity not capable, using cpuinfo file", + "%1$s: Affinity not capable, assumming \"flat\" topology", + "%1$s: Initial OS proc set respected: %2$s", + "%1$s: Initial OS proc set not respected: %2$s", + "%1$s: %2$d available OS procs", + "%1$s: Uniform topology", + "%1$s: Nonuniform topology", + "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", + "%1$s: OS proc to physical thread map ([] => level not in map):", + "%1$s: OS proc maps to th package core 0", + "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", + "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]", + "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d", + "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]", + "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", + "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d", + "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]", + "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d", + "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d", + "%1$s: OS proc %2$d maps to %3$s", + "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s", + "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d", + "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d", + "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package", + "%1$s: HT disabled; %2$d packages", + "Threads encountered barriers in different order. ", + "Function %1$s failed:", + "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", + "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected.", + "%1$s: ignored because %2$s has been defined", + "%1$s: overrides %3$s specified before", + "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause", + "Detected end of %1$s without first executing a corresponding beginning.", + "Iteration range too large in %1$s.", + "%1$s must not have a loop increment that evaluates to zero.", + "Expected end of %1$s; %2$s, however, has most recently begun execution.", + "%1$s is incorrectly nested within %2$s", + "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s", + "%1$s is incorrectly nested within %2$s of the same name", + "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause", + "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs", + "One thread at %1$s while another thread is at %2$s.", + "Cannot connect to %1$s", + "Cannot connect to %1$s - Using %2$s", + "%1$s does not support %2$s. Continuing without using %2$s.", + "%1$s does not support %2$s for %3$s. Continuing without using %2$s.", + "Static %1$s does not support %2$s. Continuing without using %2$s.", + "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0", + "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\".", + "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu.", + "%1$s: Affinity capable, using global cpuid leaf 11 info", + "%1$s: Affinity not capable, using local cpuid leaf 11 info", + "%1$s: %2$s.", + "%1$s: %2$s - %3$s.", + "%1$s: OS proc to physical thread map:", + "%1$s: using \"flat\" OS <-> physical proc mapping.", + "%1$s: parsing %2$s.", + "%1$s - exiting.", + "Incompatible %1$s library with version %2$s found.", + "ittnotify: Function %1$s failed:", + "ittnofify: Error #%1$d.", + "%1$s must be set prior to first parallel region or certain API calls; ignored.", + "Lock initialized at %1$s(%2$d) was not destroyed", + "Cannot determine machine load balance - Using %1$s", + "%1$s: Affinity not capable, using pthread info", + "%1$s: Affinity capable, using pthread info", + "Loading \"%1$s\" library failed:", + "Lookup of \"%1$s\" function failed:", + "Buffer too small.", + "Error #%1$d.", + "%1$s: Invalid symbols found. Check the value \"%2$s\".", + "%1$s: Spaces between digits are not allowed \"%2$s\".", + "%1$s: %2$s - parsing %3$s.", + "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group.", + "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", + "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", + "%1$s: Mapping Windows* OS processor group proc to OS proc 64*+.", + "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d", + "%1$s: Affinity balanced is not available.", + "%1$s: granularity=core will be used.", + "%1$s must be set prior to first OMP lock call or critical section; ignored.", + "futex system call not supported; %1$s=%2$s ignored.", + "%1$s: granularity=%2$s will be used.", + "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt (nSockets@offset, nCores@offset, nTthreads per core)\".", + "KMP_PLACE_THREADS ignored: unsupported architecture.", + "KMP_PLACE_THREADS ignored: too many cores requested.", + "%1$s: syntax error, using %2$s.", + "%1$s: Adaptive locks are not supported; using queuing.", + "%1$s: Invalid symbols found. Check the value \"%2$s\".", + "%1$s: Spaces between digits are not allowed \"%2$s\".", + "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s", + "%1$s error: parallel loop increment and condition are inconsistent.", + "libgomp cancellation is not currently supported.", + "KMP_PLACE_THREADS ignored: non-uniform topology.", + "KMP_PLACE_THREADS ignored: only three-level topology is supported.", + "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\".", + "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\".", + "KMP_PLACE_THREADS ignored: too many sockets requested.", + "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value.", + "%1$s: Affinity capable, using hwloc.", + "%1$s: Ignoring hwloc mechanism.", + "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms.", + NULL + }; + +static char const * +__kmp_i18n_default_hints[] = + { + NULL, + "Please submit a bug report with this message, compile and run commands used, and machine configuration info including native compiler and operating system versions. Faster response will be obtained by including all program sources. For information on submitting this issue, please see http://www.intel.com/software/products/support/.", + "Check NLSPATH environment variable, its value is \"%1$s\".", + "Please try changing the shell stack limit or adjusting the OMP_STACKSIZE environment variable.", + "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set).", + "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d.", + "This could also be due to a system-related limit on the number of threads.", + "This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to ensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the OpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently produce incorrect results. For more information, please see http://www.intel.com/software/products/support/.", + "This name is specified in environment variable KMP_CPUINFO_FILE.", + "Seems application required too much memory.", + "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, \"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values.", + "Perhaps too many threads.", + "Decrease priority of application. This will allow the monitor thread run at higher priority than other threads.", + "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit.", + "Try changing OMP_STACKSIZE and/or the shell stack limit.", + "Try increasing OMP_STACKSIZE or the shell stack limit.", + "Try decreasing OMP_STACKSIZE.", + "Try decreasing the value of OMP_NUM_THREADS.", + "Try increasing KMP_MONITOR_STACKSIZE.", + "Try decreasing KMP_MONITOR_STACKSIZE.", + "Try decreasing the number of threads in use simultaneously.", + "Will use default schedule type (%1$s).", + "It could be a result of using an older OMP library with a newer compiler or memory corruption. You may check the proper OMP library is linked to the application.", + "Check %1$s environment variable, its value is \"%2$s\".", + "You may want to use an %1$s library that supports %2$s interface with version %3$s.", + "You may want to use an %1$s library with version %2$s.", + "System error #193 is \"Bad format of EXE or DLL file\". Usually it means the file is found, but it is corrupted or a file for another architecture. Check whether \"%1$s\" is a file for %2$s architecture.", + "System-related limit on the number of threads.", + NULL + }; + +struct kmp_i18n_section { + int size; + char const ** str; +}; // struct kmp_i18n_section +typedef struct kmp_i18n_section kmp_i18n_section_t; + +static kmp_i18n_section_t +__kmp_i18n_sections[] = + { + { 0, NULL }, + { 5, __kmp_i18n_default_meta }, + { 54, __kmp_i18n_default_strings }, + { 6, __kmp_i18n_default_formats }, + { 253, __kmp_i18n_default_messages }, + { 27, __kmp_i18n_default_hints }, + { 0, NULL } + }; + +struct kmp_i18n_table { + int size; + kmp_i18n_section_t * sect; +}; // struct kmp_i18n_table +typedef struct kmp_i18n_table kmp_i18n_table_t; + +static kmp_i18n_table_t __kmp_i18n_default_table = + { + 5, + __kmp_i18n_sections + }; + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc b/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc index c57f3efe6c9..5de276377c7 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc +++ b/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc @@ -1,381 +1,381 @@ -// Do not edit this file! // -// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // - -enum kmp_i18n_id { - - // A special id for absence of message. - kmp_i18n_null = 0, - - // Set #1, meta. - kmp_i18n_prp_first = 65536, - kmp_i18n_prp_Language, - kmp_i18n_prp_Country, - kmp_i18n_prp_LangId, - kmp_i18n_prp_Version, - kmp_i18n_prp_Revision, - kmp_i18n_prp_last, - - // Set #2, strings. - kmp_i18n_str_first = 131072, - kmp_i18n_str_Error, - kmp_i18n_str_UnknownFile, - kmp_i18n_str_NotANumber, - kmp_i18n_str_BadUnit, - kmp_i18n_str_IllegalCharacters, - kmp_i18n_str_ValueTooLarge, - kmp_i18n_str_ValueTooSmall, - kmp_i18n_str_NotMultiple4K, - kmp_i18n_str_UnknownTopology, - kmp_i18n_str_CantOpenCpuinfo, - kmp_i18n_str_ProcCpuinfo, - kmp_i18n_str_NoProcRecords, - kmp_i18n_str_TooManyProcRecords, - kmp_i18n_str_CantRewindCpuinfo, - kmp_i18n_str_LongLineCpuinfo, - kmp_i18n_str_TooManyEntries, - kmp_i18n_str_MissingProcField, - kmp_i18n_str_MissingPhysicalIDField, - kmp_i18n_str_MissingValCpuinfo, - kmp_i18n_str_DuplicateFieldCpuinfo, - kmp_i18n_str_PhysicalIDsNotUnique, - kmp_i18n_str_ApicNotPresent, - kmp_i18n_str_InvalidCpuidInfo, - kmp_i18n_str_OBSOLETE1, - kmp_i18n_str_InconsistentCpuidInfo, - kmp_i18n_str_OutOfHeapMemory, - kmp_i18n_str_MemoryAllocFailed, - kmp_i18n_str_Core, - kmp_i18n_str_Thread, - kmp_i18n_str_Package, - kmp_i18n_str_Node, - kmp_i18n_str_OBSOLETE2, - kmp_i18n_str_DecodingLegacyAPIC, - kmp_i18n_str_OBSOLETE3, - kmp_i18n_str_NotDefined, - kmp_i18n_str_EffectiveSettings, - kmp_i18n_str_UserSettings, - kmp_i18n_str_StorageMapWarning, - kmp_i18n_str_OBSOLETE4, - kmp_i18n_str_OBSOLETE5, - kmp_i18n_str_OBSOLETE6, - kmp_i18n_str_OBSOLETE7, - kmp_i18n_str_OBSOLETE8, - kmp_i18n_str_Decodingx2APIC, - kmp_i18n_str_NoLeaf11Support, - kmp_i18n_str_NoLeaf4Support, - kmp_i18n_str_ThreadIDsNotUnique, - kmp_i18n_str_UsingPthread, - kmp_i18n_str_LegacyApicIDsNotUnique, - kmp_i18n_str_x2ApicIDsNotUnique, - kmp_i18n_str_DisplayEnvBegin, - kmp_i18n_str_DisplayEnvEnd, - kmp_i18n_str_Device, - kmp_i18n_str_Host, - kmp_i18n_str_last, - - // Set #3, formats. - kmp_i18n_fmt_first = 196608, - kmp_i18n_fmt_Info, - kmp_i18n_fmt_Warning, - kmp_i18n_fmt_Fatal, - kmp_i18n_fmt_SysErr, - kmp_i18n_fmt_Hint, - kmp_i18n_fmt_Pragma, - kmp_i18n_fmt_last, - - // Set #4, messages. - kmp_i18n_msg_first = 262144, - kmp_i18n_msg_LibraryIsSerial, - kmp_i18n_msg_CantOpenMessageCatalog, - kmp_i18n_msg_WillUseDefaultMessages, - kmp_i18n_msg_LockIsUninitialized, - kmp_i18n_msg_LockSimpleUsedAsNestable, - kmp_i18n_msg_LockNestableUsedAsSimple, - kmp_i18n_msg_LockIsAlreadyOwned, - kmp_i18n_msg_LockStillOwned, - kmp_i18n_msg_LockUnsettingFree, - kmp_i18n_msg_LockUnsettingSetByAnother, - kmp_i18n_msg_StackOverflow, - kmp_i18n_msg_StackOverlap, - kmp_i18n_msg_AssertionFailure, - kmp_i18n_msg_CantRegisterNewThread, - kmp_i18n_msg_DuplicateLibrary, - kmp_i18n_msg_CantOpenFileForReading, - kmp_i18n_msg_CantGetEnvVar, - kmp_i18n_msg_CantSetEnvVar, - kmp_i18n_msg_CantGetEnvironment, - kmp_i18n_msg_BadBoolValue, - kmp_i18n_msg_SSPNotBuiltIn, - kmp_i18n_msg_SPPSotfTerminateFailed, - kmp_i18n_msg_BufferOverflow, - kmp_i18n_msg_RealTimeSchedNotSupported, - kmp_i18n_msg_RunningAtMaxPriority, - kmp_i18n_msg_CantChangeMonitorPriority, - kmp_i18n_msg_MonitorWillStarve, - kmp_i18n_msg_CantSetMonitorStackSize, - kmp_i18n_msg_CantSetWorkerStackSize, - kmp_i18n_msg_CantInitThreadAttrs, - kmp_i18n_msg_CantDestroyThreadAttrs, - kmp_i18n_msg_CantSetWorkerState, - kmp_i18n_msg_CantSetMonitorState, - kmp_i18n_msg_NoResourcesForWorkerThread, - kmp_i18n_msg_NoResourcesForMonitorThread, - kmp_i18n_msg_CantTerminateWorkerThread, - kmp_i18n_msg_ScheduleKindOutOfRange, - kmp_i18n_msg_UnknownSchedulingType, - kmp_i18n_msg_InvalidValue, - kmp_i18n_msg_SmallValue, - kmp_i18n_msg_LargeValue, - kmp_i18n_msg_StgInvalidValue, - kmp_i18n_msg_BarrReleaseValueInvalid, - kmp_i18n_msg_BarrGatherValueInvalid, - kmp_i18n_msg_OBSOLETE9, - kmp_i18n_msg_ParRangeSyntax, - kmp_i18n_msg_UnbalancedQuotes, - kmp_i18n_msg_EmptyString, - kmp_i18n_msg_LongValue, - kmp_i18n_msg_InvalidClause, - kmp_i18n_msg_EmptyClause, - kmp_i18n_msg_InvalidChunk, - kmp_i18n_msg_LargeChunk, - kmp_i18n_msg_IgnoreChunk, - kmp_i18n_msg_CantGetProcFreq, - kmp_i18n_msg_EnvParallelWarn, - kmp_i18n_msg_AffParamDefined, - kmp_i18n_msg_AffInvalidParam, - kmp_i18n_msg_AffManyParams, - kmp_i18n_msg_AffManyParamsForLogic, - kmp_i18n_msg_AffNoParam, - kmp_i18n_msg_AffNoProcList, - kmp_i18n_msg_AffProcListNoType, - kmp_i18n_msg_AffProcListNotExplicit, - kmp_i18n_msg_AffSyntaxError, - kmp_i18n_msg_AffZeroStride, - kmp_i18n_msg_AffStartGreaterEnd, - kmp_i18n_msg_AffStrideLessZero, - kmp_i18n_msg_AffRangeTooBig, - kmp_i18n_msg_OBSOLETE10, - kmp_i18n_msg_AffNotSupported, - kmp_i18n_msg_OBSOLETE11, - kmp_i18n_msg_GetAffSysCallNotSupported, - kmp_i18n_msg_SetAffSysCallNotSupported, - kmp_i18n_msg_OBSOLETE12, - kmp_i18n_msg_OBSOLETE13, - kmp_i18n_msg_OBSOLETE14, - kmp_i18n_msg_OBSOLETE15, - kmp_i18n_msg_AffCantGetMaskSize, - kmp_i18n_msg_ParseSizeIntWarn, - kmp_i18n_msg_ParseExtraCharsWarn, - kmp_i18n_msg_UnknownForceReduction, - kmp_i18n_msg_TimerUseGettimeofday, - kmp_i18n_msg_TimerNeedMoreParam, - kmp_i18n_msg_TimerInvalidParam, - kmp_i18n_msg_TimerGettimeFailed, - kmp_i18n_msg_TimerUnknownFunction, - kmp_i18n_msg_UnknownSchedTypeDetected, - kmp_i18n_msg_DispatchManyThreads, - kmp_i18n_msg_IttLookupFailed, - kmp_i18n_msg_IttLoadLibFailed, - kmp_i18n_msg_IttAllNotifDisabled, - kmp_i18n_msg_IttObjNotifDisabled, - kmp_i18n_msg_IttMarkNotifDisabled, - kmp_i18n_msg_IttUnloadLibFailed, - kmp_i18n_msg_CantFormThrTeam, - kmp_i18n_msg_ActiveLevelsNegative, - kmp_i18n_msg_ActiveLevelsExceedLimit, - kmp_i18n_msg_SetLibraryIncorrectCall, - kmp_i18n_msg_FatalSysError, - kmp_i18n_msg_OutOfHeapMemory, - kmp_i18n_msg_OBSOLETE16, - kmp_i18n_msg_OBSOLETE17, - kmp_i18n_msg_Using_int_Value, - kmp_i18n_msg_Using_uint_Value, - kmp_i18n_msg_Using_uint64_Value, - kmp_i18n_msg_Using_str_Value, - kmp_i18n_msg_MaxValueUsing, - kmp_i18n_msg_MinValueUsing, - kmp_i18n_msg_MemoryAllocFailed, - kmp_i18n_msg_FileNameTooLong, - kmp_i18n_msg_OBSOLETE18, - kmp_i18n_msg_ManyThreadsForTPDirective, - kmp_i18n_msg_AffinityInvalidMask, - kmp_i18n_msg_WrongDefinition, - kmp_i18n_msg_TLSSetValueFailed, - kmp_i18n_msg_TLSOutOfIndexes, - kmp_i18n_msg_OBSOLETE19, - kmp_i18n_msg_CantGetNumAvailCPU, - kmp_i18n_msg_AssumedNumCPU, - kmp_i18n_msg_ErrorInitializeAffinity, - kmp_i18n_msg_AffThreadsMayMigrate, - kmp_i18n_msg_AffIgnoreInvalidProcID, - kmp_i18n_msg_AffNoValidProcID, - kmp_i18n_msg_UsingFlatOS, - kmp_i18n_msg_UsingFlatOSFile, - kmp_i18n_msg_UsingFlatOSFileLine, - kmp_i18n_msg_FileMsgExiting, - kmp_i18n_msg_FileLineMsgExiting, - kmp_i18n_msg_ConstructIdentInvalid, - kmp_i18n_msg_ThreadIdentInvalid, - kmp_i18n_msg_RTLNotInitialized, - kmp_i18n_msg_TPCommonBlocksInconsist, - kmp_i18n_msg_CantSetThreadAffMask, - kmp_i18n_msg_CantSetThreadPriority, - kmp_i18n_msg_CantCreateThread, - kmp_i18n_msg_CantCreateEvent, - kmp_i18n_msg_CantSetEvent, - kmp_i18n_msg_CantCloseHandle, - kmp_i18n_msg_UnknownLibraryType, - kmp_i18n_msg_ReapMonitorError, - kmp_i18n_msg_ReapWorkerError, - kmp_i18n_msg_ChangeThreadAffMaskError, - kmp_i18n_msg_ThreadsMigrate, - kmp_i18n_msg_DecreaseToThreads, - kmp_i18n_msg_IncreaseToThreads, - kmp_i18n_msg_OBSOLETE20, - kmp_i18n_msg_AffCapableUseCpuinfo, - kmp_i18n_msg_AffUseGlobCpuid, - kmp_i18n_msg_AffCapableUseFlat, - kmp_i18n_msg_AffNotCapableUseLocCpuid, - kmp_i18n_msg_AffNotCapableUseCpuinfo, - kmp_i18n_msg_AffFlatTopology, - kmp_i18n_msg_InitOSProcSetRespect, - kmp_i18n_msg_InitOSProcSetNotRespect, - kmp_i18n_msg_AvailableOSProc, - kmp_i18n_msg_Uniform, - kmp_i18n_msg_NonUniform, - kmp_i18n_msg_Topology, - kmp_i18n_msg_OBSOLETE21, - kmp_i18n_msg_OSProcToPackage, - kmp_i18n_msg_OBSOLETE22, - kmp_i18n_msg_OBSOLETE23, - kmp_i18n_msg_OBSOLETE24, - kmp_i18n_msg_OBSOLETE25, - kmp_i18n_msg_OBSOLETE26, - kmp_i18n_msg_OBSOLETE27, - kmp_i18n_msg_OBSOLETE28, - kmp_i18n_msg_OBSOLETE29, - kmp_i18n_msg_OBSOLETE30, - kmp_i18n_msg_OSProcMapToPack, - kmp_i18n_msg_OBSOLETE31, - kmp_i18n_msg_OBSOLETE32, - kmp_i18n_msg_OBSOLETE33, - kmp_i18n_msg_OBSOLETE34, - kmp_i18n_msg_OBSOLETE35, - kmp_i18n_msg_BarriersInDifferentOrder, - kmp_i18n_msg_FunctionError, - kmp_i18n_msg_TopologyExtra, - kmp_i18n_msg_WrongMessageCatalog, - kmp_i18n_msg_StgIgnored, - kmp_i18n_msg_OBSOLETE36, - kmp_i18n_msg_CnsBoundToWorksharing, - kmp_i18n_msg_CnsDetectedEnd, - kmp_i18n_msg_CnsIterationRangeTooLarge, - kmp_i18n_msg_CnsLoopIncrZeroProhibited, - kmp_i18n_msg_CnsExpectedEnd, - kmp_i18n_msg_CnsInvalidNesting, - kmp_i18n_msg_CnsMultipleNesting, - kmp_i18n_msg_CnsNestingSameName, - kmp_i18n_msg_CnsNoOrderedClause, - kmp_i18n_msg_CnsNotInTaskConstruct, - kmp_i18n_msg_CnsThreadsAtBarrier, - kmp_i18n_msg_CantConnect, - kmp_i18n_msg_CantConnectUsing, - kmp_i18n_msg_LibNotSupport, - kmp_i18n_msg_LibNotSupportFor, - kmp_i18n_msg_StaticLibNotSupport, - kmp_i18n_msg_OBSOLETE37, - kmp_i18n_msg_IttUnknownGroup, - kmp_i18n_msg_IttEnvVarTooLong, - kmp_i18n_msg_AffUseGlobCpuidL11, - kmp_i18n_msg_AffNotCapableUseLocCpuidL11, - kmp_i18n_msg_AffInfoStr, - kmp_i18n_msg_AffInfoStrStr, - kmp_i18n_msg_OSProcToPhysicalThreadMap, - kmp_i18n_msg_AffUsingFlatOS, - kmp_i18n_msg_AffParseFilename, - kmp_i18n_msg_MsgExiting, - kmp_i18n_msg_IncompatibleLibrary, - kmp_i18n_msg_IttFunctionError, - kmp_i18n_msg_IttUnknownError, - kmp_i18n_msg_EnvMiddleWarn, - kmp_i18n_msg_CnsLockNotDestroyed, - kmp_i18n_msg_CantLoadBalUsing, - kmp_i18n_msg_AffNotCapableUsePthread, - kmp_i18n_msg_AffUsePthread, - kmp_i18n_msg_OBSOLETE38, - kmp_i18n_msg_OBSOLETE39, - kmp_i18n_msg_OBSOLETE40, - kmp_i18n_msg_OBSOLETE41, - kmp_i18n_msg_NthSyntaxError, - kmp_i18n_msg_NthSpacesNotAllowed, - kmp_i18n_msg_AffStrParseFilename, - kmp_i18n_msg_OBSOLETE42, - kmp_i18n_msg_AffTypeCantUseMultGroups, - kmp_i18n_msg_AffGranCantUseMultGroups, - kmp_i18n_msg_AffWindowsProcGroupMap, - kmp_i18n_msg_AffOSProcToGroup, - kmp_i18n_msg_AffBalancedNotAvail, - kmp_i18n_msg_OBSOLETE43, - kmp_i18n_msg_EnvLockWarn, - kmp_i18n_msg_FutexNotSupported, - kmp_i18n_msg_AffGranUsing, - kmp_i18n_msg_AffThrPlaceInvalid, - kmp_i18n_msg_AffThrPlaceUnsupported, - kmp_i18n_msg_AffThrPlaceManyCores, - kmp_i18n_msg_SyntaxErrorUsing, - kmp_i18n_msg_AdaptiveNotSupported, - kmp_i18n_msg_EnvSyntaxError, - kmp_i18n_msg_EnvSpacesNotAllowed, - kmp_i18n_msg_BoundToOSProcSet, - kmp_i18n_msg_CnsLoopIncrIllegal, - kmp_i18n_msg_NoGompCancellation, - kmp_i18n_msg_AffThrPlaceNonUniform, - kmp_i18n_msg_AffThrPlaceNonThreeLevel, - kmp_i18n_msg_AffGranTopGroup, - kmp_i18n_msg_AffGranGroupType, - kmp_i18n_msg_AffThrPlaceManySockets, - kmp_i18n_msg_AffThrPlaceDeprecated, - kmp_i18n_msg_AffUsingHwloc, - kmp_i18n_msg_AffIgnoringHwloc, - kmp_i18n_msg_AffHwlocErrorOccurred, - kmp_i18n_msg_last, - - // Set #5, hints. - kmp_i18n_hnt_first = 327680, - kmp_i18n_hnt_SubmitBugReport, - kmp_i18n_hnt_OBSOLETE44, - kmp_i18n_hnt_ChangeStackLimit, - kmp_i18n_hnt_Unset_ALL_THREADS, - kmp_i18n_hnt_Set_ALL_THREADPRIVATE, - kmp_i18n_hnt_PossibleSystemLimitOnThreads, - kmp_i18n_hnt_DuplicateLibrary, - kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE, - kmp_i18n_hnt_NotEnoughMemory, - kmp_i18n_hnt_ValidBoolValues, - kmp_i18n_hnt_BufferOverflow, - kmp_i18n_hnt_RunningAtMaxPriority, - kmp_i18n_hnt_ChangeMonitorStackSize, - kmp_i18n_hnt_ChangeWorkerStackSize, - kmp_i18n_hnt_IncreaseWorkerStackSize, - kmp_i18n_hnt_DecreaseWorkerStackSize, - kmp_i18n_hnt_Decrease_NUM_THREADS, - kmp_i18n_hnt_IncreaseMonitorStackSize, - kmp_i18n_hnt_DecreaseMonitorStackSize, - kmp_i18n_hnt_DecreaseNumberOfThreadsInUse, - kmp_i18n_hnt_DefaultScheduleKindUsed, - kmp_i18n_hnt_GetNewerLibrary, - kmp_i18n_hnt_CheckEnvVar, - kmp_i18n_hnt_OBSOLETE45, - kmp_i18n_hnt_OBSOLETE46, - kmp_i18n_hnt_BadExeFormat, - kmp_i18n_hnt_SystemLimitOnThreads, - kmp_i18n_hnt_last, - - kmp_i18n_xxx_lastest - -}; // enum kmp_i18n_id - -typedef enum kmp_i18n_id kmp_i18n_id_t; - - -// end of file // +// Do not edit this file! // +// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // + +enum kmp_i18n_id { + + // A special id for absence of message. + kmp_i18n_null = 0, + + // Set #1, meta. + kmp_i18n_prp_first = 65536, + kmp_i18n_prp_Language, + kmp_i18n_prp_Country, + kmp_i18n_prp_LangId, + kmp_i18n_prp_Version, + kmp_i18n_prp_Revision, + kmp_i18n_prp_last, + + // Set #2, strings. + kmp_i18n_str_first = 131072, + kmp_i18n_str_Error, + kmp_i18n_str_UnknownFile, + kmp_i18n_str_NotANumber, + kmp_i18n_str_BadUnit, + kmp_i18n_str_IllegalCharacters, + kmp_i18n_str_ValueTooLarge, + kmp_i18n_str_ValueTooSmall, + kmp_i18n_str_NotMultiple4K, + kmp_i18n_str_UnknownTopology, + kmp_i18n_str_CantOpenCpuinfo, + kmp_i18n_str_ProcCpuinfo, + kmp_i18n_str_NoProcRecords, + kmp_i18n_str_TooManyProcRecords, + kmp_i18n_str_CantRewindCpuinfo, + kmp_i18n_str_LongLineCpuinfo, + kmp_i18n_str_TooManyEntries, + kmp_i18n_str_MissingProcField, + kmp_i18n_str_MissingPhysicalIDField, + kmp_i18n_str_MissingValCpuinfo, + kmp_i18n_str_DuplicateFieldCpuinfo, + kmp_i18n_str_PhysicalIDsNotUnique, + kmp_i18n_str_ApicNotPresent, + kmp_i18n_str_InvalidCpuidInfo, + kmp_i18n_str_OBSOLETE1, + kmp_i18n_str_InconsistentCpuidInfo, + kmp_i18n_str_OutOfHeapMemory, + kmp_i18n_str_MemoryAllocFailed, + kmp_i18n_str_Core, + kmp_i18n_str_Thread, + kmp_i18n_str_Package, + kmp_i18n_str_Node, + kmp_i18n_str_OBSOLETE2, + kmp_i18n_str_DecodingLegacyAPIC, + kmp_i18n_str_OBSOLETE3, + kmp_i18n_str_NotDefined, + kmp_i18n_str_EffectiveSettings, + kmp_i18n_str_UserSettings, + kmp_i18n_str_StorageMapWarning, + kmp_i18n_str_OBSOLETE4, + kmp_i18n_str_OBSOLETE5, + kmp_i18n_str_OBSOLETE6, + kmp_i18n_str_OBSOLETE7, + kmp_i18n_str_OBSOLETE8, + kmp_i18n_str_Decodingx2APIC, + kmp_i18n_str_NoLeaf11Support, + kmp_i18n_str_NoLeaf4Support, + kmp_i18n_str_ThreadIDsNotUnique, + kmp_i18n_str_UsingPthread, + kmp_i18n_str_LegacyApicIDsNotUnique, + kmp_i18n_str_x2ApicIDsNotUnique, + kmp_i18n_str_DisplayEnvBegin, + kmp_i18n_str_DisplayEnvEnd, + kmp_i18n_str_Device, + kmp_i18n_str_Host, + kmp_i18n_str_last, + + // Set #3, formats. + kmp_i18n_fmt_first = 196608, + kmp_i18n_fmt_Info, + kmp_i18n_fmt_Warning, + kmp_i18n_fmt_Fatal, + kmp_i18n_fmt_SysErr, + kmp_i18n_fmt_Hint, + kmp_i18n_fmt_Pragma, + kmp_i18n_fmt_last, + + // Set #4, messages. + kmp_i18n_msg_first = 262144, + kmp_i18n_msg_LibraryIsSerial, + kmp_i18n_msg_CantOpenMessageCatalog, + kmp_i18n_msg_WillUseDefaultMessages, + kmp_i18n_msg_LockIsUninitialized, + kmp_i18n_msg_LockSimpleUsedAsNestable, + kmp_i18n_msg_LockNestableUsedAsSimple, + kmp_i18n_msg_LockIsAlreadyOwned, + kmp_i18n_msg_LockStillOwned, + kmp_i18n_msg_LockUnsettingFree, + kmp_i18n_msg_LockUnsettingSetByAnother, + kmp_i18n_msg_StackOverflow, + kmp_i18n_msg_StackOverlap, + kmp_i18n_msg_AssertionFailure, + kmp_i18n_msg_CantRegisterNewThread, + kmp_i18n_msg_DuplicateLibrary, + kmp_i18n_msg_CantOpenFileForReading, + kmp_i18n_msg_CantGetEnvVar, + kmp_i18n_msg_CantSetEnvVar, + kmp_i18n_msg_CantGetEnvironment, + kmp_i18n_msg_BadBoolValue, + kmp_i18n_msg_SSPNotBuiltIn, + kmp_i18n_msg_SPPSotfTerminateFailed, + kmp_i18n_msg_BufferOverflow, + kmp_i18n_msg_RealTimeSchedNotSupported, + kmp_i18n_msg_RunningAtMaxPriority, + kmp_i18n_msg_CantChangeMonitorPriority, + kmp_i18n_msg_MonitorWillStarve, + kmp_i18n_msg_CantSetMonitorStackSize, + kmp_i18n_msg_CantSetWorkerStackSize, + kmp_i18n_msg_CantInitThreadAttrs, + kmp_i18n_msg_CantDestroyThreadAttrs, + kmp_i18n_msg_CantSetWorkerState, + kmp_i18n_msg_CantSetMonitorState, + kmp_i18n_msg_NoResourcesForWorkerThread, + kmp_i18n_msg_NoResourcesForMonitorThread, + kmp_i18n_msg_CantTerminateWorkerThread, + kmp_i18n_msg_ScheduleKindOutOfRange, + kmp_i18n_msg_UnknownSchedulingType, + kmp_i18n_msg_InvalidValue, + kmp_i18n_msg_SmallValue, + kmp_i18n_msg_LargeValue, + kmp_i18n_msg_StgInvalidValue, + kmp_i18n_msg_BarrReleaseValueInvalid, + kmp_i18n_msg_BarrGatherValueInvalid, + kmp_i18n_msg_OBSOLETE9, + kmp_i18n_msg_ParRangeSyntax, + kmp_i18n_msg_UnbalancedQuotes, + kmp_i18n_msg_EmptyString, + kmp_i18n_msg_LongValue, + kmp_i18n_msg_InvalidClause, + kmp_i18n_msg_EmptyClause, + kmp_i18n_msg_InvalidChunk, + kmp_i18n_msg_LargeChunk, + kmp_i18n_msg_IgnoreChunk, + kmp_i18n_msg_CantGetProcFreq, + kmp_i18n_msg_EnvParallelWarn, + kmp_i18n_msg_AffParamDefined, + kmp_i18n_msg_AffInvalidParam, + kmp_i18n_msg_AffManyParams, + kmp_i18n_msg_AffManyParamsForLogic, + kmp_i18n_msg_AffNoParam, + kmp_i18n_msg_AffNoProcList, + kmp_i18n_msg_AffProcListNoType, + kmp_i18n_msg_AffProcListNotExplicit, + kmp_i18n_msg_AffSyntaxError, + kmp_i18n_msg_AffZeroStride, + kmp_i18n_msg_AffStartGreaterEnd, + kmp_i18n_msg_AffStrideLessZero, + kmp_i18n_msg_AffRangeTooBig, + kmp_i18n_msg_OBSOLETE10, + kmp_i18n_msg_AffNotSupported, + kmp_i18n_msg_OBSOLETE11, + kmp_i18n_msg_GetAffSysCallNotSupported, + kmp_i18n_msg_SetAffSysCallNotSupported, + kmp_i18n_msg_OBSOLETE12, + kmp_i18n_msg_OBSOLETE13, + kmp_i18n_msg_OBSOLETE14, + kmp_i18n_msg_OBSOLETE15, + kmp_i18n_msg_AffCantGetMaskSize, + kmp_i18n_msg_ParseSizeIntWarn, + kmp_i18n_msg_ParseExtraCharsWarn, + kmp_i18n_msg_UnknownForceReduction, + kmp_i18n_msg_TimerUseGettimeofday, + kmp_i18n_msg_TimerNeedMoreParam, + kmp_i18n_msg_TimerInvalidParam, + kmp_i18n_msg_TimerGettimeFailed, + kmp_i18n_msg_TimerUnknownFunction, + kmp_i18n_msg_UnknownSchedTypeDetected, + kmp_i18n_msg_DispatchManyThreads, + kmp_i18n_msg_IttLookupFailed, + kmp_i18n_msg_IttLoadLibFailed, + kmp_i18n_msg_IttAllNotifDisabled, + kmp_i18n_msg_IttObjNotifDisabled, + kmp_i18n_msg_IttMarkNotifDisabled, + kmp_i18n_msg_IttUnloadLibFailed, + kmp_i18n_msg_CantFormThrTeam, + kmp_i18n_msg_ActiveLevelsNegative, + kmp_i18n_msg_ActiveLevelsExceedLimit, + kmp_i18n_msg_SetLibraryIncorrectCall, + kmp_i18n_msg_FatalSysError, + kmp_i18n_msg_OutOfHeapMemory, + kmp_i18n_msg_OBSOLETE16, + kmp_i18n_msg_OBSOLETE17, + kmp_i18n_msg_Using_int_Value, + kmp_i18n_msg_Using_uint_Value, + kmp_i18n_msg_Using_uint64_Value, + kmp_i18n_msg_Using_str_Value, + kmp_i18n_msg_MaxValueUsing, + kmp_i18n_msg_MinValueUsing, + kmp_i18n_msg_MemoryAllocFailed, + kmp_i18n_msg_FileNameTooLong, + kmp_i18n_msg_OBSOLETE18, + kmp_i18n_msg_ManyThreadsForTPDirective, + kmp_i18n_msg_AffinityInvalidMask, + kmp_i18n_msg_WrongDefinition, + kmp_i18n_msg_TLSSetValueFailed, + kmp_i18n_msg_TLSOutOfIndexes, + kmp_i18n_msg_OBSOLETE19, + kmp_i18n_msg_CantGetNumAvailCPU, + kmp_i18n_msg_AssumedNumCPU, + kmp_i18n_msg_ErrorInitializeAffinity, + kmp_i18n_msg_AffThreadsMayMigrate, + kmp_i18n_msg_AffIgnoreInvalidProcID, + kmp_i18n_msg_AffNoValidProcID, + kmp_i18n_msg_UsingFlatOS, + kmp_i18n_msg_UsingFlatOSFile, + kmp_i18n_msg_UsingFlatOSFileLine, + kmp_i18n_msg_FileMsgExiting, + kmp_i18n_msg_FileLineMsgExiting, + kmp_i18n_msg_ConstructIdentInvalid, + kmp_i18n_msg_ThreadIdentInvalid, + kmp_i18n_msg_RTLNotInitialized, + kmp_i18n_msg_TPCommonBlocksInconsist, + kmp_i18n_msg_CantSetThreadAffMask, + kmp_i18n_msg_CantSetThreadPriority, + kmp_i18n_msg_CantCreateThread, + kmp_i18n_msg_CantCreateEvent, + kmp_i18n_msg_CantSetEvent, + kmp_i18n_msg_CantCloseHandle, + kmp_i18n_msg_UnknownLibraryType, + kmp_i18n_msg_ReapMonitorError, + kmp_i18n_msg_ReapWorkerError, + kmp_i18n_msg_ChangeThreadAffMaskError, + kmp_i18n_msg_ThreadsMigrate, + kmp_i18n_msg_DecreaseToThreads, + kmp_i18n_msg_IncreaseToThreads, + kmp_i18n_msg_OBSOLETE20, + kmp_i18n_msg_AffCapableUseCpuinfo, + kmp_i18n_msg_AffUseGlobCpuid, + kmp_i18n_msg_AffCapableUseFlat, + kmp_i18n_msg_AffNotCapableUseLocCpuid, + kmp_i18n_msg_AffNotCapableUseCpuinfo, + kmp_i18n_msg_AffFlatTopology, + kmp_i18n_msg_InitOSProcSetRespect, + kmp_i18n_msg_InitOSProcSetNotRespect, + kmp_i18n_msg_AvailableOSProc, + kmp_i18n_msg_Uniform, + kmp_i18n_msg_NonUniform, + kmp_i18n_msg_Topology, + kmp_i18n_msg_OBSOLETE21, + kmp_i18n_msg_OSProcToPackage, + kmp_i18n_msg_OBSOLETE22, + kmp_i18n_msg_OBSOLETE23, + kmp_i18n_msg_OBSOLETE24, + kmp_i18n_msg_OBSOLETE25, + kmp_i18n_msg_OBSOLETE26, + kmp_i18n_msg_OBSOLETE27, + kmp_i18n_msg_OBSOLETE28, + kmp_i18n_msg_OBSOLETE29, + kmp_i18n_msg_OBSOLETE30, + kmp_i18n_msg_OSProcMapToPack, + kmp_i18n_msg_OBSOLETE31, + kmp_i18n_msg_OBSOLETE32, + kmp_i18n_msg_OBSOLETE33, + kmp_i18n_msg_OBSOLETE34, + kmp_i18n_msg_OBSOLETE35, + kmp_i18n_msg_BarriersInDifferentOrder, + kmp_i18n_msg_FunctionError, + kmp_i18n_msg_TopologyExtra, + kmp_i18n_msg_WrongMessageCatalog, + kmp_i18n_msg_StgIgnored, + kmp_i18n_msg_OBSOLETE36, + kmp_i18n_msg_CnsBoundToWorksharing, + kmp_i18n_msg_CnsDetectedEnd, + kmp_i18n_msg_CnsIterationRangeTooLarge, + kmp_i18n_msg_CnsLoopIncrZeroProhibited, + kmp_i18n_msg_CnsExpectedEnd, + kmp_i18n_msg_CnsInvalidNesting, + kmp_i18n_msg_CnsMultipleNesting, + kmp_i18n_msg_CnsNestingSameName, + kmp_i18n_msg_CnsNoOrderedClause, + kmp_i18n_msg_CnsNotInTaskConstruct, + kmp_i18n_msg_CnsThreadsAtBarrier, + kmp_i18n_msg_CantConnect, + kmp_i18n_msg_CantConnectUsing, + kmp_i18n_msg_LibNotSupport, + kmp_i18n_msg_LibNotSupportFor, + kmp_i18n_msg_StaticLibNotSupport, + kmp_i18n_msg_OBSOLETE37, + kmp_i18n_msg_IttUnknownGroup, + kmp_i18n_msg_IttEnvVarTooLong, + kmp_i18n_msg_AffUseGlobCpuidL11, + kmp_i18n_msg_AffNotCapableUseLocCpuidL11, + kmp_i18n_msg_AffInfoStr, + kmp_i18n_msg_AffInfoStrStr, + kmp_i18n_msg_OSProcToPhysicalThreadMap, + kmp_i18n_msg_AffUsingFlatOS, + kmp_i18n_msg_AffParseFilename, + kmp_i18n_msg_MsgExiting, + kmp_i18n_msg_IncompatibleLibrary, + kmp_i18n_msg_IttFunctionError, + kmp_i18n_msg_IttUnknownError, + kmp_i18n_msg_EnvMiddleWarn, + kmp_i18n_msg_CnsLockNotDestroyed, + kmp_i18n_msg_CantLoadBalUsing, + kmp_i18n_msg_AffNotCapableUsePthread, + kmp_i18n_msg_AffUsePthread, + kmp_i18n_msg_OBSOLETE38, + kmp_i18n_msg_OBSOLETE39, + kmp_i18n_msg_OBSOLETE40, + kmp_i18n_msg_OBSOLETE41, + kmp_i18n_msg_NthSyntaxError, + kmp_i18n_msg_NthSpacesNotAllowed, + kmp_i18n_msg_AffStrParseFilename, + kmp_i18n_msg_OBSOLETE42, + kmp_i18n_msg_AffTypeCantUseMultGroups, + kmp_i18n_msg_AffGranCantUseMultGroups, + kmp_i18n_msg_AffWindowsProcGroupMap, + kmp_i18n_msg_AffOSProcToGroup, + kmp_i18n_msg_AffBalancedNotAvail, + kmp_i18n_msg_OBSOLETE43, + kmp_i18n_msg_EnvLockWarn, + kmp_i18n_msg_FutexNotSupported, + kmp_i18n_msg_AffGranUsing, + kmp_i18n_msg_AffThrPlaceInvalid, + kmp_i18n_msg_AffThrPlaceUnsupported, + kmp_i18n_msg_AffThrPlaceManyCores, + kmp_i18n_msg_SyntaxErrorUsing, + kmp_i18n_msg_AdaptiveNotSupported, + kmp_i18n_msg_EnvSyntaxError, + kmp_i18n_msg_EnvSpacesNotAllowed, + kmp_i18n_msg_BoundToOSProcSet, + kmp_i18n_msg_CnsLoopIncrIllegal, + kmp_i18n_msg_NoGompCancellation, + kmp_i18n_msg_AffThrPlaceNonUniform, + kmp_i18n_msg_AffThrPlaceNonThreeLevel, + kmp_i18n_msg_AffGranTopGroup, + kmp_i18n_msg_AffGranGroupType, + kmp_i18n_msg_AffThrPlaceManySockets, + kmp_i18n_msg_AffThrPlaceDeprecated, + kmp_i18n_msg_AffUsingHwloc, + kmp_i18n_msg_AffIgnoringHwloc, + kmp_i18n_msg_AffHwlocErrorOccurred, + kmp_i18n_msg_last, + + // Set #5, hints. + kmp_i18n_hnt_first = 327680, + kmp_i18n_hnt_SubmitBugReport, + kmp_i18n_hnt_OBSOLETE44, + kmp_i18n_hnt_ChangeStackLimit, + kmp_i18n_hnt_Unset_ALL_THREADS, + kmp_i18n_hnt_Set_ALL_THREADPRIVATE, + kmp_i18n_hnt_PossibleSystemLimitOnThreads, + kmp_i18n_hnt_DuplicateLibrary, + kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE, + kmp_i18n_hnt_NotEnoughMemory, + kmp_i18n_hnt_ValidBoolValues, + kmp_i18n_hnt_BufferOverflow, + kmp_i18n_hnt_RunningAtMaxPriority, + kmp_i18n_hnt_ChangeMonitorStackSize, + kmp_i18n_hnt_ChangeWorkerStackSize, + kmp_i18n_hnt_IncreaseWorkerStackSize, + kmp_i18n_hnt_DecreaseWorkerStackSize, + kmp_i18n_hnt_Decrease_NUM_THREADS, + kmp_i18n_hnt_IncreaseMonitorStackSize, + kmp_i18n_hnt_DecreaseMonitorStackSize, + kmp_i18n_hnt_DecreaseNumberOfThreadsInUse, + kmp_i18n_hnt_DefaultScheduleKindUsed, + kmp_i18n_hnt_GetNewerLibrary, + kmp_i18n_hnt_CheckEnvVar, + kmp_i18n_hnt_OBSOLETE45, + kmp_i18n_hnt_OBSOLETE46, + kmp_i18n_hnt_BadExeFormat, + kmp_i18n_hnt_SystemLimitOnThreads, + kmp_i18n_hnt_last, + + kmp_i18n_xxx_lastest + +}; // enum kmp_i18n_id + +typedef enum kmp_i18n_id kmp_i18n_id_t; + + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_import.c b/contrib/libs/cxxsupp/openmp/kmp_import.c index 6f0105602fd..42fba412c1f 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_import.c +++ b/contrib/libs/cxxsupp/openmp/kmp_import.c @@ -1,42 +1,42 @@ -/* - * kmp_import.c - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -/* - ------------------------------------------------------------------------------------------------ - Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib) - only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go - in the libompmd.lib (but not in libompmt.lib and libompmd.dll) should be placed in this - file. - ------------------------------------------------------------------------------------------------ -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - These symbols are required for mutual exclusion with Microsoft OpenMP RTL (and compatibility - with MS Compiler). -*/ - -int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; -int _You_must_link_with_Microsoft_OpenMP_library = 1; - -#ifdef __cplusplus -} -#endif - -// end of file // +/* + * kmp_import.c + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +/* + ------------------------------------------------------------------------------------------------ + Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib) + only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go + in the libompmd.lib (but not in libompmt.lib and libompmd.dll) should be placed in this + file. + ------------------------------------------------------------------------------------------------ +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + These symbols are required for mutual exclusion with Microsoft OpenMP RTL (and compatibility + with MS Compiler). +*/ + +int _You_must_link_with_exactly_one_OpenMP_library = 1; +int _You_must_link_with_Intel_OpenMP_library = 1; +int _You_must_link_with_Microsoft_OpenMP_library = 1; + +#ifdef __cplusplus +} +#endif + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_io.c b/contrib/libs/cxxsupp/openmp/kmp_io.c index bd16a970f88..ef808af8fb8 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_io.c +++ b/contrib/libs/cxxsupp/openmp/kmp_io.c @@ -1,248 +1,248 @@ -/* - * KMP_IO.c -- RTL IO - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include -#include -#include -#include -#include -#ifndef __ABSOFT_WIN -# include -#endif - -#include "kmp_os.h" -#include "kmp_lock.h" -#include "kmp_str.h" -#include "kmp_io.h" -#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc - -#if KMP_OS_WINDOWS -# pragma warning( push ) -# pragma warning( disable: 271 310 ) -# include -# pragma warning( pop ) -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -kmp_bootstrap_lock_t __kmp_stdio_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_stdio_lock ); /* Control stdio functions */ -kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_console_lock ); /* Control console initialization */ - -#if KMP_OS_WINDOWS - - # ifdef KMP_DEBUG - /* __kmp_stdout is used only for dev build */ - static HANDLE __kmp_stdout = NULL; - # endif - static HANDLE __kmp_stderr = NULL; - static int __kmp_console_exists = FALSE; - static kmp_str_buf_t __kmp_console_buf; - - static int - is_console( void ) - { - char buffer[ 128 ]; - DWORD rc = 0; - DWORD err = 0; - // Try to get console title. - SetLastError( 0 ); - // GetConsoleTitle does not reset last error in case of success or short buffer, - // so we need to clear it explicitly. - rc = GetConsoleTitle( buffer, sizeof( buffer ) ); - if ( rc == 0 ) { - // rc == 0 means getting console title failed. Let us find out why. - err = GetLastError(); - // err == 0 means buffer too short (we suppose console exists). - // In Window applications we usually have err == 6 (invalid handle). - }; // if - return rc > 0 || err == 0; - } - - void - __kmp_close_console( void ) - { - /* wait until user presses return before closing window */ - /* TODO only close if a window was opened */ - if( __kmp_console_exists ) { - #ifdef KMP_DEBUG - /* standard out is used only in dev build */ - __kmp_stdout = NULL; - #endif - __kmp_stderr = NULL; - __kmp_str_buf_free( &__kmp_console_buf ); - __kmp_console_exists = FALSE; - } - } - - /* For windows, call this before stdout, stderr, or stdin are used. - * It opens a console window and starts processing */ - static void - __kmp_redirect_output( void ) - { - __kmp_acquire_bootstrap_lock( &__kmp_console_lock ); - - if( ! __kmp_console_exists ) { - #ifdef KMP_DEBUG - /* standard out is used only in dev build */ - HANDLE ho; - #endif - HANDLE he; - - __kmp_str_buf_init( &__kmp_console_buf ); - - AllocConsole(); - // We do not check the result of AllocConsole because - // 1. the call is harmless - // 2. it is not clear how to communicate failue - // 3. we will detect failure later when we get handle(s) - +/* + * KMP_IO.c -- RTL IO + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include +#include +#include +#include +#include +#ifndef __ABSOFT_WIN +# include +#endif + +#include "kmp_os.h" +#include "kmp_lock.h" +#include "kmp_str.h" +#include "kmp_io.h" +#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc + +#if KMP_OS_WINDOWS +# pragma warning( push ) +# pragma warning( disable: 271 310 ) +# include +# pragma warning( pop ) +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +kmp_bootstrap_lock_t __kmp_stdio_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_stdio_lock ); /* Control stdio functions */ +kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_console_lock ); /* Control console initialization */ + +#if KMP_OS_WINDOWS + + # ifdef KMP_DEBUG + /* __kmp_stdout is used only for dev build */ + static HANDLE __kmp_stdout = NULL; + # endif + static HANDLE __kmp_stderr = NULL; + static int __kmp_console_exists = FALSE; + static kmp_str_buf_t __kmp_console_buf; + + static int + is_console( void ) + { + char buffer[ 128 ]; + DWORD rc = 0; + DWORD err = 0; + // Try to get console title. + SetLastError( 0 ); + // GetConsoleTitle does not reset last error in case of success or short buffer, + // so we need to clear it explicitly. + rc = GetConsoleTitle( buffer, sizeof( buffer ) ); + if ( rc == 0 ) { + // rc == 0 means getting console title failed. Let us find out why. + err = GetLastError(); + // err == 0 means buffer too short (we suppose console exists). + // In Window applications we usually have err == 6 (invalid handle). + }; // if + return rc > 0 || err == 0; + } + + void + __kmp_close_console( void ) + { + /* wait until user presses return before closing window */ + /* TODO only close if a window was opened */ + if( __kmp_console_exists ) { #ifdef KMP_DEBUG - ho = GetStdHandle( STD_OUTPUT_HANDLE ); - if ( ho == INVALID_HANDLE_VALUE || ho == NULL ) { - - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stdout = NULL; - - } else { - - __kmp_stdout = ho; // temporary code, need new global for ho - - } - #endif - he = GetStdHandle( STD_ERROR_HANDLE ); - if ( he == INVALID_HANDLE_VALUE || he == NULL ) { - - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stderr = NULL; - - } else { - - __kmp_stderr = he; // temporary code, need new global - } - __kmp_console_exists = TRUE; - } - __kmp_release_bootstrap_lock( &__kmp_console_lock ); - } - -#else - #define __kmp_stderr (stderr) -#endif /* KMP_OS_WINDOWS */ - -void -__kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ) -{ - #if KMP_OS_WINDOWS - if( !__kmp_console_exists ) { - __kmp_redirect_output(); - } - if( ! __kmp_stderr && __kmp_io == kmp_err ) { - return; - } - #ifdef KMP_DEBUG - if( ! __kmp_stdout && __kmp_io == kmp_out ) { - return; - } - #endif - #endif /* KMP_OS_WINDOWS */ - - if ( __kmp_debug_buf && __kmp_debug_buffer != NULL ) { - - int dc = ( __kmp_debug_buf_atomic ? - KMP_TEST_THEN_INC32( & __kmp_debug_count) : __kmp_debug_count++ ) - % __kmp_debug_buf_lines; - char *db = & __kmp_debug_buffer[ dc * __kmp_debug_buf_chars ]; - int chars = 0; - - #ifdef KMP_DEBUG_PIDS - chars = KMP_SNPRINTF( db, __kmp_debug_buf_chars, "pid=%d: ", (kmp_int32)getpid() ); - #endif - chars += KMP_VSNPRINTF( db, __kmp_debug_buf_chars, format, ap ); - - if ( chars + 1 > __kmp_debug_buf_chars ) { - if ( chars + 1 > __kmp_debug_buf_warn_chars ) { - #if KMP_OS_WINDOWS - DWORD count; - __kmp_str_buf_print( &__kmp_console_buf, - "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1 ); - WriteFile( __kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, &count, NULL ); - __kmp_str_buf_clear( &__kmp_console_buf ); - #else - fprintf( __kmp_stderr, - "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1 ); - fflush( __kmp_stderr ); - #endif - __kmp_debug_buf_warn_chars = chars + 1; - } - /* terminate string if overflow occurred */ - db[ __kmp_debug_buf_chars - 2 ] = '\n'; - db[ __kmp_debug_buf_chars - 1 ] = '\0'; - } - } else { - #if KMP_OS_WINDOWS - DWORD count; - #ifdef KMP_DEBUG_PIDS - __kmp_str_buf_print( &__kmp_console_buf, "pid=%d: ", - (kmp_int32)getpid() ); - #endif - __kmp_str_buf_vprint( &__kmp_console_buf, format, ap ); - WriteFile( - __kmp_stderr, - __kmp_console_buf.str, - __kmp_console_buf.used, - &count, - NULL - ); - __kmp_str_buf_clear( &__kmp_console_buf ); - #else - #ifdef KMP_DEBUG_PIDS - fprintf( __kmp_stderr, "pid=%d: ", (kmp_int32)getpid() ); - #endif - vfprintf( __kmp_stderr, format, ap ); - fflush( __kmp_stderr ); - #endif - } -} - -void -__kmp_printf( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); - - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, format, ap ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); - - va_end( ap ); -} - -void -__kmp_printf_no_lock( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); - - __kmp_vprintf( kmp_err, format, ap ); - - va_end( ap ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ + /* standard out is used only in dev build */ + __kmp_stdout = NULL; + #endif + __kmp_stderr = NULL; + __kmp_str_buf_free( &__kmp_console_buf ); + __kmp_console_exists = FALSE; + } + } + + /* For windows, call this before stdout, stderr, or stdin are used. + * It opens a console window and starts processing */ + static void + __kmp_redirect_output( void ) + { + __kmp_acquire_bootstrap_lock( &__kmp_console_lock ); + + if( ! __kmp_console_exists ) { + #ifdef KMP_DEBUG + /* standard out is used only in dev build */ + HANDLE ho; + #endif + HANDLE he; + + __kmp_str_buf_init( &__kmp_console_buf ); + + AllocConsole(); + // We do not check the result of AllocConsole because + // 1. the call is harmless + // 2. it is not clear how to communicate failue + // 3. we will detect failure later when we get handle(s) + + #ifdef KMP_DEBUG + ho = GetStdHandle( STD_OUTPUT_HANDLE ); + if ( ho == INVALID_HANDLE_VALUE || ho == NULL ) { + + DWORD err = GetLastError(); + // TODO: output error somehow (maybe message box) + __kmp_stdout = NULL; + + } else { + + __kmp_stdout = ho; // temporary code, need new global for ho + + } + #endif + he = GetStdHandle( STD_ERROR_HANDLE ); + if ( he == INVALID_HANDLE_VALUE || he == NULL ) { + + DWORD err = GetLastError(); + // TODO: output error somehow (maybe message box) + __kmp_stderr = NULL; + + } else { + + __kmp_stderr = he; // temporary code, need new global + } + __kmp_console_exists = TRUE; + } + __kmp_release_bootstrap_lock( &__kmp_console_lock ); + } + +#else + #define __kmp_stderr (stderr) +#endif /* KMP_OS_WINDOWS */ + +void +__kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ) +{ + #if KMP_OS_WINDOWS + if( !__kmp_console_exists ) { + __kmp_redirect_output(); + } + if( ! __kmp_stderr && __kmp_io == kmp_err ) { + return; + } + #ifdef KMP_DEBUG + if( ! __kmp_stdout && __kmp_io == kmp_out ) { + return; + } + #endif + #endif /* KMP_OS_WINDOWS */ + + if ( __kmp_debug_buf && __kmp_debug_buffer != NULL ) { + + int dc = ( __kmp_debug_buf_atomic ? + KMP_TEST_THEN_INC32( & __kmp_debug_count) : __kmp_debug_count++ ) + % __kmp_debug_buf_lines; + char *db = & __kmp_debug_buffer[ dc * __kmp_debug_buf_chars ]; + int chars = 0; + + #ifdef KMP_DEBUG_PIDS + chars = KMP_SNPRINTF( db, __kmp_debug_buf_chars, "pid=%d: ", (kmp_int32)getpid() ); + #endif + chars += KMP_VSNPRINTF( db, __kmp_debug_buf_chars, format, ap ); + + if ( chars + 1 > __kmp_debug_buf_chars ) { + if ( chars + 1 > __kmp_debug_buf_warn_chars ) { + #if KMP_OS_WINDOWS + DWORD count; + __kmp_str_buf_print( &__kmp_console_buf, + "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", + chars + 1 ); + WriteFile( __kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, &count, NULL ); + __kmp_str_buf_clear( &__kmp_console_buf ); + #else + fprintf( __kmp_stderr, + "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", + chars + 1 ); + fflush( __kmp_stderr ); + #endif + __kmp_debug_buf_warn_chars = chars + 1; + } + /* terminate string if overflow occurred */ + db[ __kmp_debug_buf_chars - 2 ] = '\n'; + db[ __kmp_debug_buf_chars - 1 ] = '\0'; + } + } else { + #if KMP_OS_WINDOWS + DWORD count; + #ifdef KMP_DEBUG_PIDS + __kmp_str_buf_print( &__kmp_console_buf, "pid=%d: ", + (kmp_int32)getpid() ); + #endif + __kmp_str_buf_vprint( &__kmp_console_buf, format, ap ); + WriteFile( + __kmp_stderr, + __kmp_console_buf.str, + __kmp_console_buf.used, + &count, + NULL + ); + __kmp_str_buf_clear( &__kmp_console_buf ); + #else + #ifdef KMP_DEBUG_PIDS + fprintf( __kmp_stderr, "pid=%d: ", (kmp_int32)getpid() ); + #endif + vfprintf( __kmp_stderr, format, ap ); + fflush( __kmp_stderr ); + #endif + } +} + +void +__kmp_printf( char const * format, ... ) +{ + va_list ap; + va_start( ap, format ); + + __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_vprintf( kmp_err, format, ap ); + __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + + va_end( ap ); +} + +void +__kmp_printf_no_lock( char const * format, ... ) +{ + va_list ap; + va_start( ap, format ); + + __kmp_vprintf( kmp_err, format, ap ); + + va_end( ap ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_io.h b/contrib/libs/cxxsupp/openmp/kmp_io.h index cbc74027c5c..a0caa644eed 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_io.h +++ b/contrib/libs/cxxsupp/openmp/kmp_io.h @@ -1,44 +1,44 @@ -/* - * kmp_io.h -- RTL IO header file. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_IO_H -#define KMP_IO_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -enum kmp_io { - kmp_out = 0, - kmp_err -}; - -extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ -extern kmp_bootstrap_lock_t __kmp_console_lock; /* Control console initialization */ - -extern void __kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ); -extern void __kmp_printf( char const * format, ... ); -extern void __kmp_printf_no_lock( char const * format, ... ); -extern void __kmp_close_console( void ); - -#ifdef __cplusplus -} -#endif - -#endif /* KMP_IO_H */ - +/* + * kmp_io.h -- RTL IO header file. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_IO_H +#define KMP_IO_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +enum kmp_io { + kmp_out = 0, + kmp_err +}; + +extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ +extern kmp_bootstrap_lock_t __kmp_console_lock; /* Control console initialization */ + +extern void __kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ); +extern void __kmp_printf( char const * format, ... ); +extern void __kmp_printf_no_lock( char const * format, ... ); +extern void __kmp_close_console( void ); + +#ifdef __cplusplus +} +#endif + +#endif /* KMP_IO_H */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.c b/contrib/libs/cxxsupp/openmp/kmp_itt.c index 89d665b8b60..486d63550ed 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_itt.c +++ b/contrib/libs/cxxsupp/openmp/kmp_itt.c @@ -1,144 +1,144 @@ -#include "kmp_config.h" - -#if USE_ITT_BUILD -/* - * kmp_itt.c -- ITT Notify interface. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp_itt.h" - -#if KMP_DEBUG - #include "kmp_itt.inl" -#endif - - -#if USE_ITT_NOTIFY - - kmp_int32 __kmp_barrier_domain_count; - kmp_int32 __kmp_region_domain_count; - __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; - __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; - __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; - kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; - __itt_domain * metadata_domain = NULL; - - #include "kmp_version.h" - #include "kmp_i18n.h" - #include "kmp_str.h" - - KMP_BUILD_ASSERT( sizeof( kmp_itt_mark_t ) == sizeof( __itt_mark_type ) ); - - /* - Previously used warnings: - - KMP_WARNING( IttAllNotifDisabled ); - KMP_WARNING( IttObjNotifDisabled ); - KMP_WARNING( IttMarkNotifDisabled ); - KMP_WARNING( IttUnloadLibFailed, libittnotify ); - */ - - - kmp_int32 __kmp_itt_prepare_delay = 0; - kmp_bootstrap_lock_t __kmp_itt_debug_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_itt_debug_lock ); - -#endif // USE_ITT_NOTIFY - -void __kmp_itt_initialize() { - - // ITTNotify library is loaded and initialized at first call to any ittnotify function, - // so we do not need to explicitly load it any more. - // Jusr report OMP RTL version to ITTNotify. - - #if USE_ITT_NOTIFY - // Report OpenMP RTL version. - kmp_str_buf_t buf; - __itt_mark_type version; - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print( - & buf, - "OMP RTL Version %d.%d.%d", - __kmp_version_major, - __kmp_version_minor, - __kmp_version_build - ); - if ( __itt_api_version_ptr != NULL ) { - __kmp_str_buf_print( & buf, ":%s", __itt_api_version() ); - }; // if - version = __itt_mark_create( buf.str ); - __itt_mark( version, NULL ); - __kmp_str_buf_free( & buf ); - #endif - -} // __kmp_itt_initialize - - -void __kmp_itt_destroy() { - #if USE_ITT_NOTIFY - __kmp_itt_fini_ittlib(); - #endif -} // __kmp_itt_destroy - - -extern "C" -void -__itt_error_handler( - __itt_error_code err, - va_list args -) { - - switch ( err ) { - case __itt_error_no_module : { - char const * library = va_arg( args, char const * ); - #if KMP_OS_WINDOWS - int sys_err = va_arg( args, int ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); - #else - char const * sys_err = va_arg( args, char const * ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRMESG( sys_err ), __kmp_msg_null ); - #endif - } break; - case __itt_error_no_symbol : { - char const * library = va_arg( args, char const * ); - char const * symbol = va_arg( args, char const * ); - KMP_WARNING( IttLookupFailed, symbol, library ); - } break; - case __itt_error_unknown_group : { - char const * var = va_arg( args, char const * ); - char const * group = va_arg( args, char const * ); - KMP_WARNING( IttUnknownGroup, var, group ); - } break; - case __itt_error_env_too_long : { - char const * var = va_arg( args, char const * ); - size_t act_len = va_arg( args, size_t ); - size_t max_len = va_arg( args, size_t ); - KMP_WARNING( IttEnvVarTooLong, var, (unsigned long) act_len, (unsigned long) max_len ); - } break; - case __itt_error_cant_read_env : { - char const * var = va_arg( args, char const * ); - int sys_err = va_arg( args, int ); - __kmp_msg( kmp_ms_warning, KMP_MSG( CantGetEnvVar, var ), KMP_ERR( sys_err ), __kmp_msg_null ); - } break; - case __itt_error_system : { - char const * func = va_arg( args, char const * ); - int sys_err = va_arg( args, int ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttFunctionError, func ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); - } break; - default : { - KMP_WARNING( IttUnknownError, err ); - }; - }; // switch - -} // __itt_error_handler - -#endif /* USE_ITT_BUILD */ +#include "kmp_config.h" + +#if USE_ITT_BUILD +/* + * kmp_itt.c -- ITT Notify interface. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp_itt.h" + +#if KMP_DEBUG + #include "kmp_itt.inl" +#endif + + +#if USE_ITT_NOTIFY + + kmp_int32 __kmp_barrier_domain_count; + kmp_int32 __kmp_region_domain_count; + __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; + __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; + __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; + kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; + __itt_domain * metadata_domain = NULL; + + #include "kmp_version.h" + #include "kmp_i18n.h" + #include "kmp_str.h" + + KMP_BUILD_ASSERT( sizeof( kmp_itt_mark_t ) == sizeof( __itt_mark_type ) ); + + /* + Previously used warnings: + + KMP_WARNING( IttAllNotifDisabled ); + KMP_WARNING( IttObjNotifDisabled ); + KMP_WARNING( IttMarkNotifDisabled ); + KMP_WARNING( IttUnloadLibFailed, libittnotify ); + */ + + + kmp_int32 __kmp_itt_prepare_delay = 0; + kmp_bootstrap_lock_t __kmp_itt_debug_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_itt_debug_lock ); + +#endif // USE_ITT_NOTIFY + +void __kmp_itt_initialize() { + + // ITTNotify library is loaded and initialized at first call to any ittnotify function, + // so we do not need to explicitly load it any more. + // Jusr report OMP RTL version to ITTNotify. + + #if USE_ITT_NOTIFY + // Report OpenMP RTL version. + kmp_str_buf_t buf; + __itt_mark_type version; + __kmp_str_buf_init( & buf ); + __kmp_str_buf_print( + & buf, + "OMP RTL Version %d.%d.%d", + __kmp_version_major, + __kmp_version_minor, + __kmp_version_build + ); + if ( __itt_api_version_ptr != NULL ) { + __kmp_str_buf_print( & buf, ":%s", __itt_api_version() ); + }; // if + version = __itt_mark_create( buf.str ); + __itt_mark( version, NULL ); + __kmp_str_buf_free( & buf ); + #endif + +} // __kmp_itt_initialize + + +void __kmp_itt_destroy() { + #if USE_ITT_NOTIFY + __kmp_itt_fini_ittlib(); + #endif +} // __kmp_itt_destroy + + +extern "C" +void +__itt_error_handler( + __itt_error_code err, + va_list args +) { + + switch ( err ) { + case __itt_error_no_module : { + char const * library = va_arg( args, char const * ); + #if KMP_OS_WINDOWS + int sys_err = va_arg( args, int ); + __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); + #else + char const * sys_err = va_arg( args, char const * ); + __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRMESG( sys_err ), __kmp_msg_null ); + #endif + } break; + case __itt_error_no_symbol : { + char const * library = va_arg( args, char const * ); + char const * symbol = va_arg( args, char const * ); + KMP_WARNING( IttLookupFailed, symbol, library ); + } break; + case __itt_error_unknown_group : { + char const * var = va_arg( args, char const * ); + char const * group = va_arg( args, char const * ); + KMP_WARNING( IttUnknownGroup, var, group ); + } break; + case __itt_error_env_too_long : { + char const * var = va_arg( args, char const * ); + size_t act_len = va_arg( args, size_t ); + size_t max_len = va_arg( args, size_t ); + KMP_WARNING( IttEnvVarTooLong, var, (unsigned long) act_len, (unsigned long) max_len ); + } break; + case __itt_error_cant_read_env : { + char const * var = va_arg( args, char const * ); + int sys_err = va_arg( args, int ); + __kmp_msg( kmp_ms_warning, KMP_MSG( CantGetEnvVar, var ), KMP_ERR( sys_err ), __kmp_msg_null ); + } break; + case __itt_error_system : { + char const * func = va_arg( args, char const * ); + int sys_err = va_arg( args, int ); + __kmp_msg( kmp_ms_warning, KMP_MSG( IttFunctionError, func ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); + } break; + default : { + KMP_WARNING( IttUnknownError, err ); + }; + }; // switch + +} // __itt_error_handler + +#endif /* USE_ITT_BUILD */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.h b/contrib/libs/cxxsupp/openmp/kmp_itt.h index 8797c57802a..925a4f04ca2 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_itt.h +++ b/contrib/libs/cxxsupp/openmp/kmp_itt.h @@ -1,309 +1,309 @@ -#if USE_ITT_BUILD -/* - * kmp_itt.h -- ITT Notify interface. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_ITT_H -#define KMP_ITT_H - -#include "kmp_lock.h" - -#define INTEL_ITTNOTIFY_API_PRIVATE -#include "ittnotify.h" -#include "legacy/ittnotify.h" - -#if KMP_DEBUG - #define __kmp_inline // Turn off inlining in debug mode. -#else - #define __kmp_inline static inline -#endif - -#if USE_ITT_NOTIFY - extern kmp_int32 __kmp_itt_prepare_delay; -# ifdef __cplusplus - extern "C" void __kmp_itt_fini_ittlib(void); -# else - extern void __kmp_itt_fini_ittlib(void); -# endif -#endif - -// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. -#define USE_ITT_BUILD_ARG(x) ,x - -void __kmp_itt_initialize(); -void __kmp_itt_destroy(); - -// ------------------------------------------------------------------------------------------------- -// New stuff for reporting high-level constructs. -// ------------------------------------------------------------------------------------------------- - -// Note the naming convention: -// __kmp_itt_xxxing() function should be called before action, while -// __kmp_itt_xxxed() function should be called after action. - -// --- Parallel region reporting --- -__kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads. -__kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads. - // (*) Note: A thread may execute tasks after this point, though. - -// --- Frame reporting --- -// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel -__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); - -// --- Metadata reporting --- -// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier -__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); -// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size -__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); -__kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); - -// --- Barrier reporting --- -__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); -__kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); -__kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object ); -__kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object ); - -// --- Taskwait reporting --- -__kmp_inline void * __kmp_itt_taskwait_object( int gtid ); -__kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object ); -__kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object ); - -// --- Task reporting --- -__kmp_inline void __kmp_itt_task_starting( void * object ); -__kmp_inline void __kmp_itt_task_finished( void * object ); - -// --- Lock reporting --- -#if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); -#else -__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); -#endif -__kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); - -// --- Critical reporting --- -#if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); -#else -__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); -#endif -__kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); - -// --- Single reporting --- -__kmp_inline void __kmp_itt_single_start( int gtid ); -__kmp_inline void __kmp_itt_single_end( int gtid ); - -// --- Ordered reporting --- -__kmp_inline void __kmp_itt_ordered_init( int gtid ); -__kmp_inline void __kmp_itt_ordered_prep( int gtid ); -__kmp_inline void __kmp_itt_ordered_start( int gtid ); -__kmp_inline void __kmp_itt_ordered_end( int gtid ); - -// --- Threads reporting --- -__kmp_inline void __kmp_itt_thread_ignore(); -__kmp_inline void __kmp_itt_thread_name( int gtid ); - -// --- System objects --- -__kmp_inline void __kmp_itt_system_object_created( void * object, char const * name ); - -// --- Stack stitching --- -__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); -__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); -__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); -__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); - -// ------------------------------------------------------------------------------------------------- -// Old stuff for reporting low-level internal synchronization. -// ------------------------------------------------------------------------------------------------- - -#if USE_ITT_NOTIFY - - /* - * Support for SSC marks, which are used by SDE - * http://software.intel.com/en-us/articles/intel-software-development-emulator - * to mark points in instruction traces that represent spin-loops and are - * therefore uninteresting when collecting traces for architecture simulation. - */ - #ifndef INCLUDE_SSC_MARKS - # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) - #endif - - /* Linux 64 only for now */ - #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) - // Portable (at least for gcc and icc) code to insert the necessary instructions - // to set %ebx and execute the unlikely no-op. - #if defined( __INTEL_COMPILER ) - # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) - #else - # define INSERT_SSC_MARK(tag) \ - __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") - #endif - #else - # define INSERT_SSC_MARK(tag) ((void)0) - #endif - - /* Markers for the start and end of regions that represent polling and - * are therefore uninteresting to architectural simulations 0x4376 and - * 0x4377 are arbitrary numbers that should be unique in the space of - * SSC tags, but there is no central issuing authority rather - * randomness is expected to work. - */ - #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) - #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) - - // Markers for architecture simulation. - // FORKING : Before the master thread forks. - // JOINING : At the start of the join. - // INVOKING : Before the threads invoke microtasks. - // DISPATCH_INIT: At the start of dynamically scheduled loop. - // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. - #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) - #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) - #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) - #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) - #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) - - // The object is an address that associates a specific set of the prepare, acquire, release, - // and cancel operations. - - /* Sync prepare indicates a thread is going to start waiting for another thread - to send a release event. This operation should be done just before the thread - begins checking for the existence of the release event */ - - /* Sync cancel indicates a thread is cancelling a wait on another thread anc - continuing execution without waiting for the other thread to release it */ - - /* Sync acquired indicates a thread has received a release event from another - thread and has stopped waiting. This operation must occur only after the release - event is received. */ - - /* Sync release indicates a thread is going to send a release event to another thread - so it will stop waiting and continue execution. This operation must just happen before - the release event. */ - - #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) ) - #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) ) - #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) ) - #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) ) - - /* - In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay - (and not called at all if waiting time is small). So, in spin loops, do not use - KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), - KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). - See KMP_WAIT_YIELD() for example. - */ - - #undef KMP_FSYNC_SPIN_INIT - #define KMP_FSYNC_SPIN_INIT( obj, spin ) \ - int sync_iters = 0; \ - if ( __itt_fsync_prepare_ptr ) { \ - if ( obj == NULL ) { \ - obj = spin; \ - } /* if */ \ - } /* if */ \ - SSC_MARK_SPIN_START() - - #undef KMP_FSYNC_SPIN_PREPARE - #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \ - if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ - ++ sync_iters; \ - if ( sync_iters >= __kmp_itt_prepare_delay ) { \ - KMP_FSYNC_PREPARE( (void*) obj ); \ - } /* if */ \ - } /* if */ \ - } while (0) - #undef KMP_FSYNC_SPIN_ACQUIRED - #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \ - SSC_MARK_SPIN_END(); \ - if ( sync_iters >= __kmp_itt_prepare_delay ) { \ - KMP_FSYNC_ACQUIRED( (void*) obj ); \ - } /* if */ \ - } while (0) - - /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: - KMP_ITT_IGNORE( - ptr = malloc( size ); - ); - */ - #define KMP_ITT_IGNORE( statement ) do { \ - __itt_state_t __itt_state_; \ - if ( __itt_state_get_ptr ) { \ - __itt_state_ = __itt_state_get(); \ - __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ - } /* if */ \ - { statement } \ - if ( __itt_state_get_ptr ) { \ - __itt_state_set( __itt_state_ ); \ - } /* if */ \ - } while (0) - - const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to - // different OpenMP regions in the user source code). - extern kmp_int32 __kmp_barrier_domain_count; - extern kmp_int32 __kmp_region_domain_count; - extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; - extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain * metadata_domain; - -#else - -// Null definitions of the synchronization tracing functions. -# define KMP_FSYNC_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_CANCEL( obj ) ((void)0) -# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) -# define KMP_FSYNC_RELEASING( obj ) ((void)0) - -# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) -# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) - -# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) - -#endif // USE_ITT_NOTIFY - -#if ! KMP_DEBUG - // In release mode include definitions of inline functions. - #include "kmp_itt.inl" -#endif - -#endif // KMP_ITT_H - -#else /* USE_ITT_BUILD */ - -// Null definitions of the synchronization tracing functions. -// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. -// By defining these we avoid unpleasant ifdef tests in many places. -# define KMP_FSYNC_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_CANCEL( obj ) ((void)0) -# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) -# define KMP_FSYNC_RELEASING( obj ) ((void)0) - -# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) -# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) - -# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) - -# define USE_ITT_BUILD_ARG(x) - -#endif /* USE_ITT_BUILD */ +#if USE_ITT_BUILD +/* + * kmp_itt.h -- ITT Notify interface. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_ITT_H +#define KMP_ITT_H + +#include "kmp_lock.h" + +#define INTEL_ITTNOTIFY_API_PRIVATE +#include "ittnotify.h" +#include "legacy/ittnotify.h" + +#if KMP_DEBUG + #define __kmp_inline // Turn off inlining in debug mode. +#else + #define __kmp_inline static inline +#endif + +#if USE_ITT_NOTIFY + extern kmp_int32 __kmp_itt_prepare_delay; +# ifdef __cplusplus + extern "C" void __kmp_itt_fini_ittlib(void); +# else + extern void __kmp_itt_fini_ittlib(void); +# endif +#endif + +// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. +#define USE_ITT_BUILD_ARG(x) ,x + +void __kmp_itt_initialize(); +void __kmp_itt_destroy(); + +// ------------------------------------------------------------------------------------------------- +// New stuff for reporting high-level constructs. +// ------------------------------------------------------------------------------------------------- + +// Note the naming convention: +// __kmp_itt_xxxing() function should be called before action, while +// __kmp_itt_xxxed() function should be called after action. + +// --- Parallel region reporting --- +__kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads. +__kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads. + // (*) Note: A thread may execute tasks after this point, though. + +// --- Frame reporting --- +// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel +__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); + +// --- Metadata reporting --- +// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier +__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); +// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size +__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); +__kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); + +// --- Barrier reporting --- +__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); +__kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); +__kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object ); +__kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object ); + +// --- Taskwait reporting --- +__kmp_inline void * __kmp_itt_taskwait_object( int gtid ); +__kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object ); +__kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object ); + +// --- Task reporting --- +__kmp_inline void __kmp_itt_task_starting( void * object ); +__kmp_inline void __kmp_itt_task_finished( void * object ); + +// --- Lock reporting --- +#if KMP_USE_DYNAMIC_LOCK +__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); +#else +__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); +#endif +__kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); + +// --- Critical reporting --- +#if KMP_USE_DYNAMIC_LOCK +__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); +#else +__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); +#endif +__kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); + +// --- Single reporting --- +__kmp_inline void __kmp_itt_single_start( int gtid ); +__kmp_inline void __kmp_itt_single_end( int gtid ); + +// --- Ordered reporting --- +__kmp_inline void __kmp_itt_ordered_init( int gtid ); +__kmp_inline void __kmp_itt_ordered_prep( int gtid ); +__kmp_inline void __kmp_itt_ordered_start( int gtid ); +__kmp_inline void __kmp_itt_ordered_end( int gtid ); + +// --- Threads reporting --- +__kmp_inline void __kmp_itt_thread_ignore(); +__kmp_inline void __kmp_itt_thread_name( int gtid ); + +// --- System objects --- +__kmp_inline void __kmp_itt_system_object_created( void * object, char const * name ); + +// --- Stack stitching --- +__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); +__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); +__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); +__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); + +// ------------------------------------------------------------------------------------------------- +// Old stuff for reporting low-level internal synchronization. +// ------------------------------------------------------------------------------------------------- + +#if USE_ITT_NOTIFY + + /* + * Support for SSC marks, which are used by SDE + * http://software.intel.com/en-us/articles/intel-software-development-emulator + * to mark points in instruction traces that represent spin-loops and are + * therefore uninteresting when collecting traces for architecture simulation. + */ + #ifndef INCLUDE_SSC_MARKS + # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) + #endif + + /* Linux 64 only for now */ + #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) + // Portable (at least for gcc and icc) code to insert the necessary instructions + // to set %ebx and execute the unlikely no-op. + #if defined( __INTEL_COMPILER ) + # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) + #else + # define INSERT_SSC_MARK(tag) \ + __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") + #endif + #else + # define INSERT_SSC_MARK(tag) ((void)0) + #endif + + /* Markers for the start and end of regions that represent polling and + * are therefore uninteresting to architectural simulations 0x4376 and + * 0x4377 are arbitrary numbers that should be unique in the space of + * SSC tags, but there is no central issuing authority rather + * randomness is expected to work. + */ + #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) + #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) + + // Markers for architecture simulation. + // FORKING : Before the master thread forks. + // JOINING : At the start of the join. + // INVOKING : Before the threads invoke microtasks. + // DISPATCH_INIT: At the start of dynamically scheduled loop. + // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. + #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) + #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) + #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) + #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) + #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) + + // The object is an address that associates a specific set of the prepare, acquire, release, + // and cancel operations. + + /* Sync prepare indicates a thread is going to start waiting for another thread + to send a release event. This operation should be done just before the thread + begins checking for the existence of the release event */ + + /* Sync cancel indicates a thread is cancelling a wait on another thread anc + continuing execution without waiting for the other thread to release it */ + + /* Sync acquired indicates a thread has received a release event from another + thread and has stopped waiting. This operation must occur only after the release + event is received. */ + + /* Sync release indicates a thread is going to send a release event to another thread + so it will stop waiting and continue execution. This operation must just happen before + the release event. */ + + #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) ) + #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) ) + #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) ) + #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) ) + + /* + In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay + (and not called at all if waiting time is small). So, in spin loops, do not use + KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), + KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). + See KMP_WAIT_YIELD() for example. + */ + + #undef KMP_FSYNC_SPIN_INIT + #define KMP_FSYNC_SPIN_INIT( obj, spin ) \ + int sync_iters = 0; \ + if ( __itt_fsync_prepare_ptr ) { \ + if ( obj == NULL ) { \ + obj = spin; \ + } /* if */ \ + } /* if */ \ + SSC_MARK_SPIN_START() + + #undef KMP_FSYNC_SPIN_PREPARE + #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \ + if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ + ++ sync_iters; \ + if ( sync_iters >= __kmp_itt_prepare_delay ) { \ + KMP_FSYNC_PREPARE( (void*) obj ); \ + } /* if */ \ + } /* if */ \ + } while (0) + #undef KMP_FSYNC_SPIN_ACQUIRED + #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \ + SSC_MARK_SPIN_END(); \ + if ( sync_iters >= __kmp_itt_prepare_delay ) { \ + KMP_FSYNC_ACQUIRED( (void*) obj ); \ + } /* if */ \ + } while (0) + + /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: + KMP_ITT_IGNORE( + ptr = malloc( size ); + ); + */ + #define KMP_ITT_IGNORE( statement ) do { \ + __itt_state_t __itt_state_; \ + if ( __itt_state_get_ptr ) { \ + __itt_state_ = __itt_state_get(); \ + __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ + } /* if */ \ + { statement } \ + if ( __itt_state_get_ptr ) { \ + __itt_state_set( __itt_state_ ); \ + } /* if */ \ + } while (0) + + const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to + // different OpenMP regions in the user source code). + extern kmp_int32 __kmp_barrier_domain_count; + extern kmp_int32 __kmp_region_domain_count; + extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; + extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; + extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; + extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; + extern __itt_domain * metadata_domain; + +#else + +// Null definitions of the synchronization tracing functions. +# define KMP_FSYNC_PREPARE( obj ) ((void)0) +# define KMP_FSYNC_CANCEL( obj ) ((void)0) +# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) +# define KMP_FSYNC_RELEASING( obj ) ((void)0) + +# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) +# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) +# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) + +# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) + +#endif // USE_ITT_NOTIFY + +#if ! KMP_DEBUG + // In release mode include definitions of inline functions. + #include "kmp_itt.inl" +#endif + +#endif // KMP_ITT_H + +#else /* USE_ITT_BUILD */ + +// Null definitions of the synchronization tracing functions. +// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. +// By defining these we avoid unpleasant ifdef tests in many places. +# define KMP_FSYNC_PREPARE( obj ) ((void)0) +# define KMP_FSYNC_CANCEL( obj ) ((void)0) +# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) +# define KMP_FSYNC_RELEASING( obj ) ((void)0) + +# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) +# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) +# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) + +# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) + +# define USE_ITT_BUILD_ARG(x) + +#endif /* USE_ITT_BUILD */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.inl b/contrib/libs/cxxsupp/openmp/kmp_itt.inl index 625d8798405..6dafa6c16e1 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_itt.inl +++ b/contrib/libs/cxxsupp/openmp/kmp_itt.inl @@ -1,1130 +1,1130 @@ -#if USE_ITT_BUILD -/* - * kmp_itt.inl -- Inline functions of ITT Notify. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -// Inline function definitions. This file should be included into kmp_itt.h file for prodiction -// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce -// the number of files to recompile and save build time). - - -#include "kmp.h" -#include "kmp_str.h" - -#if KMP_ITT_DEBUG - extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; - #define KMP_ITT_DEBUG_LOCK() { \ - __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } - #define KMP_ITT_DEBUG_PRINT( ... ) { \ - fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \ - fprintf( stderr, __VA_ARGS__ ); \ - fflush( stderr ); \ - __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } -#else - #define KMP_ITT_DEBUG_LOCK() - #define KMP_ITT_DEBUG_PRINT( ... ) -#endif // KMP_ITT_DEBUG - -// Ensure that the functions are static if they're supposed to be -// being inlined. Otherwise they cannot be used in more than one file, -// since there will be multiple definitions. -#if KMP_DEBUG -# define LINKAGE -#else -# define LINKAGE static inline -#endif - -// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this -// API to support user-defined synchronization primitives, but does not use ZCA; -// it would be safe to turn this off until wider support becomes available. -#if USE_ITT_ZCA -#ifdef __INTEL_COMPILER -# if __INTEL_COMPILER >= 1200 -# undef __itt_sync_acquired -# undef __itt_sync_releasing -# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) -# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) -# endif -#endif -#endif - -static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); - -/* - ------------------------------------------------------------------------------------------------ - Parallel region reporting. - - * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of - call does not matter, but it should be completed before any thread of this team calls - __kmp_itt_region_starting. - * __kmp_itt_region_starting should be called by each thread of a team just before entering - parallel region body. - * __kmp_itt_region_finished should be called by each thread of a team right after returning - from parallel region body. - * __kmp_itt_region_joined should be called by master thread of a team, after all threads - called __kmp_itt_region_finished. - - Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more - user code -- such a thread can execute tasks. - - Note: The overhead of logging region_starting and region_finished in each thread is too large, - so these calls are not used. - - ------------------------------------------------------------------------------------------------ -*/ - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) { -#if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc) { - // Use the reserved_2 field to store the index to the region domain. - // Assume that reserved_2 contains zero initially. Since zero is special - // value here, store the index into domain array increased by 1. - if (loc->reserved_2 == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { - // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc - //} // AC: this block is to replace next unsynchronized line - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel@[file:][:]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - if( barriers ) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __kmp_str_free( &buff ); - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; - } - } - __kmp_str_loc_free( &str_loc ); - __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } else { // Team size was not changed. Use existing domain. - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n", - gtid, loc->reserved_2, serialized, loc ); - } -#endif -} // __kmp_itt_region_forking - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { -#if USE_ITT_NOTIFY - if( region ) { - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - int serialized = ( region == 2 ? 1 : 0 ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - //Check region domain has not been created before. It's index is saved in the low two bytes. - if ((loc->reserved_2 & 0x0000FFFF) == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel:team_size@[file:][:]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } else { // Team size was not changed. Use existing domain. - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, region, loc, begin, end ); - return; - } else { // called for barrier reporting - if (loc) { - if ((loc->reserved_2 & 0xFFFF0000) == 0) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$frame@[file:][:]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - if( imbalance ) { - const char * buff_imb = NULL; - buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", - str_loc.func, team_size, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff_imb ); - } else { - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff ); - } - __kmp_str_loc_free( &str_loc ); - } - } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS - if( imbalance ) { - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); - } else { - __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, loc, begin, end ); - } - } -#endif -} // __kmp_itt_frame_submit - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance"); - - kmp_uint64 imbalance_data[ 4 ]; - imbalance_data[ 0 ] = begin; - imbalance_data[ 1 ] = end; - imbalance_data[ 2 ] = imbalance; - imbalance_data[ 3 ] = reduction; - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data); -#endif -} // __kmp_itt_metadata_imbalance - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop"); - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - - kmp_uint64 loop_data[ 5 ]; - loop_data[ 0 ] = str_loc.line; - loop_data[ 1 ] = str_loc.col; - loop_data[ 2 ] = sched_type; - loop_data[ 3 ] = iterations; - loop_data[ 4 ] = chunk; - - __kmp_str_loc_free( &str_loc ); - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data); -#endif -} // __kmp_itt_metadata_loop - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_single( ident_t * loc ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single"); - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - kmp_uint64 single_data[ 2 ]; - single_data[ 0 ] = str_loc.line; - single_data[ 1 ] = str_loc.col; - - __kmp_str_loc_free( &str_loc ); - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data); -#endif -} // __kmp_itt_metadata_single - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_starting( int gtid ) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_starting - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_finished( int gtid ) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_finished - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_joined( int gtid, int serialized ) { -#if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc && loc->reserved_2) - { - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if(frm < KMP_MAX_FRAME_DOMAINS) { - KMP_ITT_DEBUG_LOCK(); - __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); - KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n", - gtid, loc->reserved_2, serialized, loc ); - } - } -#endif -} // __kmp_itt_region_joined - -/* - ------------------------------------------------------------------------------------------------ - Barriers reporting. - - A barrier consists of two phases: - - 1. Gather -- master waits for arriving of all the worker threads; each worker thread - registers arrival and goes further. - 2. Release -- each worker threads waits until master lets it go; master lets worker threads - go. - - Function should be called by each thread: - - * __kmp_itt_barrier_starting() -- before arriving to the gather phase. - * __kmp_itt_barrier_middle() -- between gather and release phases. - * __kmp_itt_barrier_finished() -- after release phase. - - Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save - result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather - phase) would return itt sync object for the next barrier! - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have - barrier object or barrier data structure. Barrier is just a counter in team and thread - structures. We could use an address of team structure as an barrier sync object, but ITT wants - different objects for different barriers (even whithin the same team). So let us use - team address as barrier sync object for the first barrier, then increase it by one for the next - barrier, and so on (but wrap it not to use addresses outside of team structure). - - ------------------------------------------------------------------------------------------------ -*/ - -void * -__kmp_itt_barrier_object( - int gtid, - int bt, - int set_name, - int delta // 0 (current barrier) is default value; specify -1 to get previous barrier. -) { - void * object = NULL; -#if USE_ITT_NOTIFY - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - kmp_team_t * team = thr->th.th_team; - - // NOTE: - // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" - // helps to avoid crash. However, this is not complete solution, and reporting fork/join - // barriers to ITT should be revisited. - - if ( team != NULL ) { - - // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived - // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. - kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; - // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of - // different types do not have the same ids. - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); - // This conditon is a must (we would have zero divide otherwise). - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); - // More strong condition: make sure we have room at least for for two differtent ids - // (for each barrier type). - object = - reinterpret_cast< void * >( - kmp_uintptr_t( team ) - + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier - + bt - ); - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); - - if ( set_name ) { - ident_t const * loc = NULL; - char const * src = NULL; - char const * type = "OMP Barrier"; - switch ( bt ) { - case bs_plain_barrier : { - // For plain barrier compiler calls __kmpc_barrier() function, which saves - // location in thr->th.th_ident. - loc = thr->th.th_ident; - // Get the barrier type from flags provided by compiler. - kmp_int32 expl = 0; - kmp_uint32 impl = 0; - if ( loc != NULL ) { - src = loc->psource; - expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; - impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; - }; // if - if ( impl ) { - switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { - case KMP_IDENT_BARRIER_IMPL_FOR : { - type = "OMP For Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SECTIONS : { - type = "OMP Sections Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SINGLE : { - type = "OMP Single Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { - type = "OMP Workshare Barrier"; - } break; - default : { - type = "OMP Implicit Barrier"; - KMP_DEBUG_ASSERT( 0 ); - }; - }; /* switch */ - } else if ( expl ) { - type = "OMP Explicit Barrier"; - }; /* if */ - } break; - case bs_forkjoin_barrier : { - // In case of fork/join barrier we can read thr->th.th_ident, because it - // contains location of last passed construct (while join barrier is not - // such one). Use th_ident of master thread instead -- __kmp_join_call() - // called by the master thread saves location. - // - // AC: cannot read from master because __kmp_join_call may be not called - // yet, so we read the location from team. This is the same location. - // And team is valid at the enter to join barrier where this happens. - loc = team->t.t_ident; - if ( loc != NULL ) { - src = loc->psource; - }; // if - type = "OMP Join Barrier"; - } break; - }; // switch - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, type, src, __itt_attr_barrier ); - KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); - }; // if - - }; // if -#endif - return object; -} // __kmp_itt_barrier_object - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_starting( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( !KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); - }; // if - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); -#endif -} // __kmp_itt_barrier_starting - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_middle( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); - } else { - }; // if -#endif -} // __kmp_itt_barrier_middle - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_finished( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - } else { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); - }; // if -#endif -} // __kmp_itt_barrier_finished - -/* - ------------------------------------------------------------------------------------------------ - Taskwait reporting. - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait - structure, so we need to construct something. - -*/ - -void * -__kmp_itt_taskwait_object( int gtid ) { - void * object = NULL; -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - object = - reinterpret_cast< void * >( - kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) - ); - }; // if -#endif - return object; -} // __kmp_itt_taskwait_object - -void -__kmp_itt_taskwait_starting( - int gtid, - void * object -) { -#if USE_ITT_NOTIFY - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - ident_t const * loc = taskdata->td_taskwait_ident; - char const * src = ( loc == NULL? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Taskwait", src, 0 ); - KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); -#endif -} // __kmp_itt_taskwait_starting - -void -__kmp_itt_taskwait_finished( - int gtid, - void * object -) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); -#endif -} // __kmp_itt_taskwait_finished - -/* - ------------------------------------------------------------------------------------------------ - Task reporting. - - Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). - Synch object passed to the function must be barrier of taskwait the threads waiting at. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_task_starting( - void * object // ITT sync object: barrier or taskwait. -) { -#if USE_ITT_NOTIFY - if ( object != NULL ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_cancel( object ); - KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); - }; // if -#endif -} // __kmp_itt_task_starting - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_task_finished( - void * object // ITT sync object: barrier or taskwait. -) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); -#endif -} // __kmp_itt_task_finished - -// ------------------------------------------------------------------------------------------------- - -/* - ------------------------------------------------------------------------------------------------ - Lock reporting. - - * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation - (set/unset). It is not a real event shown to the user but just setting a name for - synchronization object. `lock' is an address of sync object, the same address should be - used in all subsequent calls. - - * __kmp_itt_lock_acquiring() should be called before setting the lock. - - * __kmp_itt_lock_acquired() should be called after setting the lock. - - * __kmp_itt_lock_realeasing() should be called before unsetting the lock. - - * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. - - * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After - __kmp_itt_lock_destroyed() all the references to the same address will be considered - as another sync object, not related with the original one. - ------------------------------------------------------------------------------------------------ -*/ - -// ------------------------------------------------------------------------------------------------- - -#if KMP_USE_DYNAMIC_LOCK -// Takes location information directly -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - } -#endif -} -#else // KMP_USE_DYNAMIC_LOCK -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - ident_t const * loc = NULL; - if ( __kmp_get_user_lock_location_ != NULL ) - loc = __kmp_get_user_lock_location_( (lock) ); - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - }; // if -#endif -} // ___kmp_itt_lock_init -#endif // KMP_USE_DYNAMIC_LOCK - -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( lock ); - KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); -#endif -} // ___kmp_itt_lock_fini - - -// ------------------------------------------------------------------------------------------------- - -#if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Lock", loc ); -} -#else -void -__kmp_itt_lock_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Lock" ); -} // __kmp_itt_lock_creating -#endif - -void -__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_prepare_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_prepare( ilk->lock ); - } else { - __itt_sync_prepare( lock ); - } - } -#else - __itt_sync_prepare( lock ); -#endif -} // __kmp_itt_lock_acquiring - -void -__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_acquired_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_acquired( ilk->lock ); - } else { - __itt_sync_acquired( lock ); - } - } -#else - __itt_sync_acquired( lock ); -#endif -} // __kmp_itt_lock_acquired - -void -__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_releasing_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_releasing( ilk->lock ); - } else { - __itt_sync_releasing( lock ); - } - } -#else - __itt_sync_releasing( lock ); -#endif -} // __kmp_itt_lock_releasing - -void -__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_cancel_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_cancel( ilk->lock ); - } else { - __itt_sync_cancel( lock ); - } - } -#else - __itt_sync_cancel( lock ); -#endif -} // __kmp_itt_lock_cancelled - -void -__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Lock" ); -} // __kmp_itt_lock_destroyed - -/* - ------------------------------------------------------------------------------------------------ - Critical reporting. - - Critical sections are treated exactly as locks (but have different object type). - ------------------------------------------------------------------------------------------------ -*/ -#if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Critical", loc); -} -#else -void -__kmp_itt_critical_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Critical" ); -} // __kmp_itt_critical_creating -#endif - -void -__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { - __itt_sync_prepare( lock ); -} // __kmp_itt_critical_acquiring - -void -__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { - __itt_sync_acquired( lock ); -} // __kmp_itt_critical_acquired - -void -__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { - __itt_sync_releasing( lock ); -} // __kmp_itt_critical_releasing - -void -__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Critical" ); -} // __kmp_itt_critical_destroyed - -/* - ------------------------------------------------------------------------------------------------ - Single reporting. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_single_start( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { - kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); - ident_t * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - __kmp_str_buf_print( & name, "OMP Single-%s", src ); - KMP_ITT_DEBUG_LOCK(); - thr->th.th_itt_mark_single = __itt_mark_create( name.str ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); - __kmp_str_buf_free( & name ); - KMP_ITT_DEBUG_LOCK(); - __itt_mark( thr->th.th_itt_mark_single, NULL ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); - }; // if -#endif -} // __kmp_itt_single_start - -void -__kmp_itt_single_end( int gtid ) { -#if USE_ITT_NOTIFY - __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; - KMP_ITT_DEBUG_LOCK(); - __itt_mark_off( mark ); - KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); -#endif -} // __kmp_itt_single_end - -/* - ------------------------------------------------------------------------------------------------ - Ordered reporting. - - __kmp_itt_ordered_init is called by each thread *before* first using sync - object. ITT team would like it to be called once, but it requires extra synchronization. - - __kmp_itt_ordered_prep is called when thread is going to enter ordered section - (before synchronization). - - __kmp_itt_ordered_start is called just before entering user code (after - synchronization). - - __kmp_itt_ordered_end is called after returning from user code. - - Sync object is th->th.th_dispatch->th_dispatch_sh_current. - - Events are not generated in case of serialized team. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_ordered_init( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - ident_t const * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - __itt_sync_create( - thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 - ); - }; // if -#endif -} // __kmp_itt_ordered_init - -void -__kmp_itt_ordered_prep( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_prep - -void -__kmp_itt_ordered_start( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_start - -void -__kmp_itt_ordered_end( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_end - - -/* - ------------------------------------------------------------------------------------------------ - Threads reporting. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_thread_ignore() { - __itt_thr_ignore(); -} // __kmp_itt_thread_ignore - -void -__kmp_itt_thread_name( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_thr_name_set_ptr ) { - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - if( KMP_MASTER_GTID(gtid) ) { - __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); - } else { - __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); - } - KMP_ITT_DEBUG_LOCK(); - __itt_thr_name_set( name.str, name.used ); - KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); - __kmp_str_buf_free( & name ); - }; // if -#endif -} // __kmp_itt_thread_name - - -/* - -------------------------------------------------------------------------- - System object reporting. - - ITT catches operations with system sync objects (like Windows* OS on IA-32 - architecture API critical sections and events). We only need to specify - name ("OMP Scheduler") for the object to let ITT know it is an object used - by OpenMP RTL for internal purposes. - -------------------------------------------------------------------------- -*/ - -void -__kmp_itt_system_object_created( void * object, char const * name ) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Scheduler", name, 0 ); - KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); -#endif -} // __kmp_itt_system_object_created - - -/* - ------------------------------------------------------------------------------------------------ - Stack stitching api. - - Master calls "create" and put the stitching id into team structure. - Workers read the stitching id and call "enter" / "leave" api. - Master calls "destroy" at the end of the parallel region. - ------------------------------------------------------------------------------------------------ -*/ - -__itt_caller -__kmp_itt_stack_caller_create() -{ -#if USE_ITT_NOTIFY - if ( !__itt_stack_caller_create_ptr ) - return NULL; - KMP_ITT_DEBUG_LOCK(); - __itt_caller id = __itt_stack_caller_create(); - KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); - return id; -#endif - return NULL; -} - -void -__kmp_itt_stack_caller_destroy( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_caller_destroy_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_caller_destroy( id ); - KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); - } -#endif -} - -void -__kmp_itt_stack_callee_enter( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_callee_enter_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_enter( id ); - KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); - } -#endif -} - -void -__kmp_itt_stack_callee_leave( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_callee_leave_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_leave( id ); - KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); - } -#endif -} - -#endif /* USE_ITT_BUILD */ +#if USE_ITT_BUILD +/* + * kmp_itt.inl -- Inline functions of ITT Notify. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +// Inline function definitions. This file should be included into kmp_itt.h file for prodiction +// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce +// the number of files to recompile and save build time). + + +#include "kmp.h" +#include "kmp_str.h" + +#if KMP_ITT_DEBUG + extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; + #define KMP_ITT_DEBUG_LOCK() { \ + __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \ + } + #define KMP_ITT_DEBUG_PRINT( ... ) { \ + fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \ + fprintf( stderr, __VA_ARGS__ ); \ + fflush( stderr ); \ + __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \ + } +#else + #define KMP_ITT_DEBUG_LOCK() + #define KMP_ITT_DEBUG_PRINT( ... ) +#endif // KMP_ITT_DEBUG + +// Ensure that the functions are static if they're supposed to be +// being inlined. Otherwise they cannot be used in more than one file, +// since there will be multiple definitions. +#if KMP_DEBUG +# define LINKAGE +#else +# define LINKAGE static inline +#endif + +// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this +// API to support user-defined synchronization primitives, but does not use ZCA; +// it would be safe to turn this off until wider support becomes available. +#if USE_ITT_ZCA +#ifdef __INTEL_COMPILER +# if __INTEL_COMPILER >= 1200 +# undef __itt_sync_acquired +# undef __itt_sync_releasing +# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) +# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) +# endif +#endif +#endif + +static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); + +/* + ------------------------------------------------------------------------------------------------ + Parallel region reporting. + + * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of + call does not matter, but it should be completed before any thread of this team calls + __kmp_itt_region_starting. + * __kmp_itt_region_starting should be called by each thread of a team just before entering + parallel region body. + * __kmp_itt_region_finished should be called by each thread of a team right after returning + from parallel region body. + * __kmp_itt_region_joined should be called by master thread of a team, after all threads + called __kmp_itt_region_finished. + + Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more + user code -- such a thread can execute tasks. + + Note: The overhead of logging region_starting and region_finished in each thread is too large, + so these calls are not used. + + ------------------------------------------------------------------------------------------------ +*/ + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) { +#if USE_ITT_NOTIFY + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; + if (loc) { + // Use the reserved_2 field to store the index to the region domain. + // Assume that reserved_2 contains zero initially. Since zero is special + // value here, store the index into domain array increased by 1. + if (loc->reserved_2 == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { + // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc + //} // AC: this block is to replace next unsynchronized line + + // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 + // field but put region index to the low two bytes and barrier indexes to the high + // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$parallel@[file:][:]" + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + if( barriers ) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + const char * buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + __kmp_str_free( &buff ); + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; + } + } + __kmp_str_loc_free( &str_loc ); + __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if( __kmp_itt_region_team_size[frm] != team_size ) { + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } else { // Team size was not changed. Use existing domain. + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n", + gtid, loc->reserved_2, serialized, loc ); + } +#endif +} // __kmp_itt_region_forking + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { +#if USE_ITT_NOTIFY + if( region ) { + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + int serialized = ( region == 2 ? 1 : 0 ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + //Check region domain has not been created before. It's index is saved in the low two bytes. + if ((loc->reserved_2 & 0x0000FFFF) == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + + // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 + // field but put region index to the low two bytes and barrier indexes to the high + // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$parallel:team_size@[file:][:]" + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if( __kmp_itt_region_team_size[frm] != team_size ) { + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } else { // Team size was not changed. Use existing domain. + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", + gtid, loc->reserved_2, region, loc, begin, end ); + return; + } else { // called for barrier reporting + if (loc) { + if ((loc->reserved_2 & 0xFFFF0000) == 0) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$frame@[file:][:]" + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + if( imbalance ) { + const char * buff_imb = NULL; + buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", + str_loc.func, team_size, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); + __kmp_str_free( &buff_imb ); + } else { + const char * buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); + __kmp_str_free( &buff ); + } + __kmp_str_loc_free( &str_loc ); + } + } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS + if( imbalance ) { + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); + } else { + __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", + gtid, loc->reserved_2, loc, begin, end ); + } + } +#endif +} // __kmp_itt_frame_submit + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance"); + + kmp_uint64 imbalance_data[ 4 ]; + imbalance_data[ 0 ] = begin; + imbalance_data[ 1 ] = end; + imbalance_data[ 2 ] = imbalance; + imbalance_data[ 3 ] = reduction; + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data); +#endif +} // __kmp_itt_metadata_imbalance + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop"); + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + + kmp_uint64 loop_data[ 5 ]; + loop_data[ 0 ] = str_loc.line; + loop_data[ 1 ] = str_loc.col; + loop_data[ 2 ] = sched_type; + loop_data[ 3 ] = iterations; + loop_data[ 4 ] = chunk; + + __kmp_str_loc_free( &str_loc ); + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data); +#endif +} // __kmp_itt_metadata_loop + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_single( ident_t * loc ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single"); + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + kmp_uint64 single_data[ 2 ]; + single_data[ 0 ] = str_loc.line; + single_data[ 1 ] = str_loc.col; + + __kmp_str_loc_free( &str_loc ); + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data); +#endif +} // __kmp_itt_metadata_single + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_starting( int gtid ) { +#if USE_ITT_NOTIFY +#endif +} // __kmp_itt_region_starting + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_finished( int gtid ) { +#if USE_ITT_NOTIFY +#endif +} // __kmp_itt_region_finished + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_joined( int gtid, int serialized ) { +#if USE_ITT_NOTIFY + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; + if (loc && loc->reserved_2) + { + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if(frm < KMP_MAX_FRAME_DOMAINS) { + KMP_ITT_DEBUG_LOCK(); + __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); + KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n", + gtid, loc->reserved_2, serialized, loc ); + } + } +#endif +} // __kmp_itt_region_joined + +/* + ------------------------------------------------------------------------------------------------ + Barriers reporting. + + A barrier consists of two phases: + + 1. Gather -- master waits for arriving of all the worker threads; each worker thread + registers arrival and goes further. + 2. Release -- each worker threads waits until master lets it go; master lets worker threads + go. + + Function should be called by each thread: + + * __kmp_itt_barrier_starting() -- before arriving to the gather phase. + * __kmp_itt_barrier_middle() -- between gather and release phases. + * __kmp_itt_barrier_finished() -- after release phase. + + Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save + result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather + phase) would return itt sync object for the next barrier! + + ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have + barrier object or barrier data structure. Barrier is just a counter in team and thread + structures. We could use an address of team structure as an barrier sync object, but ITT wants + different objects for different barriers (even whithin the same team). So let us use + team address as barrier sync object for the first barrier, then increase it by one for the next + barrier, and so on (but wrap it not to use addresses outside of team structure). + + ------------------------------------------------------------------------------------------------ +*/ + +void * +__kmp_itt_barrier_object( + int gtid, + int bt, + int set_name, + int delta // 0 (current barrier) is default value; specify -1 to get previous barrier. +) { + void * object = NULL; +#if USE_ITT_NOTIFY + kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); + kmp_team_t * team = thr->th.th_team; + + // NOTE: + // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" + // helps to avoid crash. However, this is not complete solution, and reporting fork/join + // barriers to ITT should be revisited. + + if ( team != NULL ) { + + // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived + // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. + kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; + // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of + // different types do not have the same ids. + KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); + // This conditon is a must (we would have zero divide otherwise). + KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); + // More strong condition: make sure we have room at least for for two differtent ids + // (for each barrier type). + object = + reinterpret_cast< void * >( + kmp_uintptr_t( team ) + + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier + + bt + ); + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); + + if ( set_name ) { + ident_t const * loc = NULL; + char const * src = NULL; + char const * type = "OMP Barrier"; + switch ( bt ) { + case bs_plain_barrier : { + // For plain barrier compiler calls __kmpc_barrier() function, which saves + // location in thr->th.th_ident. + loc = thr->th.th_ident; + // Get the barrier type from flags provided by compiler. + kmp_int32 expl = 0; + kmp_uint32 impl = 0; + if ( loc != NULL ) { + src = loc->psource; + expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; + impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; + }; // if + if ( impl ) { + switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { + case KMP_IDENT_BARRIER_IMPL_FOR : { + type = "OMP For Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SECTIONS : { + type = "OMP Sections Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SINGLE : { + type = "OMP Single Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { + type = "OMP Workshare Barrier"; + } break; + default : { + type = "OMP Implicit Barrier"; + KMP_DEBUG_ASSERT( 0 ); + }; + }; /* switch */ + } else if ( expl ) { + type = "OMP Explicit Barrier"; + }; /* if */ + } break; + case bs_forkjoin_barrier : { + // In case of fork/join barrier we can read thr->th.th_ident, because it + // contains location of last passed construct (while join barrier is not + // such one). Use th_ident of master thread instead -- __kmp_join_call() + // called by the master thread saves location. + // + // AC: cannot read from master because __kmp_join_call may be not called + // yet, so we read the location from team. This is the same location. + // And team is valid at the enter to join barrier where this happens. + loc = team->t.t_ident; + if ( loc != NULL ) { + src = loc->psource; + }; // if + type = "OMP Join Barrier"; + } break; + }; // switch + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, type, src, __itt_attr_barrier ); + KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); + }; // if + + }; // if +#endif + return object; +} // __kmp_itt_barrier_object + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_starting( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( !KMP_MASTER_GTID( gtid ) ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_releasing( object ); + KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); + }; // if + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); +#endif +} // __kmp_itt_barrier_starting + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_middle( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( KMP_MASTER_GTID( gtid ) ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_releasing( object ); + KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); + } else { + }; // if +#endif +} // __kmp_itt_barrier_middle + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_finished( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( KMP_MASTER_GTID( gtid ) ) { + } else { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); + }; // if +#endif +} // __kmp_itt_barrier_finished + +/* + ------------------------------------------------------------------------------------------------ + Taskwait reporting. + + ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait + structure, so we need to construct something. + +*/ + +void * +__kmp_itt_taskwait_object( int gtid ) { + void * object = NULL; +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); + kmp_taskdata_t * taskdata = thread -> th.th_current_task; + object = + reinterpret_cast< void * >( + kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) + ); + }; // if +#endif + return object; +} // __kmp_itt_taskwait_object + +void +__kmp_itt_taskwait_starting( + int gtid, + void * object +) { +#if USE_ITT_NOTIFY + kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); + kmp_taskdata_t * taskdata = thread -> th.th_current_task; + ident_t const * loc = taskdata->td_taskwait_ident; + char const * src = ( loc == NULL? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, "OMP Taskwait", src, 0 ); + KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); +#endif +} // __kmp_itt_taskwait_starting + +void +__kmp_itt_taskwait_finished( + int gtid, + void * object +) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy( object ); + KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); +#endif +} // __kmp_itt_taskwait_finished + +/* + ------------------------------------------------------------------------------------------------ + Task reporting. + + Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). + Synch object passed to the function must be barrier of taskwait the threads waiting at. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_task_starting( + void * object // ITT sync object: barrier or taskwait. +) { +#if USE_ITT_NOTIFY + if ( object != NULL ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_cancel( object ); + KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); + }; // if +#endif +} // __kmp_itt_task_starting + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_task_finished( + void * object // ITT sync object: barrier or taskwait. +) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); +#endif +} // __kmp_itt_task_finished + +// ------------------------------------------------------------------------------------------------- + +/* + ------------------------------------------------------------------------------------------------ + Lock reporting. + + * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation + (set/unset). It is not a real event shown to the user but just setting a name for + synchronization object. `lock' is an address of sync object, the same address should be + used in all subsequent calls. + + * __kmp_itt_lock_acquiring() should be called before setting the lock. + + * __kmp_itt_lock_acquired() should be called after setting the lock. + + * __kmp_itt_lock_realeasing() should be called before unsetting the lock. + + * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. + + * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After + __kmp_itt_lock_destroyed() all the references to the same address will be considered + as another sync object, not related with the original one. + ------------------------------------------------------------------------------------------------ +*/ + +// ------------------------------------------------------------------------------------------------- + +#if KMP_USE_DYNAMIC_LOCK +// Takes location information directly +__kmp_inline +void +___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + char const * src = ( loc == NULL ? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( lock, type, src, 0 ); + KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); + } +#endif +} +#else // KMP_USE_DYNAMIC_LOCK +// Internal guts -- common code for locks and critical sections, do not call directly. +__kmp_inline +void +___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + ident_t const * loc = NULL; + if ( __kmp_get_user_lock_location_ != NULL ) + loc = __kmp_get_user_lock_location_( (lock) ); + char const * src = ( loc == NULL ? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( lock, type, src, 0 ); + KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); + }; // if +#endif +} // ___kmp_itt_lock_init +#endif // KMP_USE_DYNAMIC_LOCK + +// Internal guts -- common code for locks and critical sections, do not call directly. +__kmp_inline +void +___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy( lock ); + KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); +#endif +} // ___kmp_itt_lock_fini + + +// ------------------------------------------------------------------------------------------------- + +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Lock", loc ); +} +#else +void +__kmp_itt_lock_creating( kmp_user_lock_p lock ) { + ___kmp_itt_lock_init( lock, "OMP Lock" ); +} // __kmp_itt_lock_creating +#endif + +void +__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_prepare_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_prepare( ilk->lock ); + } else { + __itt_sync_prepare( lock ); + } + } +#else + __itt_sync_prepare( lock ); +#endif +} // __kmp_itt_lock_acquiring + +void +__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_acquired_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_acquired( ilk->lock ); + } else { + __itt_sync_acquired( lock ); + } + } +#else + __itt_sync_acquired( lock ); +#endif +} // __kmp_itt_lock_acquired + +void +__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_releasing_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_releasing( ilk->lock ); + } else { + __itt_sync_releasing( lock ); + } + } +#else + __itt_sync_releasing( lock ); +#endif +} // __kmp_itt_lock_releasing + +void +__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_cancel_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_cancel( ilk->lock ); + } else { + __itt_sync_cancel( lock ); + } + } +#else + __itt_sync_cancel( lock ); +#endif +} // __kmp_itt_lock_cancelled + +void +__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { + ___kmp_itt_lock_fini( lock, "OMP Lock" ); +} // __kmp_itt_lock_destroyed + +/* + ------------------------------------------------------------------------------------------------ + Critical reporting. + + Critical sections are treated exactly as locks (but have different object type). + ------------------------------------------------------------------------------------------------ +*/ +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Critical", loc); +} +#else +void +__kmp_itt_critical_creating( kmp_user_lock_p lock ) { + ___kmp_itt_lock_init( lock, "OMP Critical" ); +} // __kmp_itt_critical_creating +#endif + +void +__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { + __itt_sync_prepare( lock ); +} // __kmp_itt_critical_acquiring + +void +__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { + __itt_sync_acquired( lock ); +} // __kmp_itt_critical_acquired + +void +__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { + __itt_sync_releasing( lock ); +} // __kmp_itt_critical_releasing + +void +__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { + ___kmp_itt_lock_fini( lock, "OMP Critical" ); +} // __kmp_itt_critical_destroyed + +/* + ------------------------------------------------------------------------------------------------ + Single reporting. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_single_start( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { + kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); + ident_t * loc = thr->th.th_ident; + char const * src = ( loc == NULL ? NULL : loc->psource ); + kmp_str_buf_t name; + __kmp_str_buf_init( & name ); + __kmp_str_buf_print( & name, "OMP Single-%s", src ); + KMP_ITT_DEBUG_LOCK(); + thr->th.th_itt_mark_single = __itt_mark_create( name.str ); + KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); + __kmp_str_buf_free( & name ); + KMP_ITT_DEBUG_LOCK(); + __itt_mark( thr->th.th_itt_mark_single, NULL ); + KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); + }; // if +#endif +} // __kmp_itt_single_start + +void +__kmp_itt_single_end( int gtid ) { +#if USE_ITT_NOTIFY + __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; + KMP_ITT_DEBUG_LOCK(); + __itt_mark_off( mark ); + KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); +#endif +} // __kmp_itt_single_end + +/* + ------------------------------------------------------------------------------------------------ + Ordered reporting. + + __kmp_itt_ordered_init is called by each thread *before* first using sync + object. ITT team would like it to be called once, but it requires extra synchronization. + + __kmp_itt_ordered_prep is called when thread is going to enter ordered section + (before synchronization). + + __kmp_itt_ordered_start is called just before entering user code (after + synchronization). + + __kmp_itt_ordered_end is called after returning from user code. + + Sync object is th->th.th_dispatch->th_dispatch_sh_current. + + Events are not generated in case of serialized team. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_ordered_init( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); + ident_t const * loc = thr->th.th_ident; + char const * src = ( loc == NULL ? NULL : loc->psource ); + __itt_sync_create( + thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 + ); + }; // if +#endif +} // __kmp_itt_ordered_init + +void +__kmp_itt_ordered_prep( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_prep + +void +__kmp_itt_ordered_start( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_start + +void +__kmp_itt_ordered_end( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_end + + +/* + ------------------------------------------------------------------------------------------------ + Threads reporting. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_thread_ignore() { + __itt_thr_ignore(); +} // __kmp_itt_thread_ignore + +void +__kmp_itt_thread_name( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_thr_name_set_ptr ) { + kmp_str_buf_t name; + __kmp_str_buf_init( & name ); + if( KMP_MASTER_GTID(gtid) ) { + __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); + } else { + __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); + } + KMP_ITT_DEBUG_LOCK(); + __itt_thr_name_set( name.str, name.used ); + KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); + __kmp_str_buf_free( & name ); + }; // if +#endif +} // __kmp_itt_thread_name + + +/* + -------------------------------------------------------------------------- + System object reporting. + + ITT catches operations with system sync objects (like Windows* OS on IA-32 + architecture API critical sections and events). We only need to specify + name ("OMP Scheduler") for the object to let ITT know it is an object used + by OpenMP RTL for internal purposes. + -------------------------------------------------------------------------- +*/ + +void +__kmp_itt_system_object_created( void * object, char const * name ) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, "OMP Scheduler", name, 0 ); + KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); +#endif +} // __kmp_itt_system_object_created + + +/* + ------------------------------------------------------------------------------------------------ + Stack stitching api. + + Master calls "create" and put the stitching id into team structure. + Workers read the stitching id and call "enter" / "leave" api. + Master calls "destroy" at the end of the parallel region. + ------------------------------------------------------------------------------------------------ +*/ + +__itt_caller +__kmp_itt_stack_caller_create() +{ +#if USE_ITT_NOTIFY + if ( !__itt_stack_caller_create_ptr ) + return NULL; + KMP_ITT_DEBUG_LOCK(); + __itt_caller id = __itt_stack_caller_create(); + KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); + return id; +#endif + return NULL; +} + +void +__kmp_itt_stack_caller_destroy( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_caller_destroy_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_caller_destroy( id ); + KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); + } +#endif +} + +void +__kmp_itt_stack_callee_enter( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_callee_enter_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_enter( id ); + KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); + } +#endif +} + +void +__kmp_itt_stack_callee_leave( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_callee_leave_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_leave( id ); + KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); + } +#endif +} + +#endif /* USE_ITT_BUILD */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp index ec884f6a51b..becf7eddf61 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp @@ -1,4207 +1,4207 @@ -/* - * kmp_lock.cpp -- lock-related functions - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include - -#include "kmp.h" -#include "kmp_itt.h" -#include "kmp_i18n.h" -#include "kmp_lock.h" -#include "kmp_io.h" - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) -# include -# include -// We should really include , but that causes compatibility problems on different -// Linux* OS distributions that either require that you include (or break when you try to include) -// . -// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) -// we just define the constants here and don't include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif -#endif - -/* Implement spin locks for internal library use. */ -/* The algorithm implemented is Lamport's bakery lock [1974]. */ - -void -__kmp_validate_locks( void ) -{ - int i; - kmp_uint32 x, y; - - /* Check to make sure unsigned arithmetic does wraps properly */ - x = ~((kmp_uint32) 0) - 2; - y = x - 2; - - for (i = 0; i < 8; ++i, ++x, ++y) { - kmp_uint32 z = (x - y); - KMP_ASSERT( z == 2 ); - } - - KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); -} - - -/* ------------------------------------------------------------------------ */ -/* test and set locks */ - -// -// For the non-nested locks, we can only assume that the first 4 bytes were -// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel -// compiler only allocates a 4 byte pointer on IA-32 architecture. On -// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. -// -// gcc reserves >= 8 bytes for nested locks, so we can assume that the -// entire 8 bytes were allocated for nested locks on all 64-bit platforms. -// - -static kmp_int32 -__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) -{ - return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; -} - -static inline bool -__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); - -#ifdef USE_LOCK_PROFILE - kmp_uint32 curr = TCR_4( lck->lk.poll ); - if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; - } - - kmp_uint32 spins; - KMP_FSYNC_PREPARE( lck ); - KMP_INIT_YIELD( spins ); - if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ) { - KMP_YIELD( TRUE ); - } - else { - KMP_YIELD_SPIN( spins ); - } - - while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || - ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { - // - // FIXME - use exponential backoff here - // - if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ) { - KMP_YIELD( TRUE ); - } - else { - KMP_YIELD_SPIN( spins ); - } - } - KMP_FSYNC_ACQUIRED( lck ); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_tas_lock_timed_template( lck, gtid ); -} - -static int -__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - return __kmp_acquire_tas_lock( lck, gtid ); -} - -int -__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { - KMP_FSYNC_ACQUIRED( lck ); - return TRUE; - } - return FALSE; -} - -static int -__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - return __kmp_test_tas_lock( lck, gtid ); -} - -int -__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_FSYNC_RELEASING(lck); - KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) - && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_tas_lock( lck, gtid ); -} - -void -__kmp_init_tas_lock( kmp_tas_lock_t * lck ) -{ - TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); -} - -static void -__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) -{ - __kmp_init_tas_lock( lck ); -} - -void -__kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) -{ - lck->lk.poll = 0; -} - -static void -__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_tas_lock( lck ); -} - - -// -// nested test and set locks -// - -int -__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_tas_lock_timed_template( lck, gtid ); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_tas_lock( lck, gtid ); -} - -int -__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_tas_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; -} - -static int -__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_tas_lock( lck, gtid ); -} - -int -__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - __kmp_release_tas_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_tas_lock( lck, gtid ); -} - -void -__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) -{ - __kmp_init_tas_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) -{ - __kmp_init_nested_tas_lock( lck ); -} - -void -__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) -{ - __kmp_destroy_tas_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_tas_lock( lck ); -} - - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -/* ------------------------------------------------------------------------ */ -/* futex locks */ - -// futex locks are really just test and set locks, with a different method -// of handling contention. They take the same amount of space as test and -// set locks, and are allocated the same way (i.e. use the area allocated by -// the compiler for non-nested locks / allocate nested locks on the heap). - -static kmp_int32 -__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) -{ - return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; -} - -static inline bool -__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - kmp_int32 gtid_code = ( gtid + 1 ) << 1; - - KMP_MB(); - -#ifdef USE_LOCK_PROFILE - kmp_uint32 curr = TCR_4( lck->lk.poll ); - if ( ( curr != 0 ) && ( curr != gtid_code ) ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - KMP_FSYNC_PREPARE( lck ); - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid ) ); - - kmp_int32 poll_val; - - while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), - KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { - - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", - lck, gtid, poll_val, cond ) ); - - // - // NOTE: if you try to use the following condition for this branch - // - // if ( poll_val & 1 == 0 ) - // - // Then the 12.0 compiler has a bug where the following block will - // always be skipped, regardless of the value of the LSB of poll_val. - // - if ( ! cond ) { - // - // Try to set the lsb in the poll to indicate to the owner - // thread that they need to wake this thread up. - // - if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", - lck, lck->lk.poll, gtid ) ); - continue; - } - poll_val |= KMP_LOCK_BUSY(1, futex); - - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", - lck, lck->lk.poll, gtid ) ); - } - - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", - lck, gtid, poll_val ) ); - - kmp_int32 rc; - if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, - poll_val, NULL, NULL, 0 ) ) != 0 ) { - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", - lck, gtid, poll_val, rc, errno ) ); - continue; - } - - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", - lck, gtid, poll_val ) ); - // - // This thread has now done a successful futex wait call and was - // entered on the OS futex queue. We must now perform a futex - // wake call when releasing the lock, as we have no idea how many - // other threads are in the queue. - // - gtid_code |= 1; - } - - KMP_FSYNC_ACQUIRED( lck ); - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", - lck, lck->lk.poll, gtid ) ); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_futex_lock_timed_template( lck, gtid ); -} - -static int -__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - return __kmp_acquire_futex_lock( lck, gtid ); -} - -int -__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) { - KMP_FSYNC_ACQUIRED( lck ); - return TRUE; - } - return FALSE; -} - -static int -__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - return __kmp_test_futex_lock( lck, gtid ); -} - -int -__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid ) ); - - KMP_FSYNC_RELEASING(lck); - - kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); - - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", - lck, gtid, poll_val ) ); - - if ( KMP_LOCK_STRIP(poll_val) & 1 ) { - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", - lck, gtid ) ); - syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", - lck, lck->lk.poll, gtid ) ); - - KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) - && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_futex_lock( lck, gtid ); -} - -void -__kmp_init_futex_lock( kmp_futex_lock_t * lck ) -{ - TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); -} - -static void -__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) -{ - __kmp_init_futex_lock( lck ); -} - -void -__kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) -{ - lck->lk.poll = 0; -} - -static void -__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_futex_lock( lck ); -} - - -// -// nested futex locks -// - -int -__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_futex_lock_timed_template( lck, gtid ); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_futex_lock( lck, gtid ); -} - -int -__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_futex_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; -} - -static int -__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_futex_lock( lck, gtid ); -} - -int -__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - __kmp_release_futex_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_futex_lock( lck, gtid ); -} - -void -__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) -{ - __kmp_init_futex_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) -{ - __kmp_init_nested_futex_lock( lck ); -} - -void -__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) -{ - __kmp_destroy_futex_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_futex_lock( lck ); -} - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) - - -/* ------------------------------------------------------------------------ */ -/* ticket (bakery) locks */ - -static kmp_int32 -__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) -{ - return TCR_4( lck->lk.owner_id ) - 1; -} - -static inline bool -__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; -} - -static kmp_uint32 -__kmp_bakery_check(kmp_uint value, kmp_uint checker) -{ +/* + * kmp_lock.cpp -- lock-related functions + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include + +#include "kmp.h" +#include "kmp_itt.h" +#include "kmp_i18n.h" +#include "kmp_lock.h" +#include "kmp_io.h" + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) +# include +# include +// We should really include , but that causes compatibility problems on different +// Linux* OS distributions that either require that you include (or break when you try to include) +// . +// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) +// we just define the constants here and don't include +# ifndef FUTEX_WAIT +# define FUTEX_WAIT 0 +# endif +# ifndef FUTEX_WAKE +# define FUTEX_WAKE 1 +# endif +#endif + +/* Implement spin locks for internal library use. */ +/* The algorithm implemented is Lamport's bakery lock [1974]. */ + +void +__kmp_validate_locks( void ) +{ + int i; + kmp_uint32 x, y; + + /* Check to make sure unsigned arithmetic does wraps properly */ + x = ~((kmp_uint32) 0) - 2; + y = x - 2; + + for (i = 0; i < 8; ++i, ++x, ++y) { + kmp_uint32 z = (x - y); + KMP_ASSERT( z == 2 ); + } + + KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); +} + + +/* ------------------------------------------------------------------------ */ +/* test and set locks */ + +// +// For the non-nested locks, we can only assume that the first 4 bytes were +// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel +// compiler only allocates a 4 byte pointer on IA-32 architecture. On +// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. +// +// gcc reserves >= 8 bytes for nested locks, so we can assume that the +// entire 8 bytes were allocated for nested locks on all 64-bit platforms. +// + +static kmp_int32 +__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) +{ + return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; +} + +static inline bool +__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) +{ + return lck->lk.depth_locked != -1; +} + +__forceinline static int +__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + KMP_MB(); + +#ifdef USE_LOCK_PROFILE + kmp_uint32 curr = TCR_4( lck->lk.poll ); + if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) + __kmp_printf( "LOCK CONTENTION: %p\n", lck ); + /* else __kmp_printf( "." );*/ +#endif /* USE_LOCK_PROFILE */ + + if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) + && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { + KMP_FSYNC_ACQUIRED(lck); + return KMP_LOCK_ACQUIRED_FIRST; + } + + kmp_uint32 spins; + KMP_FSYNC_PREPARE( lck ); + KMP_INIT_YIELD( spins ); + if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : + __kmp_xproc ) ) { + KMP_YIELD( TRUE ); + } + else { + KMP_YIELD_SPIN( spins ); + } + + while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || + ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { + // + // FIXME - use exponential backoff here + // + if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : + __kmp_xproc ) ) { + KMP_YIELD( TRUE ); + } + else { + KMP_YIELD_SPIN( spins ); + } + } + KMP_FSYNC_ACQUIRED( lck ); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_tas_lock_timed_template( lck, gtid ); +} + +static int +__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + return __kmp_acquire_tas_lock( lck, gtid ); +} + +int +__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) + && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { + KMP_FSYNC_ACQUIRED( lck ); + return TRUE; + } + return FALSE; +} + +static int +__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + return __kmp_test_tas_lock( lck, gtid ); +} + +int +__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KMP_FSYNC_RELEASING(lck); + KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : + __kmp_xproc ) ); + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) + && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_tas_lock( lck, gtid ); +} + +void +__kmp_init_tas_lock( kmp_tas_lock_t * lck ) +{ + TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); +} + +static void +__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) +{ + __kmp_init_tas_lock( lck ); +} + +void +__kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) +{ + lck->lk.poll = 0; +} + +static void +__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_tas_lock( lck ); +} + + +// +// nested test and set locks +// + +int +__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } + else { + __kmp_acquire_tas_lock_timed_template( lck, gtid ); + lck->lk.depth_locked = 1; + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static int +__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_nest_lock"; + if ( ! __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_acquire_nested_tas_lock( lck, gtid ); +} + +int +__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + int retval; + + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { + retval = ++lck->lk.depth_locked; + } + else if ( !__kmp_test_tas_lock( lck, gtid ) ) { + retval = 0; + } + else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + } + return retval; +} + +static int +__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_nest_lock"; + if ( ! __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_test_nested_tas_lock( lck, gtid ); +} + +int +__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + KMP_MB(); + if ( --(lck->lk.depth_locked) == 0 ) { + __kmp_release_tas_lock( lck, gtid ); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; +} + +static int +__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( ! __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_nested_tas_lock( lck, gtid ); +} + +void +__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) +{ + __kmp_init_tas_lock( lck ); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +} + +static void +__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) +{ + __kmp_init_nested_tas_lock( lck ); +} + +void +__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) +{ + __kmp_destroy_tas_lock( lck ); + lck->lk.depth_locked = 0; +} + +static void +__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) +{ + char const * const func = "omp_destroy_nest_lock"; + if ( ! __kmp_is_tas_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_nested_tas_lock( lck ); +} + + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +/* ------------------------------------------------------------------------ */ +/* futex locks */ + +// futex locks are really just test and set locks, with a different method +// of handling contention. They take the same amount of space as test and +// set locks, and are allocated the same way (i.e. use the area allocated by +// the compiler for non-nested locks / allocate nested locks on the heap). + +static kmp_int32 +__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) +{ + return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; +} + +static inline bool +__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) +{ + return lck->lk.depth_locked != -1; +} + +__forceinline static int +__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + kmp_int32 gtid_code = ( gtid + 1 ) << 1; + + KMP_MB(); + +#ifdef USE_LOCK_PROFILE + kmp_uint32 curr = TCR_4( lck->lk.poll ); + if ( ( curr != 0 ) && ( curr != gtid_code ) ) + __kmp_printf( "LOCK CONTENTION: %p\n", lck ); + /* else __kmp_printf( "." );*/ +#endif /* USE_LOCK_PROFILE */ + + KMP_FSYNC_PREPARE( lck ); + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", + lck, lck->lk.poll, gtid ) ); + + kmp_int32 poll_val; + + while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), + KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { + + kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", + lck, gtid, poll_val, cond ) ); + + // + // NOTE: if you try to use the following condition for this branch + // + // if ( poll_val & 1 == 0 ) + // + // Then the 12.0 compiler has a bug where the following block will + // always be skipped, regardless of the value of the LSB of poll_val. + // + if ( ! cond ) { + // + // Try to set the lsb in the poll to indicate to the owner + // thread that they need to wake this thread up. + // + if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", + lck, lck->lk.poll, gtid ) ); + continue; + } + poll_val |= KMP_LOCK_BUSY(1, futex); + + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", + lck, lck->lk.poll, gtid ) ); + } + + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", + lck, gtid, poll_val ) ); + + kmp_int32 rc; + if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, + poll_val, NULL, NULL, 0 ) ) != 0 ) { + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", + lck, gtid, poll_val, rc, errno ) ); + continue; + } + + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", + lck, gtid, poll_val ) ); + // + // This thread has now done a successful futex wait call and was + // entered on the OS futex queue. We must now perform a futex + // wake call when releasing the lock, as we have no idea how many + // other threads are in the queue. + // + gtid_code |= 1; + } + + KMP_FSYNC_ACQUIRED( lck ); + KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", + lck, lck->lk.poll, gtid ) ); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_futex_lock_timed_template( lck, gtid ); +} + +static int +__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + return __kmp_acquire_futex_lock( lck, gtid ); +} + +int +__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) { + KMP_FSYNC_ACQUIRED( lck ); + return TRUE; + } + return FALSE; +} + +static int +__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + return __kmp_test_futex_lock( lck, gtid ); +} + +int +__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", + lck, lck->lk.poll, gtid ) ); + + KMP_FSYNC_RELEASING(lck); + + kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); + + KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", + lck, gtid, poll_val ) ); + + if ( KMP_LOCK_STRIP(poll_val) & 1 ) { + KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", + lck, gtid ) ); + syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", + lck, lck->lk.poll, gtid ) ); + + KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : + __kmp_xproc ) ); + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) + && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_futex_lock( lck, gtid ); +} + +void +__kmp_init_futex_lock( kmp_futex_lock_t * lck ) +{ + TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); +} + +static void +__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) +{ + __kmp_init_futex_lock( lck ); +} + +void +__kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) +{ + lck->lk.poll = 0; +} + +static void +__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) + && __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_futex_lock( lck ); +} + + +// +// nested futex locks +// + +int +__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } + else { + __kmp_acquire_futex_lock_timed_template( lck, gtid ); + lck->lk.depth_locked = 1; + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static int +__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_nest_lock"; + if ( ! __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_acquire_nested_futex_lock( lck, gtid ); +} + +int +__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + int retval; + + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { + retval = ++lck->lk.depth_locked; + } + else if ( !__kmp_test_futex_lock( lck, gtid ) ) { + retval = 0; + } + else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + } + return retval; +} + +static int +__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_nest_lock"; + if ( ! __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_test_nested_futex_lock( lck, gtid ); +} + +int +__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + KMP_MB(); + if ( --(lck->lk.depth_locked) == 0 ) { + __kmp_release_futex_lock( lck, gtid ); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; +} + +static int +__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( ! __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_nested_futex_lock( lck, gtid ); +} + +void +__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) +{ + __kmp_init_futex_lock( lck ); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +} + +static void +__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) +{ + __kmp_init_nested_futex_lock( lck ); +} + +void +__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) +{ + __kmp_destroy_futex_lock( lck ); + lck->lk.depth_locked = 0; +} + +static void +__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) +{ + char const * const func = "omp_destroy_nest_lock"; + if ( ! __kmp_is_futex_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_nested_futex_lock( lck ); +} + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) + + +/* ------------------------------------------------------------------------ */ +/* ticket (bakery) locks */ + +static kmp_int32 +__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) +{ + return TCR_4( lck->lk.owner_id ) - 1; +} + +static inline bool +__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) +{ + return lck->lk.depth_locked != -1; +} + +static kmp_uint32 +__kmp_bakery_check(kmp_uint value, kmp_uint checker) +{ kmp_uint32 pause; - - if (value == checker) { - return TRUE; - } - for (pause = checker - value; pause != 0; --pause); - return FALSE; -} - -__forceinline static int -__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 my_ticket; - KMP_MB(); - - my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); - -#ifdef USE_LOCK_PROFILE - if ( TCR_4( lck->lk.now_serving ) != my_ticket ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; - } - KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); -} - -static int -__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_ticket_lock( lck, gtid ); - - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); - if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { - kmp_uint32 next_ticket = my_ticket + 1; - if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, - my_ticket, next_ticket ) ) { - KMP_FSYNC_ACQUIRED( lck ); - return TRUE; - } - } - return FALSE; -} - -static int -__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - - int retval = __kmp_test_ticket_lock( lck, gtid ); - - if ( retval ) { - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -int -__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 distance; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_FSYNC_RELEASING(lck); - distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); - - KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_YIELD( distance - > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) - && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - lck->lk.owner_id = 0; - return __kmp_release_ticket_lock( lck, gtid ); -} - -void -__kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) -{ - lck->lk.location = NULL; - TCW_4( lck->lk.next_ticket, 0 ); - TCW_4( lck->lk.now_serving, 0 ); - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // -1 => not a nested lock. - lck->lk.initialized = (kmp_ticket_lock *)lck; -} - -static void -__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -void -__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) -{ - lck->lk.initialized = NULL; - lck->lk.location = NULL; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void -__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_ticket_lock( lck ); -} - - -// -// nested ticket locks -// - -int -__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_ticket_lock_timed_template( lck, gtid ); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_ticket_lock( lck, gtid ); -} - -int -__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -static int -__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_ticket_lock( lck, gtid ); -} - -int -__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_ticket_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_ticket_lock( lck, gtid ); -} - -void -__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) -{ - __kmp_init_ticket_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) -{ - __kmp_init_nested_ticket_lock( lck ); -} - -void -__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_ticket_lock( lck ); -} - - -// -// access functions to fields which don't exist for all lock kinds. -// - -static int -__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) -{ - return lck == lck->lk.initialized; -} - -static const ident_t * -__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) -{ - return lck->lk.location; -} - -static void -__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; -} - -static kmp_lock_flags_t -__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) -{ - return lck->lk.flags; -} - -static void -__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; -} - -/* ------------------------------------------------------------------------ */ -/* queuing locks */ - -/* - * First the states - * (head,tail) = 0, 0 means lock is unheld, nobody on queue - * UINT_MAX or -1, 0 means lock is held, nobody on queue - * h, h means lock is held or about to transition, 1 element on queue - * h, t h <> t, means lock is held or about to transition, >1 elements on queue - * - * Now the transitions - * Acquire(0,0) = -1 ,0 - * Release(0,0) = Error - * Acquire(-1,0) = h ,h h > 0 - * Release(-1,0) = 0 ,0 - * Acquire(h,h) = h ,t h > 0, t > 0, h <> t - * Release(h,h) = -1 ,0 h > 0 - * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' - * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t - * - * And pictorially - * - * - * +-----+ - * | 0, 0|------- release -------> Error - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * |-1, 0| - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * | h, h| - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * | h, t|----- acquire, release loopback ---+ - * +-----+ | - * ^ | - * | | - * +------------------------------------+ - * - */ - -#ifdef DEBUG_QUEUING_LOCKS - -/* Stuff for circular trace buffer */ -#define TRACE_BUF_ELE 1024 -static char traces[TRACE_BUF_ELE][128] = { 0 } -static int tc = 0; -#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); -#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); -#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); - -static void -__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, - kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) -{ - kmp_int32 t, i; - - __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); - - i = tc % TRACE_BUF_ELE; - __kmp_printf_no_lock( "%s\n", traces[i] ); - i = (i+1) % TRACE_BUF_ELE; - while ( i != (tc % TRACE_BUF_ELE) ) { - __kmp_printf_no_lock( "%s", traces[i] ); - i = (i+1) % TRACE_BUF_ELE; - } - __kmp_printf_no_lock( "\n" ); - - __kmp_printf_no_lock( - "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", - gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, - head_id, tail_id ); - - __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); - - if ( lck->lk.head_id >= 1 ) { - t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; - while (t > 0) { - __kmp_printf_no_lock( "-> %d ", t ); - t = __kmp_threads[t-1]->th.th_next_waiting; - } - } - __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); - __kmp_printf_no_lock( "\n\n" ); -} - -#endif /* DEBUG_QUEUING_LOCKS */ - -static kmp_int32 -__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) -{ - return TCR_4( lck->lk.owner_id ) - 1; -} - -static inline bool -__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; -} - -/* Acquire a lock using a the queuing lock implementation */ -template -/* [TLW] The unused template above is left behind because of what BEB believes is a - potential compiler problem with __forceinline. */ -__forceinline static int -__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ + + if (value == checker) { + return TRUE; + } + for (pause = checker - value; pause != 0; --pause); + return FALSE; +} + +__forceinline static int +__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + kmp_uint32 my_ticket; + KMP_MB(); + + my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); + +#ifdef USE_LOCK_PROFILE + if ( TCR_4( lck->lk.now_serving ) != my_ticket ) + __kmp_printf( "LOCK CONTENTION: %p\n", lck ); + /* else __kmp_printf( "." );*/ +#endif /* USE_LOCK_PROFILE */ + + if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { + KMP_FSYNC_ACQUIRED(lck); + return KMP_LOCK_ACQUIRED_FIRST; + } + KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); + KMP_FSYNC_ACQUIRED(lck); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); +} + +static int +__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + + __kmp_acquire_ticket_lock( lck, gtid ); + + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); + if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { + kmp_uint32 next_ticket = my_ticket + 1; + if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, + my_ticket, next_ticket ) ) { + KMP_FSYNC_ACQUIRED( lck ); + return TRUE; + } + } + return FALSE; +} + +static int +__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + + int retval = __kmp_test_ticket_lock( lck, gtid ); + + if ( retval ) { + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +int +__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + kmp_uint32 distance; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KMP_FSYNC_RELEASING(lck); + distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); + + KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KMP_YIELD( distance + > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) + && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + lck->lk.owner_id = 0; + return __kmp_release_ticket_lock( lck, gtid ); +} + +void +__kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) +{ + lck->lk.location = NULL; + TCW_4( lck->lk.next_ticket, 0 ); + TCW_4( lck->lk.now_serving, 0 ); + lck->lk.owner_id = 0; // no thread owns the lock. + lck->lk.depth_locked = -1; // -1 => not a nested lock. + lck->lk.initialized = (kmp_ticket_lock *)lck; +} + +static void +__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) +{ + __kmp_init_ticket_lock( lck ); +} + +void +__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) +{ + lck->lk.initialized = NULL; + lck->lk.location = NULL; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; + lck->lk.depth_locked = -1; +} + +static void +__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_ticket_lock( lck ); +} + + +// +// nested ticket locks +// + +int +__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } + else { + __kmp_acquire_ticket_lock_timed_template( lck, gtid ); + KMP_MB(); + lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static int +__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_acquire_nested_ticket_lock( lck, gtid ); +} + +int +__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + int retval; + + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { + retval = ++lck->lk.depth_locked; + } + else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { + retval = 0; + } + else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +static int +__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, + kmp_int32 gtid ) +{ + char const * const func = "omp_test_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_test_nested_ticket_lock( lck, gtid ); +} + +int +__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + KMP_MB(); + if ( --(lck->lk.depth_locked) == 0 ) { + KMP_MB(); + lck->lk.owner_id = 0; + __kmp_release_ticket_lock( lck, gtid ); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; +} + +static int +__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_nested_ticket_lock( lck, gtid ); +} + +void +__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) +{ + __kmp_init_ticket_lock( lck ); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +} + +static void +__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) +{ + __kmp_init_nested_ticket_lock( lck ); +} + +void +__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) +{ + __kmp_destroy_ticket_lock( lck ); + lck->lk.depth_locked = 0; +} + +static void +__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) +{ + char const * const func = "omp_destroy_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_nested_ticket_lock( lck ); +} + + +// +// access functions to fields which don't exist for all lock kinds. +// + +static int +__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) +{ + return lck == lck->lk.initialized; +} + +static const ident_t * +__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) +{ + return lck->lk.location; +} + +static void +__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) +{ + lck->lk.location = loc; +} + +static kmp_lock_flags_t +__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) +{ + return lck->lk.flags; +} + +static void +__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) +{ + lck->lk.flags = flags; +} + +/* ------------------------------------------------------------------------ */ +/* queuing locks */ + +/* + * First the states + * (head,tail) = 0, 0 means lock is unheld, nobody on queue + * UINT_MAX or -1, 0 means lock is held, nobody on queue + * h, h means lock is held or about to transition, 1 element on queue + * h, t h <> t, means lock is held or about to transition, >1 elements on queue + * + * Now the transitions + * Acquire(0,0) = -1 ,0 + * Release(0,0) = Error + * Acquire(-1,0) = h ,h h > 0 + * Release(-1,0) = 0 ,0 + * Acquire(h,h) = h ,t h > 0, t > 0, h <> t + * Release(h,h) = -1 ,0 h > 0 + * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' + * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t + * + * And pictorially + * + * + * +-----+ + * | 0, 0|------- release -------> Error + * +-----+ + * | ^ + * acquire| |release + * | | + * | | + * v | + * +-----+ + * |-1, 0| + * +-----+ + * | ^ + * acquire| |release + * | | + * | | + * v | + * +-----+ + * | h, h| + * +-----+ + * | ^ + * acquire| |release + * | | + * | | + * v | + * +-----+ + * | h, t|----- acquire, release loopback ---+ + * +-----+ | + * ^ | + * | | + * +------------------------------------+ + * + */ + +#ifdef DEBUG_QUEUING_LOCKS + +/* Stuff for circular trace buffer */ +#define TRACE_BUF_ELE 1024 +static char traces[TRACE_BUF_ELE][128] = { 0 } +static int tc = 0; +#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); +#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); +#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); + +static void +__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, + kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) +{ + kmp_int32 t, i; + + __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); + + i = tc % TRACE_BUF_ELE; + __kmp_printf_no_lock( "%s\n", traces[i] ); + i = (i+1) % TRACE_BUF_ELE; + while ( i != (tc % TRACE_BUF_ELE) ) { + __kmp_printf_no_lock( "%s", traces[i] ); + i = (i+1) % TRACE_BUF_ELE; + } + __kmp_printf_no_lock( "\n" ); + + __kmp_printf_no_lock( + "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", + gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, + head_id, tail_id ); + + __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); + + if ( lck->lk.head_id >= 1 ) { + t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; + while (t > 0) { + __kmp_printf_no_lock( "-> %d ", t ); + t = __kmp_threads[t-1]->th.th_next_waiting; + } + } + __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); + __kmp_printf_no_lock( "\n\n" ); +} + +#endif /* DEBUG_QUEUING_LOCKS */ + +static kmp_int32 +__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) +{ + return TCR_4( lck->lk.owner_id ) - 1; +} + +static inline bool +__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) +{ + return lck->lk.depth_locked != -1; +} + +/* Acquire a lock using a the queuing lock implementation */ +template +/* [TLW] The unused template above is left behind because of what BEB believes is a + potential compiler problem with __forceinline. */ +__forceinline static int +__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, + kmp_int32 gtid ) +{ kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; - volatile kmp_uint32 *spin_here_p; - kmp_int32 need_mf = 1; - -#if OMPT_SUPPORT - ompt_state_t prev_state = ompt_state_undefined; -#endif - - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); - - KMP_FSYNC_PREPARE( lck ); - KMP_DEBUG_ASSERT( this_thr != NULL ); - spin_here_p = & this_thr->th.th_spin_here; - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq ent" ); - if ( *spin_here_p ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); -#endif - KMP_DEBUG_ASSERT( !*spin_here_p ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - - - /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p - that may follow, not just in execution order, but also in visibility order. This way, - when a releasing thread observes the changes to the queue by this thread, it can - rightly assume that spin_here_p has already been set to TRUE, so that when it sets - spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p - to FALSE before this thread sets it to TRUE, this thread will hang. - */ - *spin_here_p = TRUE; /* before enqueuing to prevent race */ - - while( 1 ) { - kmp_int32 enqueued; - kmp_int32 head; - kmp_int32 tail; - - head = *head_id_p; - - switch ( head ) { - - case -1: - { -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); -#endif - tail = 0; /* to make sure next link asynchronously read is not set accidentally; - this assignment prevents us from entering the if ( t > 0 ) - condition in the enqueued case below, which is not necessary for - this state transition */ - - need_mf = 0; - /* try (-1,0)->(tid,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, - KMP_PACK_64( -1, 0 ), - KMP_PACK_64( gtid+1, gtid+1 ) ); -#ifdef DEBUG_QUEUING_LOCKS - if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); -#endif - } - break; - - default: - { - tail = *tail_id_p; - KMP_DEBUG_ASSERT( tail != gtid + 1 ); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); -#endif - - if ( tail == 0 ) { - enqueued = FALSE; - } - else { - need_mf = 0; - /* try (h,t) or (h,h)->(h,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); - -#ifdef DEBUG_QUEUING_LOCKS - if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); -#endif - } - } - break; - - case 0: /* empty queue */ - { - kmp_int32 grabbed_lock; - -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); -#endif - /* try (0,0)->(-1,0) */ - - /* only legal transition out of head = 0 is head = -1 with no change to tail */ - grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); - - if ( grabbed_lock ) { - - *spin_here_p = FALSE; - - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", - lck, gtid )); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); -#endif - -#if OMPT_SUPPORT - if (ompt_enabled && prev_state != ompt_state_undefined) { - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; - } -#endif - - KMP_FSYNC_ACQUIRED( lck ); - return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ - } - enqueued = FALSE; - } - break; - } - -#if OMPT_SUPPORT - if (ompt_enabled && prev_state == ompt_state_undefined) { - /* this thread will spin; set wait_id before entering wait state */ - prev_state = this_thr->th.ompt_thread_info.state; - this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; - this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; - } -#endif - - if ( enqueued ) { - if ( tail > 0 ) { - kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); - KMP_ASSERT( tail_thr != NULL ); - tail_thr->th.th_next_waiting = gtid+1; - /* corresponding wait for this write in release code */ - } - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); - - - /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for - * throughput only here. - */ - KMP_MB(); - KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq spin" ); - - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); -#endif - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", - lck, gtid )); - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq exit 2" ); -#endif - -#if OMPT_SUPPORT - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; -#endif - - /* got lock, we were dequeued by the thread that released lock */ - return KMP_LOCK_ACQUIRED_FIRST; - } - - /* Yield if number of threads > number of logical processors */ - /* ToDo: Not sure why this should only be in oversubscription case, - maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ - KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq retry" ); -#endif - - } - KMP_ASSERT2( 0, "should not get here" ); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - return __kmp_acquire_queuing_lock_timed_template( lck, gtid ); -} - -static int -__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_queuing_lock( lck, gtid ); - - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - kmp_int32 head; -#ifdef KMP_DEBUG - kmp_info_t *this_thr; -#endif - - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); - KMP_DEBUG_ASSERT( gtid >= 0 ); -#ifdef KMP_DEBUG - this_thr = __kmp_thread_from_gtid( gtid ); - KMP_DEBUG_ASSERT( this_thr != NULL ); - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); -#endif - - head = *head_id_p; - - if ( head == 0 ) { /* nobody on queue, nobody holding */ - - /* try (0,0)->(-1,0) */ - - if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); - KMP_FSYNC_ACQUIRED(lck); - return TRUE; - } - } - - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); - return FALSE; -} - -static int -__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - - int retval = __kmp_test_queuing_lock( lck, gtid ); - - if ( retval ) { - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -int -__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ + volatile kmp_int32 *head_id_p = & lck->lk.head_id; + volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; + volatile kmp_uint32 *spin_here_p; + kmp_int32 need_mf = 1; + +#if OMPT_SUPPORT + ompt_state_t prev_state = ompt_state_undefined; +#endif + + KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); + + KMP_FSYNC_PREPARE( lck ); + KMP_DEBUG_ASSERT( this_thr != NULL ); + spin_here_p = & this_thr->th.th_spin_here; + +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "acq ent" ); + if ( *spin_here_p ) + __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); + if ( this_thr->th.th_next_waiting != 0 ) + __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); +#endif + KMP_DEBUG_ASSERT( !*spin_here_p ); + KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + + + /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p + that may follow, not just in execution order, but also in visibility order. This way, + when a releasing thread observes the changes to the queue by this thread, it can + rightly assume that spin_here_p has already been set to TRUE, so that when it sets + spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p + to FALSE before this thread sets it to TRUE, this thread will hang. + */ + *spin_here_p = TRUE; /* before enqueuing to prevent race */ + + while( 1 ) { + kmp_int32 enqueued; + kmp_int32 head; + kmp_int32 tail; + + head = *head_id_p; + + switch ( head ) { + + case -1: + { +#ifdef DEBUG_QUEUING_LOCKS + tail = *tail_id_p; + TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); +#endif + tail = 0; /* to make sure next link asynchronously read is not set accidentally; + this assignment prevents us from entering the if ( t > 0 ) + condition in the enqueued case below, which is not necessary for + this state transition */ + + need_mf = 0; + /* try (-1,0)->(tid,tid) */ + enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, + KMP_PACK_64( -1, 0 ), + KMP_PACK_64( gtid+1, gtid+1 ) ); +#ifdef DEBUG_QUEUING_LOCKS + if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); +#endif + } + break; + + default: + { + tail = *tail_id_p; + KMP_DEBUG_ASSERT( tail != gtid + 1 ); + +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); +#endif + + if ( tail == 0 ) { + enqueued = FALSE; + } + else { + need_mf = 0; + /* try (h,t) or (h,h)->(h,tid) */ + enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); + +#ifdef DEBUG_QUEUING_LOCKS + if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); +#endif + } + } + break; + + case 0: /* empty queue */ + { + kmp_int32 grabbed_lock; + +#ifdef DEBUG_QUEUING_LOCKS + tail = *tail_id_p; + TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); +#endif + /* try (0,0)->(-1,0) */ + + /* only legal transition out of head = 0 is head = -1 with no change to tail */ + grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); + + if ( grabbed_lock ) { + + *spin_here_p = FALSE; + + KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", + lck, gtid )); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); +#endif + +#if OMPT_SUPPORT + if (ompt_enabled && prev_state != ompt_state_undefined) { + /* change the state before clearing wait_id */ + this_thr->th.ompt_thread_info.state = prev_state; + this_thr->th.ompt_thread_info.wait_id = 0; + } +#endif + + KMP_FSYNC_ACQUIRED( lck ); + return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ + } + enqueued = FALSE; + } + break; + } + +#if OMPT_SUPPORT + if (ompt_enabled && prev_state == ompt_state_undefined) { + /* this thread will spin; set wait_id before entering wait state */ + prev_state = this_thr->th.ompt_thread_info.state; + this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; + this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; + } +#endif + + if ( enqueued ) { + if ( tail > 0 ) { + kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); + KMP_ASSERT( tail_thr != NULL ); + tail_thr->th.th_next_waiting = gtid+1; + /* corresponding wait for this write in release code */ + } + KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); + + + /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for + * throughput only here. + */ + KMP_MB(); + KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); + +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "acq spin" ); + + if ( this_thr->th.th_next_waiting != 0 ) + __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); +#endif + KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", + lck, gtid )); + +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "acq exit 2" ); +#endif + +#if OMPT_SUPPORT + /* change the state before clearing wait_id */ + this_thr->th.ompt_thread_info.state = prev_state; + this_thr->th.ompt_thread_info.wait_id = 0; +#endif + + /* got lock, we were dequeued by the thread that released lock */ + return KMP_LOCK_ACQUIRED_FIRST; + } + + /* Yield if number of threads > number of logical processors */ + /* ToDo: Not sure why this should only be in oversubscription case, + maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ + KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : + __kmp_xproc ) ); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "acq retry" ); +#endif + + } + KMP_ASSERT2( 0, "should not get here" ); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + return __kmp_acquire_queuing_lock_timed_template( lck, gtid ); +} + +static int +__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, + kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + + __kmp_acquire_queuing_lock( lck, gtid ); + + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + volatile kmp_int32 *head_id_p = & lck->lk.head_id; + kmp_int32 head; +#ifdef KMP_DEBUG kmp_info_t *this_thr; - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; - - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); - KMP_DEBUG_ASSERT( gtid >= 0 ); - this_thr = __kmp_thread_from_gtid( gtid ); - KMP_DEBUG_ASSERT( this_thr != NULL ); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel ent" ); - - if ( this_thr->th.th_spin_here ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); -#endif - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - - KMP_FSYNC_RELEASING(lck); - - while( 1 ) { - kmp_int32 dequeued; - kmp_int32 head; - kmp_int32 tail; - - head = *head_id_p; - -#ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); - if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); -#endif - KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ - - if ( head == -1 ) { /* nobody on queue */ - - /* try (-1,0)->(0,0) */ - if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", - lck, gtid )); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); -#endif - -#if OMPT_SUPPORT - /* nothing to do - no other thread is trying to shift blame */ -#endif - - return KMP_LOCK_RELEASED; - } - dequeued = FALSE; - - } - else { - - tail = *tail_id_p; - if ( head == tail ) { /* only one thread on the queue */ - -#ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); -#endif - KMP_DEBUG_ASSERT( head > 0 ); - - /* try (h,h)->(-1,0) */ - dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, - KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); -#endif - - } - else { - volatile kmp_int32 *waiting_id_p; - kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); - KMP_DEBUG_ASSERT( head_thr != NULL ); - waiting_id_p = & head_thr->th.th_next_waiting; - - /* Does this require synchronous reads? */ -#ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); -#endif - KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); - - /* try (h,t)->(h',t) or (t,t) */ - - KMP_MB(); - /* make sure enqueuing thread has time to update next waiting thread field */ - *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); -#endif - dequeued = TRUE; - } - } - - if ( dequeued ) { - kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); - KMP_DEBUG_ASSERT( head_thr != NULL ); - - /* Does this require synchronous reads? */ -#ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); -#endif - KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); - - /* For clean code only. - * Thread not released until next statement prevents race with acquire code. - */ - head_thr->th.th_next_waiting = 0; -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); -#endif - - KMP_MB(); - /* reset spin value */ - head_thr->th.th_spin_here = FALSE; - - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", - lck, gtid )); -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel exit 2" ); -#endif - return KMP_LOCK_RELEASED; - } - /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ - -#ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel retry" ); -#endif - - } /* while */ - KMP_ASSERT2( 0, "should not get here" ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - lck->lk.owner_id = 0; - return __kmp_release_queuing_lock( lck, gtid ); -} - -void -__kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) -{ - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); -} - -static void -__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) -{ - __kmp_init_queuing_lock( lck ); -} - -void -__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) -{ - lck->lk.initialized = NULL; - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void -__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_queuing_lock( lck ); -} - - -// -// nested queuing locks -// - -int -__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_queuing_lock_timed_template( lck, gtid ); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_queuing_lock( lck, gtid ); -} - -int -__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -static int -__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_queuing_lock( lck, gtid ); -} - -int -__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_queuing_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_queuing_lock( lck, gtid ); -} - -void -__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) -{ - __kmp_init_queuing_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) -{ - __kmp_init_nested_queuing_lock( lck ); -} - -void -__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) -{ - __kmp_destroy_queuing_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_queuing_lock( lck ); -} - - -// -// access functions to fields which don't exist for all lock kinds. -// - -static int -__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) -{ - return lck == lck->lk.initialized; -} - -static const ident_t * -__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) -{ - return lck->lk.location; -} - -static void -__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; -} - -static kmp_lock_flags_t -__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) -{ - return lck->lk.flags; -} - -static void -__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; -} - -#if KMP_USE_ADAPTIVE_LOCKS - -/* - RTM Adaptive locks -*/ - -#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 - -#include -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) - -#else - -// Values from the status register after failed speculation. -#define _XBEGIN_STARTED (~0u) -#define _XABORT_EXPLICIT (1 << 0) -#define _XABORT_RETRY (1 << 1) -#define _XABORT_CONFLICT (1 << 2) -#define _XABORT_CAPACITY (1 << 3) -#define _XABORT_DEBUG (1 << 4) -#define _XABORT_NESTED (1 << 5) -#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) - -// Aborts for which it's worth trying again immediately -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) - -#define STRINGIZE_INTERNAL(arg) #arg -#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) - -// Access to RTM instructions - -/* - A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. - This is the same definition as the compiler intrinsic that will be supported at some point. -*/ -static __inline int _xbegin() -{ - int res = -1; - -#if KMP_OS_WINDOWS -#if KMP_ARCH_X86_64 - _asm { - _emit 0xC7 - _emit 0xF8 - _emit 2 - _emit 0 - _emit 0 - _emit 0 - jmp L2 - mov res, eax - L2: - } -#else /* IA32 */ - _asm { - _emit 0xC7 - _emit 0xF8 - _emit 2 - _emit 0 - _emit 0 - _emit 0 - jmp L2 - mov res, eax - L2: - } -#endif // KMP_ARCH_X86_64 -#else - /* Note that %eax must be noted as killed (clobbered), because - * the XSR is returned in %eax(%rax) on abort. Other register - * values are restored, so don't need to be killed. - * - * We must also mark 'res' as an input and an output, since otherwise - * 'res=-1' may be dropped as being dead, whereas we do need the - * assignment on the successful (i.e., non-abort) path. - */ - __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" - " .long 1f-1b-6\n" - " jmp 2f\n" - "1: movl %%eax,%0\n" - "2:" - :"+r"(res)::"memory","%eax"); -#endif // KMP_OS_WINDOWS - return res; -} - -/* - Transaction end -*/ -static __inline void _xend() -{ -#if KMP_OS_WINDOWS - __asm { - _emit 0x0f - _emit 0x01 - _emit 0xd5 - } -#else - __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); -#endif -} - -/* - This is a macro, the argument must be a single byte constant which - can be evaluated by the inline assembler, since it is emitted as a - byte into the assembly code. -*/ -#if KMP_OS_WINDOWS -#define _xabort(ARG) \ - _asm _emit 0xc6 \ - _asm _emit 0xf8 \ - _asm _emit ARG -#else -#define _xabort(ARG) \ - __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); -#endif - -#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 - -// -// Statistics is collected for testing purpose -// -#if KMP_DEBUG_ADAPTIVE_LOCKS - -// We accumulate speculative lock statistics when the lock is destroyed. -// We keep locks that haven't been destroyed in the liveLocks list -// so that we can grab their statistics too. -static kmp_adaptive_lock_statistics_t destroyedStats; - -// To hold the list of live locks. -static kmp_adaptive_lock_info_t liveLocks; - -// A lock so we can safely update the list of locks. -static kmp_bootstrap_lock_t chain_lock; - -// Initialize the list of stats. -void -__kmp_init_speculative_stats() -{ - kmp_adaptive_lock_info_t *lck = &liveLocks; - - memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); - lck->stats.next = lck; - lck->stats.prev = lck; - - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); - - __kmp_init_bootstrap_lock( &chain_lock ); - -} - -// Insert the lock into the circular list -static void -__kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) -{ - __kmp_acquire_bootstrap_lock( &chain_lock ); - - lck->stats.next = liveLocks.stats.next; - lck->stats.prev = &liveLocks; - - liveLocks.stats.next = lck; - lck->stats.next->stats.prev = lck; - - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); - - __kmp_release_bootstrap_lock( &chain_lock ); -} - -static void -__kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) -{ - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); - - kmp_adaptive_lock_info_t * n = lck->stats.next; - kmp_adaptive_lock_info_t * p = lck->stats.prev; - - n->stats.prev = p; - p->stats.next = n; -} - -static void -__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) -{ - memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); - __kmp_remember_lock( lck ); -} - -static void -__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) -{ - kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; - - t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; - t->successfulSpeculations += s->successfulSpeculations; - t->hardFailedSpeculations += s->hardFailedSpeculations; - t->softFailedSpeculations += s->softFailedSpeculations; - t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; - t->lemmingYields += s->lemmingYields; -} - -static void -__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) -{ - kmp_adaptive_lock_statistics_t *t = &destroyedStats; - - __kmp_acquire_bootstrap_lock( &chain_lock ); - - __kmp_add_stats( &destroyedStats, lck ); - __kmp_forget_lock( lck ); - - __kmp_release_bootstrap_lock( &chain_lock ); -} - -static float -percent (kmp_uint32 count, kmp_uint32 total) -{ - return (total == 0) ? 0.0: (100.0 * count)/total; -} - -static -FILE * __kmp_open_stats_file() -{ - if (strcmp (__kmp_speculative_statsfile, "-") == 0) - return stdout; - - size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; - char buffer[buffLen]; - KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, - (kmp_int32)getpid()); - FILE * result = fopen(&buffer[0], "w"); - - // Maybe we should issue a warning here... - return result ? result : stdout; -} - -void -__kmp_print_speculative_stats() -{ - if (__kmp_user_lock_kind != lk_adaptive) - return; - - FILE * statsFile = __kmp_open_stats_file(); - - kmp_adaptive_lock_statistics_t total = destroyedStats; - kmp_adaptive_lock_info_t *lck; - - for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { - __kmp_add_stats( &total, lck ); - } - kmp_adaptive_lock_statistics_t *t = &total; - kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; - kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + - t->softFailedSpeculations; - - fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); - fprintf ( statsFile, " Lock parameters: \n" - " max_soft_retries : %10d\n" - " max_badness : %10d\n", - __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness); - fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); - fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); - fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", - t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); - fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", - t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); - fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); - - fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); - fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", - t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); - fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", - t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); - fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", - t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); - - if (statsFile != stdout) - fclose( statsFile ); -} - -# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) -#else -# define KMP_INC_STAT(lck,stat) - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -static inline bool -__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) -{ - // It is enough to check that the head_id is zero. - // We don't also need to check the tail. - bool res = lck->lk.head_id == 0; - - // We need a fence here, since we must ensure that no memory operations - // from later in this thread float above that read. -#if KMP_COMPILER_ICC - _mm_mfence(); -#else - __sync_synchronize(); -#endif - - return res; -} - -// Functions for manipulating the badness -static __inline void -__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) -{ - // Reset the badness to zero so we eagerly try to speculate again - lck->lk.adaptive.badness = 0; - KMP_INC_STAT(lck,successfulSpeculations); -} - -// Create a bit mask with one more set bit. -static __inline void -__kmp_step_badness( kmp_adaptive_lock_t *lck ) -{ - kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; - if ( newBadness > lck->lk.adaptive.max_badness) { - return; - } else { - lck->lk.adaptive.badness = newBadness; - } -} - -// Check whether speculation should be attempted. -static __inline int -__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 badness = lck->lk.adaptive.badness; - kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; - int res = (attempts & badness) == 0; - return res; -} - -// Attempt to acquire only the speculative lock. -// Does not back off to the non-speculative lock. -// -static int -__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) -{ - int retries = lck->lk.adaptive.max_soft_retries; - - // We don't explicitly count the start of speculation, rather we record - // the results (success, hard fail, soft fail). The sum of all of those - // is the total number of times we started speculation since all - // speculations must end one of those ways. - do - { - kmp_uint32 status = _xbegin(); - // Switch this in to disable actual speculation but exercise - // at least some of the rest of the code. Useful for debugging... - // kmp_uint32 status = _XABORT_NESTED; - - if (status == _XBEGIN_STARTED ) - { /* We have successfully started speculation - * Check that no-one acquired the lock for real between when we last looked - * and now. This also gets the lock cache line into our read-set, - * which we need so that we'll abort if anyone later claims it for real. - */ - if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - // Lock is now visibly acquired, so someone beat us to it. - // Abort the transaction so we'll restart from _xbegin with the - // failure status. - _xabort(0x01); - KMP_ASSERT2( 0, "should not get here" ); - } - return 1; // Lock has been acquired (speculatively) - } else { - // We have aborted, update the statistics - if ( status & SOFT_ABORT_MASK) - { - KMP_INC_STAT(lck,softFailedSpeculations); - // and loop round to retry. - } - else - { - KMP_INC_STAT(lck,hardFailedSpeculations); - // Give up if we had a hard failure. - break; - } - } - } while( retries-- ); // Loop while we have retries, and didn't fail hard. - - // Either we had a hard failure or we didn't succeed softly after - // the full set of attempts, so back off the badness. - __kmp_step_badness( lck ); - return 0; -} - -// Attempt to acquire the speculative lock, or back off to the non-speculative one -// if the speculative lock cannot be acquired. -// We can succeed speculatively, non-speculatively, or fail. -static int -__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - // First try to acquire the lock speculatively - if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) - return 1; - - // Speculative acquisition failed, so try to acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - // Use base, non-speculative lock. - if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) - { - KMP_INC_STAT(lck,nonSpeculativeAcquires); - return 1; // Lock is acquired (non-speculatively) - } - else - { - return 0; // Failed to acquire the lock, it's already visibly locked. - } -} - -static int -__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - - int retval = __kmp_test_adaptive_lock( lck, gtid ); - - if ( retval ) { - lck->lk.qlk.owner_id = gtid + 1; - } - return retval; -} - -// Block until we can acquire a speculative, adaptive lock. -// We check whether we should be trying to speculate. -// If we should be, we check the real lock to see if it is free, -// and, if not, pause without attempting to acquire it until it is. -// Then we try the speculative acquire. -// This means that although we suffer from lemmings a little ( -// because all we can't acquire the lock speculatively until -// the queue of threads waiting has cleared), we don't get into a -// state where we can never acquire the lock speculatively (because we -// force the queue to clear by preventing new arrivals from entering the -// queue). -// This does mean that when we're trying to break lemmings, the lock -// is no longer fair. However OpenMP makes no guarantee that its -// locks are fair, so this isn't a real problem. -static void -__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) -{ - if ( __kmp_should_speculate( lck, gtid ) ) - { - if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) - return; - // We tried speculation and failed, so give up. - } - else - { - // We can't try speculation until the lock is free, so we - // pause here (without suspending on the queueing lock, - // to allow it to drain, then try again. - // All other threads will also see the same result for - // shouldSpeculate, so will be doing the same if they - // try to claim the lock from now on. - while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - KMP_INC_STAT(lck,lemmingYields); - __kmp_yield (TRUE); - } - - if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) - return; - } - } - - // Speculative acquisition failed, so acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - __kmp_acquire_queuing_lock_timed_template( GET_QLK_PTR(lck), gtid ); - // We have acquired the base lock, so count that. - KMP_INC_STAT(lck,nonSpeculativeAcquires ); -} - -static void -__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_adaptive_lock( lck, gtid ); - - lck->lk.qlk.owner_id = gtid + 1; -} - -static int -__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { // If the lock doesn't look claimed we must be speculating. - // (Or the user's code is buggy and they're releasing without locking; - // if we had XTEST we'd be able to check that case...) - _xend(); // Exit speculation - __kmp_update_badness_after_success( lck ); - } - else - { // Since the lock *is* visibly locked we're not speculating, - // so should use the underlying lock's release scheme. - __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); - } - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - lck->lk.qlk.owner_id = 0; - __kmp_release_adaptive_lock( lck, gtid ); - return KMP_LOCK_RELEASED; -} - -static void -__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) -{ - __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); - lck->lk.adaptive.badness = 0; - lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; - lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; - lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_zero_speculative_stats( &lck->lk.adaptive ); -#endif - KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); -} - -static void -__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) -{ - __kmp_init_adaptive_lock( lck ); -} - -static void -__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) -{ -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); -#endif - __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); - // Nothing needed for the speculative part. -} - -static void -__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_adaptive_lock( lck ); -} - - -#endif // KMP_USE_ADAPTIVE_LOCKS - - -/* ------------------------------------------------------------------------ */ -/* DRDPA ticket locks */ -/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ - -static kmp_int32 -__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) -{ - return TCR_4( lck->lk.owner_id ) - 1; -} - -static inline bool -__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; -} - -__forceinline static int -__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - -#ifdef USE_LOCK_PROFILE - if (TCR_8(polls[ticket & mask].poll) != ticket) - __kmp_printf("LOCK CONTENTION: %p\n", lck); - /* else __kmp_printf( "." );*/ -#endif /* USE_LOCK_PROFILE */ - - // - // Now spin-wait, but reload the polls pointer and mask, in case the - // polling area has been reconfigured. Unless it is reconfigured, the - // reloads stay in L1 cache and are cheap. - // - // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! - // - // The current implementation of KMP_WAIT_YIELD doesn't allow for mask - // and poll to be re-read every spin iteration. - // - kmp_uint32 spins; - - KMP_FSYNC_PREPARE(lck); - KMP_INIT_YIELD(spins); - while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load - // If we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. - // CPU Pause is in the macros for yield. - // - KMP_YIELD(TCR_4(__kmp_nth) - > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - KMP_YIELD_SPIN(spins); - - // Re-read the mask and the poll pointer from the lock structure. - // - // Make certain that "mask" is read before "polls" !!! - // - // If another thread picks reconfigures the polling area and updates - // their values, and we get the new value of mask and the old polls - // pointer, we could access memory beyond the end of the old polling - // area. - // - mask = TCR_8(lck->lk.mask); // volatile load - polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - } - - // - // Critical section starts here - // - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store - - // - // Deallocate a garbage polling area if we know that we are the last - // thread that could possibly access it. - // - // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup - // ticket. - // - if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { - __kmp_free((void *)lck->lk.old_polls); - lck->lk.old_polls = NULL; - lck->lk.cleanup_ticket = 0; - } - - // - // Check to see if we should reconfigure the polling area. - // If there is still a garbage polling area to be deallocated from a - // previous reconfiguration, let a later thread reconfigure it. - // - if (lck->lk.old_polls == NULL) { - bool reconfigure = false; - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; - kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); - - if (TCR_4(__kmp_nth) - > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - // - // We are in oversubscription mode. Contract the polling area - // down to a single location, if that hasn't been done already. - // - if (num_polls > 1) { - reconfigure = true; - num_polls = TCR_4(lck->lk.num_polls); - mask = 0; - num_polls = 1; - polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(num_polls * sizeof(*polls)); - polls[0].poll = ticket; - } - } - else { - // - // We are in under/fully subscribed mode. Check the number of - // threads waiting on the lock. The size of the polling area - // should be at least the number of threads waiting. - // - kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; - if (num_waiting > num_polls) { - kmp_uint32 old_num_polls = num_polls; - reconfigure = true; - do { - mask = (mask << 1) | 1; - num_polls *= 2; - } while (num_polls <= num_waiting); - - // - // Allocate the new polling area, and copy the relevant portion - // of the old polling area to the new area. __kmp_allocate() - // zeroes the memory it allocates, and most of the old area is - // just zero padding, so we only copy the release counters. - // - polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(num_polls * sizeof(*polls)); - kmp_uint32 i; - for (i = 0; i < old_num_polls; i++) { - polls[i].poll = old_polls[i].poll; - } - } - } - - if (reconfigure) { - // - // Now write the updated fields back to the lock structure. - // - // Make certain that "polls" is written before "mask" !!! - // - // If another thread picks up the new value of mask and the old - // polls pointer , it could access memory beyond the end of the - // old polling area. - // - // On x86, we need memory fences. - // - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", - ticket, lck, num_polls)); - - lck->lk.old_polls = old_polls; // non-volatile store - lck->lk.polls = polls; // volatile store - - KMP_MB(); - - lck->lk.num_polls = num_polls; // non-volatile store - lck->lk.mask = mask; // volatile store - - KMP_MB(); - - // - // Only after the new polling area and mask have been flushed - // to main memory can we update the cleanup ticket field. - // - // volatile load / non-volatile store - // - lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); - } - } - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); -} - -static int -__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_drdpa_lock( lck, gtid ); - - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - // - // First get a ticket, then read the polls pointer and the mask. - // The polls pointer must be read before the mask!!! (See above) - // - kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - if (TCR_8(polls[ticket & mask].poll) == ticket) { - kmp_uint64 next_ticket = ticket + 1; - if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, - ticket, next_ticket)) { - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store - - // - // Since no threads are waiting, there is no possibility that - // we would want to reconfigure the polling area. We might - // have the cleanup ticket value (which says that it is now - // safe to deallocate old_polls), but we'll let a later thread - // which calls __kmp_acquire_lock do that - this routine - // isn't supposed to block, and we would risk blocks if we - // called __kmp_free() to do the deallocation. - // - return TRUE; - } - } - return FALSE; -} - -static int -__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - - int retval = __kmp_test_drdpa_lock( lck, gtid ); - - if ( retval ) { - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -int -__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - // - // Read the ticket value from the lock data struct, then the polls - // pointer and the mask. The polls pointer must be read before the - // mask!!! (See above) - // - kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", - ticket - 1, lck)); - KMP_FSYNC_RELEASING(lck); - KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) - && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - lck->lk.owner_id = 0; - return __kmp_release_drdpa_lock( lck, gtid ); -} - -void -__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - lck->lk.location = NULL; - lck->lk.mask = 0; - lck->lk.num_polls = 1; - lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); - lck->lk.cleanup_ticket = 0; - lck->lk.old_polls = NULL; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); -} - -static void -__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_drdpa_lock( lck ); -} - -void -__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - lck->lk.initialized = NULL; - lck->lk.location = NULL; - if (lck->lk.polls != NULL) { - __kmp_free((void *)lck->lk.polls); - lck->lk.polls = NULL; - } - if (lck->lk.old_polls != NULL) { - __kmp_free((void *)lck->lk.old_polls); - lck->lk.old_polls = NULL; - } - lck->lk.mask = 0; - lck->lk.num_polls = 0; - lck->lk.cleanup_ticket = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void -__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_drdpa_lock( lck ); -} - - -// -// nested drdpa ticket locks -// - -int -__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static void -__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - __kmp_acquire_nested_drdpa_lock( lck, gtid ); -} - -int -__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; -} - -static int -__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_drdpa_lock( lck, gtid ); -} - -int -__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_drdpa_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; -} - -static int -__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_drdpa_lock( lck, gtid ); -} - -void -__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_drdpa_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_nested_drdpa_lock( lck ); -} - -void -__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - __kmp_destroy_drdpa_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_drdpa_lock( lck ); -} - - -// -// access functions to fields which don't exist for all lock kinds. -// - -static int -__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) -{ - return lck == lck->lk.initialized; -} - -static const ident_t * -__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.location; -} - -static void -__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; -} - -static kmp_lock_flags_t -__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.flags; -} - -static void -__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; -} - -#if KMP_USE_DYNAMIC_LOCK - -// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. -static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) -{ - TCW_4(*lck, KMP_GET_D_TAG(seq)); - KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); -} - -#if KMP_USE_TSX - -// HLE lock functions - imported from the testbed runtime. -#define HLE_ACQUIRE ".byte 0xf2;" -#define HLE_RELEASE ".byte 0xf3;" - -static inline kmp_uint32 -swap4(kmp_uint32 volatile *p, kmp_uint32 v) -{ - __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" - : "+r"(v), "+m"(*p) - : - : "memory"); - return v; -} - -static void -__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) -{ - TCW_4(*lck, 0); -} - -static void -__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - // Use gtid for KMP_LOCK_BUSY if necessary - if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { - int delay = 1; - do { - while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { - for (int i = delay; i != 0; --i) - KMP_CPU_PAUSE(); - delay = ((delay << 1) | 1) & 7; - } - } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); - } -} - -static void -__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks -} - -static int -__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - __asm__ volatile(HLE_RELEASE "movl %1,%0" - : "=m"(*lck) - : "r"(KMP_LOCK_FREE(hle)) - : "memory"); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_release_hle_lock(lck, gtid); // TODO: add checks -} - -static int -__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); -} - -static int -__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_test_hle_lock(lck, gtid); // TODO: add checks -} - -static void -__kmp_init_rtm_lock(kmp_queuing_lock_t *lck) -{ - __kmp_init_queuing_lock(lck); -} - -static void -__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) -{ - __kmp_destroy_queuing_lock(lck); -} - -static void -__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - unsigned retries=3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED) { - if (__kmp_is_unlocked_queuing_lock(lck)) - return; - _xabort(0xff); - } - if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { - // Wait until lock becomes free - while (! __kmp_is_unlocked_queuing_lock(lck)) - __kmp_yield(TRUE); - } - else if (!(status & _XABORT_RETRY)) - break; - } while (retries--); - - // Fall-back non-speculative lock (xchg) - __kmp_acquire_queuing_lock(lck, gtid); -} - -static void -__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - __kmp_acquire_rtm_lock(lck, gtid); -} - -static int -__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - if (__kmp_is_unlocked_queuing_lock(lck)) { - // Releasing from speculation - _xend(); - } - else { - // Releasing from a real lock - __kmp_release_queuing_lock(lck, gtid); - } - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_release_rtm_lock(lck, gtid); -} - -static int -__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - unsigned retries=3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { - return 1; - } - if (!(status & _XABORT_RETRY)) - break; - } while (retries--); - - return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; -} - -static int -__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_test_rtm_lock(lck, gtid); -} - -#endif // KMP_USE_TSX - -// Entry functions for indirect locks (first element of direct lock jump tables). -static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); -static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); -static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); - -// -// Jump tables for the indirect lock functions. -// Only fill in the odd entries, that avoids the need to shift out the low bit. -// - -// init functions -#define expand(l, op) 0,__kmp_init_direct_lock, -void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) - = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; -#undef expand - -// destroy functions -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, -void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) - = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; -#undef expand - -// set/acquire functions -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; -#undef expand -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; -#undef expand - -// unset/release and test functions -#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; -static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; -#undef expand -#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; -static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; -#undef expand - -// Exposes only one set of jump tables (*lock or *lock_with_checks). -void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; - -// -// Jump tables for the indirect lock functions. -// -#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, -void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; -void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; -#undef expand - -// set/acquire functions -#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, -static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; -#undef expand -#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, -static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; -#undef expand - -// unset/release and test functions -#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, -static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; -static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; -#undef expand -#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, -static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; -static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; -#undef expand - -// Exposes only one jump tables (*lock or *lock_with_checks). -void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; - -// Lock index table. -kmp_indirect_lock_table_t __kmp_i_lock_table; - -// Size of indirect locks. -static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; - -// Jump tables for lock accessor/modifier. -void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; -void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; -const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; -kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; - -// Use different lock pools for different lock types. -static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; - -// User lock allocator for dynamically dispatched indirect locks. -// Every entry of the indirect lock table holds the address and type of the allocated indrect lock -// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock -// object is returned to the reusable pool of locks, unique to each lock type. -kmp_indirect_lock_t * -__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) -{ - kmp_indirect_lock_t *lck; - kmp_lock_index_t idx; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - if (__kmp_indirect_lock_pool[tag] != NULL) { - // Reuse the allocated and destroyed lock object - lck = __kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) - idx = lck->lock->pool.index; - __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; - KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); - } else { - idx = __kmp_i_lock_table.next; - // Check capacity and double the size if it is full - if (idx == __kmp_i_lock_table.size) { - // Double up the space for block pointers - int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); - __kmp_free(old_table); - // Allocate new objects in the new blocks - for (int i = row; i < 2*row; ++i) - *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) - __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.size = 2*idx; - } - __kmp_i_lock_table.next++; - lck = KMP_GET_I_LOCK(idx); - // Allocate a new base lock object - lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); - KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); - } - - __kmp_release_lock(&__kmp_global_lock, gtid); - - lck->type = tag; - - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. - } else { - *((kmp_indirect_lock_t **)user_lock) = lck; - } - - return lck; -} - -// User lock lookup for dynamically dispatched locks. -static __forceinline -kmp_indirect_lock_t * -__kmp_lookup_indirect_lock(void **user_lock, const char *func) -{ - if (__kmp_env_consistency_check) { - kmp_indirect_lock_t *lck = NULL; - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx >= __kmp_i_lock_table.size) { - KMP_FATAL(LockIsUninitialized, func); - } - lck = KMP_GET_I_LOCK(idx); - } else { - lck = *((kmp_indirect_lock_t **)user_lock); - } - if (lck == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - return lck; - } else { - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); - } else { - return *((kmp_indirect_lock_t **)user_lock); - } - } -} - -static void -__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) -{ -#if KMP_USE_ADAPTIVE_LOCKS - if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { - KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); - seq = lockseq_queuing; - } -#endif -#if KMP_USE_TSX - if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { - seq = lockseq_queuing; - } -#endif - kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); - kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); - KMP_I_LOCK_FUNC(l, init)(l->lock); - KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); -} - -static void -__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) -{ - kmp_uint32 gtid = __kmp_entry_gtid(); - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - kmp_indirect_locktag_t tag = l->type; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - // Use the base lock's space to keep the pool chain. - l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); - } - __kmp_indirect_lock_pool[tag] = l; - - __kmp_release_lock(&__kmp_global_lock, gtid); -} - -static void -__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); -} - -static int -__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); -} - -static int -__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); -} - -static void -__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); -} - -static int -__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); -} - -static int -__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); -} - -kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; - -// This is used only in kmp_error.c when consistency checking is on. -kmp_int32 -__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) -{ - switch (seq) { - case lockseq_tas: - case lockseq_nested_tas: - return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); -#if KMP_HAS_FUTEX - case lockseq_futex: - case lockseq_nested_futex: - return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); -#endif - case lockseq_ticket: - case lockseq_nested_ticket: - return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); - case lockseq_queuing: - case lockseq_nested_queuing: -#if KMP_USE_ADAPTIVE_LOCKS - case lockseq_adaptive: - return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); -#endif - case lockseq_drdpa: - case lockseq_nested_drdpa: - return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); - default: - return 0; - } -} - -// Initializes data for dynamic user locks. -void -__kmp_init_dynamic_user_locks() -{ - // Initialize jump table for the lock functions - if (__kmp_env_consistency_check) { - __kmp_direct_set = direct_set_check; - __kmp_direct_unset = direct_unset_check; - __kmp_direct_test = direct_test_check; - __kmp_indirect_set = indirect_set_check; - __kmp_indirect_unset = indirect_unset_check; - __kmp_indirect_test = indirect_test_check; - } - else { - __kmp_direct_set = direct_set; - __kmp_direct_unset = direct_unset; - __kmp_direct_test = direct_test; - __kmp_indirect_set = indirect_set; - __kmp_indirect_unset = indirect_unset; - __kmp_indirect_test = indirect_test; - } - - // Initialize lock index table - __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; - __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); - *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) - __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.next = 0; - - // Indirect lock size - __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); -#if KMP_USE_ADAPTIVE_LOCKS - __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); -#endif - __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); -#if KMP_USE_TSX - __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); -#endif - __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); -#if KMP_USE_FUTEX - __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); -#endif - __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); - __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); - - // Initialize lock accessor/modifier -#define fill_jumps(table, expand, sep) { \ - table[locktag##sep##ticket] = expand(ticket); \ - table[locktag##sep##queuing] = expand(queuing); \ - table[locktag##sep##drdpa] = expand(drdpa); \ -} - -#if KMP_USE_ADAPTIVE_LOCKS -# define fill_table(table, expand) { \ - fill_jumps(table, expand, _); \ - table[locktag_adaptive] = expand(queuing); \ - fill_jumps(table, expand, _nested_); \ -} -#else -# define fill_table(table, expand) { \ - fill_jumps(table, expand, _); \ - fill_jumps(table, expand, _nested_); \ -} -#endif // KMP_USE_ADAPTIVE_LOCKS - -#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location - fill_table(__kmp_indirect_set_location, expand); -#undef expand -#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags - fill_table(__kmp_indirect_set_flags, expand); -#undef expand -#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location - fill_table(__kmp_indirect_get_location, expand); -#undef expand -#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags - fill_table(__kmp_indirect_get_flags, expand); -#undef expand - - __kmp_init_user_locks = TRUE; -} - -// Clean up the lock table. -void -__kmp_cleanup_indirect_user_locks() -{ - kmp_lock_index_t i; - int k; - - // Clean up locks in the pools first (they were already destroyed before going into the pools). - for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { - kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; - while (l != NULL) { - kmp_indirect_lock_t *ll = l; - l = (kmp_indirect_lock_t *)l->lock->pool.next; - KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); - __kmp_free(ll->lock); - ll->lock = NULL; - } - } - // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_i_lock_table.next; i++) { - kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); - if (l->lock != NULL) { - // Locks not destroyed explicitly need to be destroyed here. - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); - __kmp_free(l->lock); - } - } - // Free the table - for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) - __kmp_free(__kmp_i_lock_table.table[i]); - __kmp_free(__kmp_i_lock_table.table); - - __kmp_init_user_locks = FALSE; -} - -enum kmp_lock_kind __kmp_user_lock_kind = lk_default; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value - -#else // KMP_USE_DYNAMIC_LOCK - -/* ------------------------------------------------------------------------ */ -/* user locks - * - * They are implemented as a table of function pointers which are set to the - * lock functions of the appropriate kind, once that has been determined. - */ - -enum kmp_lock_kind __kmp_user_lock_kind = lk_default; - -size_t __kmp_base_user_lock_size = 0; -size_t __kmp_user_lock_size = 0; - -kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; -int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; - -int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; - -int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; - -int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; -const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; -kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; - -void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) -{ - switch ( user_lock_kind ) { - case lk_default: - default: - KMP_ASSERT( 0 ); - - case lk_tas: { - __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); - __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_tas_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(tas); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); - } - else { - KMP_BIND_USER_LOCK(tas); - KMP_BIND_NESTED_USER_LOCK(tas); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_tas_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; - } - break; - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) - - case lk_futex: { - __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); - __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_futex_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(futex); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); - } - else { - KMP_BIND_USER_LOCK(futex); - KMP_BIND_NESTED_USER_LOCK(futex); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_futex_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; - } - break; - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) - - case lk_ticket: { - __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); - __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); - } - else { - KMP_BIND_USER_LOCK(ticket); - KMP_BIND_NESTED_USER_LOCK(ticket); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_ticket_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_ticket_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_ticket_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_ticket_lock_flags ); - } - break; - - case lk_queuing: { - __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); - __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); - } - else { - KMP_BIND_USER_LOCK(queuing); - KMP_BIND_NESTED_USER_LOCK(queuing); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_queuing_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_queuing_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_queuing_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_queuing_lock_flags ); - } - break; - -#if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: { - __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); - __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); - } - else { - KMP_BIND_USER_LOCK(adaptive); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_adaptive_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_queuing_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_queuing_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_queuing_lock_flags ); - - } - break; -#endif // KMP_USE_ADAPTIVE_LOCKS - - case lk_drdpa: { - __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); - __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); - } - else { - KMP_BIND_USER_LOCK(drdpa); - KMP_BIND_NESTED_USER_LOCK(drdpa); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_drdpa_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_drdpa_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_drdpa_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_drdpa_lock_flags ); - } - break; - } -} - - -// ---------------------------------------------------------------------------- -// User lock table & lock allocation - -kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; -kmp_user_lock_p __kmp_lock_pool = NULL; - -// Lock block-allocation support. -kmp_block_of_locks* __kmp_lock_blocks = NULL; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value - -static kmp_lock_index_t -__kmp_lock_table_insert( kmp_user_lock_p lck ) -{ - // Assume that kmp_global_lock is held upon entry/exit. - kmp_lock_index_t index; - if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { - kmp_lock_index_t size; - kmp_user_lock_p *table; - // Reallocate lock table. - if ( __kmp_user_lock_table.allocated == 0 ) { - size = 1024; - } - else { - size = __kmp_user_lock_table.allocated * 2; - } - table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); - KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); - table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; - // We cannot free the previous table now, since it may be in use by other - // threads. So save the pointer to the previous table in in the first element of the - // new table. All the tables will be organized into a list, and could be freed when - // library shutting down. - __kmp_user_lock_table.table = table; - __kmp_user_lock_table.allocated = size; - } - KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); - index = __kmp_user_lock_table.used; - __kmp_user_lock_table.table[ index ] = lck; - ++ __kmp_user_lock_table.used; - return index; -} - -static kmp_user_lock_p -__kmp_lock_block_allocate() -{ - // Assume that kmp_global_lock is held upon entry/exit. - static int last_index = 0; - if ( ( last_index >= __kmp_num_locks_in_block ) - || ( __kmp_lock_blocks == NULL ) ) { - // Restart the index. - last_index = 0; - // Need to allocate a new block. - KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); - size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; - char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); - // Set up the new block. - kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); - new_block->next_block = __kmp_lock_blocks; - new_block->locks = (void *)buffer; - // Publish the new block. - KMP_MB(); - __kmp_lock_blocks = new_block; - } - kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) - [ last_index * __kmp_user_lock_size ] ) ); - last_index++; - return ret; -} - -// -// Get memory for a lock. It may be freshly allocated memory or reused memory -// from lock pool. -// -kmp_user_lock_p -__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, - kmp_lock_flags_t flags ) -{ - kmp_user_lock_p lck; - kmp_lock_index_t index; - KMP_DEBUG_ASSERT( user_lock ); - - __kmp_acquire_lock( &__kmp_global_lock, gtid ); - - if ( __kmp_lock_pool == NULL ) { - // Lock pool is empty. Allocate new memory. - if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. - lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); - } - else { - lck = __kmp_lock_block_allocate(); - } - - // Insert lock in the table so that it can be freed in __kmp_cleanup, - // and debugger has info on all allocated locks. - index = __kmp_lock_table_insert( lck ); - } - else { - // Pick up lock from pool. - lck = __kmp_lock_pool; - index = __kmp_lock_pool->pool.index; - __kmp_lock_pool = __kmp_lock_pool->pool.next; - } - - // - // We could potentially differentiate between nested and regular locks - // here, and do the lock table lookup for regular locks only. - // - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - * ( (kmp_lock_index_t *) user_lock ) = index; - } - else { - * ( (kmp_user_lock_p *) user_lock ) = lck; - } - - // mark the lock if it is critical section lock. - __kmp_set_user_lock_flags( lck, flags ); - - __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper - - return lck; -} - -// Put lock's memory to pool for reusing. -void -__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( user_lock != NULL ); - KMP_DEBUG_ASSERT( lck != NULL ); - - __kmp_acquire_lock( & __kmp_global_lock, gtid ); - - lck->pool.next = __kmp_lock_pool; - __kmp_lock_pool = lck; - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); - KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); - lck->pool.index = index; - } - - __kmp_release_lock( & __kmp_global_lock, gtid ); -} - -kmp_user_lock_p -__kmp_lookup_user_lock( void **user_lock, char const *func ) -{ - kmp_user_lock_p lck = NULL; - - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); - if ( __kmp_env_consistency_check ) { - if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); - KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); - lck = __kmp_user_lock_table.table[index]; - } - else { - lck = *( (kmp_user_lock_p *)user_lock ); - } - - if ( __kmp_env_consistency_check ) { - if ( lck == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - - return lck; -} - -void -__kmp_cleanup_user_locks( void ) -{ - // - // Reset lock pool. Do not worry about lock in the pool -- we will free - // them when iterating through lock table (it includes all the locks, - // dead or alive). - // - __kmp_lock_pool = NULL; - -#define IS_CRITICAL(lck) \ - ( ( __kmp_get_user_lock_flags_ != NULL ) && \ - ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) - - // - // Loop through lock table, free all locks. - // - // Do not free item [0], it is reserved for lock tables list. - // - // FIXME - we are iterating through a list of (pointers to) objects of - // type union kmp_user_lock, but we have no way of knowing whether the - // base type is currently "pool" or whatever the global user lock type - // is. - // - // We are relying on the fact that for all of the user lock types - // (except "tas"), the first field in the lock struct is the "initialized" - // field, which is set to the address of the lock object itself when - // the lock is initialized. When the union is of type "pool", the - // first field is a pointer to the next object in the free list, which - // will not be the same address as the object itself. - // - // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) - // will fail for "pool" objects on the free list. This must happen as - // the "location" field of real user locks overlaps the "index" field - // of "pool" objects. - // - // It would be better to run through the free list, and remove all "pool" - // objects from the lock table before executing this loop. However, - // "pool" objects do not always have their index field set (only on - // lin_32e), and I don't want to search the lock table for the address - // of every "pool" object on the free list. - // - while ( __kmp_user_lock_table.used > 1 ) { - const ident *loc; - - // - // reduce __kmp_user_lock_table.used before freeing the lock, - // so that state of locks is consistent - // - kmp_user_lock_p lck = __kmp_user_lock_table.table[ - --__kmp_user_lock_table.used ]; - - if ( ( __kmp_is_user_lock_initialized_ != NULL ) && - ( *__kmp_is_user_lock_initialized_ )( lck ) ) { - // - // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is - // initialized AND it is NOT a critical section (user is not - // responsible for destroying criticals) AND we know source - // location to report. - // - if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && - ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && - ( loc->psource != NULL ) ) { - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); - KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); - __kmp_str_loc_free( &str_loc); - } - -#ifdef KMP_DEBUG - if ( IS_CRITICAL( lck ) ) { - KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); - } - else { - KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); - } -#endif // KMP_DEBUG - - // - // Cleanup internal lock dynamic resources - // (for drdpa locks particularly). - // - __kmp_destroy_user_lock( lck ); - } - - // - // Free the lock if block allocation of locks is not used. - // - if ( __kmp_lock_blocks == NULL ) { - __kmp_free( lck ); - } - } - -#undef IS_CRITICAL - - // - // delete lock table(s). - // - kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; - __kmp_user_lock_table.table = NULL; - __kmp_user_lock_table.allocated = 0; - - while ( table_ptr != NULL ) { - // - // In the first element we saved the pointer to the previous - // (smaller) lock table. - // - kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); - __kmp_free( table_ptr ); - table_ptr = next; - } - - // - // Free buffers allocated for blocks of locks. - // - kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; - __kmp_lock_blocks = NULL; - - while ( block_ptr != NULL ) { - kmp_block_of_locks_t *next = block_ptr->next_block; - __kmp_free( block_ptr->locks ); - // - // *block_ptr itself was allocated at the end of the locks vector. - // - block_ptr = next; - } - - TCW_4(__kmp_init_user_locks, FALSE); -} - -#endif // KMP_USE_DYNAMIC_LOCK +#endif + + KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); + KMP_DEBUG_ASSERT( gtid >= 0 ); +#ifdef KMP_DEBUG + this_thr = __kmp_thread_from_gtid( gtid ); + KMP_DEBUG_ASSERT( this_thr != NULL ); + KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); +#endif + + head = *head_id_p; + + if ( head == 0 ) { /* nobody on queue, nobody holding */ + + /* try (0,0)->(-1,0) */ + + if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { + KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); + KMP_FSYNC_ACQUIRED(lck); + return TRUE; + } + } + + KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); + return FALSE; +} + +static int +__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + + int retval = __kmp_test_queuing_lock( lck, gtid ); + + if ( retval ) { + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +int +__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + kmp_info_t *this_thr; + volatile kmp_int32 *head_id_p = & lck->lk.head_id; + volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; + + KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); + KMP_DEBUG_ASSERT( gtid >= 0 ); + this_thr = __kmp_thread_from_gtid( gtid ); + KMP_DEBUG_ASSERT( this_thr != NULL ); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "rel ent" ); + + if ( this_thr->th.th_spin_here ) + __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); + if ( this_thr->th.th_next_waiting != 0 ) + __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); +#endif + KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); + KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + + KMP_FSYNC_RELEASING(lck); + + while( 1 ) { + kmp_int32 dequeued; + kmp_int32 head; + kmp_int32 tail; + + head = *head_id_p; + +#ifdef DEBUG_QUEUING_LOCKS + tail = *tail_id_p; + TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); + if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); +#endif + KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ + + if ( head == -1 ) { /* nobody on queue */ + + /* try (-1,0)->(0,0) */ + if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { + KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", + lck, gtid )); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); +#endif + +#if OMPT_SUPPORT + /* nothing to do - no other thread is trying to shift blame */ +#endif + + return KMP_LOCK_RELEASED; + } + dequeued = FALSE; + + } + else { + + tail = *tail_id_p; + if ( head == tail ) { /* only one thread on the queue */ + +#ifdef DEBUG_QUEUING_LOCKS + if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); +#endif + KMP_DEBUG_ASSERT( head > 0 ); + + /* try (h,h)->(-1,0) */ + dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, + KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); +#endif + + } + else { + volatile kmp_int32 *waiting_id_p; + kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); + KMP_DEBUG_ASSERT( head_thr != NULL ); + waiting_id_p = & head_thr->th.th_next_waiting; + + /* Does this require synchronous reads? */ +#ifdef DEBUG_QUEUING_LOCKS + if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); +#endif + KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); + + /* try (h,t)->(h',t) or (t,t) */ + + KMP_MB(); + /* make sure enqueuing thread has time to update next waiting thread field */ + *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); +#endif + dequeued = TRUE; + } + } + + if ( dequeued ) { + kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); + KMP_DEBUG_ASSERT( head_thr != NULL ); + + /* Does this require synchronous reads? */ +#ifdef DEBUG_QUEUING_LOCKS + if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); +#endif + KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); + + /* For clean code only. + * Thread not released until next statement prevents race with acquire code. + */ + head_thr->th.th_next_waiting = 0; +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); +#endif + + KMP_MB(); + /* reset spin value */ + head_thr->th.th_spin_here = FALSE; + + KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", + lck, gtid )); +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "rel exit 2" ); +#endif + return KMP_LOCK_RELEASED; + } + /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ + +#ifdef DEBUG_QUEUING_LOCKS + TRACE_LOCK( gtid+1, "rel retry" ); +#endif + + } /* while */ + KMP_ASSERT2( 0, "should not get here" ); + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, + kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + lck->lk.owner_id = 0; + return __kmp_release_queuing_lock( lck, gtid ); +} + +void +__kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) +{ + lck->lk.location = NULL; + lck->lk.head_id = 0; + lck->lk.tail_id = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; // no thread owns the lock. + lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. + lck->lk.initialized = lck; + + KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); +} + +static void +__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) +{ + __kmp_init_queuing_lock( lck ); +} + +void +__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) +{ + lck->lk.initialized = NULL; + lck->lk.location = NULL; + lck->lk.head_id = 0; + lck->lk.tail_id = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; + lck->lk.depth_locked = -1; +} + +static void +__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_queuing_lock( lck ); +} + + +// +// nested queuing locks +// + +int +__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } + else { + __kmp_acquire_queuing_lock_timed_template( lck, gtid ); + KMP_MB(); + lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static int +__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_acquire_nested_queuing_lock( lck, gtid ); +} + +int +__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + int retval; + + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { + retval = ++lck->lk.depth_locked; + } + else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { + retval = 0; + } + else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +static int +__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, + kmp_int32 gtid ) +{ + char const * const func = "omp_test_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_test_nested_queuing_lock( lck, gtid ); +} + +int +__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + KMP_MB(); + if ( --(lck->lk.depth_locked) == 0 ) { + KMP_MB(); + lck->lk.owner_id = 0; + __kmp_release_queuing_lock( lck, gtid ); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; +} + +static int +__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_nested_queuing_lock( lck, gtid ); +} + +void +__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) +{ + __kmp_init_queuing_lock( lck ); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +} + +static void +__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) +{ + __kmp_init_nested_queuing_lock( lck ); +} + +void +__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) +{ + __kmp_destroy_queuing_lock( lck ); + lck->lk.depth_locked = 0; +} + +static void +__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) +{ + char const * const func = "omp_destroy_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_nested_queuing_lock( lck ); +} + + +// +// access functions to fields which don't exist for all lock kinds. +// + +static int +__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) +{ + return lck == lck->lk.initialized; +} + +static const ident_t * +__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) +{ + return lck->lk.location; +} + +static void +__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) +{ + lck->lk.location = loc; +} + +static kmp_lock_flags_t +__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) +{ + return lck->lk.flags; +} + +static void +__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) +{ + lck->lk.flags = flags; +} + +#if KMP_USE_ADAPTIVE_LOCKS + +/* + RTM Adaptive locks +*/ + +#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 + +#include +#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) + +#else + +// Values from the status register after failed speculation. +#define _XBEGIN_STARTED (~0u) +#define _XABORT_EXPLICIT (1 << 0) +#define _XABORT_RETRY (1 << 1) +#define _XABORT_CONFLICT (1 << 2) +#define _XABORT_CAPACITY (1 << 3) +#define _XABORT_DEBUG (1 << 4) +#define _XABORT_NESTED (1 << 5) +#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) + +// Aborts for which it's worth trying again immediately +#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) + +#define STRINGIZE_INTERNAL(arg) #arg +#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) + +// Access to RTM instructions + +/* + A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. + This is the same definition as the compiler intrinsic that will be supported at some point. +*/ +static __inline int _xbegin() +{ + int res = -1; + +#if KMP_OS_WINDOWS +#if KMP_ARCH_X86_64 + _asm { + _emit 0xC7 + _emit 0xF8 + _emit 2 + _emit 0 + _emit 0 + _emit 0 + jmp L2 + mov res, eax + L2: + } +#else /* IA32 */ + _asm { + _emit 0xC7 + _emit 0xF8 + _emit 2 + _emit 0 + _emit 0 + _emit 0 + jmp L2 + mov res, eax + L2: + } +#endif // KMP_ARCH_X86_64 +#else + /* Note that %eax must be noted as killed (clobbered), because + * the XSR is returned in %eax(%rax) on abort. Other register + * values are restored, so don't need to be killed. + * + * We must also mark 'res' as an input and an output, since otherwise + * 'res=-1' may be dropped as being dead, whereas we do need the + * assignment on the successful (i.e., non-abort) path. + */ + __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" + " .long 1f-1b-6\n" + " jmp 2f\n" + "1: movl %%eax,%0\n" + "2:" + :"+r"(res)::"memory","%eax"); +#endif // KMP_OS_WINDOWS + return res; +} + +/* + Transaction end +*/ +static __inline void _xend() +{ +#if KMP_OS_WINDOWS + __asm { + _emit 0x0f + _emit 0x01 + _emit 0xd5 + } +#else + __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); +#endif +} + +/* + This is a macro, the argument must be a single byte constant which + can be evaluated by the inline assembler, since it is emitted as a + byte into the assembly code. +*/ +#if KMP_OS_WINDOWS +#define _xabort(ARG) \ + _asm _emit 0xc6 \ + _asm _emit 0xf8 \ + _asm _emit ARG +#else +#define _xabort(ARG) \ + __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); +#endif + +#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 + +// +// Statistics is collected for testing purpose +// +#if KMP_DEBUG_ADAPTIVE_LOCKS + +// We accumulate speculative lock statistics when the lock is destroyed. +// We keep locks that haven't been destroyed in the liveLocks list +// so that we can grab their statistics too. +static kmp_adaptive_lock_statistics_t destroyedStats; + +// To hold the list of live locks. +static kmp_adaptive_lock_info_t liveLocks; + +// A lock so we can safely update the list of locks. +static kmp_bootstrap_lock_t chain_lock; + +// Initialize the list of stats. +void +__kmp_init_speculative_stats() +{ + kmp_adaptive_lock_info_t *lck = &liveLocks; + + memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); + lck->stats.next = lck; + lck->stats.prev = lck; + + KMP_ASSERT( lck->stats.next->stats.prev == lck ); + KMP_ASSERT( lck->stats.prev->stats.next == lck ); + + __kmp_init_bootstrap_lock( &chain_lock ); + +} + +// Insert the lock into the circular list +static void +__kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) +{ + __kmp_acquire_bootstrap_lock( &chain_lock ); + + lck->stats.next = liveLocks.stats.next; + lck->stats.prev = &liveLocks; + + liveLocks.stats.next = lck; + lck->stats.next->stats.prev = lck; + + KMP_ASSERT( lck->stats.next->stats.prev == lck ); + KMP_ASSERT( lck->stats.prev->stats.next == lck ); + + __kmp_release_bootstrap_lock( &chain_lock ); +} + +static void +__kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) +{ + KMP_ASSERT( lck->stats.next->stats.prev == lck ); + KMP_ASSERT( lck->stats.prev->stats.next == lck ); + + kmp_adaptive_lock_info_t * n = lck->stats.next; + kmp_adaptive_lock_info_t * p = lck->stats.prev; + + n->stats.prev = p; + p->stats.next = n; +} + +static void +__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) +{ + memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); + __kmp_remember_lock( lck ); +} + +static void +__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) +{ + kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; + + t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; + t->successfulSpeculations += s->successfulSpeculations; + t->hardFailedSpeculations += s->hardFailedSpeculations; + t->softFailedSpeculations += s->softFailedSpeculations; + t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; + t->lemmingYields += s->lemmingYields; +} + +static void +__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) +{ + kmp_adaptive_lock_statistics_t *t = &destroyedStats; + + __kmp_acquire_bootstrap_lock( &chain_lock ); + + __kmp_add_stats( &destroyedStats, lck ); + __kmp_forget_lock( lck ); + + __kmp_release_bootstrap_lock( &chain_lock ); +} + +static float +percent (kmp_uint32 count, kmp_uint32 total) +{ + return (total == 0) ? 0.0: (100.0 * count)/total; +} + +static +FILE * __kmp_open_stats_file() +{ + if (strcmp (__kmp_speculative_statsfile, "-") == 0) + return stdout; + + size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; + char buffer[buffLen]; + KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, + (kmp_int32)getpid()); + FILE * result = fopen(&buffer[0], "w"); + + // Maybe we should issue a warning here... + return result ? result : stdout; +} + +void +__kmp_print_speculative_stats() +{ + if (__kmp_user_lock_kind != lk_adaptive) + return; + + FILE * statsFile = __kmp_open_stats_file(); + + kmp_adaptive_lock_statistics_t total = destroyedStats; + kmp_adaptive_lock_info_t *lck; + + for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { + __kmp_add_stats( &total, lck ); + } + kmp_adaptive_lock_statistics_t *t = &total; + kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; + kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + + t->softFailedSpeculations; + + fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); + fprintf ( statsFile, " Lock parameters: \n" + " max_soft_retries : %10d\n" + " max_badness : %10d\n", + __kmp_adaptive_backoff_params.max_soft_retries, + __kmp_adaptive_backoff_params.max_badness); + fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); + fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); + fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", + t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); + fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", + t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); + fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); + + fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); + fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", + t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); + fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", + t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); + fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", + t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); + + if (statsFile != stdout) + fclose( statsFile ); +} + +# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) +#else +# define KMP_INC_STAT(lck,stat) + +#endif // KMP_DEBUG_ADAPTIVE_LOCKS + +static inline bool +__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) +{ + // It is enough to check that the head_id is zero. + // We don't also need to check the tail. + bool res = lck->lk.head_id == 0; + + // We need a fence here, since we must ensure that no memory operations + // from later in this thread float above that read. +#if KMP_COMPILER_ICC + _mm_mfence(); +#else + __sync_synchronize(); +#endif + + return res; +} + +// Functions for manipulating the badness +static __inline void +__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) +{ + // Reset the badness to zero so we eagerly try to speculate again + lck->lk.adaptive.badness = 0; + KMP_INC_STAT(lck,successfulSpeculations); +} + +// Create a bit mask with one more set bit. +static __inline void +__kmp_step_badness( kmp_adaptive_lock_t *lck ) +{ + kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; + if ( newBadness > lck->lk.adaptive.max_badness) { + return; + } else { + lck->lk.adaptive.badness = newBadness; + } +} + +// Check whether speculation should be attempted. +static __inline int +__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + kmp_uint32 badness = lck->lk.adaptive.badness; + kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; + int res = (attempts & badness) == 0; + return res; +} + +// Attempt to acquire only the speculative lock. +// Does not back off to the non-speculative lock. +// +static int +__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) +{ + int retries = lck->lk.adaptive.max_soft_retries; + + // We don't explicitly count the start of speculation, rather we record + // the results (success, hard fail, soft fail). The sum of all of those + // is the total number of times we started speculation since all + // speculations must end one of those ways. + do + { + kmp_uint32 status = _xbegin(); + // Switch this in to disable actual speculation but exercise + // at least some of the rest of the code. Useful for debugging... + // kmp_uint32 status = _XABORT_NESTED; + + if (status == _XBEGIN_STARTED ) + { /* We have successfully started speculation + * Check that no-one acquired the lock for real between when we last looked + * and now. This also gets the lock cache line into our read-set, + * which we need so that we'll abort if anyone later claims it for real. + */ + if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) + { + // Lock is now visibly acquired, so someone beat us to it. + // Abort the transaction so we'll restart from _xbegin with the + // failure status. + _xabort(0x01); + KMP_ASSERT2( 0, "should not get here" ); + } + return 1; // Lock has been acquired (speculatively) + } else { + // We have aborted, update the statistics + if ( status & SOFT_ABORT_MASK) + { + KMP_INC_STAT(lck,softFailedSpeculations); + // and loop round to retry. + } + else + { + KMP_INC_STAT(lck,hardFailedSpeculations); + // Give up if we had a hard failure. + break; + } + } + } while( retries-- ); // Loop while we have retries, and didn't fail hard. + + // Either we had a hard failure or we didn't succeed softly after + // the full set of attempts, so back off the badness. + __kmp_step_badness( lck ); + return 0; +} + +// Attempt to acquire the speculative lock, or back off to the non-speculative one +// if the speculative lock cannot be acquired. +// We can succeed speculatively, non-speculatively, or fail. +static int +__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + // First try to acquire the lock speculatively + if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) + return 1; + + // Speculative acquisition failed, so try to acquire it non-speculatively. + // Count the non-speculative acquire attempt + lck->lk.adaptive.acquire_attempts++; + + // Use base, non-speculative lock. + if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) + { + KMP_INC_STAT(lck,nonSpeculativeAcquires); + return 1; // Lock is acquired (non-speculatively) + } + else + { + return 0; // Failed to acquire the lock, it's already visibly locked. + } +} + +static int +__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + + int retval = __kmp_test_adaptive_lock( lck, gtid ); + + if ( retval ) { + lck->lk.qlk.owner_id = gtid + 1; + } + return retval; +} + +// Block until we can acquire a speculative, adaptive lock. +// We check whether we should be trying to speculate. +// If we should be, we check the real lock to see if it is free, +// and, if not, pause without attempting to acquire it until it is. +// Then we try the speculative acquire. +// This means that although we suffer from lemmings a little ( +// because all we can't acquire the lock speculatively until +// the queue of threads waiting has cleared), we don't get into a +// state where we can never acquire the lock speculatively (because we +// force the queue to clear by preventing new arrivals from entering the +// queue). +// This does mean that when we're trying to break lemmings, the lock +// is no longer fair. However OpenMP makes no guarantee that its +// locks are fair, so this isn't a real problem. +static void +__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) +{ + if ( __kmp_should_speculate( lck, gtid ) ) + { + if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) + { + if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) + return; + // We tried speculation and failed, so give up. + } + else + { + // We can't try speculation until the lock is free, so we + // pause here (without suspending on the queueing lock, + // to allow it to drain, then try again. + // All other threads will also see the same result for + // shouldSpeculate, so will be doing the same if they + // try to claim the lock from now on. + while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) + { + KMP_INC_STAT(lck,lemmingYields); + __kmp_yield (TRUE); + } + + if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) + return; + } + } + + // Speculative acquisition failed, so acquire it non-speculatively. + // Count the non-speculative acquire attempt + lck->lk.adaptive.acquire_attempts++; + + __kmp_acquire_queuing_lock_timed_template( GET_QLK_PTR(lck), gtid ); + // We have acquired the base lock, so count that. + KMP_INC_STAT(lck,nonSpeculativeAcquires ); +} + +static void +__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + + __kmp_acquire_adaptive_lock( lck, gtid ); + + lck->lk.qlk.owner_id = gtid + 1; +} + +static int +__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) + { // If the lock doesn't look claimed we must be speculating. + // (Or the user's code is buggy and they're releasing without locking; + // if we had XTEST we'd be able to check that case...) + _xend(); // Exit speculation + __kmp_update_badness_after_success( lck ); + } + else + { // Since the lock *is* visibly locked we're not speculating, + // so should use the underlying lock's release scheme. + __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); + } + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + lck->lk.qlk.owner_id = 0; + __kmp_release_adaptive_lock( lck, gtid ); + return KMP_LOCK_RELEASED; +} + +static void +__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) +{ + __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); + lck->lk.adaptive.badness = 0; + lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; + lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; + lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; +#if KMP_DEBUG_ADAPTIVE_LOCKS + __kmp_zero_speculative_stats( &lck->lk.adaptive ); +#endif + KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); +} + +static void +__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) +{ + __kmp_init_adaptive_lock( lck ); +} + +static void +__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) +{ +#if KMP_DEBUG_ADAPTIVE_LOCKS + __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); +#endif + __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); + // Nothing needed for the speculative part. +} + +static void +__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_adaptive_lock( lck ); +} + + +#endif // KMP_USE_ADAPTIVE_LOCKS + + +/* ------------------------------------------------------------------------ */ +/* DRDPA ticket locks */ +/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ + +static kmp_int32 +__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) +{ + return TCR_4( lck->lk.owner_id ) - 1; +} + +static inline bool +__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) +{ + return lck->lk.depth_locked != -1; +} + +__forceinline static int +__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls + = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + TCR_PTR(lck->lk.polls); // volatile load + +#ifdef USE_LOCK_PROFILE + if (TCR_8(polls[ticket & mask].poll) != ticket) + __kmp_printf("LOCK CONTENTION: %p\n", lck); + /* else __kmp_printf( "." );*/ +#endif /* USE_LOCK_PROFILE */ + + // + // Now spin-wait, but reload the polls pointer and mask, in case the + // polling area has been reconfigured. Unless it is reconfigured, the + // reloads stay in L1 cache and are cheap. + // + // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! + // + // The current implementation of KMP_WAIT_YIELD doesn't allow for mask + // and poll to be re-read every spin iteration. + // + kmp_uint32 spins; + + KMP_FSYNC_PREPARE(lck); + KMP_INIT_YIELD(spins); + while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load + // If we are oversubscribed, + // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. + // CPU Pause is in the macros for yield. + // + KMP_YIELD(TCR_4(__kmp_nth) + > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_SPIN(spins); + + // Re-read the mask and the poll pointer from the lock structure. + // + // Make certain that "mask" is read before "polls" !!! + // + // If another thread picks reconfigures the polling area and updates + // their values, and we get the new value of mask and the old polls + // pointer, we could access memory beyond the end of the old polling + // area. + // + mask = TCR_8(lck->lk.mask); // volatile load + polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + TCR_PTR(lck->lk.polls); // volatile load + } + + // + // Critical section starts here + // + KMP_FSYNC_ACQUIRED(lck); + KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", + ticket, lck)); + lck->lk.now_serving = ticket; // non-volatile store + + // + // Deallocate a garbage polling area if we know that we are the last + // thread that could possibly access it. + // + // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup + // ticket. + // + if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { + __kmp_free((void *)lck->lk.old_polls); + lck->lk.old_polls = NULL; + lck->lk.cleanup_ticket = 0; + } + + // + // Check to see if we should reconfigure the polling area. + // If there is still a garbage polling area to be deallocated from a + // previous reconfiguration, let a later thread reconfigure it. + // + if (lck->lk.old_polls == NULL) { + bool reconfigure = false; + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; + kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); + + if (TCR_4(__kmp_nth) + > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { + // + // We are in oversubscription mode. Contract the polling area + // down to a single location, if that hasn't been done already. + // + if (num_polls > 1) { + reconfigure = true; + num_polls = TCR_4(lck->lk.num_polls); + mask = 0; + num_polls = 1; + polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + __kmp_allocate(num_polls * sizeof(*polls)); + polls[0].poll = ticket; + } + } + else { + // + // We are in under/fully subscribed mode. Check the number of + // threads waiting on the lock. The size of the polling area + // should be at least the number of threads waiting. + // + kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; + if (num_waiting > num_polls) { + kmp_uint32 old_num_polls = num_polls; + reconfigure = true; + do { + mask = (mask << 1) | 1; + num_polls *= 2; + } while (num_polls <= num_waiting); + + // + // Allocate the new polling area, and copy the relevant portion + // of the old polling area to the new area. __kmp_allocate() + // zeroes the memory it allocates, and most of the old area is + // just zero padding, so we only copy the release counters. + // + polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + __kmp_allocate(num_polls * sizeof(*polls)); + kmp_uint32 i; + for (i = 0; i < old_num_polls; i++) { + polls[i].poll = old_polls[i].poll; + } + } + } + + if (reconfigure) { + // + // Now write the updated fields back to the lock structure. + // + // Make certain that "polls" is written before "mask" !!! + // + // If another thread picks up the new value of mask and the old + // polls pointer , it could access memory beyond the end of the + // old polling area. + // + // On x86, we need memory fences. + // + KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", + ticket, lck, num_polls)); + + lck->lk.old_polls = old_polls; // non-volatile store + lck->lk.polls = polls; // volatile store + + KMP_MB(); + + lck->lk.num_polls = num_polls; // non-volatile store + lck->lk.mask = mask; // volatile store + + KMP_MB(); + + // + // Only after the new polling area and mask have been flushed + // to main memory can we update the cleanup ticket field. + // + // volatile load / non-volatile store + // + lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); + } + } + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); +} + +static int +__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { + KMP_FATAL( LockIsAlreadyOwned, func ); + } + + __kmp_acquire_drdpa_lock( lck, gtid ); + + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; +} + +int +__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + // + // First get a ticket, then read the polls pointer and the mask. + // The polls pointer must be read before the mask!!! (See above) + // + kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls + = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + TCR_PTR(lck->lk.polls); // volatile load + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + if (TCR_8(polls[ticket & mask].poll) == ticket) { + kmp_uint64 next_ticket = ticket + 1; + if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, + ticket, next_ticket)) { + KMP_FSYNC_ACQUIRED(lck); + KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", + ticket, lck)); + lck->lk.now_serving = ticket; // non-volatile store + + // + // Since no threads are waiting, there is no possibility that + // we would want to reconfigure the polling area. We might + // have the cleanup ticket value (which says that it is now + // safe to deallocate old_polls), but we'll let a later thread + // which calls __kmp_acquire_lock do that - this routine + // isn't supposed to block, and we would risk blocks if we + // called __kmp_free() to do the deallocation. + // + return TRUE; + } + } + return FALSE; +} + +static int +__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + + int retval = __kmp_test_drdpa_lock( lck, gtid ); + + if ( retval ) { + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +int +__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + // + // Read the ticket value from the lock data struct, then the polls + // pointer and the mask. The polls pointer must be read before the + // mask!!! (See above) + // + kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls + = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + TCR_PTR(lck->lk.polls); // volatile load + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", + ticket - 1, lck)); + KMP_FSYNC_RELEASING(lck); + KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) + && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + lck->lk.owner_id = 0; + return __kmp_release_drdpa_lock( lck, gtid ); +} + +void +__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) +{ + lck->lk.location = NULL; + lck->lk.mask = 0; + lck->lk.num_polls = 1; + lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); + lck->lk.cleanup_ticket = 0; + lck->lk.old_polls = NULL; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; // no thread owns the lock. + lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. + lck->lk.initialized = lck; + + KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); +} + +static void +__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) +{ + __kmp_init_drdpa_lock( lck ); +} + +void +__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) +{ + lck->lk.initialized = NULL; + lck->lk.location = NULL; + if (lck->lk.polls != NULL) { + __kmp_free((void *)lck->lk.polls); + lck->lk.polls = NULL; + } + if (lck->lk.old_polls != NULL) { + __kmp_free((void *)lck->lk.old_polls); + lck->lk.old_polls = NULL; + } + lck->lk.mask = 0; + lck->lk.num_polls = 0; + lck->lk.cleanup_ticket = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; + lck->lk.depth_locked = -1; +} + +static void +__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) +{ + char const * const func = "omp_destroy_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_drdpa_lock( lck ); +} + + +// +// nested drdpa ticket locks +// + +int +__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } + else { + __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); + KMP_MB(); + lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static void +__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_set_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + __kmp_acquire_nested_drdpa_lock( lck, gtid ); +} + +int +__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + int retval; + + KMP_DEBUG_ASSERT( gtid >= 0 ); + + if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { + retval = ++lck->lk.depth_locked; + } + else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { + retval = 0; + } + else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +static int +__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_test_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + return __kmp_test_nested_drdpa_lock( lck, gtid ); +} + +int +__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( gtid >= 0 ); + + KMP_MB(); + if ( --(lck->lk.depth_locked) == 0 ) { + KMP_MB(); + lck->lk.owner_id = 0; + __kmp_release_drdpa_lock( lck, gtid ); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; +} + +static int +__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) +{ + char const * const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { + KMP_FATAL( LockUnsettingFree, func ); + } + if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { + KMP_FATAL( LockUnsettingSetByAnother, func ); + } + return __kmp_release_nested_drdpa_lock( lck, gtid ); +} + +void +__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) +{ + __kmp_init_drdpa_lock( lck ); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +} + +static void +__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) +{ + __kmp_init_nested_drdpa_lock( lck ); +} + +void +__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) +{ + __kmp_destroy_drdpa_lock( lck ); + lck->lk.depth_locked = 0; +} + +static void +__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) +{ + char const * const func = "omp_destroy_nest_lock"; + if ( lck->lk.initialized != lck ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { + KMP_FATAL( LockStillOwned, func ); + } + __kmp_destroy_nested_drdpa_lock( lck ); +} + + +// +// access functions to fields which don't exist for all lock kinds. +// + +static int +__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) +{ + return lck == lck->lk.initialized; +} + +static const ident_t * +__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) +{ + return lck->lk.location; +} + +static void +__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) +{ + lck->lk.location = loc; +} + +static kmp_lock_flags_t +__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) +{ + return lck->lk.flags; +} + +static void +__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) +{ + lck->lk.flags = flags; +} + +#if KMP_USE_DYNAMIC_LOCK + +// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. +static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) +{ + TCW_4(*lck, KMP_GET_D_TAG(seq)); + KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); +} + +#if KMP_USE_TSX + +// HLE lock functions - imported from the testbed runtime. +#define HLE_ACQUIRE ".byte 0xf2;" +#define HLE_RELEASE ".byte 0xf3;" + +static inline kmp_uint32 +swap4(kmp_uint32 volatile *p, kmp_uint32 v) +{ + __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" + : "+r"(v), "+m"(*p) + : + : "memory"); + return v; +} + +static void +__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) +{ + TCW_4(*lck, 0); +} + +static void +__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + // Use gtid for KMP_LOCK_BUSY if necessary + if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { + int delay = 1; + do { + while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { + for (int i = delay; i != 0; --i) + KMP_CPU_PAUSE(); + delay = ((delay << 1) | 1) & 7; + } + } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); + } +} + +static void +__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks +} + +static int +__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + __asm__ volatile(HLE_RELEASE "movl %1,%0" + : "=m"(*lck) + : "r"(KMP_LOCK_FREE(hle)) + : "memory"); + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_release_hle_lock(lck, gtid); // TODO: add checks +} + +static int +__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); +} + +static int +__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_test_hle_lock(lck, gtid); // TODO: add checks +} + +static void +__kmp_init_rtm_lock(kmp_queuing_lock_t *lck) +{ + __kmp_init_queuing_lock(lck); +} + +static void +__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) +{ + __kmp_destroy_queuing_lock(lck); +} + +static void +__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + unsigned retries=3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED) { + if (__kmp_is_unlocked_queuing_lock(lck)) + return; + _xabort(0xff); + } + if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { + // Wait until lock becomes free + while (! __kmp_is_unlocked_queuing_lock(lck)) + __kmp_yield(TRUE); + } + else if (!(status & _XABORT_RETRY)) + break; + } while (retries--); + + // Fall-back non-speculative lock (xchg) + __kmp_acquire_queuing_lock(lck, gtid); +} + +static void +__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + __kmp_acquire_rtm_lock(lck, gtid); +} + +static int +__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + if (__kmp_is_unlocked_queuing_lock(lck)) { + // Releasing from speculation + _xend(); + } + else { + // Releasing from a real lock + __kmp_release_queuing_lock(lck, gtid); + } + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_release_rtm_lock(lck, gtid); +} + +static int +__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + unsigned retries=3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { + return 1; + } + if (!(status & _XABORT_RETRY)) + break; + } while (retries--); + + return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; +} + +static int +__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_test_rtm_lock(lck, gtid); +} + +#endif // KMP_USE_TSX + +// Entry functions for indirect locks (first element of direct lock jump tables). +static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); +static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); +static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); + +// +// Jump tables for the indirect lock functions. +// Only fill in the odd entries, that avoids the need to shift out the low bit. +// + +// init functions +#define expand(l, op) 0,__kmp_init_direct_lock, +void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) + = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; +#undef expand + +// destroy functions +#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, +void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) + = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; +#undef expand + +// set/acquire functions +#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; +#undef expand +#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; +#undef expand + +// unset/release and test functions +#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; +static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; +#undef expand +#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; +static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) + = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; +#undef expand + +// Exposes only one set of jump tables (*lock or *lock_with_checks). +void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; +int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; +int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; + +// +// Jump tables for the indirect lock functions. +// +#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, +void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; +void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; +#undef expand + +// set/acquire functions +#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, +static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; +#undef expand +#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; +#undef expand + +// unset/release and test functions +#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, +static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; +static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; +#undef expand +#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; +static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; +#undef expand + +// Exposes only one jump tables (*lock or *lock_with_checks). +void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; +int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; +int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; + +// Lock index table. +kmp_indirect_lock_table_t __kmp_i_lock_table; + +// Size of indirect locks. +static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; + +// Jump tables for lock accessor/modifier. +void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; +void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; +const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; +kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; + +// Use different lock pools for different lock types. +static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; + +// User lock allocator for dynamically dispatched indirect locks. +// Every entry of the indirect lock table holds the address and type of the allocated indrect lock +// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock +// object is returned to the reusable pool of locks, unique to each lock type. +kmp_indirect_lock_t * +__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) +{ + kmp_indirect_lock_t *lck; + kmp_lock_index_t idx; + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + if (__kmp_indirect_lock_pool[tag] != NULL) { + // Reuse the allocated and destroyed lock object + lck = __kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) + idx = lck->lock->pool.index; + __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; + KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); + } else { + idx = __kmp_i_lock_table.next; + // Check capacity and double the size if it is full + if (idx == __kmp_i_lock_table.size) { + // Double up the space for block pointers + int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; + kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); + KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); + __kmp_free(old_table); + // Allocate new objects in the new blocks + for (int i = row; i < 2*row; ++i) + *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) + __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.size = 2*idx; + } + __kmp_i_lock_table.next++; + lck = KMP_GET_I_LOCK(idx); + // Allocate a new base lock object + lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); + KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); + } + + __kmp_release_lock(&__kmp_global_lock, gtid); + + lck->type = tag; + + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. + } else { + *((kmp_indirect_lock_t **)user_lock) = lck; + } + + return lck; +} + +// User lock lookup for dynamically dispatched locks. +static __forceinline +kmp_indirect_lock_t * +__kmp_lookup_indirect_lock(void **user_lock, const char *func) +{ + if (__kmp_env_consistency_check) { + kmp_indirect_lock_t *lck = NULL; + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); + if (idx >= __kmp_i_lock_table.size) { + KMP_FATAL(LockIsUninitialized, func); + } + lck = KMP_GET_I_LOCK(idx); + } else { + lck = *((kmp_indirect_lock_t **)user_lock); + } + if (lck == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + return lck; + } else { + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); + } else { + return *((kmp_indirect_lock_t **)user_lock); + } + } +} + +static void +__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) +{ +#if KMP_USE_ADAPTIVE_LOCKS + if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { + KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); + seq = lockseq_queuing; + } +#endif +#if KMP_USE_TSX + if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { + seq = lockseq_queuing; + } +#endif + kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); + kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); + KMP_I_LOCK_FUNC(l, init)(l->lock); + KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); +} + +static void +__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) +{ + kmp_uint32 gtid = __kmp_entry_gtid(); + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + kmp_indirect_locktag_t tag = l->type; + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + // Use the base lock's space to keep the pool chain. + l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); + } + __kmp_indirect_lock_pool[tag] = l; + + __kmp_release_lock(&__kmp_global_lock, gtid); +} + +static void +__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); +} + +static int +__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); +} + +static int +__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); +} + +static void +__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); + KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); +} + +static int +__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); + return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); +} + +static int +__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); + return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); +} + +kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; + +// This is used only in kmp_error.c when consistency checking is on. +kmp_int32 +__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) +{ + switch (seq) { + case lockseq_tas: + case lockseq_nested_tas: + return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); +#if KMP_HAS_FUTEX + case lockseq_futex: + case lockseq_nested_futex: + return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); +#endif + case lockseq_ticket: + case lockseq_nested_ticket: + return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); + case lockseq_queuing: + case lockseq_nested_queuing: +#if KMP_USE_ADAPTIVE_LOCKS + case lockseq_adaptive: + return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); +#endif + case lockseq_drdpa: + case lockseq_nested_drdpa: + return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); + default: + return 0; + } +} + +// Initializes data for dynamic user locks. +void +__kmp_init_dynamic_user_locks() +{ + // Initialize jump table for the lock functions + if (__kmp_env_consistency_check) { + __kmp_direct_set = direct_set_check; + __kmp_direct_unset = direct_unset_check; + __kmp_direct_test = direct_test_check; + __kmp_indirect_set = indirect_set_check; + __kmp_indirect_unset = indirect_unset_check; + __kmp_indirect_test = indirect_test_check; + } + else { + __kmp_direct_set = direct_set; + __kmp_direct_unset = direct_unset; + __kmp_direct_test = direct_test; + __kmp_indirect_set = indirect_set; + __kmp_indirect_unset = indirect_unset; + __kmp_indirect_test = indirect_test; + } + + // Initialize lock index table + __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); + *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) + __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.next = 0; + + // Indirect lock size + __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); +#if KMP_USE_ADAPTIVE_LOCKS + __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); +#endif + __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); +#if KMP_USE_TSX + __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); +#endif + __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); +#if KMP_USE_FUTEX + __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); +#endif + __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); + __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); + + // Initialize lock accessor/modifier +#define fill_jumps(table, expand, sep) { \ + table[locktag##sep##ticket] = expand(ticket); \ + table[locktag##sep##queuing] = expand(queuing); \ + table[locktag##sep##drdpa] = expand(drdpa); \ +} + +#if KMP_USE_ADAPTIVE_LOCKS +# define fill_table(table, expand) { \ + fill_jumps(table, expand, _); \ + table[locktag_adaptive] = expand(queuing); \ + fill_jumps(table, expand, _nested_); \ +} +#else +# define fill_table(table, expand) { \ + fill_jumps(table, expand, _); \ + fill_jumps(table, expand, _nested_); \ +} +#endif // KMP_USE_ADAPTIVE_LOCKS + +#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location + fill_table(__kmp_indirect_set_location, expand); +#undef expand +#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags + fill_table(__kmp_indirect_set_flags, expand); +#undef expand +#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location + fill_table(__kmp_indirect_get_location, expand); +#undef expand +#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags + fill_table(__kmp_indirect_get_flags, expand); +#undef expand + + __kmp_init_user_locks = TRUE; +} + +// Clean up the lock table. +void +__kmp_cleanup_indirect_user_locks() +{ + kmp_lock_index_t i; + int k; + + // Clean up locks in the pools first (they were already destroyed before going into the pools). + for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { + kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; + while (l != NULL) { + kmp_indirect_lock_t *ll = l; + l = (kmp_indirect_lock_t *)l->lock->pool.next; + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); + __kmp_free(ll->lock); + ll->lock = NULL; + } + } + // Clean up the remaining undestroyed locks. + for (i = 0; i < __kmp_i_lock_table.next; i++) { + kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); + if (l->lock != NULL) { + // Locks not destroyed explicitly need to be destroyed here. + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); + __kmp_free(l->lock); + } + } + // Free the table + for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) + __kmp_free(__kmp_i_lock_table.table[i]); + __kmp_free(__kmp_i_lock_table.table); + + __kmp_init_user_locks = FALSE; +} + +enum kmp_lock_kind __kmp_user_lock_kind = lk_default; +int __kmp_num_locks_in_block = 1; // FIXME - tune this value + +#else // KMP_USE_DYNAMIC_LOCK + +/* ------------------------------------------------------------------------ */ +/* user locks + * + * They are implemented as a table of function pointers which are set to the + * lock functions of the appropriate kind, once that has been determined. + */ + +enum kmp_lock_kind __kmp_user_lock_kind = lk_default; + +size_t __kmp_base_user_lock_size = 0; +size_t __kmp_user_lock_size = 0; + +kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; +int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; + +int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; +int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; +void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; +void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; +void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; +int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; + +int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; +int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; +void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; +void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; + +int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; +const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; +void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; +kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; +void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; + +void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) +{ + switch ( user_lock_kind ) { + case lk_default: + default: + KMP_ASSERT( 0 ); + + case lk_tas: { + __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); + __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_tas_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(tas); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); + } + else { + KMP_BIND_USER_LOCK(tas); + KMP_BIND_NESTED_USER_LOCK(tas); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_tas_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; + } + break; + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) + + case lk_futex: { + __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); + __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_futex_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(futex); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); + } + else { + KMP_BIND_USER_LOCK(futex); + KMP_BIND_NESTED_USER_LOCK(futex); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_futex_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; + } + break; + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) + + case lk_ticket: { + __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); + __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_ticket_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); + } + else { + KMP_BIND_USER_LOCK(ticket); + KMP_BIND_NESTED_USER_LOCK(ticket); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_ticket_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) + ( &__kmp_is_ticket_lock_initialized ); + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_ticket_lock_location ); + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) + ( &__kmp_set_ticket_lock_location ); + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_ticket_lock_flags ); + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) + ( &__kmp_set_ticket_lock_flags ); + } + break; + + case lk_queuing: { + __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); + __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); + } + else { + KMP_BIND_USER_LOCK(queuing); + KMP_BIND_NESTED_USER_LOCK(queuing); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_queuing_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) + ( &__kmp_is_queuing_lock_initialized ); + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_location ); + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) + ( &__kmp_set_queuing_lock_location ); + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_flags ); + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) + ( &__kmp_set_queuing_lock_flags ); + } + break; + +#if KMP_USE_ADAPTIVE_LOCKS + case lk_adaptive: { + __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); + __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); + } + else { + KMP_BIND_USER_LOCK(adaptive); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_adaptive_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) + ( &__kmp_is_queuing_lock_initialized ); + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_location ); + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) + ( &__kmp_set_queuing_lock_location ); + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_queuing_lock_flags ); + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) + ( &__kmp_set_queuing_lock_flags ); + + } + break; +#endif // KMP_USE_ADAPTIVE_LOCKS + + case lk_drdpa: { + __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); + __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); + + __kmp_get_user_lock_owner_ = + ( kmp_int32 ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_drdpa_lock_owner ); + + if ( __kmp_env_consistency_check ) { + KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); + } + else { + KMP_BIND_USER_LOCK(drdpa); + KMP_BIND_NESTED_USER_LOCK(drdpa); + } + + __kmp_destroy_user_lock_ = + ( void ( * )( kmp_user_lock_p ) ) + ( &__kmp_destroy_drdpa_lock ); + + __kmp_is_user_lock_initialized_ = + ( int ( * )( kmp_user_lock_p ) ) + ( &__kmp_is_drdpa_lock_initialized ); + + __kmp_get_user_lock_location_ = + ( const ident_t * ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_drdpa_lock_location ); + + __kmp_set_user_lock_location_ = + ( void ( * )( kmp_user_lock_p, const ident_t * ) ) + ( &__kmp_set_drdpa_lock_location ); + + __kmp_get_user_lock_flags_ = + ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) + ( &__kmp_get_drdpa_lock_flags ); + + __kmp_set_user_lock_flags_ = + ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) + ( &__kmp_set_drdpa_lock_flags ); + } + break; + } +} + + +// ---------------------------------------------------------------------------- +// User lock table & lock allocation + +kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; +kmp_user_lock_p __kmp_lock_pool = NULL; + +// Lock block-allocation support. +kmp_block_of_locks* __kmp_lock_blocks = NULL; +int __kmp_num_locks_in_block = 1; // FIXME - tune this value + +static kmp_lock_index_t +__kmp_lock_table_insert( kmp_user_lock_p lck ) +{ + // Assume that kmp_global_lock is held upon entry/exit. + kmp_lock_index_t index; + if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { + kmp_lock_index_t size; + kmp_user_lock_p *table; + // Reallocate lock table. + if ( __kmp_user_lock_table.allocated == 0 ) { + size = 1024; + } + else { + size = __kmp_user_lock_table.allocated * 2; + } + table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); + KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); + table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; + // We cannot free the previous table now, since it may be in use by other + // threads. So save the pointer to the previous table in in the first element of the + // new table. All the tables will be organized into a list, and could be freed when + // library shutting down. + __kmp_user_lock_table.table = table; + __kmp_user_lock_table.allocated = size; + } + KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); + index = __kmp_user_lock_table.used; + __kmp_user_lock_table.table[ index ] = lck; + ++ __kmp_user_lock_table.used; + return index; +} + +static kmp_user_lock_p +__kmp_lock_block_allocate() +{ + // Assume that kmp_global_lock is held upon entry/exit. + static int last_index = 0; + if ( ( last_index >= __kmp_num_locks_in_block ) + || ( __kmp_lock_blocks == NULL ) ) { + // Restart the index. + last_index = 0; + // Need to allocate a new block. + KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); + size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; + char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); + // Set up the new block. + kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); + new_block->next_block = __kmp_lock_blocks; + new_block->locks = (void *)buffer; + // Publish the new block. + KMP_MB(); + __kmp_lock_blocks = new_block; + } + kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) + [ last_index * __kmp_user_lock_size ] ) ); + last_index++; + return ret; +} + +// +// Get memory for a lock. It may be freshly allocated memory or reused memory +// from lock pool. +// +kmp_user_lock_p +__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, + kmp_lock_flags_t flags ) +{ + kmp_user_lock_p lck; + kmp_lock_index_t index; + KMP_DEBUG_ASSERT( user_lock ); + + __kmp_acquire_lock( &__kmp_global_lock, gtid ); + + if ( __kmp_lock_pool == NULL ) { + // Lock pool is empty. Allocate new memory. + if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. + lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); + } + else { + lck = __kmp_lock_block_allocate(); + } + + // Insert lock in the table so that it can be freed in __kmp_cleanup, + // and debugger has info on all allocated locks. + index = __kmp_lock_table_insert( lck ); + } + else { + // Pick up lock from pool. + lck = __kmp_lock_pool; + index = __kmp_lock_pool->pool.index; + __kmp_lock_pool = __kmp_lock_pool->pool.next; + } + + // + // We could potentially differentiate between nested and regular locks + // here, and do the lock table lookup for regular locks only. + // + if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { + * ( (kmp_lock_index_t *) user_lock ) = index; + } + else { + * ( (kmp_user_lock_p *) user_lock ) = lck; + } + + // mark the lock if it is critical section lock. + __kmp_set_user_lock_flags( lck, flags ); + + __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper + + return lck; +} + +// Put lock's memory to pool for reusing. +void +__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( user_lock != NULL ); + KMP_DEBUG_ASSERT( lck != NULL ); + + __kmp_acquire_lock( & __kmp_global_lock, gtid ); + + lck->pool.next = __kmp_lock_pool; + __kmp_lock_pool = lck; + if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { + kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); + KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); + lck->pool.index = index; + } + + __kmp_release_lock( & __kmp_global_lock, gtid ); +} + +kmp_user_lock_p +__kmp_lookup_user_lock( void **user_lock, char const *func ) +{ + kmp_user_lock_p lck = NULL; + + if ( __kmp_env_consistency_check ) { + if ( user_lock == NULL ) { + KMP_FATAL( LockIsUninitialized, func ); + } + } + + if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { + kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); + if ( __kmp_env_consistency_check ) { + if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + } + KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); + KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); + lck = __kmp_user_lock_table.table[index]; + } + else { + lck = *( (kmp_user_lock_p *)user_lock ); + } + + if ( __kmp_env_consistency_check ) { + if ( lck == NULL ) { + KMP_FATAL( LockIsUninitialized, func ); + } + } + + return lck; +} + +void +__kmp_cleanup_user_locks( void ) +{ + // + // Reset lock pool. Do not worry about lock in the pool -- we will free + // them when iterating through lock table (it includes all the locks, + // dead or alive). + // + __kmp_lock_pool = NULL; + +#define IS_CRITICAL(lck) \ + ( ( __kmp_get_user_lock_flags_ != NULL ) && \ + ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) + + // + // Loop through lock table, free all locks. + // + // Do not free item [0], it is reserved for lock tables list. + // + // FIXME - we are iterating through a list of (pointers to) objects of + // type union kmp_user_lock, but we have no way of knowing whether the + // base type is currently "pool" or whatever the global user lock type + // is. + // + // We are relying on the fact that for all of the user lock types + // (except "tas"), the first field in the lock struct is the "initialized" + // field, which is set to the address of the lock object itself when + // the lock is initialized. When the union is of type "pool", the + // first field is a pointer to the next object in the free list, which + // will not be the same address as the object itself. + // + // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) + // will fail for "pool" objects on the free list. This must happen as + // the "location" field of real user locks overlaps the "index" field + // of "pool" objects. + // + // It would be better to run through the free list, and remove all "pool" + // objects from the lock table before executing this loop. However, + // "pool" objects do not always have their index field set (only on + // lin_32e), and I don't want to search the lock table for the address + // of every "pool" object on the free list. + // + while ( __kmp_user_lock_table.used > 1 ) { + const ident *loc; + + // + // reduce __kmp_user_lock_table.used before freeing the lock, + // so that state of locks is consistent + // + kmp_user_lock_p lck = __kmp_user_lock_table.table[ + --__kmp_user_lock_table.used ]; + + if ( ( __kmp_is_user_lock_initialized_ != NULL ) && + ( *__kmp_is_user_lock_initialized_ )( lck ) ) { + // + // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is + // initialized AND it is NOT a critical section (user is not + // responsible for destroying criticals) AND we know source + // location to report. + // + if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && + ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && + ( loc->psource != NULL ) ) { + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); + KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); + __kmp_str_loc_free( &str_loc); + } + +#ifdef KMP_DEBUG + if ( IS_CRITICAL( lck ) ) { + KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); + } + else { + KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); + } +#endif // KMP_DEBUG + + // + // Cleanup internal lock dynamic resources + // (for drdpa locks particularly). + // + __kmp_destroy_user_lock( lck ); + } + + // + // Free the lock if block allocation of locks is not used. + // + if ( __kmp_lock_blocks == NULL ) { + __kmp_free( lck ); + } + } + +#undef IS_CRITICAL + + // + // delete lock table(s). + // + kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; + __kmp_user_lock_table.table = NULL; + __kmp_user_lock_table.allocated = 0; + + while ( table_ptr != NULL ) { + // + // In the first element we saved the pointer to the previous + // (smaller) lock table. + // + kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); + __kmp_free( table_ptr ); + table_ptr = next; + } + + // + // Free buffers allocated for blocks of locks. + // + kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; + __kmp_lock_blocks = NULL; + + while ( block_ptr != NULL ) { + kmp_block_of_locks_t *next = block_ptr->next_block; + __kmp_free( block_ptr->locks ); + // + // *block_ptr itself was allocated at the end of the locks vector. + // + block_ptr = next; + } + + TCW_4(__kmp_init_user_locks, FALSE); +} + +#endif // KMP_USE_DYNAMIC_LOCK diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.h b/contrib/libs/cxxsupp/openmp/kmp_lock.h index d79db4ae961..8cd01d39812 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_lock.h +++ b/contrib/libs/cxxsupp/openmp/kmp_lock.h @@ -1,1273 +1,1273 @@ -/* - * kmp_lock.h -- lock header file - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_LOCK_H -#define KMP_LOCK_H - -#include // CHAR_BIT -#include // offsetof - -#include "kmp_os.h" -#include "kmp_debug.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// ---------------------------------------------------------------------------- -// Have to copy these definitions from kmp.h because kmp.h cannot be included -// due to circular dependencies. Will undef these at end of file. - -#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) -#define KMP_GTID_DNE (-2) - -// Forward declaration of ident and ident_t - -struct ident; -typedef struct ident ident_t; - -// End of copied code. -// ---------------------------------------------------------------------------- - -// -// We need to know the size of the area we can assume that the compiler(s) -// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel -// compiler always allocates a pointer-sized area, as does visual studio. -// -// gcc however, only allocates 4 bytes for regular locks, even on 64-bit -// intel archs. It allocates at least 8 bytes for nested lock (more on -// recent versions), but we are bounded by the pointer-sized chunks that -// the Intel compiler allocates. -// - -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) -# define OMP_LOCK_T_SIZE sizeof(int) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#else -# define OMP_LOCK_T_SIZE sizeof(void *) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#endif - -// -// The Intel compiler allocates a 32-byte chunk for a critical section. -// Both gcc and visual studio only allocate enough space for a pointer. -// Sometimes we know that the space was allocated by the Intel compiler. -// -#define OMP_CRITICAL_SIZE sizeof(void *) -#define INTEL_CRITICAL_SIZE 32 - -// -// lock flags -// -typedef kmp_uint32 kmp_lock_flags_t; - -#define kmp_lf_critical_section 1 - -// -// When a lock table is used, the indices are of kmp_lock_index_t -// -typedef kmp_uint32 kmp_lock_index_t; - -// -// When memory allocated for locks are on the lock pool (free list), -// it is treated as structs of this type. -// -struct kmp_lock_pool { - union kmp_user_lock *next; - kmp_lock_index_t index; -}; - -typedef struct kmp_lock_pool kmp_lock_pool_t; - - -extern void __kmp_validate_locks( void ); - - -// ---------------------------------------------------------------------------- -// -// There are 5 lock implementations: -// -// 1. Test and set locks. -// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) -// 3. Ticket (Lamport bakery) locks. -// 4. Queuing locks (with separate spin fields). -// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks -// -// and 3 lock purposes: -// -// 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. -// These do not require non-negative global thread ID's. -// 2. Internal RTL locks -- Used everywhere else in the RTL -// 3. User locks (includes critical sections) -// -// ---------------------------------------------------------------------------- - - -// ============================================================================ -// Lock implementations. -// ============================================================================ - - -// ---------------------------------------------------------------------------- -// Test and set locks. -// -// Non-nested test and set locks differ from the other lock kinds (except -// futex) in that we use the memory allocated by the compiler for the lock, -// rather than a pointer to it. -// -// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 -// bytes, so we have to use a lock table for nested locks, and avoid accessing -// the depth_locked field for non-nested locks. -// -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_tas_lock { - volatile kmp_int32 poll; // 0 => unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; - -union kmp_tas_lock { - kmp_base_tas_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding -}; - -typedef union kmp_tas_lock kmp_tas_lock_t; - -// -// Static initializer for test and set lock variables. Usage: -// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); -// -#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); - -extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); - -#define KMP_LOCK_RELEASED 1 -#define KMP_LOCK_STILL_HELD 0 -#define KMP_LOCK_ACQUIRED_FIRST 1 -#define KMP_LOCK_ACQUIRED_NEXT 0 - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -// ---------------------------------------------------------------------------- -// futex locks. futex locks are only available on Linux* OS. -// -// Like non-nested test and set lock, non-nested futex locks use the memory -// allocated by the compiler for the lock, rather than a pointer to it. -// -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. With non-nested futex locks, the lock owner is not -// even available. -// ---------------------------------------------------------------------------- - -struct kmp_base_futex_lock { - volatile kmp_int32 poll; // 0 => unlocked - // 2*(gtid+1) of owning thread, 0 if unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; - -union kmp_futex_lock { - kmp_base_futex_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding -}; - -typedef union kmp_futex_lock kmp_futex_lock_t; - -// -// Static initializer for futex lock variables. Usage: -// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); -// -#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); - -extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - - -// ---------------------------------------------------------------------------- -// Ticket locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; - -union KMP_ALIGN_CACHE kmp_ticket_lock { - kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_ticket_lock kmp_ticket_lock_t; - -// -// Static initializer for simple ticket lock variables. Usage: -// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); -// Note the macro argument. It is important to make var properly initialized. -// -#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } - -extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); - -extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); - - -// ---------------------------------------------------------------------------- -// Queuing locks. -// ---------------------------------------------------------------------------- - -#if KMP_USE_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info; - -typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_statistics { - /* So we can get stats from locks that haven't been destroyed. */ - kmp_adaptive_lock_info_t * next; - kmp_adaptive_lock_info_t * prev; - - /* Other statistics */ - kmp_uint32 successfulSpeculations; - kmp_uint32 hardFailedSpeculations; - kmp_uint32 softFailedSpeculations; - kmp_uint32 nonSpeculativeAcquires; - kmp_uint32 nonSpeculativeAcquireAttempts; - kmp_uint32 lemmingYields; -}; - -typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; - -extern void __kmp_print_speculative_stats(); -extern void __kmp_init_speculative_stats(); - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info -{ - /* Values used for adaptivity. - * Although these are accessed from multiple threads we don't access them atomically, - * because if we miss updates it probably doesn't matter much. (It just affects our - * decision about whether to try speculation on the lock). - */ - kmp_uint32 volatile badness; - kmp_uint32 volatile acquire_attempts; - /* Parameters of the lock. */ - kmp_uint32 max_badness; - kmp_uint32 max_soft_retries; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - kmp_adaptive_lock_statistics_t volatile stats; -#endif -}; - -#endif // KMP_USE_ADAPTIVE_LOCKS - - -struct kmp_base_queuing_lock { - - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. - - ident_t const * location; // Source code location of omp_init_lock(). - - KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! - - volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty - // Must be no padding here since head/tail used in 8-byte CAS - volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty - // Decl order assumes little endian - // bakery-style lock - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; - -KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); - -union KMP_ALIGN_CACHE kmp_queuing_lock { - kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_queuing_lock kmp_queuing_lock_t; - -extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); - -extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); - -#if KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// Adaptive locks. -// ---------------------------------------------------------------------------- -struct kmp_base_adaptive_lock { - kmp_base_queuing_lock qlk; - KMP_ALIGN(CACHE_LINE) - kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock -}; - -typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; - -union KMP_ALIGN_CACHE kmp_adaptive_lock { - kmp_base_adaptive_lock_t lk; - kmp_lock_pool_t pool; - double lk_align; - char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; -}; -typedef union kmp_adaptive_lock kmp_adaptive_lock_t; - -# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) - -#endif // KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// DRDPA ticket locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_drdpa_lock { - // - // All of the fields on the first cache line are only written when - // initializing or reconfiguring the lock. These are relatively rare - // operations, so data from the first cache line will usually stay - // resident in the cache of each thread trying to acquire the lock. - // - // initialized must be the first entry in the lock data structure! - // - KMP_ALIGN_CACHE - - volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile struct kmp_lock_poll { - kmp_uint64 poll; - } * volatile polls; - volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op - kmp_uint64 cleanup_ticket; // thread with cleanup ticket - volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls - kmp_uint32 num_polls; // must be power of 2 - - // - // next_ticket it needs to exist in a separate cache line, as it is - // invalidated every time a thread takes a new ticket. - // - KMP_ALIGN_CACHE - - volatile kmp_uint64 next_ticket; - - // - // now_serving is used to store our ticket value while we hold the lock. - // It has a slightly different meaning in the DRDPA ticket locks (where - // it is written by the acquiring thread) than it does in the simple - // ticket locks (where it is written by the releasing thread). - // - // Since now_serving is only read an written in the critical section, - // it is non-volatile, but it needs to exist on a separate cache line, - // as it is invalidated at every lock acquire. - // - // Likewise, the vars used for nested locks (owner_id and depth_locked) - // are only written by the thread owning the lock, so they are put in - // this cache line. owner_id is read by other threads, so it must be - // declared volatile. - // - KMP_ALIGN_CACHE - - kmp_uint64 now_serving; // doesn't have to be volatile - volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; - -union KMP_ALIGN_CACHE kmp_drdpa_lock { - kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_drdpa_lock kmp_drdpa_lock_t; - -extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); - -extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); - - -// ============================================================================ -// Lock purposes. -// ============================================================================ - - -// ---------------------------------------------------------------------------- -// Bootstrap locks. -// ---------------------------------------------------------------------------- - -// Bootstrap locks -- very few locks used at library initialization time. -// Bootstrap locks are currently implemented as ticket locks. -// They could also be implemented as test and set lock, but cannot be -// implemented with other lock kinds as they require gtids which are not -// available at initialization time. - -typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; - -#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) - -static inline int -__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline int -__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline void -__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline void -__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -static inline void -__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); -} - - -// ---------------------------------------------------------------------------- -// Internal RTL locks. -// ---------------------------------------------------------------------------- - -// -// Internal RTL locks are also implemented as ticket locks, for now. -// -// FIXME - We should go through and figure out which lock kind works best for -// each internal lock, and use the type declaration and function calls for -// that explicit lock kind (and get rid of this section). -// - -typedef kmp_ticket_lock_t kmp_lock_t; - -static inline int -__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_ticket_lock( lck, gtid ); -} - -static inline int -__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_test_ticket_lock( lck, gtid ); -} - -static inline void -__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - __kmp_release_ticket_lock( lck, gtid ); -} - -static inline void -__kmp_init_lock( kmp_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -static inline void -__kmp_destroy_lock( kmp_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); -} - - -// ---------------------------------------------------------------------------- -// User locks. -// ---------------------------------------------------------------------------- - -// -// Do not allocate objects of type union kmp_user_lock!!! -// This will waste space unless __kmp_user_lock_kind == lk_drdpa. -// Instead, check the value of __kmp_user_lock_kind and allocate objects of -// the type of the appropriate union member, and cast their addresses to -// kmp_user_lock_p. -// - -enum kmp_lock_kind { - lk_default = 0, - lk_tas, -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - lk_futex, -#endif -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - lk_hle, - lk_rtm, -#endif - lk_ticket, - lk_queuing, - lk_drdpa, -#if KMP_USE_ADAPTIVE_LOCKS - lk_adaptive -#endif // KMP_USE_ADAPTIVE_LOCKS -}; - -typedef enum kmp_lock_kind kmp_lock_kind_t; - -extern kmp_lock_kind_t __kmp_user_lock_kind; - -union kmp_user_lock { - kmp_tas_lock_t tas; -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - kmp_futex_lock_t futex; -#endif - kmp_ticket_lock_t ticket; - kmp_queuing_lock_t queuing; - kmp_drdpa_lock_t drdpa; -#if KMP_USE_ADAPTIVE_LOCKS - kmp_adaptive_lock_t adaptive; -#endif // KMP_USE_ADAPTIVE_LOCKS - kmp_lock_pool_t pool; -}; - -typedef union kmp_user_lock *kmp_user_lock_p; - -#if ! KMP_USE_DYNAMIC_LOCK - -extern size_t __kmp_base_user_lock_size; -extern size_t __kmp_user_lock_size; - -extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); - -static inline kmp_int32 -__kmp_get_user_lock_owner( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); - return ( *__kmp_get_user_lock_owner_ )( lck ); -} - -extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#define __kmp_acquire_user_lock_with_checks(lck,gtid) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked != -1 ) { \ - KMP_FATAL( LockNestableUsedAsSimple, func ); \ - } \ - if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ - KMP_FATAL( LockIsAlreadyOwned, func ); \ - } \ - } \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ - ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ - } - -#else -static inline int -__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); - return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ -extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked != -1 ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - } - return ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - } else { - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); - } -} -#else -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -static inline void -__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); - ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); -} - -extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); - ( *__kmp_init_user_lock_with_checks_ )( lck ); -} - -// -// We need a non-checking version of destroy lock for when the RTL is -// doing the cleanup as it can't always tell if the lock is nested or not. -// -extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_user_lock( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); - ( *__kmp_destroy_user_lock_ )( lck ); -} - -extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_user_lock_with_checks_ )( lck ); -} - -extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) - -#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_nest_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked == -1 ) { \ - KMP_FATAL( LockSimpleUsedAsNestable, func ); \ - } \ - } \ - if ( lck->tas.lk.poll - 1 == gtid ) { \ - lck->tas.lk.depth_locked += 1; \ - *depth = KMP_LOCK_ACQUIRED_NEXT; \ - } else { \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - lck->tas.lk.depth_locked = 1; \ - *depth = KMP_LOCK_ACQUIRED_FIRST; \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ - } - -#else -static inline void -__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - int retval; - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_nest_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked == -1 ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - } - KMP_DEBUG_ASSERT( gtid >= 0 ); - if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ - return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ - } - retval = ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - if ( retval ) { - KMP_MB(); - lck->tas.lk.depth_locked = 1; - } - return retval; - } else { - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); - } -} -#else -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -static inline int -__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); -} - -extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); -} - -extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); -} - -// -// user lock functions which do not necessarily exist for all lock kinds. -// -// The "set" functions usually have wrapper routines that check for a NULL set -// function pointer and call it if non-NULL. -// -// In some cases, it makes sense to have a "get" wrapper function check for a -// NULL get function pointer and return NULL / invalid value / error code if -// the function pointer is NULL. -// -// In other cases, the calling code really should differentiate between an -// unimplemented function and one that is implemented but returning NULL / -// invalied value. If this is the case, no get function wrapper exists. -// - -extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); - -// no set function; fields set durining local allocation - -extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); - -static inline const ident_t * -__kmp_get_user_lock_location( kmp_user_lock_p lck ) -{ - if ( __kmp_get_user_lock_location_ != NULL ) { - return ( *__kmp_get_user_lock_location_ )( lck ); - } - else { - return NULL; - } -} - -extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); - -static inline void -__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) -{ - if ( __kmp_set_user_lock_location_ != NULL ) { - ( *__kmp_set_user_lock_location_ )( lck, loc ); - } -} - -extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); - -extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); - -static inline void -__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) -{ - if ( __kmp_set_user_lock_flags_ != NULL ) { - ( *__kmp_set_user_lock_flags_ )( lck, flags ); - } -} - -// -// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. -// -extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); - -// -// Macros for binding user lock functions. -// -#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ - __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_acquire##nest##kind##_##suffix; \ - __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_release##nest##kind##_##suffix; \ - __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_test##nest##kind##_##suffix; \ - __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_init##nest##kind##_##suffix; \ - __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_destroy##nest##kind##_##suffix; \ -} - -#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) -#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) -#define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) -#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) - -// ---------------------------------------------------------------------------- -// User lock table & lock allocation -// ---------------------------------------------------------------------------- - -/* - On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which - is not enough to store a pointer, so we have to use lock indexes instead of pointers and - maintain lock table to map indexes to pointers. - - - Note: The first element of the table is not a pointer to lock! It is a pointer to previously - allocated table (or NULL if it is the first table). - - Usage: - - if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE - Lock table is fully utilized. User locks are indexes, so table is - used on user lock operation. - Note: it may be the case (lin_32) that we don't need to use a lock - table for regular locks, but do need the table for nested locks. - } - else { - Lock table initialized but not actually used. - } -*/ - -struct kmp_lock_table { - kmp_lock_index_t used; // Number of used elements - kmp_lock_index_t allocated; // Number of allocated elements - kmp_user_lock_p * table; // Lock table. -}; - -typedef struct kmp_lock_table kmp_lock_table_t; - -extern kmp_lock_table_t __kmp_user_lock_table; -extern kmp_user_lock_p __kmp_lock_pool; - -struct kmp_block_of_locks { - struct kmp_block_of_locks * next_block; - void * locks; -}; - -typedef struct kmp_block_of_locks kmp_block_of_locks_t; - -extern kmp_block_of_locks_t *__kmp_lock_blocks; -extern int __kmp_num_locks_in_block; - -extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); -extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); -extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); -extern void __kmp_cleanup_user_locks(); - -#define KMP_CHECK_USER_LOCK_INIT() \ - { \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - TCW_4( __kmp_init_user_locks, TRUE ); \ - } \ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ - } \ - } - -#endif // KMP_USE_DYNAMIC_LOCK - -#undef KMP_PAD -#undef KMP_GTID_DNE - -#if KMP_USE_DYNAMIC_LOCK - -// -// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current -// compatibility. Essential functionality of this new code is dynamic dispatch, but it also -// implements (or enables implementation of) hinted user lock and critical section which will be -// part of OMP 4.1 soon. -// -// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock -// function call on the created lock object requires type extraction and call through jump table -// using the extracted type. This type information is stored in two different ways depending on -// the size of the lock object, and we differentiate lock types by this size requirement - direct -// and indirect locks. -// -// Direct locks: -// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and -// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage -// for the lock type, and appropriate bit operation is required to access the data meaningful to -// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB -// of the lock object. The newly introduced "hle" lock is also a direct lock. -// -// Indirect locks: -// An indirect lock object requires more space than the compiler-generated space, and it should be -// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., -// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated -// indirect lock (void * fits in the object) or an index to the indirect lock table entry that -// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly -// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. -// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to -// differentiate the lock from a direct lock, and the remaining part is the actual index to the -// indirect lock table. -// - -#include // for uintptr_t - -// Shortcuts -#define KMP_USE_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) -#define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 -#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 - -// List of lock definitions; all nested locks are indirect locks. -// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. -// All nested locks are indirect lock types. -#if KMP_USE_TSX -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# endif // KMP_USE_FUTEX -# define KMP_LAST_D_LOCK lockseq_hle -#else -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_futex -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_tas -# endif // KMP_USE_FUTEX -#endif // KMP_USE_TSX - -// Information used in dynamic dispatch -#define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks -#define KMP_FIRST_D_LOCK lockseq_tas -#define KMP_FIRST_I_LOCK lockseq_ticket -#define KMP_LAST_I_LOCK lockseq_nested_drdpa -#define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types - -// Base type for dynamic locks. -typedef kmp_uint32 kmp_dyna_lock_t; - -// Lock sequence that enumerates all lock kinds. -// Always make this enumeration consistent with kmp_lockseq_t in the include directory. -typedef enum { - lockseq_indirect = 0, -#define expand_seq(l,a) lockseq_##l, - KMP_FOREACH_D_LOCK(expand_seq, 0) - KMP_FOREACH_I_LOCK(expand_seq, 0) -#undef expand_seq -} kmp_dyna_lockseq_t; - -// Enumerates indirect lock tags. -typedef enum { -#define expand_tag(l,a) locktag_##l, - KMP_FOREACH_I_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_indirect_locktag_t; - -// Utility macros that extract information from lock sequences. -#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) -#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) -#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) -#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) - -// Enumerates direct lock tags starting from indirect tag. -typedef enum { -#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), - KMP_FOREACH_D_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_direct_locktag_t; - -// Indirect lock type -typedef struct { - kmp_user_lock_p lock; - kmp_indirect_locktag_t type; -} kmp_indirect_lock_t; - -// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. -extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); -extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); -extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); - -// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. -extern void (*__kmp_indirect_init[])(kmp_user_lock_p); -extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); - -// Extracts direct lock tag from a user lock pointer -#define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<> 1) - -// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). -#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] - -// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). -#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] - -// Initializes a direct lock with the given lock pointer and lock sequence. -#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) - -// Initializes an indirect lock with the given lock pointer and lock sequence. -#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) - -// Returns "free" lock value for the given lock type. -#define KMP_LOCK_FREE(type) (locktag_##type) - -// Returns "busy" lock value for the given lock teyp. -#define KMP_LOCK_BUSY(v, type) ((v)<>KMP_LOCK_SHIFT) - -// Initializes global states and data structures for managing dynamic user locks. -extern void __kmp_init_dynamic_user_locks(); - -// Allocates and returns an indirect lock with the given indirect lock tag. -extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); - -// Cleans up global states and data structures for managing dynamic user locks. -extern void __kmp_cleanup_indirect_user_locks(); - -// Default user lock sequence when not using hinted locks. -extern kmp_dyna_lockseq_t __kmp_user_lock_seq; - -// Jump table for "set lock location", available only for indirect locks. -extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); -#define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ - if (__kmp_indirect_set_location[(lck)->type] != NULL) \ - __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ -} - -// Jump table for "set lock flags", available only for indirect locks. -extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); -#define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ - if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ - __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ -} - -// Jump table for "get lock location", available only for indirect locks. -extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ - ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ - : NULL ) - -// Jump table for "get lock flags", available only for indirect locks. -extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ - ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ - : NULL ) - -#define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together - -// Lock table for indirect locks. -typedef struct kmp_indirect_lock_table { - kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table - kmp_lock_index_t next; // index to the next lock to be allocated -} kmp_indirect_lock_table_t; - -extern kmp_indirect_lock_table_t __kmp_i_lock_table; - -// Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) - -// Number of locks in a lock block, which is fixed to "1" now. -// TODO: No lock block implementation now. If we do support, we need to manage lock block data -// structure for each indirect lock type. -extern int __kmp_num_locks_in_block; - -// Fast lock table lookup without consistency checking -#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ - ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l)) ) - -// Used once in kmp_error.c -extern kmp_int32 -__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); - -#else // KMP_USE_DYNAMIC_LOCK - -# define KMP_LOCK_BUSY(v, type) (v) -# define KMP_LOCK_FREE(type) 0 -# define KMP_LOCK_STRIP(v) (v) - -#endif // KMP_USE_DYNAMIC_LOCK - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_LOCK_H */ - +/* + * kmp_lock.h -- lock header file + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_LOCK_H +#define KMP_LOCK_H + +#include // CHAR_BIT +#include // offsetof + +#include "kmp_os.h" +#include "kmp_debug.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// ---------------------------------------------------------------------------- +// Have to copy these definitions from kmp.h because kmp.h cannot be included +// due to circular dependencies. Will undef these at end of file. + +#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) +#define KMP_GTID_DNE (-2) + +// Forward declaration of ident and ident_t + +struct ident; +typedef struct ident ident_t; + +// End of copied code. +// ---------------------------------------------------------------------------- + +// +// We need to know the size of the area we can assume that the compiler(s) +// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel +// compiler always allocates a pointer-sized area, as does visual studio. +// +// gcc however, only allocates 4 bytes for regular locks, even on 64-bit +// intel archs. It allocates at least 8 bytes for nested lock (more on +// recent versions), but we are bounded by the pointer-sized chunks that +// the Intel compiler allocates. +// + +#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +# define OMP_LOCK_T_SIZE sizeof(int) +# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#else +# define OMP_LOCK_T_SIZE sizeof(void *) +# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#endif + +// +// The Intel compiler allocates a 32-byte chunk for a critical section. +// Both gcc and visual studio only allocate enough space for a pointer. +// Sometimes we know that the space was allocated by the Intel compiler. +// +#define OMP_CRITICAL_SIZE sizeof(void *) +#define INTEL_CRITICAL_SIZE 32 + +// +// lock flags +// +typedef kmp_uint32 kmp_lock_flags_t; + +#define kmp_lf_critical_section 1 + +// +// When a lock table is used, the indices are of kmp_lock_index_t +// +typedef kmp_uint32 kmp_lock_index_t; + +// +// When memory allocated for locks are on the lock pool (free list), +// it is treated as structs of this type. +// +struct kmp_lock_pool { + union kmp_user_lock *next; + kmp_lock_index_t index; +}; + +typedef struct kmp_lock_pool kmp_lock_pool_t; + + +extern void __kmp_validate_locks( void ); + + +// ---------------------------------------------------------------------------- +// +// There are 5 lock implementations: +// +// 1. Test and set locks. +// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) +// 3. Ticket (Lamport bakery) locks. +// 4. Queuing locks (with separate spin fields). +// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks +// +// and 3 lock purposes: +// +// 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. +// These do not require non-negative global thread ID's. +// 2. Internal RTL locks -- Used everywhere else in the RTL +// 3. User locks (includes critical sections) +// +// ---------------------------------------------------------------------------- + + +// ============================================================================ +// Lock implementations. +// ============================================================================ + + +// ---------------------------------------------------------------------------- +// Test and set locks. +// +// Non-nested test and set locks differ from the other lock kinds (except +// futex) in that we use the memory allocated by the compiler for the lock, +// rather than a pointer to it. +// +// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 +// bytes, so we have to use a lock table for nested locks, and avoid accessing +// the depth_locked field for non-nested locks. +// +// Information normally available to the tools, such as lock location, +// lock usage (normal lock vs. critical section), etc. is not available with +// test and set locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_tas_lock { + volatile kmp_int32 poll; // 0 => unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; + +union kmp_tas_lock { + kmp_base_tas_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding +}; + +typedef union kmp_tas_lock kmp_tas_lock_t; + +// +// Static initializer for test and set lock variables. Usage: +// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); +// +#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } + +extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); +extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); + +extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); +extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); + +#define KMP_LOCK_RELEASED 1 +#define KMP_LOCK_STILL_HELD 0 +#define KMP_LOCK_ACQUIRED_FIRST 1 +#define KMP_LOCK_ACQUIRED_NEXT 0 + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +// ---------------------------------------------------------------------------- +// futex locks. futex locks are only available on Linux* OS. +// +// Like non-nested test and set lock, non-nested futex locks use the memory +// allocated by the compiler for the lock, rather than a pointer to it. +// +// Information normally available to the tools, such as lock location, +// lock usage (normal lock vs. critical section), etc. is not available with +// test and set locks. With non-nested futex locks, the lock owner is not +// even available. +// ---------------------------------------------------------------------------- + +struct kmp_base_futex_lock { + volatile kmp_int32 poll; // 0 => unlocked + // 2*(gtid+1) of owning thread, 0 if unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; + +union kmp_futex_lock { + kmp_base_futex_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding +}; + +typedef union kmp_futex_lock kmp_futex_lock_t; + +// +// Static initializer for futex lock variables. Usage: +// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); +// +#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } + +extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); +extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); + +extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); +extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + + +// ---------------------------------------------------------------------------- +// Ticket locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_ticket_lock { + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state + ident_t const * location; // Source code location of omp_init_lock(). + volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; + +union KMP_ALIGN_CACHE kmp_ticket_lock { + kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_ticket_lock kmp_ticket_lock_t; + +// +// Static initializer for simple ticket lock variables. Usage: +// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); +// Note the macro argument. It is important to make var properly initialized. +// +#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } + +extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); +extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); + +extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); +extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); + + +// ---------------------------------------------------------------------------- +// Queuing locks. +// ---------------------------------------------------------------------------- + +#if KMP_USE_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info; + +typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_statistics { + /* So we can get stats from locks that haven't been destroyed. */ + kmp_adaptive_lock_info_t * next; + kmp_adaptive_lock_info_t * prev; + + /* Other statistics */ + kmp_uint32 successfulSpeculations; + kmp_uint32 hardFailedSpeculations; + kmp_uint32 softFailedSpeculations; + kmp_uint32 nonSpeculativeAcquires; + kmp_uint32 nonSpeculativeAcquireAttempts; + kmp_uint32 lemmingYields; +}; + +typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; + +extern void __kmp_print_speculative_stats(); +extern void __kmp_init_speculative_stats(); + +#endif // KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info +{ + /* Values used for adaptivity. + * Although these are accessed from multiple threads we don't access them atomically, + * because if we miss updates it probably doesn't matter much. (It just affects our + * decision about whether to try speculation on the lock). + */ + kmp_uint32 volatile badness; + kmp_uint32 volatile acquire_attempts; + /* Parameters of the lock. */ + kmp_uint32 max_badness; + kmp_uint32 max_soft_retries; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + kmp_adaptive_lock_statistics_t volatile stats; +#endif +}; + +#endif // KMP_USE_ADAPTIVE_LOCKS + + +struct kmp_base_queuing_lock { + + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. + + ident_t const * location; // Source code location of omp_init_lock(). + + KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! + + volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty + // Must be no padding here since head/tail used in 8-byte CAS + volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty + // Decl order assumes little endian + // bakery-style lock + volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; + +KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); + +union KMP_ALIGN_CACHE kmp_queuing_lock { + kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_queuing_lock kmp_queuing_lock_t; + +extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); +extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); + +extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); +extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); + +#if KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// Adaptive locks. +// ---------------------------------------------------------------------------- +struct kmp_base_adaptive_lock { + kmp_base_queuing_lock qlk; + KMP_ALIGN(CACHE_LINE) + kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock +}; + +typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; + +union KMP_ALIGN_CACHE kmp_adaptive_lock { + kmp_base_adaptive_lock_t lk; + kmp_lock_pool_t pool; + double lk_align; + char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; +}; +typedef union kmp_adaptive_lock kmp_adaptive_lock_t; + +# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) + +#endif // KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// DRDPA ticket locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_drdpa_lock { + // + // All of the fields on the first cache line are only written when + // initializing or reconfiguring the lock. These are relatively rare + // operations, so data from the first cache line will usually stay + // resident in the cache of each thread trying to acquire the lock. + // + // initialized must be the first entry in the lock data structure! + // + KMP_ALIGN_CACHE + + volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state + ident_t const * location; // Source code location of omp_init_lock(). + volatile struct kmp_lock_poll { + kmp_uint64 poll; + } * volatile polls; + volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op + kmp_uint64 cleanup_ticket; // thread with cleanup ticket + volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls + kmp_uint32 num_polls; // must be power of 2 + + // + // next_ticket it needs to exist in a separate cache line, as it is + // invalidated every time a thread takes a new ticket. + // + KMP_ALIGN_CACHE + + volatile kmp_uint64 next_ticket; + + // + // now_serving is used to store our ticket value while we hold the lock. + // It has a slightly different meaning in the DRDPA ticket locks (where + // it is written by the acquiring thread) than it does in the simple + // ticket locks (where it is written by the releasing thread). + // + // Since now_serving is only read an written in the critical section, + // it is non-volatile, but it needs to exist on a separate cache line, + // as it is invalidated at every lock acquire. + // + // Likewise, the vars used for nested locks (owner_id and depth_locked) + // are only written by the thread owning the lock, so they are put in + // this cache line. owner_id is read by other threads, so it must be + // declared volatile. + // + KMP_ALIGN_CACHE + + kmp_uint64 now_serving; // doesn't have to be volatile + volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; + +union KMP_ALIGN_CACHE kmp_drdpa_lock { + kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_drdpa_lock kmp_drdpa_lock_t; + +extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); +extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); + +extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); +extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); + + +// ============================================================================ +// Lock purposes. +// ============================================================================ + + +// ---------------------------------------------------------------------------- +// Bootstrap locks. +// ---------------------------------------------------------------------------- + +// Bootstrap locks -- very few locks used at library initialization time. +// Bootstrap locks are currently implemented as ticket locks. +// They could also be implemented as test and set lock, but cannot be +// implemented with other lock kinds as they require gtids which are not +// available at initialization time. + +typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; + +#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) + +static inline int +__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline int +__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline void +__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline void +__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_init_ticket_lock( lck ); +} + +static inline void +__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_destroy_ticket_lock( lck ); +} + + +// ---------------------------------------------------------------------------- +// Internal RTL locks. +// ---------------------------------------------------------------------------- + +// +// Internal RTL locks are also implemented as ticket locks, for now. +// +// FIXME - We should go through and figure out which lock kind works best for +// each internal lock, and use the type declaration and function calls for +// that explicit lock kind (and get rid of this section). +// + +typedef kmp_ticket_lock_t kmp_lock_t; + +static inline int +__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_ticket_lock( lck, gtid ); +} + +static inline int +__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_test_ticket_lock( lck, gtid ); +} + +static inline void +__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + __kmp_release_ticket_lock( lck, gtid ); +} + +static inline void +__kmp_init_lock( kmp_lock_t *lck ) +{ + __kmp_init_ticket_lock( lck ); +} + +static inline void +__kmp_destroy_lock( kmp_lock_t *lck ) +{ + __kmp_destroy_ticket_lock( lck ); +} + + +// ---------------------------------------------------------------------------- +// User locks. +// ---------------------------------------------------------------------------- + +// +// Do not allocate objects of type union kmp_user_lock!!! +// This will waste space unless __kmp_user_lock_kind == lk_drdpa. +// Instead, check the value of __kmp_user_lock_kind and allocate objects of +// the type of the appropriate union member, and cast their addresses to +// kmp_user_lock_p. +// + +enum kmp_lock_kind { + lk_default = 0, + lk_tas, +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + lk_futex, +#endif +#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX + lk_hle, + lk_rtm, +#endif + lk_ticket, + lk_queuing, + lk_drdpa, +#if KMP_USE_ADAPTIVE_LOCKS + lk_adaptive +#endif // KMP_USE_ADAPTIVE_LOCKS +}; + +typedef enum kmp_lock_kind kmp_lock_kind_t; + +extern kmp_lock_kind_t __kmp_user_lock_kind; + +union kmp_user_lock { + kmp_tas_lock_t tas; +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + kmp_futex_lock_t futex; +#endif + kmp_ticket_lock_t ticket; + kmp_queuing_lock_t queuing; + kmp_drdpa_lock_t drdpa; +#if KMP_USE_ADAPTIVE_LOCKS + kmp_adaptive_lock_t adaptive; +#endif // KMP_USE_ADAPTIVE_LOCKS + kmp_lock_pool_t pool; +}; + +typedef union kmp_user_lock *kmp_user_lock_p; + +#if ! KMP_USE_DYNAMIC_LOCK + +extern size_t __kmp_base_user_lock_size; +extern size_t __kmp_user_lock_size; + +extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); + +static inline kmp_int32 +__kmp_get_user_lock_owner( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); + return ( *__kmp_get_user_lock_owner_ )( lck ); +} + +extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#define __kmp_acquire_user_lock_with_checks(lck,gtid) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if ( __kmp_env_consistency_check ) { \ + char const * const func = "omp_set_lock"; \ + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ + && lck->tas.lk.depth_locked != -1 ) { \ + KMP_FATAL( LockNestableUsedAsSimple, func ); \ + } \ + if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ + KMP_FATAL( LockIsAlreadyOwned, func ); \ + } \ + } \ + if ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE( lck ); \ + KMP_INIT_YIELD( spins ); \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + while ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED( lck ); \ + } else { \ + KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ + ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ + } + +#else +static inline int +__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); + return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ +extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ +static inline int +__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + if ( __kmp_user_lock_kind == lk_tas ) { + if ( __kmp_env_consistency_check ) { + char const * const func = "omp_test_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && lck->tas.lk.depth_locked != -1 ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + } + return ( ( lck->tas.lk.poll == 0 ) && + KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); + } else { + KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); + } +} +#else +static inline int +__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +static inline void +__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); + ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); +} + +extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); + ( *__kmp_init_user_lock_with_checks_ )( lck ); +} + +// +// We need a non-checking version of destroy lock for when the RTL is +// doing the cleanup as it can't always tell if the lock is nested or not. +// +extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_user_lock( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); + ( *__kmp_destroy_user_lock_ )( lck ); +} + +extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); + ( *__kmp_destroy_user_lock_with_checks_ )( lck ); +} + +extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) + +#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if ( __kmp_env_consistency_check ) { \ + char const * const func = "omp_set_nest_lock"; \ + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ + && lck->tas.lk.depth_locked == -1 ) { \ + KMP_FATAL( LockSimpleUsedAsNestable, func ); \ + } \ + } \ + if ( lck->tas.lk.poll - 1 == gtid ) { \ + lck->tas.lk.depth_locked += 1; \ + *depth = KMP_LOCK_ACQUIRED_NEXT; \ + } else { \ + if ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE( lck ); \ + KMP_INIT_YIELD( spins ); \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + while ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + } \ + } \ + lck->tas.lk.depth_locked = 1; \ + *depth = KMP_LOCK_ACQUIRED_FIRST; \ + } \ + KMP_FSYNC_ACQUIRED( lck ); \ + } else { \ + KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ + *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ + } + +#else +static inline void +__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) +{ + KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); + *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +static inline int +__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + if ( __kmp_user_lock_kind == lk_tas ) { + int retval; + if ( __kmp_env_consistency_check ) { + char const * const func = "omp_test_nest_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) + && lck->tas.lk.depth_locked == -1 ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + } + KMP_DEBUG_ASSERT( gtid >= 0 ); + if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ + return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ + } + retval = ( ( lck->tas.lk.poll == 0 ) && + KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); + if ( retval ) { + KMP_MB(); + lck->tas.lk.depth_locked = 1; + } + return retval; + } else { + KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); + } +} +#else +static inline int +__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +static inline int +__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); +} + +extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); + ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); +} + +extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); + ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); +} + +// +// user lock functions which do not necessarily exist for all lock kinds. +// +// The "set" functions usually have wrapper routines that check for a NULL set +// function pointer and call it if non-NULL. +// +// In some cases, it makes sense to have a "get" wrapper function check for a +// NULL get function pointer and return NULL / invalid value / error code if +// the function pointer is NULL. +// +// In other cases, the calling code really should differentiate between an +// unimplemented function and one that is implemented but returning NULL / +// invalied value. If this is the case, no get function wrapper exists. +// + +extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); + +// no set function; fields set durining local allocation + +extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); + +static inline const ident_t * +__kmp_get_user_lock_location( kmp_user_lock_p lck ) +{ + if ( __kmp_get_user_lock_location_ != NULL ) { + return ( *__kmp_get_user_lock_location_ )( lck ); + } + else { + return NULL; + } +} + +extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); + +static inline void +__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) +{ + if ( __kmp_set_user_lock_location_ != NULL ) { + ( *__kmp_set_user_lock_location_ )( lck, loc ); + } +} + +extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); + +extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); + +static inline void +__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) +{ + if ( __kmp_set_user_lock_flags_ != NULL ) { + ( *__kmp_set_user_lock_flags_ )( lck, flags ); + } +} + +// +// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. +// +extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); + +// +// Macros for binding user lock functions. +// +#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ + __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_acquire##nest##kind##_##suffix; \ + __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_release##nest##kind##_##suffix; \ + __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_test##nest##kind##_##suffix; \ + __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ + __kmp_init##nest##kind##_##suffix; \ + __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ + __kmp_destroy##nest##kind##_##suffix; \ +} + +#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) +#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) +#define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) +#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) + +// ---------------------------------------------------------------------------- +// User lock table & lock allocation +// ---------------------------------------------------------------------------- + +/* + On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which + is not enough to store a pointer, so we have to use lock indexes instead of pointers and + maintain lock table to map indexes to pointers. + + + Note: The first element of the table is not a pointer to lock! It is a pointer to previously + allocated table (or NULL if it is the first table). + + Usage: + + if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE + Lock table is fully utilized. User locks are indexes, so table is + used on user lock operation. + Note: it may be the case (lin_32) that we don't need to use a lock + table for regular locks, but do need the table for nested locks. + } + else { + Lock table initialized but not actually used. + } +*/ + +struct kmp_lock_table { + kmp_lock_index_t used; // Number of used elements + kmp_lock_index_t allocated; // Number of allocated elements + kmp_user_lock_p * table; // Lock table. +}; + +typedef struct kmp_lock_table kmp_lock_table_t; + +extern kmp_lock_table_t __kmp_user_lock_table; +extern kmp_user_lock_p __kmp_lock_pool; + +struct kmp_block_of_locks { + struct kmp_block_of_locks * next_block; + void * locks; +}; + +typedef struct kmp_block_of_locks kmp_block_of_locks_t; + +extern kmp_block_of_locks_t *__kmp_lock_blocks; +extern int __kmp_num_locks_in_block; + +extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); +extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); +extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); +extern void __kmp_cleanup_user_locks(); + +#define KMP_CHECK_USER_LOCK_INIT() \ + { \ + if ( ! TCR_4( __kmp_init_user_locks ) ) { \ + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ + if ( ! TCR_4( __kmp_init_user_locks ) ) { \ + TCW_4( __kmp_init_user_locks, TRUE ); \ + } \ + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ + } \ + } + +#endif // KMP_USE_DYNAMIC_LOCK + +#undef KMP_PAD +#undef KMP_GTID_DNE + +#if KMP_USE_DYNAMIC_LOCK + +// +// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current +// compatibility. Essential functionality of this new code is dynamic dispatch, but it also +// implements (or enables implementation of) hinted user lock and critical section which will be +// part of OMP 4.1 soon. +// +// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock +// function call on the created lock object requires type extraction and call through jump table +// using the extracted type. This type information is stored in two different ways depending on +// the size of the lock object, and we differentiate lock types by this size requirement - direct +// and indirect locks. +// +// Direct locks: +// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and +// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage +// for the lock type, and appropriate bit operation is required to access the data meaningful to +// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB +// of the lock object. The newly introduced "hle" lock is also a direct lock. +// +// Indirect locks: +// An indirect lock object requires more space than the compiler-generated space, and it should be +// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., +// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated +// indirect lock (void * fits in the object) or an index to the indirect lock table entry that +// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly +// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. +// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to +// differentiate the lock from a direct lock, and the remaining part is the actual index to the +// indirect lock table. +// + +#include // for uintptr_t + +// Shortcuts +#define KMP_USE_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) +#define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 +#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 + +// List of lock definitions; all nested locks are indirect locks. +// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. +// All nested locks are indirect lock types. +#if KMP_USE_TSX +# if KMP_USE_FUTEX +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# else +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# endif // KMP_USE_FUTEX +# define KMP_LAST_D_LOCK lockseq_hle +#else +# if KMP_USE_FUTEX +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# define KMP_LAST_D_LOCK lockseq_futex +# else +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# define KMP_LAST_D_LOCK lockseq_tas +# endif // KMP_USE_FUTEX +#endif // KMP_USE_TSX + +// Information used in dynamic dispatch +#define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks +#define KMP_FIRST_D_LOCK lockseq_tas +#define KMP_FIRST_I_LOCK lockseq_ticket +#define KMP_LAST_I_LOCK lockseq_nested_drdpa +#define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types + +// Base type for dynamic locks. +typedef kmp_uint32 kmp_dyna_lock_t; + +// Lock sequence that enumerates all lock kinds. +// Always make this enumeration consistent with kmp_lockseq_t in the include directory. +typedef enum { + lockseq_indirect = 0, +#define expand_seq(l,a) lockseq_##l, + KMP_FOREACH_D_LOCK(expand_seq, 0) + KMP_FOREACH_I_LOCK(expand_seq, 0) +#undef expand_seq +} kmp_dyna_lockseq_t; + +// Enumerates indirect lock tags. +typedef enum { +#define expand_tag(l,a) locktag_##l, + KMP_FOREACH_I_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_indirect_locktag_t; + +// Utility macros that extract information from lock sequences. +#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) +#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) +#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) +#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) + +// Enumerates direct lock tags starting from indirect tag. +typedef enum { +#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), + KMP_FOREACH_D_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_direct_locktag_t; + +// Indirect lock type +typedef struct { + kmp_user_lock_p lock; + kmp_indirect_locktag_t type; +} kmp_indirect_lock_t; + +// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. +extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); +extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); +extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); + +// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. +extern void (*__kmp_indirect_init[])(kmp_user_lock_p); +extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); +extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); + +// Extracts direct lock tag from a user lock pointer +#define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<> 1) + +// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). +#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] + +// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). +#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] + +// Initializes a direct lock with the given lock pointer and lock sequence. +#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) + +// Initializes an indirect lock with the given lock pointer and lock sequence. +#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) + +// Returns "free" lock value for the given lock type. +#define KMP_LOCK_FREE(type) (locktag_##type) + +// Returns "busy" lock value for the given lock teyp. +#define KMP_LOCK_BUSY(v, type) ((v)<>KMP_LOCK_SHIFT) + +// Initializes global states and data structures for managing dynamic user locks. +extern void __kmp_init_dynamic_user_locks(); + +// Allocates and returns an indirect lock with the given indirect lock tag. +extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); + +// Cleans up global states and data structures for managing dynamic user locks. +extern void __kmp_cleanup_indirect_user_locks(); + +// Default user lock sequence when not using hinted locks. +extern kmp_dyna_lockseq_t __kmp_user_lock_seq; + +// Jump table for "set lock location", available only for indirect locks. +extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); +#define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ + if (__kmp_indirect_set_location[(lck)->type] != NULL) \ + __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ +} + +// Jump table for "set lock flags", available only for indirect locks. +extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); +#define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ + if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ + __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ +} + +// Jump table for "get lock location", available only for indirect locks. +extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); +#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ + ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ + : NULL ) + +// Jump table for "get lock flags", available only for indirect locks. +extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); +#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ + ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ + : NULL ) + +#define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together + +// Lock table for indirect locks. +typedef struct kmp_indirect_lock_table { + kmp_indirect_lock_t **table; // blocks of indirect locks allocated + kmp_lock_index_t size; // size of the indirect lock table + kmp_lock_index_t next; // index to the next lock to be allocated +} kmp_indirect_lock_table_t; + +extern kmp_indirect_lock_table_t __kmp_i_lock_table; + +// Returns the indirect lock associated with the given index. +#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) + +// Number of locks in a lock block, which is fixed to "1" now. +// TODO: No lock block implementation now. If we do support, we need to manage lock block data +// structure for each indirect lock type. +extern int __kmp_num_locks_in_block; + +// Fast lock table lookup without consistency checking +#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ + ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l)) ) + +// Used once in kmp_error.c +extern kmp_int32 +__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); + +#else // KMP_USE_DYNAMIC_LOCK + +# define KMP_LOCK_BUSY(v, type) (v) +# define KMP_LOCK_FREE(type) 0 +# define KMP_LOCK_STRIP(v) (v) + +#endif // KMP_USE_DYNAMIC_LOCK + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* KMP_LOCK_H */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_omp.h b/contrib/libs/cxxsupp/openmp/kmp_omp.h index 311d7e67aef..fc4de0f2360 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_omp.h +++ b/contrib/libs/cxxsupp/openmp/kmp_omp.h @@ -1,233 +1,233 @@ -#if USE_DEBUGGER -/* - * kmp_omp.h -- OpenMP definition for kmp_omp_struct_info_t. - * This is for information about runtime library structures. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -/* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE - * It should instead be modified in the OpenMP runtime and copied - * to the interface library code. This way we can minimize the - * problems that this is sure to cause having two copies of the - * same file. - * - * files live in libomp and libomp_db/src/include - */ - -/* CHANGE THIS WHEN STRUCTURES BELOW CHANGE - * Before we release this to a customer, please don't change this value. After it is released and - * stable, then any new updates to the structures or data structure traversal algorithms need to - * change this value. - */ -#define KMP_OMP_VERSION 9 - -typedef struct { - kmp_int32 offset; - kmp_int32 size; -} offset_and_size_t; - -typedef struct { - kmp_uint64 addr; - kmp_int32 size; - kmp_int32 padding; -} addr_and_size_t; - -typedef struct { - kmp_uint64 flags; // Flags for future extensions. - kmp_uint64 file; // Pointer to name of source file where the parallel region is. - kmp_uint64 func; // Pointer to name of routine where the parallel region is. - kmp_int32 begin; // Beginning of source line range. - kmp_int32 end; // End of source line range. - kmp_int32 num_threads; // Specified number of threads. -} kmp_omp_nthr_item_t; - -typedef struct { - kmp_int32 num; // Number of items in the arrray. - kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t. -} kmp_omp_nthr_info_t; - - -/* This structure is known to the idb interface library */ -typedef struct { - - /* Change this only if you make a fundamental data structure change here */ - kmp_int32 lib_version; - - /* sanity check. Only should be checked if versions are identical - * This is also used for backward compatibility to get the runtime - * structure size if it the runtime is older than the interface */ - kmp_int32 sizeof_this_structure; - - /* OpenMP RTL version info. */ - addr_and_size_t major; - addr_and_size_t minor; - addr_and_size_t build; - addr_and_size_t openmp_version; - addr_and_size_t banner; - - /* Various globals. */ - addr_and_size_t threads; // Pointer to __kmp_threads. - addr_and_size_t roots; // Pointer to __kmp_root. - addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. - addr_and_size_t monitor; // Pointer to __kmp_monitor. -#if ! KMP_USE_DYNAMIC_LOCK - addr_and_size_t lock_table; // Pointer to __kmp_lock_table. -#endif - addr_and_size_t func_microtask; - addr_and_size_t func_fork; - addr_and_size_t func_fork_teams; - addr_and_size_t team_counter; - addr_and_size_t task_counter; - addr_and_size_t nthr_info; - kmp_int32 address_width; - kmp_int32 indexed_locks; - kmp_int32 last_barrier; // The end in enum barrier_type - kmp_int32 deque_size; // TASK_DEQUE_SIZE - - /* thread structure information. */ - kmp_int32 th_sizeof_struct; - offset_and_size_t th_info; // descriptor for thread - offset_and_size_t th_team; // team for this thread - offset_and_size_t th_root; // root for this thread - offset_and_size_t th_serial_team; // serial team under this thread - offset_and_size_t th_ident; // location for this thread (if available) - offset_and_size_t th_spin_here; // is thread waiting for lock (if available) - offset_and_size_t th_next_waiting; // next thread waiting for lock (if available) - offset_and_size_t th_task_team; // task team struct - offset_and_size_t th_current_task; // innermost task being executed - offset_and_size_t th_task_state; // alternating 0/1 for task team identification - offset_and_size_t th_bar; - offset_and_size_t th_b_worker_arrived; // the worker increases it by 1 when it arrives to the barrier - -#if OMP_40_ENABLED - /* teams information */ - offset_and_size_t th_teams_microtask;// entry address for teams construct - offset_and_size_t th_teams_level; // initial level of teams construct - offset_and_size_t th_teams_nteams; // number of teams in a league - offset_and_size_t th_teams_nth; // number of threads in each team of the league -#endif - - /* kmp_desc structure (for info field above) */ - kmp_int32 ds_sizeof_struct; - offset_and_size_t ds_tid; // team thread id - offset_and_size_t ds_gtid; // global thread id - offset_and_size_t ds_thread; // native thread id - - /* team structure information */ - kmp_int32 t_sizeof_struct; - offset_and_size_t t_master_tid; // tid of master in parent team - offset_and_size_t t_ident; // location of parallel region - offset_and_size_t t_parent; // parent team - offset_and_size_t t_nproc; // # team threads - offset_and_size_t t_threads; // array of threads - offset_and_size_t t_serialized; // # levels of serialized teams - offset_and_size_t t_id; // unique team id - offset_and_size_t t_pkfn; - offset_and_size_t t_task_team; // task team structure - offset_and_size_t t_implicit_task; // taskdata for the thread's implicit task -#if OMP_40_ENABLED - offset_and_size_t t_cancel_request; -#endif - offset_and_size_t t_bar; - offset_and_size_t t_b_master_arrived; // increased by 1 when master arrives to a barrier - offset_and_size_t t_b_team_arrived; // increased by one when all the threads arrived - - /* root structure information */ - kmp_int32 r_sizeof_struct; - offset_and_size_t r_root_team; // team at root - offset_and_size_t r_hot_team; // hot team for this root - offset_and_size_t r_uber_thread; // root thread - offset_and_size_t r_root_id; // unique root id (if available) - - /* ident structure information */ - kmp_int32 id_sizeof_struct; - offset_and_size_t id_psource; /* address of string ";file;func;line1;line2;;". */ - offset_and_size_t id_flags; - - /* lock structure information */ - kmp_int32 lk_sizeof_struct; - offset_and_size_t lk_initialized; - offset_and_size_t lk_location; - offset_and_size_t lk_tail_id; - offset_and_size_t lk_head_id; - offset_and_size_t lk_next_ticket; - offset_and_size_t lk_now_serving; - offset_and_size_t lk_owner_id; - offset_and_size_t lk_depth_locked; - offset_and_size_t lk_lock_flags; - -#if ! KMP_USE_DYNAMIC_LOCK - /* lock_table_t */ - kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ - offset_and_size_t lt_used; - offset_and_size_t lt_allocated; - offset_and_size_t lt_table; -#endif - - /* task_team_t */ - kmp_int32 tt_sizeof_struct; - offset_and_size_t tt_threads_data; - offset_and_size_t tt_found_tasks; - offset_and_size_t tt_nproc; - offset_and_size_t tt_unfinished_threads; - offset_and_size_t tt_active; - - /* kmp_taskdata_t */ - kmp_int32 td_sizeof_struct; - offset_and_size_t td_task_id; // task id - offset_and_size_t td_flags; // task flags - offset_and_size_t td_team; // team for this task - offset_and_size_t td_parent; // parent task - offset_and_size_t td_level; // task testing level - offset_and_size_t td_ident; // task identifier - offset_and_size_t td_allocated_child_tasks; // child tasks (+ current task) not yet deallocated - offset_and_size_t td_incomplete_child_tasks; // child tasks not yet complete - - /* Taskwait */ - offset_and_size_t td_taskwait_ident; - offset_and_size_t td_taskwait_counter; - offset_and_size_t td_taskwait_thread; // gtid + 1 of thread encountered taskwait - -#if OMP_40_ENABLED - /* Taskgroup */ - offset_and_size_t td_taskgroup; // pointer to the current taskgroup - offset_and_size_t td_task_count; // number of allocated and not yet complete tasks - offset_and_size_t td_cancel; // request for cancellation of this taskgroup - - /* Task dependency */ - offset_and_size_t td_depnode; // pointer to graph node if the task has dependencies - offset_and_size_t dn_node; - offset_and_size_t dn_next; - offset_and_size_t dn_successors; - offset_and_size_t dn_task; - offset_and_size_t dn_npredecessors; - offset_and_size_t dn_nrefs; -#endif - offset_and_size_t dn_routine; - - /* kmp_thread_data_t */ - kmp_int32 hd_sizeof_struct; - offset_and_size_t hd_deque; - offset_and_size_t hd_deque_head; - offset_and_size_t hd_deque_tail; - offset_and_size_t hd_deque_ntasks; - offset_and_size_t hd_deque_last_stolen; - - // The last field of stable version. - kmp_uint64 last_field; - -} kmp_omp_struct_info_t; - -#endif /* USE_DEBUGGER */ - -/* end of file */ +#if USE_DEBUGGER +/* + * kmp_omp.h -- OpenMP definition for kmp_omp_struct_info_t. + * This is for information about runtime library structures. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +/* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE + * It should instead be modified in the OpenMP runtime and copied + * to the interface library code. This way we can minimize the + * problems that this is sure to cause having two copies of the + * same file. + * + * files live in libomp and libomp_db/src/include + */ + +/* CHANGE THIS WHEN STRUCTURES BELOW CHANGE + * Before we release this to a customer, please don't change this value. After it is released and + * stable, then any new updates to the structures or data structure traversal algorithms need to + * change this value. + */ +#define KMP_OMP_VERSION 9 + +typedef struct { + kmp_int32 offset; + kmp_int32 size; +} offset_and_size_t; + +typedef struct { + kmp_uint64 addr; + kmp_int32 size; + kmp_int32 padding; +} addr_and_size_t; + +typedef struct { + kmp_uint64 flags; // Flags for future extensions. + kmp_uint64 file; // Pointer to name of source file where the parallel region is. + kmp_uint64 func; // Pointer to name of routine where the parallel region is. + kmp_int32 begin; // Beginning of source line range. + kmp_int32 end; // End of source line range. + kmp_int32 num_threads; // Specified number of threads. +} kmp_omp_nthr_item_t; + +typedef struct { + kmp_int32 num; // Number of items in the arrray. + kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t. +} kmp_omp_nthr_info_t; + + +/* This structure is known to the idb interface library */ +typedef struct { + + /* Change this only if you make a fundamental data structure change here */ + kmp_int32 lib_version; + + /* sanity check. Only should be checked if versions are identical + * This is also used for backward compatibility to get the runtime + * structure size if it the runtime is older than the interface */ + kmp_int32 sizeof_this_structure; + + /* OpenMP RTL version info. */ + addr_and_size_t major; + addr_and_size_t minor; + addr_and_size_t build; + addr_and_size_t openmp_version; + addr_and_size_t banner; + + /* Various globals. */ + addr_and_size_t threads; // Pointer to __kmp_threads. + addr_and_size_t roots; // Pointer to __kmp_root. + addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. + addr_and_size_t monitor; // Pointer to __kmp_monitor. +#if ! KMP_USE_DYNAMIC_LOCK + addr_and_size_t lock_table; // Pointer to __kmp_lock_table. +#endif + addr_and_size_t func_microtask; + addr_and_size_t func_fork; + addr_and_size_t func_fork_teams; + addr_and_size_t team_counter; + addr_and_size_t task_counter; + addr_and_size_t nthr_info; + kmp_int32 address_width; + kmp_int32 indexed_locks; + kmp_int32 last_barrier; // The end in enum barrier_type + kmp_int32 deque_size; // TASK_DEQUE_SIZE + + /* thread structure information. */ + kmp_int32 th_sizeof_struct; + offset_and_size_t th_info; // descriptor for thread + offset_and_size_t th_team; // team for this thread + offset_and_size_t th_root; // root for this thread + offset_and_size_t th_serial_team; // serial team under this thread + offset_and_size_t th_ident; // location for this thread (if available) + offset_and_size_t th_spin_here; // is thread waiting for lock (if available) + offset_and_size_t th_next_waiting; // next thread waiting for lock (if available) + offset_and_size_t th_task_team; // task team struct + offset_and_size_t th_current_task; // innermost task being executed + offset_and_size_t th_task_state; // alternating 0/1 for task team identification + offset_and_size_t th_bar; + offset_and_size_t th_b_worker_arrived; // the worker increases it by 1 when it arrives to the barrier + +#if OMP_40_ENABLED + /* teams information */ + offset_and_size_t th_teams_microtask;// entry address for teams construct + offset_and_size_t th_teams_level; // initial level of teams construct + offset_and_size_t th_teams_nteams; // number of teams in a league + offset_and_size_t th_teams_nth; // number of threads in each team of the league +#endif + + /* kmp_desc structure (for info field above) */ + kmp_int32 ds_sizeof_struct; + offset_and_size_t ds_tid; // team thread id + offset_and_size_t ds_gtid; // global thread id + offset_and_size_t ds_thread; // native thread id + + /* team structure information */ + kmp_int32 t_sizeof_struct; + offset_and_size_t t_master_tid; // tid of master in parent team + offset_and_size_t t_ident; // location of parallel region + offset_and_size_t t_parent; // parent team + offset_and_size_t t_nproc; // # team threads + offset_and_size_t t_threads; // array of threads + offset_and_size_t t_serialized; // # levels of serialized teams + offset_and_size_t t_id; // unique team id + offset_and_size_t t_pkfn; + offset_and_size_t t_task_team; // task team structure + offset_and_size_t t_implicit_task; // taskdata for the thread's implicit task +#if OMP_40_ENABLED + offset_and_size_t t_cancel_request; +#endif + offset_and_size_t t_bar; + offset_and_size_t t_b_master_arrived; // increased by 1 when master arrives to a barrier + offset_and_size_t t_b_team_arrived; // increased by one when all the threads arrived + + /* root structure information */ + kmp_int32 r_sizeof_struct; + offset_and_size_t r_root_team; // team at root + offset_and_size_t r_hot_team; // hot team for this root + offset_and_size_t r_uber_thread; // root thread + offset_and_size_t r_root_id; // unique root id (if available) + + /* ident structure information */ + kmp_int32 id_sizeof_struct; + offset_and_size_t id_psource; /* address of string ";file;func;line1;line2;;". */ + offset_and_size_t id_flags; + + /* lock structure information */ + kmp_int32 lk_sizeof_struct; + offset_and_size_t lk_initialized; + offset_and_size_t lk_location; + offset_and_size_t lk_tail_id; + offset_and_size_t lk_head_id; + offset_and_size_t lk_next_ticket; + offset_and_size_t lk_now_serving; + offset_and_size_t lk_owner_id; + offset_and_size_t lk_depth_locked; + offset_and_size_t lk_lock_flags; + +#if ! KMP_USE_DYNAMIC_LOCK + /* lock_table_t */ + kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ + offset_and_size_t lt_used; + offset_and_size_t lt_allocated; + offset_and_size_t lt_table; +#endif + + /* task_team_t */ + kmp_int32 tt_sizeof_struct; + offset_and_size_t tt_threads_data; + offset_and_size_t tt_found_tasks; + offset_and_size_t tt_nproc; + offset_and_size_t tt_unfinished_threads; + offset_and_size_t tt_active; + + /* kmp_taskdata_t */ + kmp_int32 td_sizeof_struct; + offset_and_size_t td_task_id; // task id + offset_and_size_t td_flags; // task flags + offset_and_size_t td_team; // team for this task + offset_and_size_t td_parent; // parent task + offset_and_size_t td_level; // task testing level + offset_and_size_t td_ident; // task identifier + offset_and_size_t td_allocated_child_tasks; // child tasks (+ current task) not yet deallocated + offset_and_size_t td_incomplete_child_tasks; // child tasks not yet complete + + /* Taskwait */ + offset_and_size_t td_taskwait_ident; + offset_and_size_t td_taskwait_counter; + offset_and_size_t td_taskwait_thread; // gtid + 1 of thread encountered taskwait + +#if OMP_40_ENABLED + /* Taskgroup */ + offset_and_size_t td_taskgroup; // pointer to the current taskgroup + offset_and_size_t td_task_count; // number of allocated and not yet complete tasks + offset_and_size_t td_cancel; // request for cancellation of this taskgroup + + /* Task dependency */ + offset_and_size_t td_depnode; // pointer to graph node if the task has dependencies + offset_and_size_t dn_node; + offset_and_size_t dn_next; + offset_and_size_t dn_successors; + offset_and_size_t dn_task; + offset_and_size_t dn_npredecessors; + offset_and_size_t dn_nrefs; +#endif + offset_and_size_t dn_routine; + + /* kmp_thread_data_t */ + kmp_int32 hd_sizeof_struct; + offset_and_size_t hd_deque; + offset_and_size_t hd_deque_head; + offset_and_size_t hd_deque_tail; + offset_and_size_t hd_deque_ntasks; + offset_and_size_t hd_deque_last_stolen; + + // The last field of stable version. + kmp_uint64 last_field; + +} kmp_omp_struct_info_t; + +#endif /* USE_DEBUGGER */ + +/* end of file */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_os.h b/contrib/libs/cxxsupp/openmp/kmp_os.h index 90b26d05678..4f89c7379c4 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_os.h +++ b/contrib/libs/cxxsupp/openmp/kmp_os.h @@ -1,726 +1,726 @@ -/* - * kmp_os.h -- KPTS runtime header file. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_OS_H -#define KMP_OS_H - -#include "kmp_config.h" -#include - -#define KMP_FTN_PLAIN 1 -#define KMP_FTN_APPEND 2 -#define KMP_FTN_UPPER 3 -/* -#define KMP_FTN_PREPEND 4 -#define KMP_FTN_UAPPEND 5 -*/ - -#define KMP_PTR_SKIP (sizeof(void*)) - -/* -------------------------- Compiler variations ------------------------ */ - -#define KMP_OFF 0 -#define KMP_ON 1 - -#define KMP_MEM_CONS_VOLATILE 0 -#define KMP_MEM_CONS_FENCE 1 - -#ifndef KMP_MEM_CONS_MODEL -# define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE -#endif - -/* ------------------------- Compiler recognition ---------------------- */ -#define KMP_COMPILER_ICC 0 -#define KMP_COMPILER_GCC 0 -#define KMP_COMPILER_CLANG 0 -#define KMP_COMPILER_MSVC 0 - -#if defined( __INTEL_COMPILER ) -# undef KMP_COMPILER_ICC -# define KMP_COMPILER_ICC 1 -#elif defined( __clang__ ) -# undef KMP_COMPILER_CLANG -# define KMP_COMPILER_CLANG 1 -#elif defined( __GNUC__ ) -# undef KMP_COMPILER_GCC -# define KMP_COMPILER_GCC 1 -#elif defined( _MSC_VER ) -# undef KMP_COMPILER_MSVC -# define KMP_COMPILER_MSVC 1 -#else -# error Unknown compiler -#endif - -#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64 -# define KMP_AFFINITY_SUPPORTED 1 -# if KMP_OS_WINDOWS && KMP_ARCH_X86_64 -# define KMP_GROUP_AFFINITY 1 -# else -# define KMP_GROUP_AFFINITY 0 -# endif -#else -# define KMP_AFFINITY_SUPPORTED 0 -# define KMP_GROUP_AFFINITY 0 -#endif - -/* Check for quad-precision extension. */ -#define KMP_HAVE_QUAD 0 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# if KMP_COMPILER_ICC - /* _Quad is already defined for icc */ -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# elif KMP_COMPILER_CLANG - /* Clang doesn't support a software-implemented - 128-bit extended precision type yet */ - typedef long double _Quad; -# elif KMP_COMPILER_GCC - typedef __float128 _Quad; -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# elif KMP_COMPILER_MSVC - typedef long double _Quad; -# endif -#else -# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC - typedef long double _Quad; -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# endif -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#if KMP_OS_WINDOWS - typedef char kmp_int8; - typedef unsigned char kmp_uint8; - typedef short kmp_int16; - typedef unsigned short kmp_uint16; - typedef int kmp_int32; - typedef unsigned int kmp_uint32; -# define KMP_INT32_SPEC "d" -# define KMP_UINT32_SPEC "u" -# ifndef KMP_STRUCT64 - typedef __int64 kmp_int64; - typedef unsigned __int64 kmp_uint64; - #define KMP_INT64_SPEC "I64d" - #define KMP_UINT64_SPEC "I64u" -# else - struct kmp_struct64 { - kmp_int32 a,b; - }; - typedef struct kmp_struct64 kmp_int64; - typedef struct kmp_struct64 kmp_uint64; - /* Not sure what to use for KMP_[U]INT64_SPEC here */ -# endif -# if KMP_ARCH_X86_64 -# define KMP_INTPTR 1 - typedef __int64 kmp_intptr_t; - typedef unsigned __int64 kmp_uintptr_t; -# define KMP_INTPTR_SPEC "I64d" -# define KMP_UINTPTR_SPEC "I64u" -# endif -#endif /* KMP_OS_WINDOWS */ - -#if KMP_OS_UNIX - typedef char kmp_int8; - typedef unsigned char kmp_uint8; - typedef short kmp_int16; - typedef unsigned short kmp_uint16; - typedef int kmp_int32; - typedef unsigned int kmp_uint32; - typedef long long kmp_int64; - typedef unsigned long long kmp_uint64; -# define KMP_INT32_SPEC "d" -# define KMP_UINT32_SPEC "u" -# define KMP_INT64_SPEC "lld" -# define KMP_UINT64_SPEC "llu" -#endif /* KMP_OS_UNIX */ - -#if KMP_ARCH_X86 || KMP_ARCH_ARM -# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC -#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 -# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC -#else -# error "Can't determine size_t printf format specifier." -#endif - -#if KMP_ARCH_X86 -# define KMP_SIZE_T_MAX (0xFFFFFFFF) -#else -# define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) -#endif - -typedef size_t kmp_size_t; -typedef float kmp_real32; -typedef double kmp_real64; - -#ifndef KMP_INTPTR -# define KMP_INTPTR 1 - typedef long kmp_intptr_t; - typedef unsigned long kmp_uintptr_t; -# define KMP_INTPTR_SPEC "ld" -# define KMP_UINTPTR_SPEC "lu" -#endif - -#ifdef KMP_I8 - typedef kmp_int64 kmp_int; - typedef kmp_uint64 kmp_uint; -# define KMP_INT_SPEC KMP_INT64_SPEC -# define KMP_UINT_SPEC KMP_UINT64_SPEC -# define KMP_INT_MAX ((kmp_int64)0x7FFFFFFFFFFFFFFFLL) -# define KMP_INT_MIN ((kmp_int64)0x8000000000000000LL) -#else - typedef kmp_int32 kmp_int; - typedef kmp_uint32 kmp_uint; -# define KMP_INT_SPEC KMP_INT32_SPEC -# define KMP_UINT_SPEC KMP_UINT32_SPEC -# define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) -# define KMP_INT_MIN ((kmp_int32)0x80000000) -#endif /* KMP_I8 */ - -#ifdef __cplusplus - //------------------------------------------------------------------------- - // template for debug prints specification ( d, u, lld, llu ), and to obtain - // signed/unsigned flavors of a type - template< typename T > - struct traits_t { - typedef T signed_t; - typedef T unsigned_t; - typedef T floating_t; - static char const * spec; - }; - // int - template<> - struct traits_t< signed int > { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const * spec; - }; - // unsigned int - template<> - struct traits_t< unsigned int > { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const * spec; - }; - // long long - template<> - struct traits_t< signed long long > { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const * spec; - }; - // unsigned long long - template<> - struct traits_t< unsigned long long > { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const * spec; - }; - //------------------------------------------------------------------------- -#endif // __cplusplus - -#define KMP_EXPORT extern /* export declaration in guide libraries */ - -#if __GNUC__ >= 4 - #define __forceinline __inline -#endif - -#define PAGE_SIZE (0x4000) -#define PAGE_ALIGNED(_addr) ( ! ((size_t) _addr & \ - (size_t)(PAGE_SIZE - 1))) -#define ALIGN_TO_PAGE(x) (void *)(((size_t)(x)) & ~((size_t)(PAGE_SIZE - 1))) - -/* ---------------------- Support for cache alignment, padding, etc. -----------------*/ - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ - -/* Define the default size of the cache line */ -#ifndef CACHE_LINE - #define CACHE_LINE 128 /* cache line size in bytes */ -#else - #if ( CACHE_LINE < 64 ) && ! defined( KMP_OS_DARWIN ) - // 2006-02-13: This produces too many warnings on OS X*. Disable it for a while... - #warning CACHE_LINE is too small. - #endif -#endif /* CACHE_LINE */ - -#define KMP_CACHE_PREFETCH(ADDR) /* nothing */ - -/* Temporary note: if performance testing of this passes, we can remove - all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ -#if KMP_OS_UNIX && defined(__GNUC__) -# define KMP_DO_ALIGN(bytes) __attribute__((aligned(bytes))) -# define KMP_ALIGN_CACHE __attribute__((aligned(CACHE_LINE))) -# define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE))) -# define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) -#else -# define KMP_DO_ALIGN(bytes) __declspec( align(bytes) ) -# define KMP_ALIGN_CACHE __declspec( align(CACHE_LINE) ) -# define KMP_ALIGN_CACHE_INTERNODE __declspec( align(INTERNODE_CACHE_LINE) ) -# define KMP_ALIGN(bytes) __declspec( align(bytes) ) -#endif - -/* General purpose fence types for memory operations */ -enum kmp_mem_fence_type { - kmp_no_fence, /* No memory fence */ - kmp_acquire_fence, /* Acquire (read) memory fence */ - kmp_release_fence, /* Release (write) memory fence */ - kmp_full_fence /* Full (read+write) memory fence */ -}; - - -// -// Synchronization primitives -// - -#if KMP_ASM_INTRINS && KMP_OS_WINDOWS - -#include - -#pragma intrinsic(InterlockedExchangeAdd) -#pragma intrinsic(InterlockedCompareExchange) -#pragma intrinsic(InterlockedExchange) -#pragma intrinsic(InterlockedExchange64) - -// -// Using InterlockedIncrement / InterlockedDecrement causes a library loading -// ordering problem, so we use InterlockedExchangeAdd instead. -// -# define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) -# define KMP_TEST_THEN_INC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) -# define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) -# define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) -# define KMP_TEST_THEN_ADD32(p, v) InterlockedExchangeAdd( (volatile long *)(p), (v) ) - -extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) ) - -# define KMP_XCHG_FIXED32(p, v) InterlockedExchange( (volatile long *)(p), (long)(v) ) -# define KMP_XCHG_FIXED64(p, v) InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) ) - -inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) -{ - kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v); - return *(kmp_real32*)&tmp; -} - -// -// Routines that we still need to implement in assembly. -// -extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); - -extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); - -extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); -extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); -extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); -# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) - -//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) -//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) -//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) -//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) -//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) -//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) -//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) - -# if KMP_ARCH_X86 -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) -# else /* 64 bit pointers */ -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) -# endif /* KMP_ARCH_X86 */ - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) -//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) - -# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); -# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); -//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); -//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); -//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); -# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); - - -#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) -# define KMP_TEST_THEN_ADD8(p, v) __sync_fetch_and_add( (kmp_int8 *)(p), (v) ) - -/* cast p to correct type so that proper intrinsic will be used */ -# define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __sync_fetch_and_or( (kmp_int8 *)(p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __sync_fetch_and_and( (kmp_int8 *)(p), (v) ) -# define KMP_TEST_THEN_INC_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_INC64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_ADD4_32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) -# define KMP_TEST_THEN_DEC32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_DEC64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_ADD32(p, v) __sync_fetch_and_add( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __sync_fetch_and_add( (kmp_int64 *)(p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __sync_fetch_and_or( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __sync_fetch_and_and( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __sync_fetch_and_or( (kmp_int64 *)(p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __sync_fetch_and_and( (kmp_int64 *)(p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __sync_bool_compare_and_swap( (volatile void **)(p),(void *)(cv),(void *)(sv) ) - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) - -#define KMP_XCHG_FIXED8(p, v) __sync_lock_test_and_set( (volatile kmp_uint8 *)(p), (kmp_uint8)(v) ) -#define KMP_XCHG_FIXED16(p, v) __sync_lock_test_and_set( (volatile kmp_uint16 *)(p), (kmp_uint16)(v) ) -#define KMP_XCHG_FIXED32(p, v) __sync_lock_test_and_set( (volatile kmp_uint32 *)(p), (kmp_uint32)(v) ) -#define KMP_XCHG_FIXED64(p, v) __sync_lock_test_and_set( (volatile kmp_uint64 *)(p), (kmp_uint64)(v) ) - -extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); -inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) -{ - kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)p, *(kmp_int32*)&v); - return *(kmp_real32*)&tmp; -} - -inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) -{ - kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v); - return *(kmp_real64*)&tmp; -} - -#else - -extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); - -extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); - -extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); -extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); -# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) -extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); - -# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) -# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) - -# if KMP_ARCH_X86 -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) -# else /* 64 bit pointers */ -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) -# endif /* KMP_ARCH_X86 */ - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) - -# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); -# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); -# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); -# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); -# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); -# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); - -#endif /* KMP_ASM_INTRINS */ - - -/* ------------- relaxed consistency memory model stuff ------------------ */ - -#if KMP_OS_WINDOWS -# ifdef __ABSOFT_WIN -# define KMP_MB() asm ("nop") -# define KMP_IMB() asm ("nop") -# else -# define KMP_MB() /* _asm{ nop } */ -# define KMP_IMB() /* _asm{ nop } */ -# endif -#endif /* KMP_OS_WINDOWS */ - -#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 -# define KMP_MB() __sync_synchronize() -#endif - -#ifndef KMP_MB -# define KMP_MB() /* nothing to do */ -#endif - -#ifndef KMP_IMB -# define KMP_IMB() /* nothing to do */ -#endif - -#ifndef KMP_ST_REL32 -# define KMP_ST_REL32(A,D) ( *(A) = (D) ) -#endif - -#ifndef KMP_ST_REL64 -# define KMP_ST_REL64(A,D) ( *(A) = (D) ) -#endif - -#ifndef KMP_LD_ACQ32 -# define KMP_LD_ACQ32(A) ( *(A) ) -#endif - -#ifndef KMP_LD_ACQ64 -# define KMP_LD_ACQ64(A) ( *(A) ) -#endif - -#define TCR_1(a) (a) -#define TCW_1(a,b) (a) = (b) -/* ------------------------------------------------------------------------ */ -// -// FIXME - maybe this should this be -// -// #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) -// #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b)) -// -// #define TCR_8(a) (*(volatile kmp_int64 *)(a)) -// #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b)) -// -// I'm fairly certain this is the correct thing to do, but I'm afraid -// of performance regressions. -// - -#define TCR_4(a) (a) -#define TCW_4(a,b) (a) = (b) -#define TCR_8(a) (a) -#define TCW_8(a,b) (a) = (b) -#define TCR_SYNC_4(a) (a) -#define TCW_SYNC_4(a,b) (a) = (b) -#define TCX_SYNC_4(a,b,c) KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), (kmp_int32)(b), (kmp_int32)(c)) -#define TCR_SYNC_8(a) (a) -#define TCW_SYNC_8(a,b) (a) = (b) -#define TCX_SYNC_8(a,b,c) KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c)) - -#if KMP_ARCH_X86 -// What about ARM? - #define TCR_PTR(a) ((void *)TCR_4(a)) - #define TCW_PTR(a,b) TCW_4((a),(b)) - #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) - #define TCW_SYNC_PTR(a,b) TCW_SYNC_4((a),(b)) - #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_4((a),(b),(c))) - -#else /* 64 bit pointers */ - - #define TCR_PTR(a) ((void *)TCR_8(a)) - #define TCW_PTR(a,b) TCW_8((a),(b)) - #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) - #define TCW_SYNC_PTR(a,b) TCW_SYNC_8((a),(b)) - #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_8((a),(b),(c))) - -#endif /* KMP_ARCH_X86 */ - -/* - * If these FTN_{TRUE,FALSE} values change, may need to - * change several places where they are used to check that - * language is Fortran, not C. - */ - -#ifndef FTN_TRUE -# define FTN_TRUE TRUE -#endif - -#ifndef FTN_FALSE -# define FTN_FALSE FALSE -#endif - -typedef void (*microtask_t)( int *gtid, int *npr, ... ); - -#ifdef USE_VOLATILE_CAST -# define VOLATILE_CAST(x) (volatile x) -#else -# define VOLATILE_CAST(x) (x) -#endif - -#ifdef KMP_I8 -# define KMP_WAIT_YIELD __kmp_wait_yield_8 -# define KMP_EQ __kmp_eq_8 -# define KMP_NEQ __kmp_neq_8 -# define KMP_LT __kmp_lt_8 -# define KMP_GE __kmp_ge_8 -# define KMP_LE __kmp_le_8 -#else -# define KMP_WAIT_YIELD __kmp_wait_yield_4 -# define KMP_EQ __kmp_eq_4 -# define KMP_NEQ __kmp_neq_4 -# define KMP_LT __kmp_lt_4 -# define KMP_GE __kmp_ge_4 -# define KMP_LE __kmp_le_4 -#endif /* KMP_I8 */ - -/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX -# define STATIC_EFI2_WORKAROUND -#else -# define STATIC_EFI2_WORKAROUND static -#endif - -// Support of BGET usage -#ifndef KMP_USE_BGET -#define KMP_USE_BGET 1 -#endif - - -// Switches for OSS builds -#ifndef USE_SYSFS_INFO -# define USE_SYSFS_INFO 0 -#endif -#ifndef USE_CMPXCHG_FIX -# define USE_CMPXCHG_FIX 1 -#endif - -// Enable dynamic user lock -#if OMP_41_ENABLED -# define KMP_USE_DYNAMIC_LOCK 1 -#endif - -// Enable TSX if dynamic user lock is turned on -#if KMP_USE_DYNAMIC_LOCK -// Visual studio can't handle the asm sections in this code -# define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC -# ifdef KMP_USE_ADAPTIVE_LOCKS -# undef KMP_USE_ADAPTIVE_LOCKS -# endif -# define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX -#endif - -// Enable tick time conversion of ticks to seconds -#if KMP_STATS_ENABLED -# define KMP_HAVE_TICK_TIME (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) -#endif - -// Warning levels -enum kmp_warnings_level { - kmp_warnings_off = 0, /* No warnings */ - kmp_warnings_low, /* Minimal warnings (default) */ - kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ - kmp_warnings_verbose /* reserved */ -}; - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_OS_H */ -// Safe C API -#include "kmp_safe_c_api.h" - +/* + * kmp_os.h -- KPTS runtime header file. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_OS_H +#define KMP_OS_H + +#include "kmp_config.h" +#include + +#define KMP_FTN_PLAIN 1 +#define KMP_FTN_APPEND 2 +#define KMP_FTN_UPPER 3 +/* +#define KMP_FTN_PREPEND 4 +#define KMP_FTN_UAPPEND 5 +*/ + +#define KMP_PTR_SKIP (sizeof(void*)) + +/* -------------------------- Compiler variations ------------------------ */ + +#define KMP_OFF 0 +#define KMP_ON 1 + +#define KMP_MEM_CONS_VOLATILE 0 +#define KMP_MEM_CONS_FENCE 1 + +#ifndef KMP_MEM_CONS_MODEL +# define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE +#endif + +/* ------------------------- Compiler recognition ---------------------- */ +#define KMP_COMPILER_ICC 0 +#define KMP_COMPILER_GCC 0 +#define KMP_COMPILER_CLANG 0 +#define KMP_COMPILER_MSVC 0 + +#if defined( __INTEL_COMPILER ) +# undef KMP_COMPILER_ICC +# define KMP_COMPILER_ICC 1 +#elif defined( __clang__ ) +# undef KMP_COMPILER_CLANG +# define KMP_COMPILER_CLANG 1 +#elif defined( __GNUC__ ) +# undef KMP_COMPILER_GCC +# define KMP_COMPILER_GCC 1 +#elif defined( _MSC_VER ) +# undef KMP_COMPILER_MSVC +# define KMP_COMPILER_MSVC 1 +#else +# error Unknown compiler +#endif + +#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64 +# define KMP_AFFINITY_SUPPORTED 1 +# if KMP_OS_WINDOWS && KMP_ARCH_X86_64 +# define KMP_GROUP_AFFINITY 1 +# else +# define KMP_GROUP_AFFINITY 0 +# endif +#else +# define KMP_AFFINITY_SUPPORTED 0 +# define KMP_GROUP_AFFINITY 0 +#endif + +/* Check for quad-precision extension. */ +#define KMP_HAVE_QUAD 0 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +# if KMP_COMPILER_ICC + /* _Quad is already defined for icc */ +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 +# elif KMP_COMPILER_CLANG + /* Clang doesn't support a software-implemented + 128-bit extended precision type yet */ + typedef long double _Quad; +# elif KMP_COMPILER_GCC + typedef __float128 _Quad; +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 +# elif KMP_COMPILER_MSVC + typedef long double _Quad; +# endif +#else +# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC + typedef long double _Quad; +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 +# endif +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#if KMP_OS_WINDOWS + typedef char kmp_int8; + typedef unsigned char kmp_uint8; + typedef short kmp_int16; + typedef unsigned short kmp_uint16; + typedef int kmp_int32; + typedef unsigned int kmp_uint32; +# define KMP_INT32_SPEC "d" +# define KMP_UINT32_SPEC "u" +# ifndef KMP_STRUCT64 + typedef __int64 kmp_int64; + typedef unsigned __int64 kmp_uint64; + #define KMP_INT64_SPEC "I64d" + #define KMP_UINT64_SPEC "I64u" +# else + struct kmp_struct64 { + kmp_int32 a,b; + }; + typedef struct kmp_struct64 kmp_int64; + typedef struct kmp_struct64 kmp_uint64; + /* Not sure what to use for KMP_[U]INT64_SPEC here */ +# endif +# if KMP_ARCH_X86_64 +# define KMP_INTPTR 1 + typedef __int64 kmp_intptr_t; + typedef unsigned __int64 kmp_uintptr_t; +# define KMP_INTPTR_SPEC "I64d" +# define KMP_UINTPTR_SPEC "I64u" +# endif +#endif /* KMP_OS_WINDOWS */ + +#if KMP_OS_UNIX + typedef char kmp_int8; + typedef unsigned char kmp_uint8; + typedef short kmp_int16; + typedef unsigned short kmp_uint16; + typedef int kmp_int32; + typedef unsigned int kmp_uint32; + typedef long long kmp_int64; + typedef unsigned long long kmp_uint64; +# define KMP_INT32_SPEC "d" +# define KMP_UINT32_SPEC "u" +# define KMP_INT64_SPEC "lld" +# define KMP_UINT64_SPEC "llu" +#endif /* KMP_OS_UNIX */ + +#if KMP_ARCH_X86 || KMP_ARCH_ARM +# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC +#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 +# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC +#else +# error "Can't determine size_t printf format specifier." +#endif + +#if KMP_ARCH_X86 +# define KMP_SIZE_T_MAX (0xFFFFFFFF) +#else +# define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) +#endif + +typedef size_t kmp_size_t; +typedef float kmp_real32; +typedef double kmp_real64; + +#ifndef KMP_INTPTR +# define KMP_INTPTR 1 + typedef long kmp_intptr_t; + typedef unsigned long kmp_uintptr_t; +# define KMP_INTPTR_SPEC "ld" +# define KMP_UINTPTR_SPEC "lu" +#endif + +#ifdef KMP_I8 + typedef kmp_int64 kmp_int; + typedef kmp_uint64 kmp_uint; +# define KMP_INT_SPEC KMP_INT64_SPEC +# define KMP_UINT_SPEC KMP_UINT64_SPEC +# define KMP_INT_MAX ((kmp_int64)0x7FFFFFFFFFFFFFFFLL) +# define KMP_INT_MIN ((kmp_int64)0x8000000000000000LL) +#else + typedef kmp_int32 kmp_int; + typedef kmp_uint32 kmp_uint; +# define KMP_INT_SPEC KMP_INT32_SPEC +# define KMP_UINT_SPEC KMP_UINT32_SPEC +# define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) +# define KMP_INT_MIN ((kmp_int32)0x80000000) +#endif /* KMP_I8 */ + +#ifdef __cplusplus + //------------------------------------------------------------------------- + // template for debug prints specification ( d, u, lld, llu ), and to obtain + // signed/unsigned flavors of a type + template< typename T > + struct traits_t { + typedef T signed_t; + typedef T unsigned_t; + typedef T floating_t; + static char const * spec; + }; + // int + template<> + struct traits_t< signed int > { + typedef signed int signed_t; + typedef unsigned int unsigned_t; + typedef double floating_t; + static char const * spec; + }; + // unsigned int + template<> + struct traits_t< unsigned int > { + typedef signed int signed_t; + typedef unsigned int unsigned_t; + typedef double floating_t; + static char const * spec; + }; + // long long + template<> + struct traits_t< signed long long > { + typedef signed long long signed_t; + typedef unsigned long long unsigned_t; + typedef long double floating_t; + static char const * spec; + }; + // unsigned long long + template<> + struct traits_t< unsigned long long > { + typedef signed long long signed_t; + typedef unsigned long long unsigned_t; + typedef long double floating_t; + static char const * spec; + }; + //------------------------------------------------------------------------- +#endif // __cplusplus + +#define KMP_EXPORT extern /* export declaration in guide libraries */ + +#if __GNUC__ >= 4 + #define __forceinline __inline +#endif + +#define PAGE_SIZE (0x4000) +#define PAGE_ALIGNED(_addr) ( ! ((size_t) _addr & \ + (size_t)(PAGE_SIZE - 1))) +#define ALIGN_TO_PAGE(x) (void *)(((size_t)(x)) & ~((size_t)(PAGE_SIZE - 1))) + +/* ---------------------- Support for cache alignment, padding, etc. -----------------*/ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ + +/* Define the default size of the cache line */ +#ifndef CACHE_LINE + #define CACHE_LINE 128 /* cache line size in bytes */ +#else + #if ( CACHE_LINE < 64 ) && ! defined( KMP_OS_DARWIN ) + // 2006-02-13: This produces too many warnings on OS X*. Disable it for a while... + #warning CACHE_LINE is too small. + #endif +#endif /* CACHE_LINE */ + +#define KMP_CACHE_PREFETCH(ADDR) /* nothing */ + +/* Temporary note: if performance testing of this passes, we can remove + all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ +#if KMP_OS_UNIX && defined(__GNUC__) +# define KMP_DO_ALIGN(bytes) __attribute__((aligned(bytes))) +# define KMP_ALIGN_CACHE __attribute__((aligned(CACHE_LINE))) +# define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE))) +# define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) +#else +# define KMP_DO_ALIGN(bytes) __declspec( align(bytes) ) +# define KMP_ALIGN_CACHE __declspec( align(CACHE_LINE) ) +# define KMP_ALIGN_CACHE_INTERNODE __declspec( align(INTERNODE_CACHE_LINE) ) +# define KMP_ALIGN(bytes) __declspec( align(bytes) ) +#endif + +/* General purpose fence types for memory operations */ +enum kmp_mem_fence_type { + kmp_no_fence, /* No memory fence */ + kmp_acquire_fence, /* Acquire (read) memory fence */ + kmp_release_fence, /* Release (write) memory fence */ + kmp_full_fence /* Full (read+write) memory fence */ +}; + + +// +// Synchronization primitives +// + +#if KMP_ASM_INTRINS && KMP_OS_WINDOWS + +#include + +#pragma intrinsic(InterlockedExchangeAdd) +#pragma intrinsic(InterlockedCompareExchange) +#pragma intrinsic(InterlockedExchange) +#pragma intrinsic(InterlockedExchange64) + +// +// Using InterlockedIncrement / InterlockedDecrement causes a library loading +// ordering problem, so we use InterlockedExchangeAdd instead. +// +# define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) +# define KMP_TEST_THEN_INC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) +# define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) +# define KMP_TEST_THEN_ADD4_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) +# define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) +# define KMP_TEST_THEN_DEC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) +# define KMP_TEST_THEN_ADD32(p, v) InterlockedExchangeAdd( (volatile long *)(p), (v) ) + +extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); +# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) ) + +# define KMP_XCHG_FIXED32(p, v) InterlockedExchange( (volatile long *)(p), (long)(v) ) +# define KMP_XCHG_FIXED64(p, v) InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) ) + +inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) +{ + kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v); + return *(kmp_real32*)&tmp; +} + +// +// Routines that we still need to implement in assembly. +// +extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); + +extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); + +extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); +extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); +extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); +# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) + +//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) +# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) +# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) +//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) +# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) +# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) +//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) +//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) +# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) +# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) +//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) +//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) +# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) +# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) +//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) +# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) + +# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) +# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) +# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) +# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) + +# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) + +# if KMP_ARCH_X86 +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) +# else /* 64 bit pointers */ +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) +# endif /* KMP_ARCH_X86 */ + +# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) +//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) + +# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); +# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); +//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); +//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); +//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); +# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); + + +#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +# define KMP_TEST_THEN_ADD8(p, v) __sync_fetch_and_add( (kmp_int8 *)(p), (v) ) + +/* cast p to correct type so that proper intrinsic will be used */ +# define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) +# define KMP_TEST_THEN_OR8(p, v) __sync_fetch_and_or( (kmp_int8 *)(p), (v) ) +# define KMP_TEST_THEN_AND8(p, v) __sync_fetch_and_and( (kmp_int8 *)(p), (v) ) +# define KMP_TEST_THEN_INC_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) +# define KMP_TEST_THEN_INC64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) +# define KMP_TEST_THEN_INC_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) +# define KMP_TEST_THEN_ADD4_32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) +# define KMP_TEST_THEN_ADD4_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) +# define KMP_TEST_THEN_ADD4_64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) +# define KMP_TEST_THEN_ADD4_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) +# define KMP_TEST_THEN_DEC32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) +# define KMP_TEST_THEN_DEC_ACQ32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) +# define KMP_TEST_THEN_DEC64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) +# define KMP_TEST_THEN_DEC_ACQ64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) +# define KMP_TEST_THEN_ADD32(p, v) __sync_fetch_and_add( (kmp_int32 *)(p), (v) ) +# define KMP_TEST_THEN_ADD64(p, v) __sync_fetch_and_add( (kmp_int64 *)(p), (v) ) + +# define KMP_TEST_THEN_OR32(p, v) __sync_fetch_and_or( (kmp_int32 *)(p), (v) ) +# define KMP_TEST_THEN_AND32(p, v) __sync_fetch_and_and( (kmp_int32 *)(p), (v) ) +# define KMP_TEST_THEN_OR64(p, v) __sync_fetch_and_or( (kmp_int64 *)(p), (v) ) +# define KMP_TEST_THEN_AND64(p, v) __sync_fetch_and_and( (kmp_int64 *)(p), (v) ) + +# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) +# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) +# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) +# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) +# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) +# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) +# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) +# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __sync_bool_compare_and_swap( (volatile void **)(p),(void *)(cv),(void *)(sv) ) + +# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) +# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) +# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) +# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) + +#define KMP_XCHG_FIXED8(p, v) __sync_lock_test_and_set( (volatile kmp_uint8 *)(p), (kmp_uint8)(v) ) +#define KMP_XCHG_FIXED16(p, v) __sync_lock_test_and_set( (volatile kmp_uint16 *)(p), (kmp_uint16)(v) ) +#define KMP_XCHG_FIXED32(p, v) __sync_lock_test_and_set( (volatile kmp_uint32 *)(p), (kmp_uint32)(v) ) +#define KMP_XCHG_FIXED64(p, v) __sync_lock_test_and_set( (volatile kmp_uint64 *)(p), (kmp_uint64)(v) ) + +extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); +inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) +{ + kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)p, *(kmp_int32*)&v); + return *(kmp_real32*)&tmp; +} + +inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) +{ + kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v); + return *(kmp_real64*)&tmp; +} + +#else + +extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); + +extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); + +extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); +extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); +# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) +extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); + +# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) +# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) +# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) +# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) +# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) +# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) +# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) +# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) +# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) +# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) +# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) +# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) +# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) +# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) +# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) +# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) + +# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) +# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) +# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) +# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) + +# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) + +# if KMP_ARCH_X86 +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) +# else /* 64 bit pointers */ +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) +# endif /* KMP_ARCH_X86 */ + +# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) + +# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); +# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); +# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); +# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); +# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); +# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); + +#endif /* KMP_ASM_INTRINS */ + + +/* ------------- relaxed consistency memory model stuff ------------------ */ + +#if KMP_OS_WINDOWS +# ifdef __ABSOFT_WIN +# define KMP_MB() asm ("nop") +# define KMP_IMB() asm ("nop") +# else +# define KMP_MB() /* _asm{ nop } */ +# define KMP_IMB() /* _asm{ nop } */ +# endif +#endif /* KMP_OS_WINDOWS */ + +#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 +# define KMP_MB() __sync_synchronize() +#endif + +#ifndef KMP_MB +# define KMP_MB() /* nothing to do */ +#endif + +#ifndef KMP_IMB +# define KMP_IMB() /* nothing to do */ +#endif + +#ifndef KMP_ST_REL32 +# define KMP_ST_REL32(A,D) ( *(A) = (D) ) +#endif + +#ifndef KMP_ST_REL64 +# define KMP_ST_REL64(A,D) ( *(A) = (D) ) +#endif + +#ifndef KMP_LD_ACQ32 +# define KMP_LD_ACQ32(A) ( *(A) ) +#endif + +#ifndef KMP_LD_ACQ64 +# define KMP_LD_ACQ64(A) ( *(A) ) +#endif + +#define TCR_1(a) (a) +#define TCW_1(a,b) (a) = (b) +/* ------------------------------------------------------------------------ */ +// +// FIXME - maybe this should this be +// +// #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) +// #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b)) +// +// #define TCR_8(a) (*(volatile kmp_int64 *)(a)) +// #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b)) +// +// I'm fairly certain this is the correct thing to do, but I'm afraid +// of performance regressions. +// + +#define TCR_4(a) (a) +#define TCW_4(a,b) (a) = (b) +#define TCR_8(a) (a) +#define TCW_8(a,b) (a) = (b) +#define TCR_SYNC_4(a) (a) +#define TCW_SYNC_4(a,b) (a) = (b) +#define TCX_SYNC_4(a,b,c) KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), (kmp_int32)(b), (kmp_int32)(c)) +#define TCR_SYNC_8(a) (a) +#define TCW_SYNC_8(a,b) (a) = (b) +#define TCX_SYNC_8(a,b,c) KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c)) + +#if KMP_ARCH_X86 +// What about ARM? + #define TCR_PTR(a) ((void *)TCR_4(a)) + #define TCW_PTR(a,b) TCW_4((a),(b)) + #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) + #define TCW_SYNC_PTR(a,b) TCW_SYNC_4((a),(b)) + #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_4((a),(b),(c))) + +#else /* 64 bit pointers */ + + #define TCR_PTR(a) ((void *)TCR_8(a)) + #define TCW_PTR(a,b) TCW_8((a),(b)) + #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) + #define TCW_SYNC_PTR(a,b) TCW_SYNC_8((a),(b)) + #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_8((a),(b),(c))) + +#endif /* KMP_ARCH_X86 */ + +/* + * If these FTN_{TRUE,FALSE} values change, may need to + * change several places where they are used to check that + * language is Fortran, not C. + */ + +#ifndef FTN_TRUE +# define FTN_TRUE TRUE +#endif + +#ifndef FTN_FALSE +# define FTN_FALSE FALSE +#endif + +typedef void (*microtask_t)( int *gtid, int *npr, ... ); + +#ifdef USE_VOLATILE_CAST +# define VOLATILE_CAST(x) (volatile x) +#else +# define VOLATILE_CAST(x) (x) +#endif + +#ifdef KMP_I8 +# define KMP_WAIT_YIELD __kmp_wait_yield_8 +# define KMP_EQ __kmp_eq_8 +# define KMP_NEQ __kmp_neq_8 +# define KMP_LT __kmp_lt_8 +# define KMP_GE __kmp_ge_8 +# define KMP_LE __kmp_le_8 +#else +# define KMP_WAIT_YIELD __kmp_wait_yield_4 +# define KMP_EQ __kmp_eq_4 +# define KMP_NEQ __kmp_neq_4 +# define KMP_LT __kmp_lt_4 +# define KMP_GE __kmp_ge_4 +# define KMP_LE __kmp_le_4 +#endif /* KMP_I8 */ + +/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */ +#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX +# define STATIC_EFI2_WORKAROUND +#else +# define STATIC_EFI2_WORKAROUND static +#endif + +// Support of BGET usage +#ifndef KMP_USE_BGET +#define KMP_USE_BGET 1 +#endif + + +// Switches for OSS builds +#ifndef USE_SYSFS_INFO +# define USE_SYSFS_INFO 0 +#endif +#ifndef USE_CMPXCHG_FIX +# define USE_CMPXCHG_FIX 1 +#endif + +// Enable dynamic user lock +#if OMP_41_ENABLED +# define KMP_USE_DYNAMIC_LOCK 1 +#endif + +// Enable TSX if dynamic user lock is turned on +#if KMP_USE_DYNAMIC_LOCK +// Visual studio can't handle the asm sections in this code +# define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC +# ifdef KMP_USE_ADAPTIVE_LOCKS +# undef KMP_USE_ADAPTIVE_LOCKS +# endif +# define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX +#endif + +// Enable tick time conversion of ticks to seconds +#if KMP_STATS_ENABLED +# define KMP_HAVE_TICK_TIME (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) +#endif + +// Warning levels +enum kmp_warnings_level { + kmp_warnings_off = 0, /* No warnings */ + kmp_warnings_low, /* Minimal warnings (default) */ + kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ + kmp_warnings_verbose /* reserved */ +}; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* KMP_OS_H */ +// Safe C API +#include "kmp_safe_c_api.h" + diff --git a/contrib/libs/cxxsupp/openmp/kmp_platform.h b/contrib/libs/cxxsupp/openmp/kmp_platform.h index a7c734cab98..0707f2b40ae 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_platform.h +++ b/contrib/libs/cxxsupp/openmp/kmp_platform.h @@ -1,168 +1,168 @@ -/* - * kmp_platform.h -- header for determining operating system and architecture - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_PLATFORM_H -#define KMP_PLATFORM_H - -/* ---------------------- Operating system recognition ------------------- */ - -#define KMP_OS_LINUX 0 -#define KMP_OS_FREEBSD 0 -#define KMP_OS_NETBSD 0 -#define KMP_OS_DARWIN 0 -#define KMP_OS_WINDOWS 0 -#define KMP_OS_CNK 0 -#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ - - -#ifdef _WIN32 -# undef KMP_OS_WINDOWS -# define KMP_OS_WINDOWS 1 -#endif - -#if ( defined __APPLE__ && defined __MACH__ ) -# undef KMP_OS_DARWIN -# define KMP_OS_DARWIN 1 -#endif - -// in some ppc64 linux installations, only the second condition is met -#if ( defined __linux ) -# undef KMP_OS_LINUX -# define KMP_OS_LINUX 1 -#elif ( defined __linux__) -# undef KMP_OS_LINUX -# define KMP_OS_LINUX 1 -#else -#endif - -#if ( defined __FreeBSD__ ) -# undef KMP_OS_FREEBSD -# define KMP_OS_FREEBSD 1 -#endif - -#if ( defined __NetBSD__ ) -# undef KMP_OS_NETBSD -# define KMP_OS_NETBSD 1 -#endif - -#if ( defined __bgq__ ) -# undef KMP_OS_CNK -# define KMP_OS_CNK 1 -#endif - -#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS) -# error Unknown OS -#endif - -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN -# undef KMP_OS_UNIX -# define KMP_OS_UNIX 1 -#endif - -/* ---------------------- Architecture recognition ------------------- */ - -#define KMP_ARCH_X86 0 -#define KMP_ARCH_X86_64 0 -#define KMP_ARCH_AARCH64 0 -#define KMP_ARCH_PPC64_BE 0 -#define KMP_ARCH_PPC64_LE 0 -#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) - -#if KMP_OS_WINDOWS -# if defined _M_AMD64 -# undef KMP_ARCH_X86_64 -# define KMP_ARCH_X86_64 1 -# else -# undef KMP_ARCH_X86 -# define KMP_ARCH_X86 1 -# endif -#endif - -#if KMP_OS_UNIX -# if defined __x86_64 -# undef KMP_ARCH_X86_64 -# define KMP_ARCH_X86_64 1 -# elif defined __i386 -# undef KMP_ARCH_X86 -# define KMP_ARCH_X86 1 -# elif defined __powerpc64__ -# if defined __LITTLE_ENDIAN__ -# undef KMP_ARCH_PPC64_LE -# define KMP_ARCH_PPC64_LE 1 -# else -# undef KMP_ARCH_PPC64_BE -# define KMP_ARCH_PPC64_BE 1 -# endif -# elif defined __aarch64__ -# undef KMP_ARCH_AARCH64 -# define KMP_ARCH_AARCH64 1 -# endif -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7A__) -# define KMP_ARCH_ARMV7 1 -#endif - -#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ - defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6ZK__) -# define KMP_ARCH_ARMV6 1 -#endif - -#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) -# define KMP_ARCH_ARMV5 1 -#endif - -#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ - defined(__ARM_ARCH_4T__) -# define KMP_ARCH_ARMV4 1 -#endif - -#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ - defined(__ARM_ARCH_3M__) -# define KMP_ARCH_ARMV3 1 -#endif - -#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) -# define KMP_ARCH_ARMV2 1 -#endif - -#if defined(KMP_ARCH_ARMV2) -# define KMP_ARCH_ARM 1 -#endif - -#if defined(__MIC__) || defined(__MIC2__) -# define KMP_MIC 1 -# if __MIC2__ || __KNC__ -# define KMP_MIC1 0 -# define KMP_MIC2 1 -# else -# define KMP_MIC1 1 -# define KMP_MIC2 0 -# endif -#else -# define KMP_MIC 0 -# define KMP_MIC1 0 -# define KMP_MIC2 0 -#endif - -// TODO: Fixme - This is clever, but really fugly -#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64) -# error Unknown or unsupported architecture -#endif - -#endif // KMP_PLATFORM_H +/* + * kmp_platform.h -- header for determining operating system and architecture + */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KMP_PLATFORM_H +#define KMP_PLATFORM_H + +/* ---------------------- Operating system recognition ------------------- */ + +#define KMP_OS_LINUX 0 +#define KMP_OS_FREEBSD 0 +#define KMP_OS_NETBSD 0 +#define KMP_OS_DARWIN 0 +#define KMP_OS_WINDOWS 0 +#define KMP_OS_CNK 0 +#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ + + +#ifdef _WIN32 +# undef KMP_OS_WINDOWS +# define KMP_OS_WINDOWS 1 +#endif + +#if ( defined __APPLE__ && defined __MACH__ ) +# undef KMP_OS_DARWIN +# define KMP_OS_DARWIN 1 +#endif + +// in some ppc64 linux installations, only the second condition is met +#if ( defined __linux ) +# undef KMP_OS_LINUX +# define KMP_OS_LINUX 1 +#elif ( defined __linux__) +# undef KMP_OS_LINUX +# define KMP_OS_LINUX 1 +#else +#endif + +#if ( defined __FreeBSD__ ) +# undef KMP_OS_FREEBSD +# define KMP_OS_FREEBSD 1 +#endif + +#if ( defined __NetBSD__ ) +# undef KMP_OS_NETBSD +# define KMP_OS_NETBSD 1 +#endif + +#if ( defined __bgq__ ) +# undef KMP_OS_CNK +# define KMP_OS_CNK 1 +#endif + +#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS) +# error Unknown OS +#endif + +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN +# undef KMP_OS_UNIX +# define KMP_OS_UNIX 1 +#endif + +/* ---------------------- Architecture recognition ------------------- */ + +#define KMP_ARCH_X86 0 +#define KMP_ARCH_X86_64 0 +#define KMP_ARCH_AARCH64 0 +#define KMP_ARCH_PPC64_BE 0 +#define KMP_ARCH_PPC64_LE 0 +#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) + +#if KMP_OS_WINDOWS +# if defined _M_AMD64 +# undef KMP_ARCH_X86_64 +# define KMP_ARCH_X86_64 1 +# else +# undef KMP_ARCH_X86 +# define KMP_ARCH_X86 1 +# endif +#endif + +#if KMP_OS_UNIX +# if defined __x86_64 +# undef KMP_ARCH_X86_64 +# define KMP_ARCH_X86_64 1 +# elif defined __i386 +# undef KMP_ARCH_X86 +# define KMP_ARCH_X86 1 +# elif defined __powerpc64__ +# if defined __LITTLE_ENDIAN__ +# undef KMP_ARCH_PPC64_LE +# define KMP_ARCH_PPC64_LE 1 +# else +# undef KMP_ARCH_PPC64_BE +# define KMP_ARCH_PPC64_BE 1 +# endif +# elif defined __aarch64__ +# undef KMP_ARCH_AARCH64 +# define KMP_ARCH_AARCH64 1 +# endif +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define KMP_ARCH_ARMV7 1 +#endif + +#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define KMP_ARCH_ARMV6 1 +#endif + +#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define KMP_ARCH_ARMV5 1 +#endif + +#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define KMP_ARCH_ARMV4 1 +#endif + +#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define KMP_ARCH_ARMV3 1 +#endif + +#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) +# define KMP_ARCH_ARMV2 1 +#endif + +#if defined(KMP_ARCH_ARMV2) +# define KMP_ARCH_ARM 1 +#endif + +#if defined(__MIC__) || defined(__MIC2__) +# define KMP_MIC 1 +# if __MIC2__ || __KNC__ +# define KMP_MIC1 0 +# define KMP_MIC2 1 +# else +# define KMP_MIC1 1 +# define KMP_MIC2 0 +# endif +#else +# define KMP_MIC 0 +# define KMP_MIC1 0 +# define KMP_MIC2 0 +#endif + +// TODO: Fixme - This is clever, but really fugly +#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64) +# error Unknown or unsupported architecture +#endif + +#endif // KMP_PLATFORM_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_runtime.c b/contrib/libs/cxxsupp/openmp/kmp_runtime.c index 015278a1cac..47499348084 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_runtime.c +++ b/contrib/libs/cxxsupp/openmp/kmp_runtime.c @@ -1,7654 +1,7654 @@ -/* - * kmp_runtime.c -- KPTS runtime support library - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_atomic.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_environment.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_settings.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_error.h" -#include "kmp_stats.h" -#include "kmp_wait_release.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -/* these are temporary issues to be dealt with */ -#define KMP_USE_PRCTL 0 -#define KMP_USE_POOLED_ALLOC 0 - -#if KMP_OS_WINDOWS -#include -#endif - -#if defined(KMP_GOMP_COMPAT) -char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; -#endif /* defined(KMP_GOMP_COMPAT) */ - -char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " -#if OMP_40_ENABLED - "4.0 (201307)"; -#else - "3.1 (201107)"; -#endif - -#ifdef KMP_DEBUG -char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; -#endif /* KMP_DEBUG */ - - -#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -kmp_info_t __kmp_monitor; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* Forward declarations */ - -void __kmp_cleanup( void ); - -static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); -static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED -static void __kmp_partition_places( kmp_team_t *team ); -#endif -static void __kmp_do_serial_initialize( void ); -void __kmp_fork_barrier( int gtid, int tid ); -void __kmp_join_barrier( int gtid ); -void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); - -#ifdef USE_LOAD_BALANCE -static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); -#endif - -static int __kmp_expand_threads(int nWish, int nNeed); -#if KMP_OS_WINDOWS -static int __kmp_unregister_root_other_thread( int gtid ); -#endif -static void __kmp_unregister_library( void ); // called by __kmp_internal_end() -static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); -static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* Calculate the identifier of the current thread */ -/* fast (and somewhat portable) way to get unique */ -/* identifier of executing thread. */ -/* returns KMP_GTID_DNE if we haven't been assigned a gtid */ - -int -__kmp_get_global_thread_id( ) -{ - int i; - kmp_info_t **other_threads; - size_t stack_data; - char *stack_addr; - size_t stack_size; - char *stack_base; - - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", - __kmp_nth, __kmp_all_nth )); - - /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a - parallel region, made it return KMP_GTID_DNE to force serial_initialize by - caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee - __kmp_init_gtid for this to work. */ - - if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; - -#ifdef KMP_TDATA_GTID - if ( TCR_4(__kmp_gtid_mode) >= 3) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); - return __kmp_gtid; - } -#endif - if ( TCR_4(__kmp_gtid_mode) >= 2) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); - return __kmp_gtid_get_specific(); - } - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); - - stack_addr = (char*) & stack_data; - other_threads = __kmp_threads; - - /* - ATT: The code below is a source of potential bugs due to unsynchronized access to - __kmp_threads array. For example: - 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. - 2. Current thread is suspended by OS. - 3. Another thread unregisters and finishes (debug versions of free() may fill memory - with something like 0xEF). - 4. Current thread is resumed. - 5. Current thread reads junk from *thr. - TODO: Fix it. - --ln - */ - - for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { - - kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); - if( !thr ) continue; - - stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); - stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); - - /* stack grows down -- search through all of the active threads */ - - if( stack_addr <= stack_base ) { - size_t stack_diff = stack_base - stack_addr; - - if( stack_diff <= stack_size ) { - /* The only way we can be closer than the allocated */ - /* stack size is if we are running on this thread. */ - KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); - return i; - } - } - } - - /* get specific to try and determine our gtid */ - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " - "thread, using TLS\n" )); - i = __kmp_gtid_get_specific(); - - /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ - - /* if we havn't been assigned a gtid, then return code */ - if( i<0 ) return i; - - /* dynamically updated stack window for uber threads to avoid get_specific call */ - if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { - KMP_FATAL( StackOverflow, i ); - } - - stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; - if( stack_addr > stack_base ) { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, - other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); - } else { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); - } - - /* Reprint stack bounds for ubermaster since they have been refined */ - if ( __kmp_storage_map ) { - char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; - char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; - __kmp_print_storage_map_gtid( i, stack_beg, stack_end, - other_threads[i]->th.th_info.ds.ds_stacksize, - "th_%d stack (refinement)", i ); - } - return i; -} - -int -__kmp_get_global_thread_id_reg( ) -{ - int gtid; - - if ( !__kmp_init_serial ) { - gtid = KMP_GTID_DNE; - } else -#ifdef KMP_TDATA_GTID - if ( TCR_4(__kmp_gtid_mode) >= 3 ) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); - gtid = __kmp_gtid; - } else -#endif - if ( TCR_4(__kmp_gtid_mode) >= 2 ) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); - gtid = __kmp_gtid_get_specific(); - } else { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); - gtid = __kmp_get_global_thread_id(); - } - - /* we must be a new uber master sibling thread */ - if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " - "Registering a new gtid.\n" )); - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if( !__kmp_init_serial ) { - __kmp_do_serial_initialize(); - gtid = __kmp_gtid_get_specific(); - } else { - gtid = __kmp_register_root(FALSE); - } - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ - } - - KMP_DEBUG_ASSERT( gtid >=0 ); - - return gtid; -} - -/* caller must hold forkjoin_lock */ -void -__kmp_check_stack_overlap( kmp_info_t *th ) -{ - int f; - char *stack_beg = NULL; - char *stack_end = NULL; - int gtid; - - KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); - if ( __kmp_storage_map ) { - stack_end = (char *) th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - - gtid = __kmp_gtid_from_thread( th ); - - if (gtid == KMP_GTID_MONITOR) { - __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%s stack (%s)", "mon", - ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); - } else { - __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%d stack (%s)", gtid, - ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); - } - } - - /* No point in checking ubermaster threads since they use refinement and cannot overlap */ - gtid = __kmp_gtid_from_thread( th ); - if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) - { - KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); - if ( stack_beg == NULL ) { - stack_end = (char *) th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - } - - for( f=0 ; f < __kmp_threads_capacity ; f++ ) { - kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); - - if( f_th && f_th != th ) { - char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); - char *other_stack_beg = other_stack_end - - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); - if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || - (stack_end > other_stack_beg && stack_end < other_stack_end)) { - - /* Print the other stack values before the abort */ - if ( __kmp_storage_map ) - __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), - "th_%d stack (overlapped)", - __kmp_gtid_from_thread( f_th ) ); - - __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); - } - } - } - } - KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); -} - - -/* ------------------------------------------------------------------------ */ - -/* ------------------------------------------------------------------------ */ - -void -__kmp_infinite_loop( void ) -{ - static int done = FALSE; - - while (! done) { - KMP_YIELD( 1 ); - } -} - -#define MAX_MESSAGE 512 - -void -__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { - char buffer[MAX_MESSAGE]; - va_list ap; - - va_start( ap, format); - KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, buffer, ap ); -#if KMP_PRINT_DATA_PLACEMENT - int node; - if(gtid >= 0) { - if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { - if( __kmp_storage_map_verbose ) { - node = __kmp_get_host_node(p1); - if(node < 0) /* doesn't work, so don't try this next time */ - __kmp_storage_map_verbose = FALSE; - else { - char *last; - int lastNode; - int localProc = __kmp_get_cpu_from_gtid(gtid); - - p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) ); - p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) ); - if(localProc >= 0) - __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); - else - __kmp_printf_no_lock(" GTID %d\n", gtid); -# if KMP_USE_PRCTL -/* The more elaborate format is disabled for now because of the prctl hanging bug. */ - do { - last = p1; - lastNode = node; - /* This loop collates adjacent pages with the same host node. */ - do { - (char*)p1 += PAGE_SIZE; - } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); - __kmp_printf_no_lock(" %p-%p memNode %d\n", last, - (char*)p1 - 1, lastNode); - } while(p1 <= p2); -# else - __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, - (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1)); - if(p1 < p2) { - __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, - (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2)); - } -# endif - } - } - } else - __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); - } -#endif /* KMP_PRINT_DATA_PLACEMENT */ - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); -} - -void -__kmp_warn( char const * format, ... ) -{ - char buffer[MAX_MESSAGE]; - va_list ap; - - if ( __kmp_generate_warnings == kmp_warnings_off ) { - return; - } - - va_start( ap, format ); - - KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, buffer, ap ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); - - va_end( ap ); -} - -void -__kmp_abort_process() -{ - - // Later threads may stall here, but that's ok because abort() will kill them. - __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); - - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer(); - }; // if - - if ( KMP_OS_WINDOWS ) { - // Let other threads know of abnormal termination and prevent deadlock - // if abort happened during library initialization or shutdown - __kmp_global.g.g_abort = SIGABRT; - - /* - On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. - Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() - works well, but this function is not available in VS7 (this is not problem for DLL, but - it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does - not help, at least in some versions of MS C RTL. - - It seems following sequence is the only way to simulate abort() and avoid pop-up error - box. - */ - raise( SIGABRT ); - _exit( 3 ); // Just in case, if signal ignored, exit anyway. - } else { - abort(); - }; // if - - __kmp_infinite_loop(); - __kmp_release_bootstrap_lock( & __kmp_exit_lock ); - -} // __kmp_abort_process - -void -__kmp_abort_thread( void ) -{ - // TODO: Eliminate g_abort global variable and this function. - // In case of abort just call abort(), it will kill all the threads. - __kmp_infinite_loop(); -} // __kmp_abort_thread - -/* ------------------------------------------------------------------------ */ - -/* - * Print out the storage map for the major kmp_info_t thread data structures - * that are allocated together. - */ - -static void -__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) -{ - __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); - - __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), - "th_%d.th_info", gtid ); - - __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), - "th_%d.th_local", gtid ); - - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], - sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); - - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], - &thr->th.th_bar[bs_plain_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); - - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], - &thr->th.th_bar[bs_forkjoin_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); - - #if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], - &thr->th.th_bar[bs_reduction_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); - #endif // KMP_FAST_REDUCTION_BARRIER -} - -/* - * Print out the storage map for the major kmp_team_t team data structures - * that are allocated together. - */ - -static void -__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) -{ - int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2; - __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", - header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], - sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); - - - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); - - #if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); - #endif // KMP_FAST_REDUCTION_BARRIER - - __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], - sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], - sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], - sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", - header, team_id ); - - /* - __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr], - sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); - - //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr], - // sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr], - sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id ); -#if OMP_40_ENABLED - __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr], - sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id ); -#endif - */ - - __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, - sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); -} - -static void __kmp_init_allocator() {} -static void __kmp_fini_allocator() {} - -/* ------------------------------------------------------------------------ */ - -#ifdef KMP_DYNAMIC_LIB -# if KMP_OS_WINDOWS - - -static void -__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { - // TODO: Change to __kmp_break_bootstrap_lock(). - __kmp_init_bootstrap_lock( lck ); // make the lock released -} - -static void -__kmp_reset_locks_on_process_detach( int gtid_req ) { - int i; - int thread_count; - - // PROCESS_DETACH is expected to be called by a thread - // that executes ProcessExit() or FreeLibrary(). - // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). - // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. - // However, in fact, some threads can be still alive here, although being about to be terminated. - // The threads in the array with ds_thread==0 are most suspicious. - // Actually, it can be not safe to access the __kmp_threads[]. - - // TODO: does it make sense to check __kmp_roots[] ? - - // Let's check that there are no other alive threads registered with the OMP lib. - while( 1 ) { - thread_count = 0; - for( i = 0; i < __kmp_threads_capacity; ++i ) { - if( !__kmp_threads ) continue; - kmp_info_t* th = __kmp_threads[ i ]; - if( th == NULL ) continue; - int gtid = th->th.th_info.ds.ds_gtid; - if( gtid == gtid_req ) continue; - if( gtid < 0 ) continue; - DWORD exit_val; - int alive = __kmp_is_thread_alive( th, &exit_val ); - if( alive ) { - ++thread_count; - } - } - if( thread_count == 0 ) break; // success - } - - // Assume that I'm alone. - - // Now it might be probably safe to check and reset locks. - // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. - __kmp_reset_lock( &__kmp_forkjoin_lock ); - #ifdef KMP_DEBUG - __kmp_reset_lock( &__kmp_stdio_lock ); - #endif // KMP_DEBUG - - -} - -BOOL WINAPI -DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { - //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - switch( fdwReason ) { - - case DLL_PROCESS_ATTACH: - KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); - - return TRUE; - - case DLL_PROCESS_DETACH: - KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", - __kmp_gtid_get_specific() )); - - if( lpReserved != NULL ) - { - // lpReserved is used for telling the difference: - // lpReserved == NULL when FreeLibrary() was called, - // lpReserved != NULL when the process terminates. - // When FreeLibrary() is called, worker threads remain alive. - // So they will release the forkjoin lock by themselves. - // When the process terminates, worker threads disappear triggering - // the problem of unreleased forkjoin lock as described below. - - // A worker thread can take the forkjoin lock - // in __kmp_suspend_template()->__kmp_rml_decrease_load_before_sleep(). - // The problem comes up if that worker thread becomes dead - // before it releases the forkjoin lock. - // The forkjoin lock remains taken, while the thread - // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below - // will try to take the forkjoin lock and will always fail, - // so that the application will never finish [normally]. - // This scenario is possible if __kmpc_end() has not been executed. - // It looks like it's not a corner case, but common cases: - // - the main function was compiled by an alternative compiler; - // - the main function was compiled by icl but without /Qopenmp (application with plugins); - // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. - // - alive foreign thread prevented __kmpc_end from doing cleanup. - - // This is a hack to work around the problem. - // TODO: !!! to figure out something better. - __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); - } - - __kmp_internal_end_library( __kmp_gtid_get_specific() ); - - return TRUE; - - case DLL_THREAD_ATTACH: - KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); - - /* if we wanted to register new siblings all the time here call - * __kmp_get_gtid(); */ - return TRUE; - - case DLL_THREAD_DETACH: - KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", - __kmp_gtid_get_specific() )); - - __kmp_internal_end_thread( __kmp_gtid_get_specific() ); - return TRUE; - } - - return TRUE; -} - -# endif /* KMP_OS_WINDOWS */ -#endif /* KMP_DYNAMIC_LIB */ - - -/* ------------------------------------------------------------------------ */ - -/* Change the library type to "status" and return the old type */ -/* called from within initialization routines where __kmp_initz_lock is held */ -int -__kmp_change_library( int status ) -{ - int old_status; - - old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) - - if (status) { - __kmp_yield_init |= 1; // throughput => turnaround (odd init count) - } - else { - __kmp_yield_init &= ~1; // turnaround => throughput (even init count) - } - - return old_status; // return previous setting of whether KMP_LIBRARY=throughput -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* __kmp_parallel_deo -- - * Wait until it's our turn. - */ -void -__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; -#ifdef BUILD_PARALLEL_ORDERED - kmp_team_t *team = __kmp_team_from_gtid( gtid ); -#endif /* BUILD_PARALLEL_ORDERED */ - - if( __kmp_env_consistency_check ) { - if( __kmp_threads[gtid]->th.th_root->r.r_active ) -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); -#else - __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); -#endif - } -#ifdef BUILD_PARALLEL_ORDERED - if( !team->t.t_serialized ) { - KMP_MB(); - KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); - KMP_MB(); - } -#endif /* BUILD_PARALLEL_ORDERED */ -} - -/* __kmp_parallel_dxo -- - * Signal the next task. - */ - -void -__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; -#ifdef BUILD_PARALLEL_ORDERED - int tid = __kmp_tid_from_gtid( gtid ); - kmp_team_t *team = __kmp_team_from_gtid( gtid ); -#endif /* BUILD_PARALLEL_ORDERED */ - - if( __kmp_env_consistency_check ) { - if( __kmp_threads[gtid]->th.th_root->r.r_active ) - __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); - } -#ifdef BUILD_PARALLEL_ORDERED - if ( ! team->t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* use the tid of the next thread in this team */ - /* TODO repleace with general release procedure */ - team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); - -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - /* accept blame for "ordered" waiting */ - kmp_info_t *this_thread = __kmp_threads[gtid]; - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - this_thread->th.ompt_thread_info.wait_id); - } -#endif - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } -#endif /* BUILD_PARALLEL_ORDERED */ -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* The BARRIER for a SINGLE process section is always explicit */ - -int -__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) -{ - int status; - kmp_info_t *th; - kmp_team_t *team; - - if( ! TCR_4(__kmp_init_parallel) ) - __kmp_parallel_initialize(); - - th = __kmp_threads[ gtid ]; - team = th->th.th_team; - status = 0; - - th->th.th_ident = id_ref; - - if ( team->t.t_serialized ) { - status = 1; - } else { - kmp_int32 old_this = th->th.th_local.this_construct; - - ++th->th.th_local.this_construct; - /* try to set team count to thread count--success means thread got the - single block - */ - /* TODO: Should this be acquire or release? */ - status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, - th->th.th_local.this_construct); -#if USE_ITT_BUILD - if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1 ) - { // Only report metadata by master of active team at level 1 - __kmp_itt_metadata_single( id_ref ); - } -#endif /* USE_ITT_BUILD */ - } - - if( __kmp_env_consistency_check ) { - if (status && push_ws) { - __kmp_push_workshare( gtid, ct_psingle, id_ref ); - } else { - __kmp_check_workshare( gtid, ct_psingle, id_ref ); - } - } -#if USE_ITT_BUILD - if ( status ) { - __kmp_itt_single_start( gtid ); - } -#endif /* USE_ITT_BUILD */ - return status; -} - -void -__kmp_exit_single( int gtid ) -{ -#if USE_ITT_BUILD - __kmp_itt_single_end( gtid ); -#endif /* USE_ITT_BUILD */ - if( __kmp_env_consistency_check ) - __kmp_pop_workshare( gtid, ct_psingle, NULL ); -} - - -/* - * determine if we can go parallel or must use a serialized parallel region and - * how many threads we can use - * set_nproc is the number of threads requested for the team - * returns 0 if we should serialize or only use one thread, - * otherwise the number of threads to use - * The forkjoin lock is held by the caller. - */ -static int -__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, - int master_tid, int set_nthreads -#if OMP_40_ENABLED - , int enter_teams -#endif /* OMP_40_ENABLED */ -) -{ - int capacity; - int new_nthreads; - KMP_DEBUG_ASSERT( __kmp_init_serial ); - KMP_DEBUG_ASSERT( root && parent_team ); - - // - // If dyn-var is set, dynamically adjust the number of desired threads, - // according to the method specified by dynamic_mode. - // - new_nthreads = set_nthreads; - if ( ! get__dynamic_2( parent_team, master_tid ) ) { - ; - } -#ifdef USE_LOAD_BALANCE - else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { - new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - } -#endif /* USE_LOAD_BALANCE */ - else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { - new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 - : root->r.r_hot_team->t.t_nproc); - if ( new_nthreads <= 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - else { - new_nthreads = set_nthreads; - } - } - else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { - if ( set_nthreads > 2 ) { - new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); - new_nthreads = ( new_nthreads % set_nthreads ) + 1; - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - } - } - else { - KMP_ASSERT( 0 ); - } - - // - // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. - // - if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { - int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ); - if ( tl_nthreads <= 0 ) { - tl_nthreads = 1; - } - - // - // If dyn-var is false, emit a 1-time warning. - // - if ( ! get__dynamic_2( parent_team, master_tid ) - && ( ! __kmp_reserve_warn ) ) { - __kmp_reserve_warn = 1; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - if ( tl_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", - master_tid, tl_nthreads )); - new_nthreads = tl_nthreads; - } - - - // - // Check if the threads array is large enough, or needs expanding. - // - // See comment in __kmp_register_root() about the adjustment if - // __kmp_threads[0] == NULL. - // - capacity = __kmp_threads_capacity; - if ( TCR_PTR(__kmp_threads[0]) == NULL ) { - --capacity; - } - if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) > capacity ) { - // - // Expand the threads array. - // - int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) - capacity; - int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); - if ( slotsAdded < slotsRequired ) { - // - // The threads array was not expanded enough. - // - new_nthreads -= ( slotsRequired - slotsAdded ); - KMP_ASSERT( new_nthreads >= 1 ); - - // - // If dyn-var is false, emit a 1-time warning. - // - if ( ! get__dynamic_2( parent_team, master_tid ) - && ( ! __kmp_reserve_warn ) ) { - __kmp_reserve_warn = 1; - if ( __kmp_tp_cached ) { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), - KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), - KMP_HNT( PossibleSystemLimitOnThreads ), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), - KMP_HNT( SystemLimitOnThreads ), - __kmp_msg_null - ); - } - } - } - } - - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", - __kmp_get_gtid(), set_nthreads ) ); - return 1; - } - - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", - __kmp_get_gtid(), new_nthreads, set_nthreads )); - return new_nthreads; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* allocate threads from the thread pool and assign them to the new team */ -/* we are assured that there are enough threads available, because we - * checked on that earlier within critical section forkjoin */ - -static void -__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, - kmp_info_t *master_th, int master_gtid ) -{ - int i; - int use_hot_team; - - KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); - KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); - KMP_MB(); - - /* first, let's setup the master thread */ - master_th->th.th_info.ds.ds_tid = 0; - master_th->th.th_team = team; - master_th->th.th_team_nproc = team->t.t_nproc; - master_th->th.th_team_master = master_th; - master_th->th.th_team_serialized = FALSE; - master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; - - /* make sure we are not the optimized hot team */ -#if KMP_NESTED_HOT_TEAMS - use_hot_team = 0; - kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; - if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 - int level = team->t.t_active_level - 1; // index in array of hot teams - if( master_th->th.th_teams_microtask ) { // are we inside the teams? - if( master_th->th.th_teams_size.nteams > 1 ) { - ++level; // level was not increased in teams construct for team_of_masters - } - if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master_th->th.th_teams_level == team->t.t_level ) { - ++level; // level was not increased in teams construct for team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - if( level < __kmp_hot_teams_max_level ) { - if( hot_teams[level].hot_team ) { - // hot team has already been allocated for given level - KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); - use_hot_team = 1; // the team is ready to use - } else { - use_hot_team = 0; // AC: threads are not allocated yet - hot_teams[level].hot_team = team; // remember new hot team - hot_teams[level].hot_team_nth = team->t.t_nproc; - } - } else { - use_hot_team = 0; - } - } -#else - use_hot_team = team == root->r.r_hot_team; -#endif - if ( !use_hot_team ) { - - /* install the master thread */ - team->t.t_threads[ 0 ] = master_th; - __kmp_initialize_info( master_th, team, 0, master_gtid ); - - /* now, install the worker threads */ - for ( i=1 ; i < team->t.t_nproc ; i++ ) { - - /* fork or reallocate a new thread and install it in team */ - kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); - team->t.t_threads[ i ] = thr; - KMP_DEBUG_ASSERT( thr ); - KMP_DEBUG_ASSERT( thr->th.th_team == team ); - /* align team and thread arrived states */ - KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", - __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, - __kmp_gtid_from_tid( i, team ), team->t.t_id, i, - team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, - team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); -#if OMP_40_ENABLED - thr->th.th_teams_microtask = master_th->th.th_teams_microtask; - thr->th.th_teams_level = master_th->th.th_teams_level; - thr->th.th_teams_size = master_th->th.th_teams_size; -#endif - { // Initialize threads' barrier data. - int b; - kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; -#endif - }; // for b - } - } - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); -#endif - - } - - KMP_MB(); -} - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// -// Propagate any changes to the floating point control registers out to the team -// We try to avoid unnecessary writes to the relevant cache line in the team structure, -// so we don't make changes unless they are needed. -// -inline static void -propagateFPControl(kmp_team_t * team) -{ - if ( __kmp_inherit_fp_control ) { - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - - // Get master values of FPU control flags (both X87 and vector) - __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); - __kmp_store_mxcsr( &mxcsr ); - mxcsr &= KMP_X86_MXCSR_MASK; - - // There is no point looking at t_fp_control_saved here. - // If it is TRUE, we still have to update the values if they are different from those we now have. - // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure - // that the values in the team are the same as those we have. - // So, this code achieves what we need whether or not t_fp_control_saved is true. - // By checking whether the value needs updating we avoid unnecessary writes that would put the - // cache-line into a written state, causing all threads in the team to have to read it again. - if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { - team->t.t_x87_fpu_control_word = x87_fpu_control_word; - } - if ( team->t.t_mxcsr != mxcsr ) { - team->t.t_mxcsr = mxcsr; - } - // Although we don't use this value, other code in the runtime wants to know whether it should restore them. - // So we must ensure it is correct. - if (!team->t.t_fp_control_saved) { - team->t.t_fp_control_saved = TRUE; - } - } - else { - // Similarly here. Don't write to this cache-line in the team structure unless we have to. - if (team->t.t_fp_control_saved) - team->t.t_fp_control_saved = FALSE; - } -} - -// Do the opposite, setting the hardware registers to the updated values from the team. -inline static void -updateHWFPControl(kmp_team_t * team) -{ - if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { - // - // Only reset the fp control regs if they have been changed in the team. - // the parallel region that we are exiting. - // - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); - __kmp_store_mxcsr( &mxcsr ); - mxcsr &= KMP_X86_MXCSR_MASK; - - if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); - } - - if ( team->t.t_mxcsr != mxcsr ) { - __kmp_load_mxcsr( &team->t.t_mxcsr ); - } - } -} -#else -# define propagateFPControl(x) ((void)0) -# define updateHWFPControl(x) ((void)0) -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -static void -__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration - -/* - * Run a parallel region that has been serialized, so runs only in a team of the single master thread. - */ -void -__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - kmp_info_t *this_thr; - kmp_team_t *serial_team; - - KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); - - /* Skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) - return; - - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - this_thr = __kmp_threads[ global_tid ]; - serial_team = this_thr->th.th_serial_team; - - /* utilize the serialized team held by this thread */ - KMP_DEBUG_ASSERT( serial_team ); - KMP_MB(); - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); - KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); - KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", - global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); - this_thr->th.th_task_team = NULL; - } - -#if OMP_40_ENABLED - kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; - if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { - proc_bind = proc_bind_false; - } - else if ( proc_bind == proc_bind_default ) { - // - // No proc_bind clause was specified, so use the current value - // of proc-bind-var for this parallel region. - // - proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; - } - // - // Reset for next parallel region - // - this_thr->th.th_set_proc_bind = proc_bind_default; -#endif /* OMP_40_ENABLED */ - - if( this_thr->th.th_team != serial_team ) { - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; - - if( serial_team->t.t_serialized ) { - /* this serial team was already used - * TODO increase performance by making this locks more specific */ - kmp_team_t *new_team; - - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); -#endif - - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, -#if OMPT_SUPPORT - ompt_parallel_id, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - & this_thr->th.th_current_task->td_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - KMP_ASSERT( new_team ); - - /* setup new serialized team and install it */ - new_team->t.t_threads[0] = this_thr; - new_team->t.t_parent = this_thr->th.th_team; - serial_team = new_team; - this_thr->th.th_serial_team = serial_team; - - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", - global_tid, serial_team ) ); - - - /* TODO the above breaks the requirement that if we run out of - * resources, then we can still guarantee that serialized teams - * are ok, since we may need to allocate a new one */ - } else { - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", - global_tid, serial_team ) ); - } - - /* we have to initialize this serial team */ - KMP_DEBUG_ASSERT( serial_team->t.t_threads ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); - KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); - serial_team->t.t_ident = loc; - serial_team->t.t_serialized = 1; - serial_team->t.t_nproc = 1; - serial_team->t.t_parent = this_thr->th.th_team; - serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; - this_thr->th.th_team = serial_team; - serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; - - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", - global_tid, this_thr->th.th_current_task ) ); - KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); - this_thr->th.th_current_task->td_flags.executing = 0; - - __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); - - /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for - each serialized task represented by team->t.t_serialized? */ - copy_icvs( - & this_thr->th.th_current_task->td_icvs, - & this_thr->th.th_current_task->td_parent->td_icvs ); - - // Thread value exists in the nested nthreads array for the next nested level - if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { - this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; - } - -#if OMP_40_ENABLED - if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { - this_thr->th.th_current_task->td_icvs.proc_bind - = __kmp_nested_proc_bind.bind_types[ level + 1 ]; - } -#endif /* OMP_40_ENABLED */ - -#if USE_DEBUGGER - serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. -#endif - this_thr->th.th_info.ds.ds_tid = 0; - - /* set thread cache values */ - this_thr->th.th_team_nproc = 1; - this_thr->th.th_team_master = this_thr; - this_thr->th.th_team_serialized = 1; - - serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; - serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; - - propagateFPControl (serial_team); - - /* check if we need to allocate dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - if ( !serial_team->t.t_dispatch->th_disp_buffer ) { - serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) - __kmp_allocate( sizeof( dispatch_private_info_t ) ); - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; - -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); - __ompt_team_assign_id(serial_team, ompt_parallel_id); -#endif - - KMP_MB(); - - } else { - /* this serialized team is already being used, - * that's fine, just add another nested level */ - KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); - ++ serial_team->t.t_serialized; - this_thr->th.th_team_serialized = serial_team->t.t_serialized; - - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; - // Thread value exists in the nested nthreads array for the next nested level - if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { - this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; - } - serial_team->t.t_level++; - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", - global_tid, serial_team, serial_team->t.t_level ) ); - - /* allocate/push dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - { - dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) - __kmp_allocate( sizeof( dispatch_private_info_t ) ); - disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; - - KMP_MB(); - } - - if ( __kmp_env_consistency_check ) - __kmp_push_parallel( global_tid, NULL ); - -#if USE_ITT_BUILD - // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment - if ( serial_team->t.t_level == 1 -#if OMP_40_ENABLED - && this_thr->th.th_teams_microtask == NULL -#endif - ) { -#if USE_ITT_NOTIFY - // Save the start of the "parallel" region for VTune. This is the frame begin at the same time. - if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) && - ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) - { - serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); - } else // only one notification scheme (either "submit" or "forking/joined", not both) -#endif - if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && - __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) - { - this_thr->th.th_ident = loc; - // 0 - no barriers; 1 - serialized parallel - __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); - } - } -#endif /* USE_ITT_BUILD */ -} - -/* most of the work for a fork */ -/* return true if we really went parallel, false if serialized */ -int -__kmp_fork_call( - ident_t * loc, - int gtid, - enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif - microtask_t microtask, - launch_t invoker, -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list * ap -#else - va_list ap -#endif - ) -{ - void **argv; - int i; - int master_tid; - int master_this_cons; - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int nthreads; - int master_active; - int master_set_numthreads; - int level; -#if OMP_40_ENABLED - int active_level; - int teams_level; -#endif -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t **p_hot_teams; -#endif - { // KMP_TIME_BLOCK - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call); - KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); - - KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); - if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { - /* Some systems prefer the stack for the root thread(s) to start with */ - /* some gap from the parent stack to prevent false sharing. */ - void *dummy = KMP_ALLOCA(__kmp_stkpadding); - /* These 2 lines below are so this does not get optimized out */ - if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) - __kmp_stkpadding += (short)((kmp_int64)dummy); - } - - /* initialize if needed */ - KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown - if( ! TCR_4(__kmp_init_parallel) ) - __kmp_parallel_initialize(); - - /* setup current data */ - master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown - parent_team = master_th->th.th_team; - master_tid = master_th->th.th_info.ds.ds_tid; - master_this_cons = master_th->th.th_local.this_construct; - root = master_th->th.th_root; - master_active = root->r.r_active; - master_set_numthreads = master_th->th.th_set_nproc; - -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - ompt_task_id_t ompt_task_id; - ompt_frame_t *ompt_frame; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame = __ompt_get_task_frame_internal(0); - } -#endif - - // Nested level will be an index in the nested nthreads array - level = parent_team->t.t_level; -#if OMP_40_ENABLED - active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed - teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams -#endif -#if KMP_NESTED_HOT_TEAMS - p_hot_teams = &master_th->th.th_hot_teams; - if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { - *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( - sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); - (*p_hot_teams)[0].hot_team = root->r.r_hot_team; - (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) - } -#endif - -#if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = master_set_numthreads; - - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, - team_size, unwrapped_task, OMPT_INVOKER(call_context)); - } -#endif - - master_th->th.th_ident = loc; - -#if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask && - ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { - // AC: This is start of parallel that is nested inside teams construct. - // The team is actual (hot), all workers are ready at the fork barrier. - // No lock needed to initialize the team a bit, then free workers. - parent_team->t.t_ident = loc; - parent_team->t.t_argc = argc; - argv = (void**)parent_team->t.t_argv; - for( i=argc-1; i >= 0; --i ) -/* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); -#else - *argv++ = va_arg( ap, void * ); -#endif - /* Increment our nested depth levels, but not increase the serialization */ - if ( parent_team == master_th->th.th_serial_team ) { - // AC: we are in serialized parallel - __kmpc_serialized_parallel(loc, gtid); - KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); - parent_team->t.t_serialized--; // AC: need this in order enquiry functions - // work correctly, will restore at join time - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = parent_team->t.ompt_team_info.parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_BLOCK(OMP_work); - __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv -#if OMPT_SUPPORT - , exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); - } - - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; -#endif - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - return TRUE; - } - - parent_team->t.t_pkfn = microtask; -#if OMPT_SUPPORT - parent_team->t.ompt_team_info.microtask = unwrapped_task; -#endif - parent_team->t.t_invoke = invoker; - KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); - parent_team->t.t_active_level ++; - parent_team->t.t_level ++; - - /* Change number of threads in the team if requested */ - if ( master_set_numthreads ) { // The parallel has num_threads clause - if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { - // AC: only can reduce the number of threads dynamically, cannot increase - kmp_info_t **other_threads = parent_team->t.t_threads; - parent_team->t.t_nproc = master_set_numthreads; - for ( i = 0; i < master_set_numthreads; ++i ) { - other_threads[i]->th.th_team_nproc = master_set_numthreads; - } - // Keep extra threads hot in the team for possible next parallels - } - master_th->th.th_set_nproc = 0; - } - -#if USE_DEBUGGER - if ( __kmp_debugging ) { // Let debugger override number of threads. - int nth = __kmp_omp_num_threads( loc ); - if ( nth > 0 ) { // 0 means debugger does not want to change number of threads. - master_set_numthreads = nth; - }; // if - }; // if -#endif - - KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); - __kmp_internal_fork( loc, gtid, parent_team ); - KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); - - /* Invoke microtask for MASTER thread */ - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", - gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); - - { - KMP_TIME_BLOCK(OMP_work); - if (! parent_team->t.t_invoke( gtid )) { - KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); - } - } - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", - gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); - - return TRUE; - } // Parallel closely nested in teams construct -#endif /* OMP_40_ENABLED */ - -#if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); - } -#endif - - if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { - nthreads = 1; - } else { -#if OMP_40_ENABLED - int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); -#endif - nthreads = master_set_numthreads ? - master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task - - // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). - // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. - if (nthreads > 1) { - if ( ( !get__nested(master_th) && (root->r.r_in_parallel -#if OMP_40_ENABLED - && !enter_teams -#endif /* OMP_40_ENABLED */ - ) ) || ( __kmp_library == library_serial ) ) { - KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", - gtid, nthreads )); - nthreads = 1; - } - } - if ( nthreads > 1 ) { - /* determine how many new threads we can use */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads -#if OMP_40_ENABLED -/* AC: If we execute teams from parallel region (on host), then teams should be created - but each can only have 1 thread if nesting is disabled. If teams called from serial region, - then teams and their threads should be created regardless of the nesting setting. */ - , enter_teams -#endif /* OMP_40_ENABLED */ - ); - if ( nthreads == 1 ) { - // Free lock for single thread execution here; - // for multi-thread execution it will be freed later - // after team of threads created and initialized - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - } - } - } - KMP_DEBUG_ASSERT( nthreads > 0 ); - - /* If we temporarily changed the set number of threads then restore it now */ - master_th->th.th_set_nproc = 0; - - /* create a serialized parallel region? */ - if ( nthreads == 1 ) { - /* josh todo: hypothetical question: what do we do for OS X*? */ -#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - void * args[ argc ]; -#else - void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); -#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ - - KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); - - __kmpc_serialized_parallel(loc, gtid); - - if ( call_context == fork_context_intel ) { - /* TODO this sucks, use the compiler itself to pass args! :) */ - master_th->th.th_serial_team->t.t_ident = loc; -#if OMP_40_ENABLED - if ( !ap ) { - // revert change made in __kmpc_serialized_parallel() - master_th->th.th_serial_team->t.t_level--; - // Get args from parent team for teams construct - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - my_task_id = lw_taskteam.ompt_task_info.task_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_task_id); - } -#endif - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_BLOCK(OMP_work); - __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv -#if OMPT_SUPPORT - , exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; - -#if OMPT_TRACE - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); - } -#endif - - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - } else if ( microtask == (microtask_t)__kmp_teams_master ) { - KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); - team = master_th->th.th_team; - //team->t.t_pkfn = microtask; - team->t.t_invoke = invoker; - __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; - argv = (void**) team->t.t_argv; - if ( ap ) { - for( i=argc-1; i >= 0; --i ) -// TODO: revert workaround for Intel(R) 64 tracker #96 -# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); -# else - *argv++ = va_arg( ap, void * ); -# endif - } else { - for( i=0; i < argc; ++i ) - // Get args from parent team for teams construct - argv[i] = parent_team->t.t_argv[i]; - } - // AC: revert change made in __kmpc_serialized_parallel() - // because initial code in teams should have level=0 - team->t.t_level--; - // AC: call special invoker for outer "parallel" of the teams construct - { - KMP_TIME_BLOCK(OMP_work); - invoker(gtid); - } - } else { -#endif /* OMP_40_ENABLED */ - argv = args; - for( i=argc-1; i >= 0; --i ) -// TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); -#else - *argv++ = va_arg( ap, void * ); -#endif - KMP_MB(); - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - - ompt_lw_taskteam_t lw_taskteam; - - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = ompt_parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_BLOCK(OMP_work); - __kmp_invoke_microtask( microtask, gtid, 0, argc, args -#if OMPT_SUPPORT - , exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); - } -#endif - - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif -#if OMP_40_ENABLED - } -#endif /* OMP_40_ENABLED */ - } - else if ( call_context == fork_context_gnu ) { -#if OMPT_SUPPORT - ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) - __kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, master_th, gtid, - unwrapped_task, ompt_parallel_id); - - lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); - lwt->ompt_task_info.frame.exit_runtime_frame = 0; - __ompt_lw_taskteam_link(lwt, master_th); -#endif - - // we were called from GNU native code - KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); - return FALSE; - } - else { - KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); - } - - - KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); - KMP_MB(); - return FALSE; - } - - // GEH: only modify the executing flag in the case when not serialized - // serialized case is handled in kmpc_serialized_parallel - KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", - parent_team->t.t_active_level, master_th, master_th->th.th_current_task, - master_th->th.th_current_task->td_icvs.max_active_levels ) ); - // TODO: GEH - cannot do this assertion because root thread not set up as executing - // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); - master_th->th.th_current_task->td_flags.executing = 0; - -#if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || level > teams_level ) -#endif /* OMP_40_ENABLED */ - { - /* Increment our nested depth level */ - KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); - } - - // See if we need to make a copy of the ICVs. - int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; - if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { - nthreads_icv = __kmp_nested_nth.nth[level+1]; - } - else { - nthreads_icv = 0; // don't update - } - -#if OMP_40_ENABLED - // Figure out the proc_bind_policy for the new team. - kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; - kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update - if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { - proc_bind = proc_bind_false; - } - else { - if (proc_bind == proc_bind_default) { - // No proc_bind clause specified; use current proc-bind-var for this parallel region - proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; - } - /* else: The proc_bind policy was specified explicitly on parallel clause. This - overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ - // Figure the value of proc-bind-var for the child threads. - if ((level+1 < __kmp_nested_proc_bind.used) - && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { - proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; - } - } - - // Reset for next parallel region - master_th->th.th_set_proc_bind = proc_bind_default; -#endif /* OMP_40_ENABLED */ - - if ((nthreads_icv > 0) -#if OMP_40_ENABLED - || (proc_bind_icv != proc_bind_default) -#endif /* OMP_40_ENABLED */ - ) { - kmp_internal_control_t new_icvs; - copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); - new_icvs.next = NULL; - if (nthreads_icv > 0) { - new_icvs.nproc = nthreads_icv; - } - -#if OMP_40_ENABLED - if (proc_bind_icv != proc_bind_default) { - new_icvs.proc_bind = proc_bind_icv; - } -#endif /* OMP_40_ENABLED */ - - /* allocate a new parallel team */ - KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); - team = __kmp_allocate_team(root, nthreads, nthreads, -#if OMPT_SUPPORT - ompt_parallel_id, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); - } else { - /* allocate a new parallel team */ - KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); - team = __kmp_allocate_team(root, nthreads, nthreads, -#if OMPT_SUPPORT - ompt_parallel_id, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - &master_th->th.th_current_task->td_icvs, argc - USE_NESTED_HOT_ARG(master_th) ); - } - KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); - - /* setup the new team */ - team->t.t_master_tid = master_tid; - team->t.t_master_this_cons = master_this_cons; - team->t.t_ident = loc; - team->t.t_parent = parent_team; - TCW_SYNC_PTR(team->t.t_pkfn, microtask); -#if OMPT_SUPPORT - TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task); -#endif - team->t.t_invoke = invoker; /* TODO move this to root, maybe */ - // TODO: parent_team->t.t_level == INT_MAX ??? -#if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || level > teams_level ) { -#endif /* OMP_40_ENABLED */ - team->t.t_level = parent_team->t.t_level + 1; - team->t.t_active_level = parent_team->t.t_active_level + 1; -#if OMP_40_ENABLED - } else { - // AC: Do not increase parallel level at start of the teams construct - team->t.t_level = parent_team->t.t_level; - team->t.t_active_level = parent_team->t.t_active_level; - } -#endif /* OMP_40_ENABLED */ - team->t.t_sched = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule - -#if OMP_40_ENABLED - team->t.t_cancel_request = cancel_noreq; -#endif - - // Update the floating point rounding in the team if required. - propagateFPControl(team); - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Set master's task team to team's task team. Unless this is hot team, it should be NULL. -#if 0 - // Patch out an assertion that trips while the runtime seems to operate correctly. - // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. - KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); -#endif - KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", - __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, - parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); - if (level) { - // Take a memo of master's task_state - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size - kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; - kmp_uint8 *old_stack, *new_stack; - kmp_uint32 i; - new_stack = (kmp_uint8 *)__kmp_allocate(new_size); - for (i=0; ith.th_task_state_stack_sz; ++i) { - new_stack[i] = master_th->th.th_task_state_memo_stack[i]; - } - for (i=master_th->th.th_task_state_stack_sz; ith.th_task_state_memo_stack; - master_th->th.th_task_state_memo_stack = new_stack; - master_th->th.th_task_state_stack_sz = new_size; - __kmp_free(old_stack); - } - // Store master's task_state on stack - master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; - master_th->th.th_task_state_top++; -#if KMP_NESTED_HOT_TEAMS - if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team - master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; - } - else { -#endif - master_th->th.th_task_state = 0; -#if KMP_NESTED_HOT_TEAMS - } -#endif - } -#if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); -#endif - } - - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", - gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); - KMP_DEBUG_ASSERT( team != root->r.r_hot_team || - ( team->t.t_master_tid == 0 && - ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); - KMP_MB(); - - /* now, setup the arguments */ - argv = (void**)team->t.t_argv; -#if OMP_40_ENABLED - if ( ap ) { -#endif /* OMP_40_ENABLED */ - for ( i=argc-1; i >= 0; --i ) -// TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); -#else - *argv++ = va_arg( ap, void * ); -#endif -#if OMP_40_ENABLED - } else { - for ( i=0; i < argc; ++i ) - // Get args from parent team for teams construct - argv[i] = team->t.t_parent->t.t_argv[i]; - } -#endif /* OMP_40_ENABLED */ - - /* now actually fork the threads */ - team->t.t_master_active = master_active; - if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong - root->r.r_active = TRUE; - - __kmp_fork_team_threads( root, team, master_th, gtid ); - __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); - -#if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - - -#if USE_ITT_BUILD - if ( team->t.t_active_level == 1 // only report frames at level 1 -# if OMP_40_ENABLED - && !master_th->th.th_teams_microtask // not in teams construct -# endif /* OMP_40_ENABLED */ - ) { -#if USE_ITT_NOTIFY - if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && - ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) - { - kmp_uint64 tmp_time = 0; - if ( __itt_get_timestamp_ptr ) - tmp_time = __itt_get_timestamp(); - // Internal fork - report frame begin - master_th->th.th_frame_time = tmp_time; - if ( __kmp_forkjoin_frames_mode == 3 ) - team->t.t_region_time = tmp_time; - } else // only one notification scheme (either "submit" or "forking/joined", not both) -#endif /* USE_ITT_NOTIFY */ - if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && - __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) - { // Mark start of "parallel" region for VTune. - __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); - } - } -#endif /* USE_ITT_BUILD */ - - /* now go on and do the work */ - KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); - KMP_MB(); - KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); - -#if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier - } -#endif /* USE_ITT_BUILD */ - -#if OMP_40_ENABLED - if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute -#endif /* OMP_40_ENABLED */ - { - __kmp_internal_fork( loc, gtid, team ); - KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); - } - - if (call_context == fork_context_gnu) { - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); - return TRUE; - } - - /* Invoke microtask for MASTER thread */ - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", - gtid, team->t.t_id, team->t.t_pkfn ) ); - } // END of timer KMP_fork_call block - - { - KMP_TIME_BLOCK(OMP_work); - // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke); - if (! team->t.t_invoke( gtid )) { - KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); - } - } - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", - gtid, team->t.t_id, team->t.t_pkfn ) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); - -#if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - return TRUE; -} - -#if OMPT_SUPPORT -static inline void -__kmp_join_restore_state( - kmp_info_t *thread, - kmp_team_t *team) -{ - // restore state outside the region - thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? - ompt_state_work_serial : ompt_state_work_parallel); -} - -static inline void -__kmp_join_ompt( - kmp_info_t *thread, - kmp_team_t *team, - ompt_parallel_id_t parallel_id, - fork_context_e fork_context) -{ - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); - } - - __kmp_join_restore_state(thread,team); -} -#endif - -void -__kmp_join_call(ident_t *loc, int gtid -#if OMPT_SUPPORT - , enum fork_context_e fork_context -#endif -#if OMP_40_ENABLED - , int exit_teams -#endif /* OMP_40_ENABLED */ -) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_join_call); - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int master_active; - int i; - - KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); - - /* setup current data */ - master_th = __kmp_threads[ gtid ]; - root = master_th->th.th_root; - team = master_th->th.th_team; - parent_team = team->t.t_parent; - - master_th->th.th_ident = loc; - -#if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - -#if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", - __kmp_gtid_from_thread( master_th ), team, - team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); - KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); - } -#endif - - if( team->t.t_serialized ) { -#if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask ) { - // We are in teams construct - int level = team->t.t_level; - int tlevel = master_th->th.th_teams_level; - if ( level == tlevel ) { - // AC: we haven't incremented it earlier at start of teams construct, - // so do it here - at the end of teams construct - team->t.t_level++; - } else if ( level == tlevel + 1 ) { - // AC: we are exiting parallel inside teams, need to increment serialization - // in order to restore it in the next call to __kmpc_end_serialized_parallel - team->t.t_serialized++; - } - } -#endif /* OMP_40_ENABLED */ - __kmpc_end_serialized_parallel( loc, gtid ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_restore_state(master_th, parent_team); - } -#endif - - return; - } - - master_active = team->t.t_master_active; - -#if OMP_40_ENABLED - if (!exit_teams) -#endif /* OMP_40_ENABLED */ - { - // AC: No barrier for internal teams at exit from teams construct. - // But there is barrier for external team (league). - __kmp_internal_join( loc, gtid, team ); - } -#if OMP_40_ENABLED - else { - master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) - } -#endif /* OMP_40_ENABLED */ - - KMP_MB(); - -#if OMPT_SUPPORT - ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; -#endif - -#if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier - } - - // Mark end of "parallel" region for VTune. - if ( team->t.t_active_level == 1 -# if OMP_40_ENABLED - && !master_th->th.th_teams_microtask /* not in teams construct */ -# endif /* OMP_40_ENABLED */ - ) { - master_th->th.th_ident = loc; - // only one notification scheme (either "submit" or "forking/joined", not both) - if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) - __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, - 0, loc, master_th->th.th_team_nproc, 1 ); - else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && - ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) - __kmp_itt_region_joined( gtid ); - } // active_level == 1 -#endif /* USE_ITT_BUILD */ - -#if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask && - !exit_teams && - team->t.t_pkfn != (microtask_t)__kmp_teams_master && - team->t.t_level == master_th->th.th_teams_level + 1 ) { - // AC: We need to leave the team structure intact at the end - // of parallel inside the teams construct, so that at the next - // parallel same (hot) team works, only adjust nesting levels - - /* Decrement our nested depth level */ - team->t.t_level --; - team->t.t_active_level --; - KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); - - /* Restore number of threads in the team if needed */ - if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { - int old_num = master_th->th.th_team_nproc; - int new_num = master_th->th.th_teams_size.nth; - kmp_info_t **other_threads = team->t.t_threads; - team->t.t_nproc = new_num; - for ( i = 0; i < old_num; ++i ) { - other_threads[i]->th.th_team_nproc = new_num; - } - // Adjust states of non-used threads of the team - for ( i = old_num; i < new_num; ++i ) { - // Re-initialize thread's barrier data. - int b; - kmp_balign_t * balign = other_threads[i]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; -#endif - } - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Synchronize thread's task state - other_threads[i]->th.th_task_state = master_th->th.th_task_state; - } - } - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); - } -#endif - - return; - } -#endif /* OMP_40_ENABLED */ - - /* do cleanup and restore the parent team */ - master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; - master_th->th.th_local.this_construct = team->t.t_master_this_cons; - - master_th->th.th_dispatch = - & parent_team->t.t_dispatch[ team->t.t_master_tid ]; - - /* jc: The following lock has instructions with REL and ACQ semantics, - separating the parallel user code called in this parallel region - from the serial user code called after this function returns. - */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - -#if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) -#endif /* OMP_40_ENABLED */ - { - /* Decrement our nested depth level */ - KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); - } - KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); - - KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", - 0, master_th, team ) ); - __kmp_pop_current_task_from_thread( master_th ); - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - // - // Restore master thread's partition. - // - master_th->th.th_first_place = team->t.t_first_place; - master_th->th.th_last_place = team->t.t_last_place; -#endif /* OMP_40_ENABLED */ - - updateHWFPControl (team); - - if ( root->r.r_active != master_active ) - root->r.r_active = master_active; - - __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads - - /* this race was fun to find. make sure the following is in the critical - * region otherwise assertions may fail occasionally since the old team - * may be reallocated and the hierarchy appears inconsistent. it is - * actually safe to run and won't cause any bugs, but will cause those - * assertion failures. it's only one deref&assign so might as well put this - * in the critical region */ - master_th->th.th_team = parent_team; - master_th->th.th_team_nproc = parent_team->t.t_nproc; - master_th->th.th_team_master = parent_team->t.t_threads[0]; - master_th->th.th_team_serialized = parent_team->t.t_serialized; - - /* restore serialized team, if need be */ - if( parent_team->t.t_serialized && - parent_team != master_th->th.th_serial_team && - parent_team != root->r.r_root_team ) { - __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); - master_th->th.th_serial_team = parent_team; - } - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - // Remember master's state if we re-use this nested hot team - master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; - --master_th->th.th_task_state_top; // pop - // Now restore state at this level - master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; - } - // Copy the task team from the parent team to the master thread - master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; - KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", - __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); - } - - // TODO: GEH - cannot do this assertion because root thread not set up as executing - // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); - master_th->th.th_current_task->td_flags.executing = 1; - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); - } -#endif - - KMP_MB(); - KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* Check whether we should push an internal control record onto the - serial team stack. If so, do it. */ -void -__kmp_save_internal_controls ( kmp_info_t * thread ) -{ - - if ( thread->th.th_team != thread->th.th_serial_team ) { - return; - } - if (thread->th.th_team->t.t_serialized > 1) { - int push = 0; - - if (thread->th.th_team->t.t_control_stack_top == NULL) { - push = 1; - } else { - if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != - thread->th.th_team->t.t_serialized ) { - push = 1; - } - } - if (push) { /* push a record on the serial team's stack */ - kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); - - copy_icvs( control, & thread->th.th_current_task->td_icvs ); - - control->serial_nesting_level = thread->th.th_team->t.t_serialized; - - control->next = thread->th.th_team->t.t_control_stack_top; - thread->th.th_team->t.t_control_stack_top = control; - } - } -} - -/* Changes set_nproc */ -void -__kmp_set_num_threads( int new_nth, int gtid ) -{ - kmp_info_t *thread; - kmp_root_t *root; - - KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - if (new_nth < 1) - new_nth = 1; - else if (new_nth > __kmp_max_nth) - new_nth = __kmp_max_nth; - - KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); - thread = __kmp_threads[gtid]; - - __kmp_save_internal_controls( thread ); - - set__nproc( thread, new_nth ); - - // - // If this omp_set_num_threads() call will cause the hot team size to be - // reduced (in the absence of a num_threads clause), then reduce it now, - // rather than waiting for the next parallel region. - // - root = thread->th.th_root; - if ( __kmp_init_parallel && ( ! root->r.r_active ) - && ( root->r.r_hot_team->t.t_nproc > new_nth ) -#if KMP_NESTED_HOT_TEAMS - && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode -#endif - ) { - kmp_team_t *hot_team = root->r.r_hot_team; - int f; - - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - - // Release the extra threads we don't need any more. - for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { - KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); - if ( __kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should unref task team. - hot_team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread( hot_team->t.t_threads[f] ); - hot_team->t.t_threads[f] = NULL; - } - hot_team->t.t_nproc = new_nth; -#if KMP_NESTED_HOT_TEAMS - if( thread->th.th_hot_teams ) { - KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); - thread->th.th_hot_teams[0].hot_team_nth = new_nth; - } -#endif - - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - - // - // Update the t_nproc field in the threads that are still active. - // - for( f=0 ; f < new_nth; f++ ) { - KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); - hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; - } - // Special flag in case omp_set_num_threads() call - hot_team->t.t_size_changed = -1; - } -} - -/* Changes max_active_levels */ -void -__kmp_set_max_active_levels( int gtid, int max_active_levels ) -{ - kmp_info_t *thread; - - KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // validate max_active_levels - if( max_active_levels < 0 ) { - KMP_WARNING( ActiveLevelsNegative, max_active_levels ); - // We ignore this call if the user has specified a negative value. - // The current setting won't be changed. The last valid setting will be used. - // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). - KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - return; - } - if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { - // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] - // We allow a zero value. (implementation defined behavior) - } else { - KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); - max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; - // Current upper limit is MAX_INT. (implementation defined behavior) - // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) - // Actually, the flow should never get here until we use MAX_INT limit. - } - KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - - thread = __kmp_threads[ gtid ]; - - __kmp_save_internal_controls( thread ); - - set__max_active_levels( thread, max_active_levels ); - -} - -/* Gets max_active_levels */ -int -__kmp_get_max_active_levels( int gtid ) -{ - kmp_info_t *thread; - - KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - thread = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( thread->th.th_current_task ); - KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", - gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); - return thread->th.th_current_task->td_icvs.max_active_levels; -} - -/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ -void -__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) -{ - kmp_info_t *thread; -// kmp_team_t *team; - - KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // Check if the kind parameter is valid, correct if needed. - // Valid parameters should fit in one of two intervals - standard or extended: - // , , , , , - // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 - if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || - ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) - { - // TODO: Hint needs attention in case we change the default schedule. - __kmp_msg( - kmp_ms_warning, - KMP_MSG( ScheduleKindOutOfRange, kind ), - KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), - __kmp_msg_null - ); - kind = kmp_sched_default; - chunk = 0; // ignore chunk value in case of bad kind - } - - thread = __kmp_threads[ gtid ]; - - __kmp_save_internal_controls( thread ); - - if ( kind < kmp_sched_upper_std ) { - if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { - // differ static chunked vs. unchunked: - // chunk should be invalid to indicate unchunked schedule (which is the default) - thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; - } else { - thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; - } - } else { - // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; - thread->th.th_current_task->td_icvs.sched.r_sched_type = - __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; - } - if ( kind == kmp_sched_auto ) { - // ignore parameter chunk for schedule auto - thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; - } else { - thread->th.th_current_task->td_icvs.sched.chunk = chunk; - } -} - -/* Gets def_sched_var ICV values */ -void -__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) -{ - kmp_info_t *thread; - enum sched_type th_type; - - KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - thread = __kmp_threads[ gtid ]; - - //th_type = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type; - th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; - - switch ( th_type ) { - case kmp_sch_static: - case kmp_sch_static_greedy: - case kmp_sch_static_balanced: - *kind = kmp_sched_static; - *chunk = 0; // chunk was not set, try to show this fact via zero value - return; - case kmp_sch_static_chunked: - *kind = kmp_sched_static; - break; - case kmp_sch_dynamic_chunked: - *kind = kmp_sched_dynamic; - break; - case kmp_sch_guided_chunked: - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - *kind = kmp_sched_guided; - break; - case kmp_sch_auto: - *kind = kmp_sched_auto; - break; - case kmp_sch_trapezoidal: - *kind = kmp_sched_trapezoidal; - break; -/* - case kmp_sch_static_steal: - *kind = kmp_sched_static_steal; - break; -*/ - default: - KMP_FATAL( UnknownSchedulingType, th_type ); - } - - //*chunk = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk; - *chunk = thread->th.th_current_task->td_icvs.sched.chunk; -} - -int -__kmp_get_ancestor_thread_num( int gtid, int level ) { - - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; - - KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // validate level - if( level == 0 ) return 0; - if( level < 0 ) return -1; - thr = __kmp_threads[ gtid ]; - team = thr->th.th_team; - ii = team->t.t_level; - if( level > ii ) return -1; - -#if OMP_40_ENABLED - if( thr->th.th_teams_microtask ) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT( ii >= tlevel ); - // AC: As we need to pass by the teams league, we need to artificially increase ii - if ( ii == tlevel ) { - ii += 2; // three teams have same level - } else { - ii ++; // two teams have same level - } - } - } -#endif - - if( ii == level ) return __kmp_tid_from_gtid( gtid ); - - dd = team->t.t_serialized; - level++; - while( ii > level ) - { - for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( ( team->t.t_serialized ) && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - dd = team->t.t_serialized; - ii--; - } - } - - return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); -} - -int -__kmp_get_team_size( int gtid, int level ) { - - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; - - KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // validate level - if( level == 0 ) return 1; - if( level < 0 ) return -1; - thr = __kmp_threads[ gtid ]; - team = thr->th.th_team; - ii = team->t.t_level; - if( level > ii ) return -1; - -#if OMP_40_ENABLED - if( thr->th.th_teams_microtask ) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT( ii >= tlevel ); - // AC: As we need to pass by the teams league, we need to artificially increase ii - if ( ii == tlevel ) { - ii += 2; // three teams have same level - } else { - ii ++; // two teams have same level - } - } - } -#endif - - while( ii > level ) - { - for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team->t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - - return team->t.t_nproc; -} - -kmp_r_sched_t -__kmp_get_schedule_global() { -// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) -// may be changed by kmp_set_defaults independently. So one can get the updated schedule here. - - kmp_r_sched_t r_sched; - - // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided - // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, - // and thus have different run-time schedules in different roots (even in OMP 2.5) - if ( __kmp_sched == kmp_sch_static ) { - r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) - } else if ( __kmp_sched == kmp_sch_guided_chunked ) { - r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) - } else { - r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other - } - - if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) - r_sched.chunk = KMP_DEFAULT_CHUNK; - } else { - r_sched.chunk = __kmp_chunk; - } - - return r_sched; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -/* - * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) - * at least argc number of *t_argv entries for the requested team. - */ -static void -__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) -{ - - KMP_DEBUG_ASSERT( team ); - if( !realloc || argc > team->t.t_max_argc ) { - - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", - team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); - /* if previously allocated heap space for args, free them */ - if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) - __kmp_free( (void *) team->t.t_argv ); - - if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { - /* use unused space in the cache line for arguments */ - team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", - team->t.t_id, team->t.t_max_argc )); - team->t.t_argv = &team->t.t_inline_argv[0]; - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], - &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], - (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), - "team_%d.t_inline_argv", - team->t.t_id ); - } - } else { - /* allocate space for arguments in the heap */ - team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? - KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", - team->t.t_id, team->t.t_max_argc )); - team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], - sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", - team->t.t_id ); - } - } - } -} - -static void -__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) -{ - int i; - int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2; -#if KMP_USE_POOLED_ALLOC - // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth! - char *ptr = __kmp_allocate(max_nth * - ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf - + sizeof(kmp_disp_t) + sizeof(int)*6 - //+ sizeof(int) - + sizeof(kmp_r_sched_t) - + sizeof(kmp_taskdata_t) ) ); - - team->t.t_threads = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth; - team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr; - ptr += sizeof(dispatch_shared_info_t) * num_disp_buff; - team->t.t_dispatch = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth; - team->t.t_set_nproc = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_dynamic = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_nested = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_blocktime = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_bt_set = (int*) ptr; - ptr += sizeof(int) * max_nth; - //team->t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth; - team->t.t_set_sched = (kmp_r_sched_t*) ptr; - ptr += sizeof(kmp_r_sched_t) * max_nth; - team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr; - ptr += sizeof(kmp_taskdata_t) * max_nth; -#else - - team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); - team->t.t_disp_buffer = (dispatch_shared_info_t*) - __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); - team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); - //team->t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth ); - //team->t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth ); - team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); -#endif - team->t.t_max_nproc = max_nth; - - /* setup dispatch buffers */ - for(i = 0 ; i < num_disp_buff; ++i) - team->t.t_disp_buffer[i].buffer_index = i; -} - -static void -__kmp_free_team_arrays(kmp_team_t *team) { - /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ - int i; - for ( i = 0; i < team->t.t_max_nproc; ++ i ) { - if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { - __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); - team->t.t_dispatch[ i ].th_disp_buffer = NULL; - }; // if - }; // for - __kmp_free(team->t.t_threads); - #if !KMP_USE_POOLED_ALLOC - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - //__kmp_free(team->t.t_set_max_active_levels); - //__kmp_free(team->t.t_set_sched); - __kmp_free(team->t.t_implicit_task_taskdata); - #endif - team->t.t_threads = NULL; - team->t.t_disp_buffer = NULL; - team->t.t_dispatch = NULL; - //team->t.t_set_sched = 0; - //team->t.t_set_max_active_levels = 0; - team->t.t_implicit_task_taskdata = 0; -} - -static void -__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { - kmp_info_t **oldThreads = team->t.t_threads; - - #if !KMP_USE_POOLED_ALLOC - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - //__kmp_free(team->t.t_set_max_active_levels); - //__kmp_free(team->t.t_set_sched); - __kmp_free(team->t.t_implicit_task_taskdata); - #endif - __kmp_allocate_team_arrays(team, max_nth); - - KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); - - __kmp_free(oldThreads); -} - -static kmp_internal_control_t -__kmp_get_global_icvs( void ) { - - kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals - -#if OMP_40_ENABLED - KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); -#endif /* OMP_40_ENABLED */ - - kmp_internal_control_t g_icvs = { - 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field - (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) - (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) - (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set - __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime - __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals - __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) - // (use a max ub on value if __kmp_parallel_initialize not called yet) - __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels - r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif /* OMP_40_ENABLED */ - NULL //struct kmp_internal_control *next; - }; - - return g_icvs; -} - -static kmp_internal_control_t -__kmp_get_x_global_icvs( const kmp_team_t *team ) { - - kmp_internal_control_t gx_icvs; - gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls - copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); - gx_icvs.next = NULL; - - return gx_icvs; -} - -static void -__kmp_initialize_root( kmp_root_t *root ) -{ - int f; - kmp_team_t *root_team; - kmp_team_t *hot_team; - int hot_team_max_nth; - kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KMP_DEBUG_ASSERT( root ); - KMP_ASSERT( ! root->r.r_begin ); - - /* setup the root state structure */ - __kmp_init_lock( &root->r.r_begin_lock ); - root->r.r_begin = FALSE; - root->r.r_active = FALSE; - root->r.r_in_parallel = 0; - root->r.r_blocktime = __kmp_dflt_blocktime; - root->r.r_nested = __kmp_dflt_nested; - - /* setup the root team for this task */ - /* allocate the root team structure */ - KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); - - root_team = - __kmp_allocate_team( - root, - 1, // new_nproc - 1, // max_nproc -#if OMPT_SUPPORT - 0, // root parallel id -#endif -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); -#if USE_DEBUGGER - // Non-NULL value should be assigned to make the debugger display the root team. - TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); -#endif - - KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); - - root->r.r_root_team = root_team; - root_team->t.t_control_stack_top = NULL; - - /* initialize root team */ - root_team->t.t_threads[0] = NULL; - root_team->t.t_nproc = 1; - root_team->t.t_serialized = 1; - // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; - root_team->t.t_sched.chunk = r_sched.chunk; - KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", - root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - - /* setup the hot team for this task */ - /* allocate the hot team structure */ - KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); - - hot_team = - __kmp_allocate_team( - root, - 1, // new_nproc - __kmp_dflt_team_nth_ub * 2, // max_nproc -#if OMPT_SUPPORT - 0, // root parallel id -#endif -#if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); - KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); - - root->r.r_hot_team = hot_team; - root_team->t.t_control_stack_top = NULL; - - /* first-time initialization */ - hot_team->t.t_parent = root_team; - - /* initialize hot team */ - hot_team_max_nth = hot_team->t.t_max_nproc; - for ( f = 0; f < hot_team_max_nth; ++ f ) { - hot_team->t.t_threads[ f ] = NULL; - }; // for - hot_team->t.t_nproc = 1; - // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; - hot_team->t.t_sched.chunk = r_sched.chunk; - hot_team->t.t_size_changed = 0; -} - -#ifdef KMP_DEBUG - - -typedef struct kmp_team_list_item { - kmp_team_p const * entry; - struct kmp_team_list_item * next; -} kmp_team_list_item_t; -typedef kmp_team_list_item_t * kmp_team_list_t; - - -static void -__kmp_print_structure_team_accum( // Add team to list of teams. - kmp_team_list_t list, // List of teams. - kmp_team_p const * team // Team to add. -) { - - // List must terminate with item where both entry and next are NULL. - // Team is added to the list only once. - // List is sorted in ascending order by team id. - // Team id is *not* a key. - - kmp_team_list_t l; - - KMP_DEBUG_ASSERT( list != NULL ); - if ( team == NULL ) { - return; - }; // if - - __kmp_print_structure_team_accum( list, team->t.t_parent ); - __kmp_print_structure_team_accum( list, team->t.t_next_pool ); - - // Search list for the team. - l = list; - while ( l->next != NULL && l->entry != team ) { - l = l->next; - }; // while - if ( l->next != NULL ) { - return; // Team has been added before, exit. - }; // if - - // Team is not found. Search list again for insertion point. - l = list; - while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { - l = l->next; - }; // while - - // Insert team. - { - kmp_team_list_item_t * item = - (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); - * item = * l; - l->entry = team; - l->next = item; - } - -} - -static void -__kmp_print_structure_team( - char const * title, - kmp_team_p const * team - -) { - __kmp_printf( "%s", title ); - if ( team != NULL ) { - __kmp_printf( "%2x %p\n", team->t.t_id, team ); - } else { - __kmp_printf( " - (nil)\n" ); - }; // if -} - -static void -__kmp_print_structure_thread( - char const * title, - kmp_info_p const * thread - -) { - __kmp_printf( "%s", title ); - if ( thread != NULL ) { - __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); - } else { - __kmp_printf( " - (nil)\n" ); - }; // if -} - -void -__kmp_print_structure( - void -) { - - kmp_team_list_t list; - - // Initialize list of teams. - list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); - list->entry = NULL; - list->next = NULL; - - __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); - { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - __kmp_printf( "%2d", gtid ); - if ( __kmp_threads != NULL ) { - __kmp_printf( " %p", __kmp_threads[ gtid ] ); - }; // if - if ( __kmp_root != NULL ) { - __kmp_printf( " %p", __kmp_root[ gtid ] ); - }; // if - __kmp_printf( "\n" ); - }; // for gtid - } - - // Print out __kmp_threads array. - __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); - if ( __kmp_threads != NULL ) { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - kmp_info_t const * thread = __kmp_threads[ gtid ]; - if ( thread != NULL ) { - __kmp_printf( "GTID %2d %p:\n", gtid, thread ); - __kmp_printf( " Our Root: %p\n", thread->th.th_root ); - __kmp_print_structure_team( " Our Team: ", thread->th.th_team ); - __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team ); - __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc ); - __kmp_print_structure_thread( " Master: ", thread->th.th_team_master ); - __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized ); - __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc ); -#if OMP_40_ENABLED - __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind ); -#endif - __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool ); - __kmp_printf( "\n" ); - __kmp_print_structure_team_accum( list, thread->th.th_team ); - __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); - }; // if - }; // for gtid - } else { - __kmp_printf( "Threads array is not allocated.\n" ); - }; // if - - // Print out __kmp_root array. - __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); - if ( __kmp_root != NULL ) { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - kmp_root_t const * root = __kmp_root[ gtid ]; - if ( root != NULL ) { - __kmp_printf( "GTID %2d %p:\n", gtid, root ); - __kmp_print_structure_team( " Root Team: ", root->r.r_root_team ); - __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team ); - __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread ); - __kmp_printf( " Active?: %2d\n", root->r.r_active ); - __kmp_printf( " Nested?: %2d\n", root->r.r_nested ); - __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel ); - __kmp_printf( "\n" ); - __kmp_print_structure_team_accum( list, root->r.r_root_team ); - __kmp_print_structure_team_accum( list, root->r.r_hot_team ); - }; // if - }; // for gtid - } else { - __kmp_printf( "Ubers array is not allocated.\n" ); - }; // if - - __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); - while ( list->next != NULL ) { - kmp_team_p const * team = list->entry; - int i; - __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); - __kmp_print_structure_team( " Parent Team: ", team->t.t_parent ); - __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid ); - __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc ); - __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized ); - __kmp_printf( " Number threads: %2d\n", team->t.t_nproc ); - for ( i = 0; i < team->t.t_nproc; ++ i ) { - __kmp_printf( " Thread %2d: ", i ); - __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); - }; // for i - __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool ); - __kmp_printf( "\n" ); - list = list->next; - }; // while - - // Print out __kmp_thread_pool and __kmp_team_pool. - __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); - __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool ); - __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool ); - __kmp_printf( "\n" ); - - // Free team list. - while ( list != NULL ) { - kmp_team_list_item_t * item = list; - list = list->next; - KMP_INTERNAL_FREE( item ); - }; // while - -} - -#endif - - -//--------------------------------------------------------------------------- -// Stuff for per-thread fast random number generator -// Table of primes - -static const unsigned __kmp_primes[] = { - 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, - 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, - 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, - 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, - 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, - 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, - 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, - 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, - 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, - 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, - 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, - 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, - 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, - 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, - 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, - 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f -}; - -//--------------------------------------------------------------------------- -// __kmp_get_random: Get a random number using a linear congruential method. - -unsigned short -__kmp_get_random( kmp_info_t * thread ) -{ - unsigned x = thread->th.th_x; - unsigned short r = x>>16; - - thread->th.th_x = x*thread->th.th_a+1; - - KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", - thread->th.th_info.ds.ds_tid, r) ); - - return r; -} -//-------------------------------------------------------- -// __kmp_init_random: Initialize a random number generator - -void -__kmp_init_random( kmp_info_t * thread ) -{ - unsigned seed = thread->th.th_info.ds.ds_tid; - - thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; - thread->th.th_x = (seed+1)*thread->th.th_a+1; - KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); -} - - -#if KMP_OS_WINDOWS -/* reclaim array entries for root threads that are already dead, returns number reclaimed */ -static int -__kmp_reclaim_dead_roots(void) { - int i, r = 0; - - for(i = 0; i < __kmp_threads_capacity; ++i) { - if( KMP_UBER_GTID( i ) && - !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && - !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state - r += __kmp_unregister_root_other_thread(i); - } - } - return r; -} -#endif - -/* - This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of - free entries generated. - - For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are - already dead. - - On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate - update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to - __kmp_tp_capacity, if threadprivate cache array has been created. - Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. - - After any dead root reclamation, if the clipping value allows array expansion to result in the generation - of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows - array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. - Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero, - a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create - as many free slots as possible up to nWish. - - If any argument is negative, the behavior is undefined. -*/ -static int -__kmp_expand_threads(int nWish, int nNeed) { - int added = 0; - int old_tp_cached; - int __kmp_actual_max_nth; - - if(nNeed > nWish) /* normalize the arguments */ - nWish = nNeed; -#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB -/* only for Windows static library */ - /* reclaim array entries for root threads that are already dead */ - added = __kmp_reclaim_dead_roots(); - - if(nNeed) { - nNeed -= added; - if(nNeed < 0) - nNeed = 0; - } - if(nWish) { - nWish -= added; - if(nWish < 0) - nWish = 0; - } -#endif - if(nWish <= 0) - return added; - - while(1) { - int nTarget; - int minimumRequiredCapacity; - int newCapacity; - kmp_info_t **newThreads; - kmp_root_t **newRoot; - - // - // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. - // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth - // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may - // become > __kmp_max_nth in one of two ways: - // - // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] - // may not be resused by another thread, so we may need to increase - // __kmp_threads_capacity to __kmp_max_threads + 1. - // - // 2) New foreign root(s) are encountered. We always register new - // foreign roots. This may cause a smaller # of threads to be - // allocated at subsequent parallel regions, but the worker threads - // hang around (and eventually go to sleep) and need slots in the - // __kmp_threads[] array. - // - // Anyway, that is the reason for moving the check to see if - // __kmp_max_threads was exceeded into __kmp_reseerve_threads() - // instead of having it performed here. -BB - // - old_tp_cached = __kmp_tp_cached; - __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; - KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); - - /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ - nTarget = nWish; - if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { - /* can't fulfil nWish, so try nNeed */ - if(nNeed) { - nTarget = nNeed; - if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { - /* possible expansion too small -- give up */ - break; - } - } else { - /* best-effort */ - nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; - if(!nTarget) { - /* can expand at all -- give up */ - break; - } - } - } - minimumRequiredCapacity = __kmp_threads_capacity + nTarget; - - newCapacity = __kmp_threads_capacity; - do{ - newCapacity = - newCapacity <= (__kmp_actual_max_nth >> 1) ? - (newCapacity << 1) : - __kmp_actual_max_nth; - } while(newCapacity < minimumRequiredCapacity); - newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); - newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); - KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); - KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); - memset(newThreads + __kmp_threads_capacity, 0, - (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); - memset(newRoot + __kmp_threads_capacity, 0, - (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); - - if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { - /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache - while we were allocating the expanded array, and our new capacity is larger than the threadprivate - cache capacity, so we should deallocate the expanded arrays and try again. This is the first check - of a double-check pair. - */ - __kmp_free(newThreads); - continue; /* start over and try again */ - } - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { - /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - __kmp_free(newThreads); - continue; /* start over and try again */ - } else { - /* success */ - // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. - // - *(kmp_info_t**volatile*)&__kmp_threads = newThreads; - *(kmp_root_t**volatile*)&__kmp_root = newRoot; - added += newCapacity - __kmp_threads_capacity; - *(volatile int*)&__kmp_threads_capacity = newCapacity; - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - break; /* succeeded, so we can exit the loop */ - } - } - return added; -} - -/* register the current thread as a root thread and obtain our gtid */ -/* we must have the __kmp_initz_lock held at this point */ -/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ -int -__kmp_register_root( int initial_thread ) -{ - kmp_info_t *root_thread; - kmp_root_t *root; - int gtid; - int capacity; - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - KA_TRACE( 20, ("__kmp_register_root: entered\n")); - KMP_MB(); - - - /* - 2007-03-02: - - If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, - "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may - return false (that means there is at least one empty slot in __kmp_threads array), but it - is possible the only free slot is #0, which is reserved for initial thread and so cannot be - used for this one. Following code workarounds this bug. - - However, right solution seems to be not reserving slot #0 for initial thread because: - (1) there is no magic in slot #0, - (2) we cannot detect initial thread reliably (the first thread which does serial - initialization may be not a real initial thread). - */ - capacity = __kmp_threads_capacity; - if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { - -- capacity; - }; // if - - /* see if there are too many threads */ - if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { - if ( __kmp_tp_cached ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantRegisterNewThread ), - KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), - KMP_HNT( PossibleSystemLimitOnThreads ), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantRegisterNewThread ), - KMP_HNT( SystemLimitOnThreads ), - __kmp_msg_null - ); - } - }; // if - - /* find an available thread slot */ - /* Don't reassign the zero slot since we need that to only be used by initial - thread */ - for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) - ; - KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); - KMP_ASSERT( gtid < __kmp_threads_capacity ); - - /* update global accounting */ - __kmp_all_nth ++; - TCW_4(__kmp_nth, __kmp_nth + 1); - - // - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) - // for low numbers of procs, and method #2 (keyed API call) for higher - // numbers of procs. - // - if ( __kmp_adjust_gtid_mode ) { - if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { - if ( TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } - else { - if (TCR_4(__kmp_gtid_mode) != 1 ) { - TCW_4(__kmp_gtid_mode, 1); - } - } - } - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* setup this new hierarchy */ - if( ! ( root = __kmp_root[gtid] )) { - root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); - KMP_DEBUG_ASSERT( ! root->r.r_root_team ); - } - - __kmp_initialize_root( root ); - - /* setup new root thread structure */ - if( root->r.r_uber_thread ) { - root_thread = root->r.r_uber_thread; - } else { - root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); - if ( __kmp_storage_map ) { - __kmp_print_thread_storage_map( root_thread, gtid ); - } - root_thread->th.th_info .ds.ds_gtid = gtid; - root_thread->th.th_root = root; - if( __kmp_env_consistency_check ) { - root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); - } - #if USE_FAST_MEMORY - __kmp_initialize_fast_memory( root_thread ); - #endif /* USE_FAST_MEMORY */ - - #if KMP_USE_BGET - KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); - __kmp_initialize_bget( root_thread ); - #endif - __kmp_init_random( root_thread ); // Initialize random number generator - } - - /* setup the serial team held in reserve by the root thread */ - if( ! root_thread->th.th_serial_team ) { - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); - - root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, -#if OMPT_SUPPORT - 0, // root parallel id -#endif -#if OMP_40_ENABLED - proc_bind_default, -#endif - &r_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - } - KMP_ASSERT( root_thread->th.th_serial_team ); - KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", - root_thread->th.th_serial_team ) ); - - /* drop root_thread into place */ - TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); - - root->r.r_root_team->t.t_threads[0] = root_thread; - root->r.r_hot_team ->t.t_threads[0] = root_thread; - root_thread->th.th_serial_team->t.t_threads[0] = root_thread; - root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). - root->r.r_uber_thread = root_thread; - - /* initialize the thread, get it ready to go */ - __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); - - /* prepare the master thread for get_gtid() */ - __kmp_gtid_set_specific( gtid ); - - __kmp_itt_thread_name( gtid ); - - #ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; - #endif - __kmp_create_worker( gtid, root_thread, __kmp_stksize ); - KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); - TCW_4(__kmp_init_gtid, TRUE); - - KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", - gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), - root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, - KMP_INIT_BARRIER_STATE ) ); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b ) { - root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; -#endif - }; // for - } - KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); - -#if KMP_AFFINITY_SUPPORTED - if ( TCR_4(__kmp_init_middle) ) { - __kmp_affinity_set_init_mask( gtid, TRUE ); - } -#endif /* KMP_AFFINITY_SUPPORTED */ - - __kmp_root_counter ++; - - KMP_MB(); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - - return gtid; -} - -#if KMP_NESTED_HOT_TEAMS -static int -__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) -{ - int i, n, nth; - kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; - if( !hot_teams || !hot_teams[level].hot_team ) { - return 0; - } - KMP_DEBUG_ASSERT( level < max_level ); - kmp_team_t *team = hot_teams[level].hot_team; - nth = hot_teams[level].hot_team_nth; - n = nth - 1; // master is not freed - if( level < max_level - 1 ) { - for( i = 0; i < nth; ++i ) { - kmp_info_t *th = team->t.t_threads[i]; - n += __kmp_free_hot_teams( root, th, level + 1, max_level ); - if( i > 0 && th->th.th_hot_teams ) { - __kmp_free( th->th.th_hot_teams ); - th->th.th_hot_teams = NULL; - } - } - } - __kmp_free_team( root, team, NULL ); - return n; -} -#endif - -/* Resets a root thread and clear its root and hot teams. - Returns the number of __kmp_threads entries directly and indirectly freed. -*/ -static int -__kmp_reset_root(int gtid, kmp_root_t *root) -{ - kmp_team_t * root_team = root->r.r_root_team; - kmp_team_t * hot_team = root->r.r_hot_team; - int n = hot_team->t.t_nproc; - int i; - - KMP_DEBUG_ASSERT( ! root->r.r_active ); - - root->r.r_root_team = NULL; - root->r.r_hot_team = NULL; - // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call - // to __kmp_free_team(). - __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); -#if KMP_NESTED_HOT_TEAMS - if( __kmp_hot_teams_max_level > 1 ) { // need to free nested hot teams and their threads if any - for( i = 0; i < hot_team->t.t_nproc; ++i ) { - kmp_info_t *th = hot_team->t.t_threads[i]; - n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); - if( th->th.th_hot_teams ) { - __kmp_free( th->th.th_hot_teams ); - th->th.th_hot_teams = NULL; - } - } - } -#endif - __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); - - // - // Before we can reap the thread, we need to make certain that all - // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. - // - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - __kmp_wait_to_unref_task_teams(); - } - - #if KMP_OS_WINDOWS - /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ - KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", - (LPVOID)&(root->r.r_uber_thread->th), - root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); - __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); - #endif /* KMP_OS_WINDOWS */ - -#if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - int gtid = __kmp_get_gtid(); - __ompt_thread_end(ompt_thread_initial, gtid); - } -#endif - - TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. - __kmp_reap_thread( root->r.r_uber_thread, 1 ); - - // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. - root->r.r_uber_thread = NULL; - /* mark root as no longer in use */ - root->r.r_begin = FALSE; - - return n; -} - -void -__kmp_unregister_root_current_thread( int gtid ) -{ - KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); - /* this lock should be ok, since unregister_root_current_thread is never called during - * and abort, only during a normal close. furthermore, if you have the - * forkjoin lock, you should never try to get the initz lock */ - - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - return; - } - kmp_root_t *root = __kmp_root[gtid]; - - KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); - KMP_ASSERT( KMP_UBER_GTID( gtid )); - KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); - KMP_ASSERT( root->r.r_active == FALSE ); - - - KMP_MB(); - -#if OMP_41_ENABLED - kmp_info_t * thread = __kmp_threads[gtid]; - kmp_team_t * team = thread->th.th_team; - kmp_task_team_t * task_team = thread->th.th_task_team; - - // we need to wait for the proxy tasks before finishing the thread - if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { -#if OMPT_SUPPORT - // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; -#endif - __kmp_task_team_wait(thread, team, NULL ); - } -#endif - - __kmp_reset_root(gtid, root); - - /* free up this thread slot */ - __kmp_gtid_set_specific( KMP_GTID_DNE ); -#ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_DNE; -#endif - - KMP_MB(); - KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); -} - -#if KMP_OS_WINDOWS -/* __kmp_forkjoin_lock must be already held - Unregisters a root thread that is not the current thread. Returns the number of - __kmp_threads entries freed as a result. - */ -static int -__kmp_unregister_root_other_thread( int gtid ) -{ - kmp_root_t *root = __kmp_root[gtid]; - int r; - - KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); - KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); - KMP_ASSERT( KMP_UBER_GTID( gtid )); - KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); - KMP_ASSERT( root->r.r_active == FALSE ); - - r = __kmp_reset_root(gtid, root); - KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); - return r; -} -#endif - -#if KMP_DEBUG -void __kmp_task_info() { - - kmp_int32 gtid = __kmp_entry_gtid(); - kmp_int32 tid = __kmp_tid_from_gtid( gtid ); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *steam = this_thr->th.th_serial_team; - kmp_team_t *team = this_thr->th.th_team; - - __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", - gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); -} -#endif // KMP_DEBUG - -/* TODO optimize with one big memclr, take out what isn't needed, - * split responsibility to workers as much as possible, and delay - * initialization of features as much as possible */ -static void -__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) -{ - /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker - * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ - kmp_info_t *master = team->t.t_threads[0]; - KMP_DEBUG_ASSERT( this_thr != NULL ); - KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( master ); - KMP_DEBUG_ASSERT( master->th.th_root ); - - KMP_MB(); - - TCW_SYNC_PTR(this_thr->th.th_team, team); - - this_thr->th.th_info.ds.ds_tid = tid; - this_thr->th.th_set_nproc = 0; -#if OMP_40_ENABLED - this_thr->th.th_set_proc_bind = proc_bind_default; -# if KMP_AFFINITY_SUPPORTED - this_thr->th.th_new_place = this_thr->th.th_current_place; -# endif -#endif - this_thr->th.th_root = master->th.th_root; - - /* setup the thread's cache of the team structure */ - this_thr->th.th_team_nproc = team->t.t_nproc; - this_thr->th.th_team_master = master; - this_thr->th.th_team_serialized = team->t.t_serialized; - TCW_PTR(this_thr->th.th_sleep_loc, NULL); - - KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); - - KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task ) ); - - __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); - - KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task ) ); - // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? - - /* TODO no worksharing in speculative threads */ - this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ]; - - this_thr->th.th_local.this_construct = 0; - -#ifdef BUILD_TV - this_thr->th.th_local.tv_data = 0; -#endif - - if ( ! this_thr->th.th_pri_common ) { - this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( - gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, - sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid - ); - }; // if - this_thr->th.th_pri_head = NULL; - }; // if - - /* Initialize dynamic dispatch */ - { - volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; - /* - * Use team max_nproc since this will never change for the team. - */ - size_t disp_size = sizeof( dispatch_private_info_t ) * - ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ); - KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); - KMP_ASSERT( dispatch ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); - - dispatch->th_disp_index = 0; - - if( ! dispatch->th_disp_buffer ) { - dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); - - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], - &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ], - disp_size, "th_%d.th_dispatch.th_disp_buffer " - "(team_%d.t_dispatch[%d].th_disp_buffer)", - gtid, team->t.t_id, gtid ); - } - } else { - memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); - } - - dispatch->th_dispatch_pr_current = 0; - dispatch->th_dispatch_sh_current = 0; - - dispatch->th_deo_fcn = 0; /* ORDERED */ - dispatch->th_dxo_fcn = 0; /* END ORDERED */ - } - - this_thr->th.th_next_pool = NULL; - - if (!this_thr->th.th_task_state_memo_stack) { - size_t i; - this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); - this_thr->th.th_task_state_top = 0; - this_thr->th.th_task_state_stack_sz = 4; - for (i=0; ith.th_task_state_stack_sz; ++i) // zero init the stack - this_thr->th.th_task_state_memo_stack[i] = 0; - } - - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - - KMP_MB(); -} - - -/* allocate a new thread for the requesting team. this is only called from within a - * forkjoin critical section. we will first try to get an available thread from the - * thread pool. if none is available, we will fork a new one assuming we are able - * to create a new one. this should be assured, as the caller should check on this - * first. - */ -kmp_info_t * -__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) -{ - kmp_team_t *serial_team; - kmp_info_t *new_thr; - int new_gtid; - - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); - KMP_DEBUG_ASSERT( root && team ); -#if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); -#endif - KMP_MB(); - - /* first, try to get one from the thread pool */ - if ( __kmp_thread_pool ) { - - new_thr = (kmp_info_t*)__kmp_thread_pool; - __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; - if ( new_thr == __kmp_thread_pool_insert_pt ) { - __kmp_thread_pool_insert_pt = NULL; - } - TCW_4(new_thr->th.th_in_pool, FALSE); - // - // Don't touch th_active_in_pool or th_active. - // The worker thread adjusts those flags as it sleeps/awakens. - // - - __kmp_thread_pool_nth--; - - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", - __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); - KMP_ASSERT( ! new_thr->th.th_team ); - KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); - KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); - - /* setup the thread structure */ - __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); - KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); - - TCW_4(__kmp_nth, __kmp_nth + 1); - - new_thr->th.th_task_state = 0; - new_thr->th.th_task_state_top = 0; - new_thr->th.th_task_state_stack_sz = 4; - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessar y */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - -#if KMP_DEBUG - // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. - int b; - kmp_balign_t * balign = new_thr->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#endif - - KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", - __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); - - KMP_MB(); - return new_thr; - } - - - /* no, well fork a new one */ - KMP_ASSERT( __kmp_nth == __kmp_all_nth ); - KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); - - // - // If this is the first worker thread the RTL is creating, then also - // launch the monitor thread. We try to do this as early as possible. - // - if ( ! TCR_4( __kmp_init_monitor ) ) { - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( ! TCR_4( __kmp_init_monitor ) ) { - KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); - TCW_4( __kmp_init_monitor, 1 ); - __kmp_create_monitor( & __kmp_monitor ); - KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); - #if KMP_OS_WINDOWS - // AC: wait until monitor has started. This is a fix for CQ232808. - // The reason is that if the library is loaded/unloaded in a loop with small (parallel) - // work in between, then there is high probability that monitor thread started after - // the library shutdown. At shutdown it is too late to cope with the problem, because - // when the master is in DllMain (process detach) the monitor has no chances to start - // (it is blocked), and master has no means to inform the monitor that the library has gone, - // because all the memory which the monitor can access is going to be released/reset. - while ( TCR_4(__kmp_init_monitor) < 2 ) { - KMP_YIELD( TRUE ); - } - KF_TRACE( 10, ( "after monitor thread has started\n" ) ); - #endif - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - } - - KMP_MB(); - for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { - KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); - } - - /* allocate space for it. */ - new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); - - TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); - - if ( __kmp_storage_map ) { - __kmp_print_thread_storage_map( new_thr, new_gtid ); - } - - /* add the reserve serialized team, initialized from the team's master thread */ - { - kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); - KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); - - new_thr->th.th_serial_team = serial_team = - (kmp_team_t*) __kmp_allocate_team( root, 1, 1, -#if OMPT_SUPPORT - 0, // root parallel id -#endif -#if OMP_40_ENABLED - proc_bind_default, -#endif - &r_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - } - KMP_ASSERT ( serial_team ); - serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). - serial_team->t.t_threads[0] = new_thr; - KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", - new_thr ) ); - - /* setup the thread structures */ - __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); - - #if USE_FAST_MEMORY - __kmp_initialize_fast_memory( new_thr ); - #endif /* USE_FAST_MEMORY */ - - #if KMP_USE_BGET - KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); - __kmp_initialize_bget( new_thr ); - #endif - - __kmp_init_random( new_thr ); // Initialize random number generator - - /* Initialize these only once when thread is grabbed for a team allocation */ - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", - __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - - int b; - kmp_balign_t * balign = new_thr->th.th_bar; - for(b=0; bth.th_spin_here = FALSE; - new_thr->th.th_next_waiting = 0; - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; -#endif - - TCW_4(new_thr->th.th_in_pool, FALSE); - new_thr->th.th_active_in_pool = FALSE; - TCW_4(new_thr->th.th_active, TRUE); - - /* adjust the global counters */ - __kmp_all_nth ++; - __kmp_nth ++; - - // - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) - // for low numbers of procs, and method #2 (keyed API call) for higher - // numbers of procs. - // - if ( __kmp_adjust_gtid_mode ) { - if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { - if ( TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } - else { - if (TCR_4(__kmp_gtid_mode) != 1 ) { - TCW_4(__kmp_gtid_mode, 1); - } - } - } - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* actually fork it and create the new worker thread */ - KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); - __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); - KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); - - - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); - KMP_MB(); - return new_thr; -} - -/* - * reinitialize team for reuse. - * - * The hot team code calls this case at every fork barrier, so EPCC barrier - * test are extremely sensitive to changes in it, esp. writes to the team - * struct, which cause a cache invalidation in all threads. - * - * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! - */ -static void -__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { - KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", - team->t.t_threads[0], team ) ); - KMP_DEBUG_ASSERT( team && new_icvs); - KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); - team->t.t_ident = loc; - - team->t.t_id = KMP_GEN_TEAM_ID(); - - // Copy ICVs to the master thread's implicit taskdata - __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); - copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); - - KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", - team->t.t_threads[0], team ) ); -} - - -/* initialize the team data structure - * this assumes the t_threads and t_max_nproc are already set - * also, we don't touch the arguments */ -static void -__kmp_initialize_team( - kmp_team_t * team, - int new_nproc, - kmp_internal_control_t * new_icvs, - ident_t * loc -) { - KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); - - /* verify */ - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_MB(); - - team->t.t_master_tid = 0; /* not needed */ - /* team->t.t_master_bar; not needed */ - team->t.t_serialized = new_nproc > 1 ? 0 : 1; - team->t.t_nproc = new_nproc; - - /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ - team->t.t_next_pool = NULL; - /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ - - TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ - team->t.t_invoke = NULL; /* not needed */ - - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched = new_icvs->sched; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - team->t.t_fp_control_saved = FALSE; /* not needed */ - team->t.t_x87_fpu_control_word = 0; /* not needed */ - team->t.t_mxcsr = 0; /* not needed */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - team->t.t_construct = 0; - __kmp_init_lock( & team->t.t_single_lock ); - - team->t.t_ordered .dt.t_value = 0; - team->t.t_master_active = FALSE; - - memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); - -#ifdef KMP_DEBUG - team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ -#endif - team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ - - team->t.t_control_stack_top = NULL; - - __kmp_reinitialize_team( team, new_icvs, loc ); - - KMP_MB(); - KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); -} - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED -/* Sets full mask for thread and returns old mask, no changes to structures. */ -static void -__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) -{ - if ( KMP_AFFINITY_CAPABLE() ) { - int status; - if ( old_mask != NULL ) { - status = __kmp_get_system_affinity( old_mask, TRUE ); - int error = errno; - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( ChangeThreadAffMaskError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - } - __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE ); - } -} -#endif - -#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - -// -// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. -// It calculats the worker + master thread's partition based upon the parent -// thread's partition, and binds each worker to a thread in their partition. -// The master thread's partition should already include its current binding. -// -static void -__kmp_partition_places( kmp_team_t *team ) -{ - // - // Copy the master thread's place partion to the team struct - // - kmp_info_t *master_th = team->t.t_threads[0]; - KMP_DEBUG_ASSERT( master_th != NULL ); - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - int first_place = master_th->th.th_first_place; - int last_place = master_th->th.th_last_place; - int masters_place = master_th->th.th_current_place; - team->t.t_first_place = first_place; - team->t.t_last_place = last_place; - - KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", - proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, - masters_place, first_place, last_place ) ); - - switch ( proc_bind ) { - - case proc_bind_default: - // - // serial teams might have the proc_bind policy set to - // proc_bind_default. It doesn't matter, as we don't - // rebind the master thread for any proc_bind policy. - // - KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); - break; - - case proc_bind_master: - { - int f; - int n_th = team->t.t_nproc; - for ( f = 1; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = masters_place; - - KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, masters_place, first_place, last_place ) ); - } - } - break; - - case proc_bind_close: - { - int f; - int n_th = team->t.t_nproc; - int n_places; - if ( first_place <= last_place ) { - n_places = last_place - first_place + 1; - } - else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if ( n_th <= n_places ) { - int place = masters_place; - for ( f = 1; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - - KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, place, first_place, last_place ) ); - } - } - else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - ( S * n_places ); - gap = rem > 0 ? n_places/rem : n_places; - int place = masters_place; - int gap_ct = gap; - for ( f = 0; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - s_count++; - - if ( (s_count == S) && rem && (gap_ct == gap) ) { - // do nothing, add an extra thread to place on next iteration - } - else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { - // we added an extra thread to this place; move to next place - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } - else if (s_count == S) { // place full; don't add extra - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, th->th.th_new_place, first_place, - last_place ) ); - } - KMP_DEBUG_ASSERT( place == masters_place ); - } - } - break; - - case proc_bind_spread: - { - int f; - int n_th = team->t.t_nproc; - int n_places; - if ( first_place <= last_place ) { - n_places = last_place - first_place + 1; - } - else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if ( n_th <= n_places ) { - int place = masters_place; - int S = n_places/n_th; - int s_count, rem, gap, gap_ct; - rem = n_places - n_th*S; - gap = rem ? n_th/rem : 1; - gap_ct = gap; - for ( f = 0; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = place; - th->th.th_new_place = place; - s_count = 1; - while (s_count < S) { - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count++; - } - if (rem && (gap_ct == gap)) { - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - rem--; - gap_ct = 0; - } - th->th.th_last_place = place; - gap_ct++; - - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - - KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place ) ); - } - KMP_DEBUG_ASSERT( place == masters_place ); - } - else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - ( S * n_places ); - gap = rem > 0 ? n_places/rem : n_places; - int place = masters_place; - int gap_ct = gap; - for ( f = 0; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = place; - th->th.th_last_place = place; - th->th.th_new_place = place; - s_count++; - - if ( (s_count == S) && rem && (gap_ct == gap) ) { - // do nothing, add an extra thread to place on next iteration - } - else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { - // we added an extra thread to this place; move on to next place - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } - else if (s_count == S) { // place is full; don't add extra thread - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place) ); - } - KMP_DEBUG_ASSERT( place == masters_place ); - } - } - break; - - default: - break; - } - - KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); -} - -#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ - -/* allocate a new team data structure to use. take one off of the free pool if available */ -kmp_team_t * -__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, -#endif -#if OMP_40_ENABLED - kmp_proc_bind_t new_proc_bind, -#endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) -{ - KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team); - int f; - kmp_team_t *team; - int use_hot_team = ! root->r.r_active; - int level = 0; - - KA_TRACE( 20, ("__kmp_allocate_team: called\n")); - KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); - KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); - KMP_MB(); - -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *hot_teams; - if( master ) { - team = master->th.th_team; - level = team->t.t_active_level; - if( master->th.th_teams_microtask ) { // in teams construct? - if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1 - team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams - master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams - ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise - } - } - hot_teams = master->th.th_hot_teams; - if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) - { // hot team has already been allocated for given level - use_hot_team = 1; - } else { - use_hot_team = 0; - } - } -#endif - // Optimization to use a "hot" team - if( use_hot_team && new_nproc > 1 ) { - KMP_DEBUG_ASSERT( new_nproc == max_nproc ); -#if KMP_NESTED_HOT_TEAMS - team = hot_teams[level].hot_team; -#else - team = root->r.r_hot_team; -#endif -#if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1] )); - } -#endif - - // Has the number of threads changed? - /* Let's assume the most common case is that the number of threads is unchanged, and - put that case first. */ - if (team->t.t_nproc == new_nproc) { // Check changes in number of threads - KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); - // This case can mean that omp_set_num_threads() was called and the hot team size - // was already reduced, so we check the special flag - if ( team->t.t_size_changed == -1 ) { - team->t.t_size_changed = 1; - } else { - team->t.t_size_changed = 0; - } - - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched = new_icvs->sched; - - __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); - - KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); - __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); - -#if OMP_40_ENABLED -# if KMP_AFFINITY_SUPPORTED - if ( ( team->t.t_size_changed == 0 ) - && ( team->t.t_proc_bind == new_proc_bind ) ) { - KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", - team->t.t_id, new_proc_bind, team->t.t_first_place, - team->t.t_last_place ) ); - } - else { - team->t.t_proc_bind = new_proc_bind; - __kmp_partition_places( team ); - } -# else - if ( team->t.t_proc_bind != new_proc_bind ) { - team->t.t_proc_bind = new_proc_bind; - } -# endif /* KMP_AFFINITY_SUPPORTED */ -#endif /* OMP_40_ENABLED */ - } - else if( team->t.t_nproc > new_nproc ) { - KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); - - team->t.t_size_changed = 1; -#if KMP_NESTED_HOT_TEAMS - if( __kmp_hot_teams_mode == 0 ) { - // AC: saved number of threads should correspond to team's value in this mode, - // can be bigger in mode 1, when hot team has some threads in reserve - KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); - hot_teams[level].hot_team_nth = new_nproc; -#endif // KMP_NESTED_HOT_TEAMS - /* release the extra threads we don't need any more */ - for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); - if ( __kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should unref task team. - team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread( team->t.t_threads[ f ] ); - team->t.t_threads[ f ] = NULL; - } -#if KMP_NESTED_HOT_TEAMS - } // (__kmp_hot_teams_mode == 0) -#endif // KMP_NESTED_HOT_TEAMS - team->t.t_nproc = new_nproc; - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched = new_icvs->sched; - __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); - - /* update the remaining threads */ - for(f = 0; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; - } - // restore the current task state of the master thread: should be the implicit task - KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); - - __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); - -#ifdef KMP_DEBUG - for ( f = 0; f < team->t.t_nproc; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } -#endif - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -# if KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); -# endif -#endif - } - else { // team->t.t_nproc < new_nproc -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *old_mask; - if ( KMP_AFFINITY_CAPABLE() ) { - KMP_CPU_ALLOC(old_mask); - } -#endif - - KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); - - team->t.t_size_changed = 1; - - -#if KMP_NESTED_HOT_TEAMS - int avail_threads = hot_teams[level].hot_team_nth; - if( new_nproc < avail_threads ) - avail_threads = new_nproc; - kmp_info_t **other_threads = team->t.t_threads; - for ( f = team->t.t_nproc; f < avail_threads; ++f ) { - // Adjust barrier data of reserved threads (if any) of the team - // Other data will be set in __kmp_initialize_info() below. - int b; - kmp_balign_t * balign = other_threads[f]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - if( hot_teams[level].hot_team_nth >= new_nproc ) { - // we have all needed threads in reserve, no need to allocate any - // this only possible in mode 1, cannot have reserved threads in mode 0 - KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); - team->t.t_nproc = new_nproc; // just get reserved threads involved - } else { - // we may have some threads in reserve, but not enough - team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any - hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size -#endif // KMP_NESTED_HOT_TEAMS - if(team->t.t_max_nproc < new_nproc) { - /* reallocate larger arrays */ - __kmp_reallocate_team_arrays(team, new_nproc); - __kmp_reinitialize_team( team, new_icvs, NULL ); - } - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - /* Temporarily set full mask for master thread before - creation of workers. The reason is that workers inherit - the affinity from master, so if a lot of workers are - created on the single core quickly, they don't get - a chance to set their own affinity for a long time. - */ - __kmp_set_thread_affinity_mask_full_tmp( old_mask ); -#endif - - /* allocate new threads for the hot team */ - for( f = team->t.t_nproc ; f < new_nproc ; f++ ) { - kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); - KMP_DEBUG_ASSERT( new_worker ); - team->t.t_threads[ f ] = new_worker; - - KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", - team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, - team->t.t_bar[bs_forkjoin_barrier].b_arrived, - team->t.t_bar[bs_plain_barrier].b_arrived ) ); - - { // Initialize barrier data for new threads. - int b; - kmp_balign_t * balign = new_worker->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; -#endif - } - } - } - -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - if ( KMP_AFFINITY_CAPABLE() ) { - /* Restore initial master thread's affinity mask */ - __kmp_set_system_affinity( old_mask, TRUE ); - KMP_CPU_FREE(old_mask); - } -#endif -#if KMP_NESTED_HOT_TEAMS - } // end of check of t_nproc vs. new_nproc vs. hot_team_nth -#endif // KMP_NESTED_HOT_TEAMS - /* make sure everyone is syncronized */ - int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below - __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); - - /* reinitialize the threads */ - KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); - for (f=0; f < team->t.t_nproc; ++f) - __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); - if (level) { // set th_task_state for new threads in nested hot team - // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the - // th_task_state for the new threads. th_task_state for master thread will not be accurate until - // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. - for (f=old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; - } - else { // set th_task_state for new threads in non-nested hot team - int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state - for (f=old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = old_state; - } - -#ifdef KMP_DEBUG - for ( f = 0; f < team->t.t_nproc; ++ f ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } -#endif - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -# if KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); -# endif -#endif - } // Check changes in number of threads - -#if OMP_40_ENABLED - kmp_info_t *master = team->t.t_threads[0]; - if( master->th.th_teams_microtask ) { - for( f = 1; f < new_nproc; ++f ) { - // propagate teams construct specific info to workers - kmp_info_t *thr = team->t.t_threads[f]; - thr->th.th_teams_microtask = master->th.th_teams_microtask; - thr->th.th_teams_level = master->th.th_teams_level; - thr->th.th_teams_size = master->th.th_teams_size; - } - } -#endif /* OMP_40_ENABLED */ -#if KMP_NESTED_HOT_TEAMS - if( level ) { - // Sync barrier state for nested hot teams, not needed for outermost hot team. - for( f = 1; f < new_nproc; ++f ) { - kmp_info_t *thr = team->t.t_threads[f]; - int b; - kmp_balign_t * balign = thr->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); -#if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; -#endif - } - } - } -#endif // KMP_NESTED_HOT_TEAMS - - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; - // - // The hot team re-uses the previous task team, - // if untouched during the previous release->gather phase. - // - - KF_TRACE( 10, ( " hot_team = %p\n", team ) ); - -#if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1] )); - } -#endif - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); -#endif - - KMP_MB(); - - return team; - } - - /* next, let's try to take one from the team pool */ - KMP_MB(); - for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) - { - /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ - if ( team->t.t_max_nproc >= max_nproc ) { - /* take this team from the team pool */ - __kmp_team_pool = team->t.t_next_pool; - - /* setup the team for fresh use */ - __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); - - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1]) ); - team->t.t_task_team[0] = NULL; - team->t.t_task_team[1] = NULL; - - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; - - KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b) { - team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - team->t.t_bar[ b ].b_master_arrived = 0; - team->t.t_bar[ b ].b_team_arrived = 0; -#endif - } - } - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -#endif - - KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); -#endif - - KMP_MB(); - - return team; - } - - /* reap team if it is too small, then loop back and check the next one */ - /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ - /* TODO: Use technique to find the right size hot-team, don't reap them */ - team = __kmp_reap_team( team ); - __kmp_team_pool = team; - } - - /* nothing available in the pool, no matter, make a new team! */ - KMP_MB(); - team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); - - /* and set it up */ - team->t.t_max_nproc = max_nproc; - /* NOTE well, for some reason allocating one big buffer and dividing it - * up seems to really hurt performance a lot on the P4, so, let's not use - * this... */ - __kmp_allocate_team_arrays( team, max_nproc ); - - KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); - __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); - - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); - team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate - team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate - - if ( __kmp_storage_map ) { - __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); - } - - /* allocate space for arguments */ - __kmp_alloc_argv_entries( argc, team, FALSE ); - team->t.t_argc = argc; - - KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b ) { - team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; -#if USE_DEBUGGER - team->t.t_bar[ b ].b_master_arrived = 0; - team->t.t_bar[ b ].b_team_arrived = 0; -#endif - } - } - -#if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; -#endif - -#if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); - team->t.ompt_serialized_team_info = NULL; -#endif - - KMP_MB(); - - KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); - - return team; -} - -/* TODO implement hot-teams at all levels */ -/* TODO implement lazy thread release on demand (disband request) */ - -/* free the team. return it to the team pool. release all the threads - * associated with it */ -void -__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) ) -{ - int f; - KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); - - /* verify state */ - KMP_DEBUG_ASSERT( root ); - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - - int use_hot_team = team == root->r.r_hot_team; -#if KMP_NESTED_HOT_TEAMS - int level; - kmp_hot_team_ptr_t *hot_teams; - if( master ) { - level = team->t.t_active_level - 1; - if( master->th.th_teams_microtask ) { // in teams construct? - if( master->th.th_teams_size.nteams > 1 ) { - ++level; // level was not increased in teams construct for team_of_masters - } - if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master->th.th_teams_level == team->t.t_level ) { - ++level; // level was not increased in teams construct for team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - hot_teams = master->th.th_hot_teams; - if( level < __kmp_hot_teams_max_level ) { - KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); - use_hot_team = 1; - } - } -#endif // KMP_NESTED_HOT_TEAMS - - /* team is done working */ - TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. - team->t.t_copyin_counter = 0; // init counter for possible reuse - // Do not reset pointer to parent team to NULL for hot teams. - - /* if we are non-hot team, release our threads */ - if( ! use_hot_team ) { - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Delete task teams - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if ( task_team != NULL ) { - for (f=0; ft.t_nproc; ++f) { // Have all threads unref task teams - team->t.t_threads[f]->th.th_task_team = NULL; - } - KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); -#if KMP_NESTED_HOT_TEAMS - __kmp_free_task_team( master, task_team ); -#endif - team->t.t_task_team[tt_idx] = NULL; - } - } - } - - // Reset pointer to parent team only for non-hot teams. - team->t.t_parent = NULL; - - - /* free the worker threads */ - for ( f = 1; f < team->t.t_nproc; ++ f ) { - KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); - __kmp_free_thread( team->t.t_threads[ f ] ); - team->t.t_threads[ f ] = NULL; - } - - - /* put the team back in the team pool */ - /* TODO limit size of team pool, call reap_team if pool too large */ - team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool; - __kmp_team_pool = (volatile kmp_team_t*) team; - } - - KMP_MB(); -} - - -/* reap the team. destroy it, reclaim all its resources and free its memory */ -kmp_team_t * -__kmp_reap_team( kmp_team_t *team ) -{ - kmp_team_t *next_pool = team->t.t_next_pool; - - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_DEBUG_ASSERT( team->t.t_argv ); - - /* TODO clean the threads that are a part of this? */ - - /* free stuff */ - - __kmp_free_team_arrays( team ); - if ( team->t.t_argv != &team->t.t_inline_argv[0] ) - __kmp_free( (void*) team->t.t_argv ); - __kmp_free( team ); - - KMP_MB(); - return next_pool; -} - -// -// Free the thread. Don't reap it, just place it on the pool of available -// threads. -// -// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid -// binding for the affinity mechanism to be useful. -// -// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. -// However, we want to avoid a potential performance problem by always -// scanning through the list to find the correct point at which to insert -// the thread (potential N**2 behavior). To do this we keep track of the -// last place a thread struct was inserted (__kmp_thread_pool_insert_pt). -// With single-level parallelism, threads will always be added to the tail -// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested -// parallelism, all bets are off and we may need to scan through the entire -// free list. -// -// This change also has a potentially large performance benefit, for some -// applications. Previously, as threads were freed from the hot team, they -// would be placed back on the free list in inverse order. If the hot team -// grew back to it's original size, then the freed thread would be placed -// back on the hot team in reverse order. This could cause bad cache -// locality problems on programs where the size of the hot team regularly -// grew and shrunk. -// -// Now, for single-level parallelism, the OMP tid is alway == gtid. -// -void -__kmp_free_thread( kmp_info_t *this_th ) -{ - int gtid; - kmp_info_t **scan; - - KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", - __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); - - KMP_DEBUG_ASSERT( this_th ); - - // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). - int b; - kmp_balign_t *balign = this_th->th.th_bar; - for (b=0; bth.th_task_state = 0; - - - /* put thread back on the free pool */ - TCW_PTR(this_th->th.th_team, NULL); - TCW_PTR(this_th->th.th_root, NULL); - TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ - - // - // If the __kmp_thread_pool_insert_pt is already past the new insert - // point, then we need to re-scan the entire list. - // - gtid = this_th->th.th_info.ds.ds_gtid; - if ( __kmp_thread_pool_insert_pt != NULL ) { - KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); - if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { - __kmp_thread_pool_insert_pt = NULL; - } - } - - // - // Scan down the list to find the place to insert the thread. - // scan is the address of a link in the list, possibly the address of - // __kmp_thread_pool itself. - // - // In the absence of nested parallism, the for loop will have 0 iterations. - // - if ( __kmp_thread_pool_insert_pt != NULL ) { - scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); - } - else { - scan = (kmp_info_t **)&__kmp_thread_pool; - } - for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); - scan = &( (*scan)->th.th_next_pool ) ); - - // - // Insert the new element on the list, and set __kmp_thread_pool_insert_pt - // to its address. - // - TCW_PTR(this_th->th.th_next_pool, *scan); - __kmp_thread_pool_insert_pt = *scan = this_th; - KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) - || ( this_th->th.th_info.ds.ds_gtid - < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); - TCW_4(this_th->th.th_in_pool, TRUE); - __kmp_thread_pool_nth++; - - TCW_4(__kmp_nth, __kmp_nth - 1); - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth <= __kmp_avail_proc ) { - __kmp_zero_bt = FALSE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - KMP_MB(); -} - - -/* ------------------------------------------------------------------------ */ - -void * -__kmp_launch_thread( kmp_info_t *this_thr ) -{ - int gtid = this_thr->th.th_info.ds.ds_gtid; -/* void *stack_data;*/ - kmp_team_t *(*volatile pteam); - - KMP_MB(); - KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); - - if( __kmp_env_consistency_check ) { - this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak? - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - __ompt_thread_begin(ompt_thread_worker, gtid); - } - } -#endif - - /* This is the place where threads wait for work */ - while( ! TCR_4(__kmp_global.g.g_done) ) { - KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); - KMP_MB(); - - /* wait for work to do */ - KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); - -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } -#endif - - /* No tid yet since not part of a team */ - __kmp_fork_barrier( gtid, KMP_GTID_DNE ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - - pteam = (kmp_team_t *(*))(& this_thr->th.th_team); - - /* have we been allocated? */ - if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { - /* we were just woken up, so run our new task */ - if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { - int rc; - KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); - - updateHWFPControl (*pteam); - -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // Initialize OMPT task id for implicit task. - int tid = __kmp_tid_from_gtid(gtid); - (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id = - __ompt_task_id_new(tid); - } -#endif - - KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); - { - KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke); - rc = (*pteam)->t.t_invoke( gtid ); - } - KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); - KMP_ASSERT( rc ); - -#if OMPT_SUPPORT - if (ompt_enabled) { - /* no frame set while outside task */ - int tid = __kmp_tid_from_gtid(gtid); - (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - KMP_MB(); - KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); - } - /* join barrier after parallel region */ - __kmp_join_barrier( gtid ); - } - } - TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); - -#if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - __ompt_thread_end(ompt_thread_worker, gtid); - } -#endif - - this_thr->th.th_task_team = NULL; - /* run the destructors for the threadprivate data for this thread */ - __kmp_common_destroy_gtid( gtid ); - - KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); - KMP_MB(); - return this_thr; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_internal_end_dest( void *specific_gtid ) -{ - #if KMP_COMPILER_ICC - #pragma warning( push ) - #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits - #endif - // Make sure no significant bits are lost - int gtid = (kmp_intptr_t)specific_gtid - 1; - #if KMP_COMPILER_ICC - #pragma warning( pop ) - #endif - - KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); - /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage - * this is because 0 is reserved for the nothing-stored case */ - - /* josh: One reason for setting the gtid specific data even when it is being - destroyed by pthread is to allow gtid lookup through thread specific data - (__kmp_gtid_get_specific). Some of the code, especially stat code, - that gets executed in the call to __kmp_internal_end_thread, actually - gets the gtid through the thread specific data. Setting it here seems - rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread - to run smoothly. - todo: get rid of this after we remove the dependence on - __kmp_gtid_get_specific - */ - if(gtid >= 0 && KMP_UBER_GTID(gtid)) - __kmp_gtid_set_specific( gtid ); - #ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; - #endif - __kmp_internal_end_thread( gtid ); -} - -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - -// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work -// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker -// option in makefile.mk works fine. - -__attribute__(( destructor )) -void -__kmp_internal_end_dtor( void ) -{ - __kmp_internal_end_atexit(); -} - -void -__kmp_internal_end_fini( void ) -{ - __kmp_internal_end_atexit(); -} - -#endif - -/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ -void -__kmp_internal_end_atexit( void ) -{ - KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); - /* [Windows] - josh: ideally, we want to completely shutdown the library in this atexit handler, but - stat code that depends on thread specific data for gtid fails because that data becomes - unavailable at some point during the shutdown, so we call __kmp_internal_end_thread - instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the - stat code and use __kmp_internal_end_library to cleanly shutdown the library. - -// TODO: Can some of this comment about GVS be removed? - I suspect that the offending stat code is executed when the calling thread tries to - clean up a dead root thread's data structures, resulting in GVS code trying to close - the GVS structures for that thread, but since the stat code uses - __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is - cleaning up itself instead of another thread, it gets confused. This happens because - allowing a thread to unregister and cleanup another thread is a recent modification for - addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a - thread may end up trying to unregister another thread only if thread death does not - trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread - specific data destructor function to detect thread death. For Windows dynamic, there - is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the - workaround is applicable only for Windows static stat library. - */ - __kmp_internal_end_library( -1 ); - #if KMP_OS_WINDOWS - __kmp_close_console(); - #endif -} - -static void -__kmp_reap_thread( - kmp_info_t * thread, - int is_root -) { - - // It is assumed __kmp_forkjoin_lock is acquired. - - int gtid; - - KMP_DEBUG_ASSERT( thread != NULL ); - - gtid = thread->th.th_info.ds.ds_gtid; - - if ( ! is_root ) { - - if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { - /* Assume the threads are at the fork barrier here */ - KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); - /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ - kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); - __kmp_release_64(&flag); - }; // if - - - // Terminate OS thread. - __kmp_reap_worker( thread ); - - // - // The thread was killed asynchronously. If it was actively - // spinning in the in the thread pool, decrement the global count. - // - // There is a small timing hole here - if the worker thread was - // just waking up after sleeping in the pool, had reset it's - // th_active_in_pool flag but not decremented the global counter - // __kmp_thread_pool_active_nth yet, then the global counter - // might not get updated. - // - // Currently, this can only happen as the library is unloaded, - // so there are no harmful side effects. - // - if ( thread->th.th_active_in_pool ) { - thread->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } - - // Decrement # of [worker] threads in the pool. - KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); - --__kmp_thread_pool_nth; - }; // if - - // Free the fast memory for tasking - #if USE_FAST_MEMORY - __kmp_free_fast_memory( thread ); - #endif /* USE_FAST_MEMORY */ - - __kmp_suspend_uninitialize_thread( thread ); - - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); - TCW_SYNC_PTR(__kmp_threads[gtid], NULL); - - -- __kmp_all_nth; - // __kmp_nth was decremented when thread is added to the pool. - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth <= __kmp_avail_proc ) { - __kmp_zero_bt = FALSE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* free the memory being used */ - if( __kmp_env_consistency_check ) { - if ( thread->th.th_cons ) { - __kmp_free_cons_stack( thread->th.th_cons ); - thread->th.th_cons = NULL; - }; // if - } - - if ( thread->th.th_pri_common != NULL ) { - __kmp_free( thread->th.th_pri_common ); - thread->th.th_pri_common = NULL; - }; // if - - if (thread->th.th_task_state_memo_stack != NULL) { - __kmp_free(thread->th.th_task_state_memo_stack); - thread->th.th_task_state_memo_stack = NULL; - } - - #if KMP_USE_BGET - if ( thread->th.th_local.bget_data != NULL ) { - __kmp_finalize_bget( thread ); - }; // if - #endif - -#if KMP_AFFINITY_SUPPORTED - if ( thread->th.th_affin_mask != NULL ) { - KMP_CPU_FREE( thread->th.th_affin_mask ); - thread->th.th_affin_mask = NULL; - }; // if -#endif /* KMP_AFFINITY_SUPPORTED */ - - __kmp_reap_team( thread->th.th_serial_team ); - thread->th.th_serial_team = NULL; - __kmp_free( thread ); - - KMP_MB(); - -} // __kmp_reap_thread - -static void -__kmp_internal_end(void) -{ - int i; - - /* First, unregister the library */ - __kmp_unregister_library(); - - #if KMP_OS_WINDOWS - /* In Win static library, we can't tell when a root actually dies, so we - reclaim the data structures for any root threads that have died but not - unregistered themselves, in order to shut down cleanly. - In Win dynamic library we also can't tell when a thread dies. - */ - __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots - #endif - - for( i=0 ; i<__kmp_threads_capacity ; i++ ) - if( __kmp_root[i] ) - if( __kmp_root[i]->r.r_active ) - break; - KMP_MB(); /* Flush all pending memory write invalidates. */ - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - - if ( i < __kmp_threads_capacity ) { - // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? - KMP_MB(); /* Flush all pending memory write invalidates. */ - - // - // Need to check that monitor was initialized before reaping it. - // If we are called form __kmp_atfork_child (which sets - // __kmp_init_parallel = 0), then __kmp_monitor will appear to - // contain valid data, but it is only valid in the parent process, - // not the child. - // - // New behavior (201008): instead of keying off of the flag - // __kmp_init_parallel, the monitor thread creation is keyed off - // of the new flag __kmp_init_monitor. - // - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( TCR_4( __kmp_init_monitor ) ) { - __kmp_reap_monitor( & __kmp_monitor ); - TCW_4( __kmp_init_monitor, 0 ); - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); - } else { - /* TODO move this to cleanup code */ - #ifdef KMP_DEBUG - /* make sure that everything has properly ended */ - for ( i = 0; i < __kmp_threads_capacity; i++ ) { - if( __kmp_root[i] ) { -// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here - KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active? - } - } - #endif - - KMP_MB(); - - // Reap the worker threads. - // This is valid for now, but be careful if threads are reaped sooner. - while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool. - // Get the next thread from the pool. - kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; - __kmp_thread_pool = thread->th.th_next_pool; - // Reap it. - thread->th.th_next_pool = NULL; - thread->th.th_in_pool = FALSE; - __kmp_reap_thread( thread, 0 ); - }; // while - __kmp_thread_pool_insert_pt = NULL; - - // Reap teams. - while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool. - // Get the next team from the pool. - kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; - __kmp_team_pool = team->t.t_next_pool; - // Reap it. - team->t.t_next_pool = NULL; - __kmp_reap_team( team ); - }; // while - - __kmp_reap_task_teams( ); - - for ( i = 0; i < __kmp_threads_capacity; ++ i ) { - // TBD: Add some checking... - // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); - } - - /* Make sure all threadprivate destructors get run by joining with all worker - threads before resetting this flag */ - TCW_SYNC_4(__kmp_init_common, FALSE); - - KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); - KMP_MB(); - - // - // See note above: One of the possible fixes for CQ138434 / CQ140126 - // - // FIXME: push both code fragments down and CSE them? - // push them into __kmp_cleanup() ? - // - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( TCR_4( __kmp_init_monitor ) ) { - __kmp_reap_monitor( & __kmp_monitor ); - TCW_4( __kmp_init_monitor, 0 ); - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); - - } /* else !__kmp_global.t_active */ - TCW_4(__kmp_init_gtid, FALSE); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - - __kmp_cleanup(); -#if OMPT_SUPPORT - ompt_fini(); -#endif -} - -void -__kmp_internal_end_library( int gtid_req ) -{ - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - * only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, - // because the next check will work in any case. - if( __kmp_global.g.g_abort ) { - KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); - /* TODO abort? */ - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); - return; - } - - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req )); - if( gtid == KMP_GTID_SHUTDOWN ) { - KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); - return; - } else if( gtid == KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); - return; - } else if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); - /* we don't know who we are, but we may still shutdown the library */ - } else if( KMP_UBER_GTID( gtid )) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if( __kmp_root[gtid]->r.r_active ) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); - return; - } else { - KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); - __kmp_unregister_root_current_thread( gtid ); - } - } else { - /* worker threads may call this function through the atexit handler, if they call exit() */ - /* For now, skip the usual subsequent processing and just dump the debug buffer. - TODO: do a thorough shutdown instead - */ - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer( ); - #endif - return; - } - } - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - /* have we already finished */ - if( __kmp_global.g.g_abort ) { - KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); - /* TODO abort? */ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is - atomically updated by __kmp_get_global_thread_id_reg, - __kmp_do_serial_initialize and __kmp_internal_end_*. - */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - /* now we can safely conduct the actual termination */ - __kmp_internal_end(); - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - - KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); - - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer(); - #endif - - #if KMP_OS_WINDOWS - __kmp_close_console(); - #endif - - __kmp_fini_allocator(); - -} // __kmp_internal_end_library - -void -__kmp_internal_end_thread( int gtid_req ) -{ - int i; - - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - * only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, - // because the next check will work in any case. - if( __kmp_global.g.g_abort ) { - KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); - /* TODO abort? */ - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req )); - if( gtid == KMP_GTID_SHUTDOWN ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); - return; - } else if( gtid == KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); - return; - } else if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); - return; - /* we don't know who we are */ - } else if( KMP_UBER_GTID( gtid )) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if( __kmp_root[gtid]->r.r_active ) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); - return; - } else { - KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); - __kmp_unregister_root_current_thread( gtid ); - } - } else { - /* just a worker thread, let's leave */ - KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); - - if ( gtid >= 0 ) { - __kmp_threads[gtid]->th.th_task_team = NULL; - } - - KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); - return; - } - } - #if defined KMP_DYNAMIC_LIB - // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, - // because we will better shutdown later in the library destructor. - // The reason of this change is performance problem when non-openmp thread - // in a loop forks and joins many openmp threads. We can save a lot of time - // keeping worker threads alive until the program shutdown. - // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and - // Windows(DPD200287443) that occurs when using critical sections from foreign threads. - KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); - return; - #endif - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - /* have we already finished */ - if( __kmp_global.g.g_abort ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); - /* TODO abort? */ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is - atomically updated by __kmp_get_global_thread_id_reg, - __kmp_do_serial_initialize and __kmp_internal_end_*. - */ - - /* should we finish the run-time? are all siblings done? */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - for ( i = 0; i < __kmp_threads_capacity; ++ i ) { - if ( KMP_UBER_GTID( i ) ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - }; - } - - /* now we can safely conduct the actual termination */ - - __kmp_internal_end(); - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - - KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); - - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer(); - #endif -} // __kmp_internal_end_thread - -// ------------------------------------------------------------------------------------------------- -// Library registration stuff. - -static long __kmp_registration_flag = 0; - // Random value used to indicate library initialization. -static char * __kmp_registration_str = NULL; - // Value to be saved in env var __KMP_REGISTERED_LIB_. - - -static inline -char * -__kmp_reg_status_name() { - /* - On RHEL 3u5 if linked statically, getpid() returns different values in each thread. - If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), - the name of registered_lib_env env var can not be found, because the name will contain different pid. - */ - return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); -} // __kmp_reg_status_get - - -void -__kmp_register_library_startup( - void -) { - - char * name = __kmp_reg_status_name(); // Name of the environment variable. - int done = 0; - union { - double dtime; - long ltime; - } time; - #if KMP_OS_WINDOWS - __kmp_initialize_system_tick(); - #endif - __kmp_read_system_time( & time.dtime ); - __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); - __kmp_registration_str = - __kmp_str_format( - "%p-%lx-%s", - & __kmp_registration_flag, - __kmp_registration_flag, - KMP_LIBRARY_FILE - ); - - KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); - - while ( ! done ) { - - char * value = NULL; // Actual value of the environment variable. - - // Set environment variable, but do not overwrite if it is exist. - __kmp_env_set( name, __kmp_registration_str, 0 ); - // Check the variable is written. - value = __kmp_env_get( name ); - if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { - - done = 1; // Ok, environment variable set successfully, exit the loop. - - } else { - - // Oops. Write failed. Another copy of OpenMP RTL is in memory. - // Check whether it alive or dead. - int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. - char * tail = value; - char * flag_addr_str = NULL; - char * flag_val_str = NULL; - char const * file_name = NULL; - __kmp_str_split( tail, '-', & flag_addr_str, & tail ); - __kmp_str_split( tail, '-', & flag_val_str, & tail ); - file_name = tail; - if ( tail != NULL ) { - long * flag_addr = 0; - long flag_val = 0; - KMP_SSCANF( flag_addr_str, "%p", & flag_addr ); - KMP_SSCANF( flag_val_str, "%lx", & flag_val ); - if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { - // First, check whether environment-encoded address is mapped into addr space. - // If so, dereference it to see if it still has the right value. - - if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { - neighbor = 1; - } else { - // If not, then we know the other copy of the library is no longer running. - neighbor = 2; - }; // if - }; // if - }; // if - switch ( neighbor ) { - case 0 : // Cannot parse environment variable -- neighbor status unknown. - // Assume it is the incompatible format of future version of the library. - // Assume the other library is alive. - // WARN( ... ); // TODO: Issue a warning. - file_name = "unknown library"; - // Attention! Falling to the next case. That's intentional. - case 1 : { // Neighbor is alive. - // Check it is allowed. +/* + * kmp_runtime.c -- KPTS runtime support library + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_atomic.h" +#include "kmp_wrapper_getpid.h" +#include "kmp_environment.h" +#include "kmp_itt.h" +#include "kmp_str.h" +#include "kmp_settings.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_error.h" +#include "kmp_stats.h" +#include "kmp_wait_release.h" + +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + +/* these are temporary issues to be dealt with */ +#define KMP_USE_PRCTL 0 +#define KMP_USE_POOLED_ALLOC 0 + +#if KMP_OS_WINDOWS +#include +#endif + +#if defined(KMP_GOMP_COMPAT) +char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; +#endif /* defined(KMP_GOMP_COMPAT) */ + +char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " +#if OMP_40_ENABLED + "4.0 (201307)"; +#else + "3.1 (201107)"; +#endif + +#ifdef KMP_DEBUG +char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; +#endif /* KMP_DEBUG */ + + +#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +kmp_info_t __kmp_monitor; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* Forward declarations */ + +void __kmp_cleanup( void ); + +static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); +static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED +static void __kmp_partition_places( kmp_team_t *team ); +#endif +static void __kmp_do_serial_initialize( void ); +void __kmp_fork_barrier( int gtid, int tid ); +void __kmp_join_barrier( int gtid ); +void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); + +#ifdef USE_LOAD_BALANCE +static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); +#endif + +static int __kmp_expand_threads(int nWish, int nNeed); +#if KMP_OS_WINDOWS +static int __kmp_unregister_root_other_thread( int gtid ); +#endif +static void __kmp_unregister_library( void ); // called by __kmp_internal_end() +static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); +static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* Calculate the identifier of the current thread */ +/* fast (and somewhat portable) way to get unique */ +/* identifier of executing thread. */ +/* returns KMP_GTID_DNE if we haven't been assigned a gtid */ + +int +__kmp_get_global_thread_id( ) +{ + int i; + kmp_info_t **other_threads; + size_t stack_data; + char *stack_addr; + size_t stack_size; + char *stack_base; + + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", + __kmp_nth, __kmp_all_nth )); + + /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a + parallel region, made it return KMP_GTID_DNE to force serial_initialize by + caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee + __kmp_init_gtid for this to work. */ + + if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; + +#ifdef KMP_TDATA_GTID + if ( TCR_4(__kmp_gtid_mode) >= 3) { + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); + return __kmp_gtid; + } +#endif + if ( TCR_4(__kmp_gtid_mode) >= 2) { + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); + return __kmp_gtid_get_specific(); + } + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); + + stack_addr = (char*) & stack_data; + other_threads = __kmp_threads; + + /* + ATT: The code below is a source of potential bugs due to unsynchronized access to + __kmp_threads array. For example: + 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. + 2. Current thread is suspended by OS. + 3. Another thread unregisters and finishes (debug versions of free() may fill memory + with something like 0xEF). + 4. Current thread is resumed. + 5. Current thread reads junk from *thr. + TODO: Fix it. + --ln + */ + + for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { + + kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); + if( !thr ) continue; + + stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); + stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); + + /* stack grows down -- search through all of the active threads */ + + if( stack_addr <= stack_base ) { + size_t stack_diff = stack_base - stack_addr; + + if( stack_diff <= stack_size ) { + /* The only way we can be closer than the allocated */ + /* stack size is if we are running on this thread. */ + KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); + return i; + } + } + } + + /* get specific to try and determine our gtid */ + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " + "thread, using TLS\n" )); + i = __kmp_gtid_get_specific(); + + /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ + + /* if we havn't been assigned a gtid, then return code */ + if( i<0 ) return i; + + /* dynamically updated stack window for uber threads to avoid get_specific call */ + if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { + KMP_FATAL( StackOverflow, i ); + } + + stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; + if( stack_addr > stack_base ) { + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, + other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); + } else { + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); + } + + /* Reprint stack bounds for ubermaster since they have been refined */ + if ( __kmp_storage_map ) { + char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; + char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; + __kmp_print_storage_map_gtid( i, stack_beg, stack_end, + other_threads[i]->th.th_info.ds.ds_stacksize, + "th_%d stack (refinement)", i ); + } + return i; +} + +int +__kmp_get_global_thread_id_reg( ) +{ + int gtid; + + if ( !__kmp_init_serial ) { + gtid = KMP_GTID_DNE; + } else +#ifdef KMP_TDATA_GTID + if ( TCR_4(__kmp_gtid_mode) >= 3 ) { + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); + gtid = __kmp_gtid; + } else +#endif + if ( TCR_4(__kmp_gtid_mode) >= 2 ) { + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); + gtid = __kmp_gtid_get_specific(); + } else { + KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); + gtid = __kmp_get_global_thread_id(); + } + + /* we must be a new uber master sibling thread */ + if( gtid == KMP_GTID_DNE ) { + KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " + "Registering a new gtid.\n" )); + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + if( !__kmp_init_serial ) { + __kmp_do_serial_initialize(); + gtid = __kmp_gtid_get_specific(); + } else { + gtid = __kmp_register_root(FALSE); + } + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ + } + + KMP_DEBUG_ASSERT( gtid >=0 ); + + return gtid; +} + +/* caller must hold forkjoin_lock */ +void +__kmp_check_stack_overlap( kmp_info_t *th ) +{ + int f; + char *stack_beg = NULL; + char *stack_end = NULL; + int gtid; + + KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); + if ( __kmp_storage_map ) { + stack_end = (char *) th->th.th_info.ds.ds_stackbase; + stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; + + gtid = __kmp_gtid_from_thread( th ); + + if (gtid == KMP_GTID_MONITOR) { + __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, + "th_%s stack (%s)", "mon", + ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); + } else { + __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, + "th_%d stack (%s)", gtid, + ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); + } + } + + /* No point in checking ubermaster threads since they use refinement and cannot overlap */ + gtid = __kmp_gtid_from_thread( th ); + if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) + { + KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); + if ( stack_beg == NULL ) { + stack_end = (char *) th->th.th_info.ds.ds_stackbase; + stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; + } + + for( f=0 ; f < __kmp_threads_capacity ; f++ ) { + kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); + + if( f_th && f_th != th ) { + char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); + char *other_stack_beg = other_stack_end - + (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); + if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || + (stack_end > other_stack_beg && stack_end < other_stack_end)) { + + /* Print the other stack values before the abort */ + if ( __kmp_storage_map ) + __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, + (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), + "th_%d stack (overlapped)", + __kmp_gtid_from_thread( f_th ) ); + + __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); + } + } + } + } + KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); +} + + +/* ------------------------------------------------------------------------ */ + +/* ------------------------------------------------------------------------ */ + +void +__kmp_infinite_loop( void ) +{ + static int done = FALSE; + + while (! done) { + KMP_YIELD( 1 ); + } +} + +#define MAX_MESSAGE 512 + +void +__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { + char buffer[MAX_MESSAGE]; + va_list ap; + + va_start( ap, format); + KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); + __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_vprintf( kmp_err, buffer, ap ); +#if KMP_PRINT_DATA_PLACEMENT + int node; + if(gtid >= 0) { + if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { + if( __kmp_storage_map_verbose ) { + node = __kmp_get_host_node(p1); + if(node < 0) /* doesn't work, so don't try this next time */ + __kmp_storage_map_verbose = FALSE; + else { + char *last; + int lastNode; + int localProc = __kmp_get_cpu_from_gtid(gtid); + + p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) ); + p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) ); + if(localProc >= 0) + __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); + else + __kmp_printf_no_lock(" GTID %d\n", gtid); +# if KMP_USE_PRCTL +/* The more elaborate format is disabled for now because of the prctl hanging bug. */ + do { + last = p1; + lastNode = node; + /* This loop collates adjacent pages with the same host node. */ + do { + (char*)p1 += PAGE_SIZE; + } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); + __kmp_printf_no_lock(" %p-%p memNode %d\n", last, + (char*)p1 - 1, lastNode); + } while(p1 <= p2); +# else + __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, + (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1)); + if(p1 < p2) { + __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, + (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2)); + } +# endif + } + } + } else + __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); + } +#endif /* KMP_PRINT_DATA_PLACEMENT */ + __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); +} + +void +__kmp_warn( char const * format, ... ) +{ + char buffer[MAX_MESSAGE]; + va_list ap; + + if ( __kmp_generate_warnings == kmp_warnings_off ) { + return; + } + + va_start( ap, format ); + + KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); + __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_vprintf( kmp_err, buffer, ap ); + __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + + va_end( ap ); +} + +void +__kmp_abort_process() +{ + + // Later threads may stall here, but that's ok because abort() will kill them. + __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); + + if ( __kmp_debug_buf ) { + __kmp_dump_debug_buffer(); + }; // if + + if ( KMP_OS_WINDOWS ) { + // Let other threads know of abnormal termination and prevent deadlock + // if abort happened during library initialization or shutdown + __kmp_global.g.g_abort = SIGABRT; + + /* + On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. + Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() + works well, but this function is not available in VS7 (this is not problem for DLL, but + it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does + not help, at least in some versions of MS C RTL. + + It seems following sequence is the only way to simulate abort() and avoid pop-up error + box. + */ + raise( SIGABRT ); + _exit( 3 ); // Just in case, if signal ignored, exit anyway. + } else { + abort(); + }; // if + + __kmp_infinite_loop(); + __kmp_release_bootstrap_lock( & __kmp_exit_lock ); + +} // __kmp_abort_process + +void +__kmp_abort_thread( void ) +{ + // TODO: Eliminate g_abort global variable and this function. + // In case of abort just call abort(), it will kill all the threads. + __kmp_infinite_loop(); +} // __kmp_abort_thread + +/* ------------------------------------------------------------------------ */ + +/* + * Print out the storage map for the major kmp_info_t thread data structures + * that are allocated together. + */ + +static void +__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) +{ + __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); + + __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), + "th_%d.th_info", gtid ); + + __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), + "th_%d.th_local", gtid ); + + __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], + sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); + + __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], + &thr->th.th_bar[bs_plain_barrier+1], + sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); + + __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], + &thr->th.th_bar[bs_forkjoin_barrier+1], + sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); + + #if KMP_FAST_REDUCTION_BARRIER + __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], + &thr->th.th_bar[bs_reduction_barrier+1], + sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); + #endif // KMP_FAST_REDUCTION_BARRIER +} + +/* + * Print out the storage map for the major kmp_team_t team data structures + * that are allocated together. + */ + +static void +__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) +{ + int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2; + __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", + header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], + sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); + + + __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], + sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], + sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); + + #if KMP_FAST_REDUCTION_BARRIER + __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], + sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); + #endif // KMP_FAST_REDUCTION_BARRIER + + __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], + sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], + sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], + sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", + header, team_id ); + + /* + __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr], + sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); + + //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr], + // sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id ); + + __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr], + sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id ); +#if OMP_40_ENABLED + __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr], + sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id ); +#endif + */ + + __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, + sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); +} + +static void __kmp_init_allocator() {} +static void __kmp_fini_allocator() {} + +/* ------------------------------------------------------------------------ */ + +#ifdef KMP_DYNAMIC_LIB +# if KMP_OS_WINDOWS + + +static void +__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { + // TODO: Change to __kmp_break_bootstrap_lock(). + __kmp_init_bootstrap_lock( lck ); // make the lock released +} + +static void +__kmp_reset_locks_on_process_detach( int gtid_req ) { + int i; + int thread_count; + + // PROCESS_DETACH is expected to be called by a thread + // that executes ProcessExit() or FreeLibrary(). + // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). + // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. + // However, in fact, some threads can be still alive here, although being about to be terminated. + // The threads in the array with ds_thread==0 are most suspicious. + // Actually, it can be not safe to access the __kmp_threads[]. + + // TODO: does it make sense to check __kmp_roots[] ? + + // Let's check that there are no other alive threads registered with the OMP lib. + while( 1 ) { + thread_count = 0; + for( i = 0; i < __kmp_threads_capacity; ++i ) { + if( !__kmp_threads ) continue; + kmp_info_t* th = __kmp_threads[ i ]; + if( th == NULL ) continue; + int gtid = th->th.th_info.ds.ds_gtid; + if( gtid == gtid_req ) continue; + if( gtid < 0 ) continue; + DWORD exit_val; + int alive = __kmp_is_thread_alive( th, &exit_val ); + if( alive ) { + ++thread_count; + } + } + if( thread_count == 0 ) break; // success + } + + // Assume that I'm alone. + + // Now it might be probably safe to check and reset locks. + // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. + __kmp_reset_lock( &__kmp_forkjoin_lock ); + #ifdef KMP_DEBUG + __kmp_reset_lock( &__kmp_stdio_lock ); + #endif // KMP_DEBUG + + +} + +BOOL WINAPI +DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { + //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + + switch( fdwReason ) { + + case DLL_PROCESS_ATTACH: + KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); + + return TRUE; + + case DLL_PROCESS_DETACH: + KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", + __kmp_gtid_get_specific() )); + + if( lpReserved != NULL ) + { + // lpReserved is used for telling the difference: + // lpReserved == NULL when FreeLibrary() was called, + // lpReserved != NULL when the process terminates. + // When FreeLibrary() is called, worker threads remain alive. + // So they will release the forkjoin lock by themselves. + // When the process terminates, worker threads disappear triggering + // the problem of unreleased forkjoin lock as described below. + + // A worker thread can take the forkjoin lock + // in __kmp_suspend_template()->__kmp_rml_decrease_load_before_sleep(). + // The problem comes up if that worker thread becomes dead + // before it releases the forkjoin lock. + // The forkjoin lock remains taken, while the thread + // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below + // will try to take the forkjoin lock and will always fail, + // so that the application will never finish [normally]. + // This scenario is possible if __kmpc_end() has not been executed. + // It looks like it's not a corner case, but common cases: + // - the main function was compiled by an alternative compiler; + // - the main function was compiled by icl but without /Qopenmp (application with plugins); + // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. + // - alive foreign thread prevented __kmpc_end from doing cleanup. + + // This is a hack to work around the problem. + // TODO: !!! to figure out something better. + __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); + } + + __kmp_internal_end_library( __kmp_gtid_get_specific() ); + + return TRUE; + + case DLL_THREAD_ATTACH: + KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); + + /* if we wanted to register new siblings all the time here call + * __kmp_get_gtid(); */ + return TRUE; + + case DLL_THREAD_DETACH: + KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", + __kmp_gtid_get_specific() )); + + __kmp_internal_end_thread( __kmp_gtid_get_specific() ); + return TRUE; + } + + return TRUE; +} + +# endif /* KMP_OS_WINDOWS */ +#endif /* KMP_DYNAMIC_LIB */ + + +/* ------------------------------------------------------------------------ */ + +/* Change the library type to "status" and return the old type */ +/* called from within initialization routines where __kmp_initz_lock is held */ +int +__kmp_change_library( int status ) +{ + int old_status; + + old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) + + if (status) { + __kmp_yield_init |= 1; // throughput => turnaround (odd init count) + } + else { + __kmp_yield_init &= ~1; // turnaround => throughput (even init count) + } + + return old_status; // return previous setting of whether KMP_LIBRARY=throughput +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* __kmp_parallel_deo -- + * Wait until it's our turn. + */ +void +__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + int gtid = *gtid_ref; +#ifdef BUILD_PARALLEL_ORDERED + kmp_team_t *team = __kmp_team_from_gtid( gtid ); +#endif /* BUILD_PARALLEL_ORDERED */ + + if( __kmp_env_consistency_check ) { + if( __kmp_threads[gtid]->th.th_root->r.r_active ) +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); +#else + __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); +#endif + } +#ifdef BUILD_PARALLEL_ORDERED + if( !team->t.t_serialized ) { + KMP_MB(); + KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); + KMP_MB(); + } +#endif /* BUILD_PARALLEL_ORDERED */ +} + +/* __kmp_parallel_dxo -- + * Signal the next task. + */ + +void +__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + int gtid = *gtid_ref; +#ifdef BUILD_PARALLEL_ORDERED + int tid = __kmp_tid_from_gtid( gtid ); + kmp_team_t *team = __kmp_team_from_gtid( gtid ); +#endif /* BUILD_PARALLEL_ORDERED */ + + if( __kmp_env_consistency_check ) { + if( __kmp_threads[gtid]->th.th_root->r.r_active ) + __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); + } +#ifdef BUILD_PARALLEL_ORDERED + if ( ! team->t.t_serialized ) { + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* use the tid of the next thread in this team */ + /* TODO repleace with general release procedure */ + team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); + +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { + /* accept blame for "ordered" waiting */ + kmp_info_t *this_thread = __kmp_threads[gtid]; + ompt_callbacks.ompt_callback(ompt_event_release_ordered)( + this_thread->th.ompt_thread_info.wait_id); + } +#endif + + KMP_MB(); /* Flush all pending memory write invalidates. */ + } +#endif /* BUILD_PARALLEL_ORDERED */ +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* The BARRIER for a SINGLE process section is always explicit */ + +int +__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) +{ + int status; + kmp_info_t *th; + kmp_team_t *team; + + if( ! TCR_4(__kmp_init_parallel) ) + __kmp_parallel_initialize(); + + th = __kmp_threads[ gtid ]; + team = th->th.th_team; + status = 0; + + th->th.th_ident = id_ref; + + if ( team->t.t_serialized ) { + status = 1; + } else { + kmp_int32 old_this = th->th.th_local.this_construct; + + ++th->th.th_local.this_construct; + /* try to set team count to thread count--success means thread got the + single block + */ + /* TODO: Should this be acquire or release? */ + status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, + th->th.th_local.this_construct); +#if USE_ITT_BUILD + if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1 ) + { // Only report metadata by master of active team at level 1 + __kmp_itt_metadata_single( id_ref ); + } +#endif /* USE_ITT_BUILD */ + } + + if( __kmp_env_consistency_check ) { + if (status && push_ws) { + __kmp_push_workshare( gtid, ct_psingle, id_ref ); + } else { + __kmp_check_workshare( gtid, ct_psingle, id_ref ); + } + } +#if USE_ITT_BUILD + if ( status ) { + __kmp_itt_single_start( gtid ); + } +#endif /* USE_ITT_BUILD */ + return status; +} + +void +__kmp_exit_single( int gtid ) +{ +#if USE_ITT_BUILD + __kmp_itt_single_end( gtid ); +#endif /* USE_ITT_BUILD */ + if( __kmp_env_consistency_check ) + __kmp_pop_workshare( gtid, ct_psingle, NULL ); +} + + +/* + * determine if we can go parallel or must use a serialized parallel region and + * how many threads we can use + * set_nproc is the number of threads requested for the team + * returns 0 if we should serialize or only use one thread, + * otherwise the number of threads to use + * The forkjoin lock is held by the caller. + */ +static int +__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, + int master_tid, int set_nthreads +#if OMP_40_ENABLED + , int enter_teams +#endif /* OMP_40_ENABLED */ +) +{ + int capacity; + int new_nthreads; + KMP_DEBUG_ASSERT( __kmp_init_serial ); + KMP_DEBUG_ASSERT( root && parent_team ); + + // + // If dyn-var is set, dynamically adjust the number of desired threads, + // according to the method specified by dynamic_mode. + // + new_nthreads = set_nthreads; + if ( ! get__dynamic_2( parent_team, master_tid ) ) { + ; + } +#ifdef USE_LOAD_BALANCE + else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { + new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); + if ( new_nthreads == 1 ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", + master_tid )); + return 1; + } + if ( new_nthreads < set_nthreads ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", + master_tid, new_nthreads )); + } + } +#endif /* USE_LOAD_BALANCE */ + else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { + new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 + : root->r.r_hot_team->t.t_nproc); + if ( new_nthreads <= 1 ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", + master_tid )); + return 1; + } + if ( new_nthreads < set_nthreads ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", + master_tid, new_nthreads )); + } + else { + new_nthreads = set_nthreads; + } + } + else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { + if ( set_nthreads > 2 ) { + new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); + new_nthreads = ( new_nthreads % set_nthreads ) + 1; + if ( new_nthreads == 1 ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", + master_tid )); + return 1; + } + if ( new_nthreads < set_nthreads ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", + master_tid, new_nthreads )); + } + } + } + else { + KMP_ASSERT( 0 ); + } + + // + // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. + // + if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : + root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { + int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : + root->r.r_hot_team->t.t_nproc ); + if ( tl_nthreads <= 0 ) { + tl_nthreads = 1; + } + + // + // If dyn-var is false, emit a 1-time warning. + // + if ( ! get__dynamic_2( parent_team, master_tid ) + && ( ! __kmp_reserve_warn ) ) { + __kmp_reserve_warn = 1; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), + KMP_HNT( Unset_ALL_THREADS ), + __kmp_msg_null + ); + } + if ( tl_nthreads == 1 ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", + master_tid )); + return 1; + } + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", + master_tid, tl_nthreads )); + new_nthreads = tl_nthreads; + } + + + // + // Check if the threads array is large enough, or needs expanding. + // + // See comment in __kmp_register_root() about the adjustment if + // __kmp_threads[0] == NULL. + // + capacity = __kmp_threads_capacity; + if ( TCR_PTR(__kmp_threads[0]) == NULL ) { + --capacity; + } + if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : + root->r.r_hot_team->t.t_nproc ) > capacity ) { + // + // Expand the threads array. + // + int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : + root->r.r_hot_team->t.t_nproc ) - capacity; + int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); + if ( slotsAdded < slotsRequired ) { + // + // The threads array was not expanded enough. + // + new_nthreads -= ( slotsRequired - slotsAdded ); + KMP_ASSERT( new_nthreads >= 1 ); + + // + // If dyn-var is false, emit a 1-time warning. + // + if ( ! get__dynamic_2( parent_team, master_tid ) + && ( ! __kmp_reserve_warn ) ) { + __kmp_reserve_warn = 1; + if ( __kmp_tp_cached ) { + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), + KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), + KMP_HNT( PossibleSystemLimitOnThreads ), + __kmp_msg_null + ); + } + else { + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), + KMP_HNT( SystemLimitOnThreads ), + __kmp_msg_null + ); + } + } + } + } + + if ( new_nthreads == 1 ) { + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", + __kmp_get_gtid(), set_nthreads ) ); + return 1; + } + + KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", + __kmp_get_gtid(), new_nthreads, set_nthreads )); + return new_nthreads; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* allocate threads from the thread pool and assign them to the new team */ +/* we are assured that there are enough threads available, because we + * checked on that earlier within critical section forkjoin */ + +static void +__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, + kmp_info_t *master_th, int master_gtid ) +{ + int i; + int use_hot_team; + + KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); + KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); + KMP_MB(); + + /* first, let's setup the master thread */ + master_th->th.th_info.ds.ds_tid = 0; + master_th->th.th_team = team; + master_th->th.th_team_nproc = team->t.t_nproc; + master_th->th.th_team_master = master_th; + master_th->th.th_team_serialized = FALSE; + master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; + + /* make sure we are not the optimized hot team */ +#if KMP_NESTED_HOT_TEAMS + use_hot_team = 0; + kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; + if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 + int level = team->t.t_active_level - 1; // index in array of hot teams + if( master_th->th.th_teams_microtask ) { // are we inside the teams? + if( master_th->th.th_teams_size.nteams > 1 ) { + ++level; // level was not increased in teams construct for team_of_masters + } + if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && + master_th->th.th_teams_level == team->t.t_level ) { + ++level; // level was not increased in teams construct for team_of_workers before the parallel + } // team->t.t_level will be increased inside parallel + } + if( level < __kmp_hot_teams_max_level ) { + if( hot_teams[level].hot_team ) { + // hot team has already been allocated for given level + KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); + use_hot_team = 1; // the team is ready to use + } else { + use_hot_team = 0; // AC: threads are not allocated yet + hot_teams[level].hot_team = team; // remember new hot team + hot_teams[level].hot_team_nth = team->t.t_nproc; + } + } else { + use_hot_team = 0; + } + } +#else + use_hot_team = team == root->r.r_hot_team; +#endif + if ( !use_hot_team ) { + + /* install the master thread */ + team->t.t_threads[ 0 ] = master_th; + __kmp_initialize_info( master_th, team, 0, master_gtid ); + + /* now, install the worker threads */ + for ( i=1 ; i < team->t.t_nproc ; i++ ) { + + /* fork or reallocate a new thread and install it in team */ + kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); + team->t.t_threads[ i ] = thr; + KMP_DEBUG_ASSERT( thr ); + KMP_DEBUG_ASSERT( thr->th.th_team == team ); + /* align team and thread arrived states */ + KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", + __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, + __kmp_gtid_from_tid( i, team ), team->t.t_id, i, + team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, + team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); +#if OMP_40_ENABLED + thr->th.th_teams_microtask = master_th->th.th_teams_microtask; + thr->th.th_teams_level = master_th->th.th_teams_level; + thr->th.th_teams_size = master_th->th.th_teams_size; +#endif + { // Initialize threads' barrier data. + int b; + kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; + for ( b = 0; b < bs_last_barrier; ++ b ) { + balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#if USE_DEBUGGER + balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; +#endif + }; // for b + } + } + +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + __kmp_partition_places( team ); +#endif + + } + + KMP_MB(); +} + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +// +// Propagate any changes to the floating point control registers out to the team +// We try to avoid unnecessary writes to the relevant cache line in the team structure, +// so we don't make changes unless they are needed. +// +inline static void +propagateFPControl(kmp_team_t * team) +{ + if ( __kmp_inherit_fp_control ) { + kmp_int16 x87_fpu_control_word; + kmp_uint32 mxcsr; + + // Get master values of FPU control flags (both X87 and vector) + __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); + __kmp_store_mxcsr( &mxcsr ); + mxcsr &= KMP_X86_MXCSR_MASK; + + // There is no point looking at t_fp_control_saved here. + // If it is TRUE, we still have to update the values if they are different from those we now have. + // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure + // that the values in the team are the same as those we have. + // So, this code achieves what we need whether or not t_fp_control_saved is true. + // By checking whether the value needs updating we avoid unnecessary writes that would put the + // cache-line into a written state, causing all threads in the team to have to read it again. + if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { + team->t.t_x87_fpu_control_word = x87_fpu_control_word; + } + if ( team->t.t_mxcsr != mxcsr ) { + team->t.t_mxcsr = mxcsr; + } + // Although we don't use this value, other code in the runtime wants to know whether it should restore them. + // So we must ensure it is correct. + if (!team->t.t_fp_control_saved) { + team->t.t_fp_control_saved = TRUE; + } + } + else { + // Similarly here. Don't write to this cache-line in the team structure unless we have to. + if (team->t.t_fp_control_saved) + team->t.t_fp_control_saved = FALSE; + } +} + +// Do the opposite, setting the hardware registers to the updated values from the team. +inline static void +updateHWFPControl(kmp_team_t * team) +{ + if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { + // + // Only reset the fp control regs if they have been changed in the team. + // the parallel region that we are exiting. + // + kmp_int16 x87_fpu_control_word; + kmp_uint32 mxcsr; + __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); + __kmp_store_mxcsr( &mxcsr ); + mxcsr &= KMP_X86_MXCSR_MASK; + + if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); + } + + if ( team->t.t_mxcsr != mxcsr ) { + __kmp_load_mxcsr( &team->t.t_mxcsr ); + } + } +} +#else +# define propagateFPControl(x) ((void)0) +# define updateHWFPControl(x) ((void)0) +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +static void +__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration + +/* + * Run a parallel region that has been serialized, so runs only in a team of the single master thread. + */ +void +__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) +{ + kmp_info_t *this_thr; + kmp_team_t *serial_team; + + KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); + + /* Skip all this code for autopar serialized loops since it results in + unacceptable overhead */ + if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) + return; + + if( ! TCR_4( __kmp_init_parallel ) ) + __kmp_parallel_initialize(); + + this_thr = __kmp_threads[ global_tid ]; + serial_team = this_thr->th.th_serial_team; + + /* utilize the serialized team held by this thread */ + KMP_DEBUG_ASSERT( serial_team ); + KMP_MB(); + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); + KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); + KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", + global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); + this_thr->th.th_task_team = NULL; + } + +#if OMP_40_ENABLED + kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; + if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { + proc_bind = proc_bind_false; + } + else if ( proc_bind == proc_bind_default ) { + // + // No proc_bind clause was specified, so use the current value + // of proc-bind-var for this parallel region. + // + proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; + } + // + // Reset for next parallel region + // + this_thr->th.th_set_proc_bind = proc_bind_default; +#endif /* OMP_40_ENABLED */ + + if( this_thr->th.th_team != serial_team ) { + // Nested level will be an index in the nested nthreads array + int level = this_thr->th.th_team->t.t_level; + + if( serial_team->t.t_serialized ) { + /* this serial team was already used + * TODO increase performance by making this locks more specific */ + kmp_team_t *new_team; + + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); +#endif + + new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, +#if OMPT_SUPPORT + ompt_parallel_id, +#endif +#if OMP_40_ENABLED + proc_bind, +#endif + & this_thr->th.th_current_task->td_icvs, + 0 USE_NESTED_HOT_ARG(NULL) ); + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + KMP_ASSERT( new_team ); + + /* setup new serialized team and install it */ + new_team->t.t_threads[0] = this_thr; + new_team->t.t_parent = this_thr->th.th_team; + serial_team = new_team; + this_thr->th.th_serial_team = serial_team; + + KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", + global_tid, serial_team ) ); + + + /* TODO the above breaks the requirement that if we run out of + * resources, then we can still guarantee that serialized teams + * are ok, since we may need to allocate a new one */ + } else { + KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", + global_tid, serial_team ) ); + } + + /* we have to initialize this serial team */ + KMP_DEBUG_ASSERT( serial_team->t.t_threads ); + KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); + KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); + serial_team->t.t_ident = loc; + serial_team->t.t_serialized = 1; + serial_team->t.t_nproc = 1; + serial_team->t.t_parent = this_thr->th.th_team; + serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; + this_thr->th.th_team = serial_team; + serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; + + KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", + global_tid, this_thr->th.th_current_task ) ); + KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); + this_thr->th.th_current_task->td_flags.executing = 0; + + __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); + + /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for + each serialized task represented by team->t.t_serialized? */ + copy_icvs( + & this_thr->th.th_current_task->td_icvs, + & this_thr->th.th_current_task->td_parent->td_icvs ); + + // Thread value exists in the nested nthreads array for the next nested level + if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { + this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; + } + +#if OMP_40_ENABLED + if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { + this_thr->th.th_current_task->td_icvs.proc_bind + = __kmp_nested_proc_bind.bind_types[ level + 1 ]; + } +#endif /* OMP_40_ENABLED */ + +#if USE_DEBUGGER + serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. +#endif + this_thr->th.th_info.ds.ds_tid = 0; + + /* set thread cache values */ + this_thr->th.th_team_nproc = 1; + this_thr->th.th_team_master = this_thr; + this_thr->th.th_team_serialized = 1; + + serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; + serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; + + propagateFPControl (serial_team); + + /* check if we need to allocate dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); + if ( !serial_team->t.t_dispatch->th_disp_buffer ) { + serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) + __kmp_allocate( sizeof( dispatch_private_info_t ) ); + } + this_thr->th.th_dispatch = serial_team->t.t_dispatch; + +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); + __ompt_team_assign_id(serial_team, ompt_parallel_id); +#endif + + KMP_MB(); + + } else { + /* this serialized team is already being used, + * that's fine, just add another nested level */ + KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); + KMP_DEBUG_ASSERT( serial_team->t.t_threads ); + KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); + ++ serial_team->t.t_serialized; + this_thr->th.th_team_serialized = serial_team->t.t_serialized; + + // Nested level will be an index in the nested nthreads array + int level = this_thr->th.th_team->t.t_level; + // Thread value exists in the nested nthreads array for the next nested level + if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { + this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; + } + serial_team->t.t_level++; + KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", + global_tid, serial_team, serial_team->t.t_level ) ); + + /* allocate/push dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); + { + dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) + __kmp_allocate( sizeof( dispatch_private_info_t ) ); + disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; + serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; + } + this_thr->th.th_dispatch = serial_team->t.t_dispatch; + + KMP_MB(); + } + + if ( __kmp_env_consistency_check ) + __kmp_push_parallel( global_tid, NULL ); + +#if USE_ITT_BUILD + // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment + if ( serial_team->t.t_level == 1 +#if OMP_40_ENABLED + && this_thr->th.th_teams_microtask == NULL +#endif + ) { +#if USE_ITT_NOTIFY + // Save the start of the "parallel" region for VTune. This is the frame begin at the same time. + if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); + } else // only one notification scheme (either "submit" or "forking/joined", not both) +#endif + if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && + __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) + { + this_thr->th.th_ident = loc; + // 0 - no barriers; 1 - serialized parallel + __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); + } + } +#endif /* USE_ITT_BUILD */ +} + +/* most of the work for a fork */ +/* return true if we really went parallel, false if serialized */ +int +__kmp_fork_call( + ident_t * loc, + int gtid, + enum fork_context_e call_context, // Intel, GNU, ... + kmp_int32 argc, +#if OMPT_SUPPORT + void *unwrapped_task, +#endif + microtask_t microtask, + launch_t invoker, +/* TODO: revert workaround for Intel(R) 64 tracker #96 */ +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + va_list * ap +#else + va_list ap +#endif + ) +{ + void **argv; + int i; + int master_tid; + int master_this_cons; + kmp_team_t *team; + kmp_team_t *parent_team; + kmp_info_t *master_th; + kmp_root_t *root; + int nthreads; + int master_active; + int master_set_numthreads; + int level; +#if OMP_40_ENABLED + int active_level; + int teams_level; +#endif +#if KMP_NESTED_HOT_TEAMS + kmp_hot_team_ptr_t **p_hot_teams; +#endif + { // KMP_TIME_BLOCK + KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call); + KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); + + KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); + if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { + /* Some systems prefer the stack for the root thread(s) to start with */ + /* some gap from the parent stack to prevent false sharing. */ + void *dummy = KMP_ALLOCA(__kmp_stkpadding); + /* These 2 lines below are so this does not get optimized out */ + if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) + __kmp_stkpadding += (short)((kmp_int64)dummy); + } + + /* initialize if needed */ + KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown + if( ! TCR_4(__kmp_init_parallel) ) + __kmp_parallel_initialize(); + + /* setup current data */ + master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown + parent_team = master_th->th.th_team; + master_tid = master_th->th.th_info.ds.ds_tid; + master_this_cons = master_th->th.th_local.this_construct; + root = master_th->th.th_root; + master_active = root->r.r_active; + master_set_numthreads = master_th->th.th_set_nproc; + +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id; + ompt_task_id_t ompt_task_id; + ompt_frame_t *ompt_frame; + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; + + if (ompt_enabled) { + ompt_parallel_id = __ompt_parallel_id_new(gtid); + ompt_task_id = __ompt_get_task_id_internal(0); + ompt_frame = __ompt_get_task_frame_internal(0); + } +#endif + + // Nested level will be an index in the nested nthreads array + level = parent_team->t.t_level; +#if OMP_40_ENABLED + active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed + teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams +#endif +#if KMP_NESTED_HOT_TEAMS + p_hot_teams = &master_th->th.th_hot_teams; + if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { + *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( + sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); + (*p_hot_teams)[0].hot_team = root->r.r_hot_team; + (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) + } +#endif + +#if OMPT_SUPPORT + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { + int team_size = master_set_numthreads; + + ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( + ompt_task_id, ompt_frame, ompt_parallel_id, + team_size, unwrapped_task, OMPT_INVOKER(call_context)); + } +#endif + + master_th->th.th_ident = loc; + +#if OMP_40_ENABLED + if ( master_th->th.th_teams_microtask && + ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { + // AC: This is start of parallel that is nested inside teams construct. + // The team is actual (hot), all workers are ready at the fork barrier. + // No lock needed to initialize the team a bit, then free workers. + parent_team->t.t_ident = loc; + parent_team->t.t_argc = argc; + argv = (void**)parent_team->t.t_argv; + for( i=argc-1; i >= 0; --i ) +/* TODO: revert workaround for Intel(R) 64 tracker #96 */ +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + *argv++ = va_arg( *ap, void * ); +#else + *argv++ = va_arg( ap, void * ); +#endif + /* Increment our nested depth levels, but not increase the serialization */ + if ( parent_team == master_th->th.th_serial_team ) { + // AC: we are in serialized parallel + __kmpc_serialized_parallel(loc, gtid); + KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); + parent_team->t.t_serialized--; // AC: need this in order enquiry functions + // work correctly, will restore at join time + +#if OMPT_SUPPORT + void *dummy; + void **exit_runtime_p; + + ompt_lw_taskteam_t lw_taskteam; + + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + unwrapped_task, ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th); + +#if OMPT_TRACE + /* OMPT implicit task begin */ + my_task_id = lw_taskteam.ompt_task_info.task_id; + my_parallel_id = parent_team->t.ompt_team_info.parallel_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + my_parallel_id, my_task_id); + } +#endif + + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } +#endif + + { + KMP_TIME_BLOCK(OMP_work); + __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv +#if OMPT_SUPPORT + , exit_runtime_p +#endif + ); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { +#if OMPT_TRACE + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; + + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + ompt_parallel_id, ompt_task_id); + } + + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; +#endif + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, + OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + return TRUE; + } + + parent_team->t.t_pkfn = microtask; +#if OMPT_SUPPORT + parent_team->t.ompt_team_info.microtask = unwrapped_task; +#endif + parent_team->t.t_invoke = invoker; + KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); + parent_team->t.t_active_level ++; + parent_team->t.t_level ++; + + /* Change number of threads in the team if requested */ + if ( master_set_numthreads ) { // The parallel has num_threads clause + if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { + // AC: only can reduce the number of threads dynamically, cannot increase + kmp_info_t **other_threads = parent_team->t.t_threads; + parent_team->t.t_nproc = master_set_numthreads; + for ( i = 0; i < master_set_numthreads; ++i ) { + other_threads[i]->th.th_team_nproc = master_set_numthreads; + } + // Keep extra threads hot in the team for possible next parallels + } + master_th->th.th_set_nproc = 0; + } + +#if USE_DEBUGGER + if ( __kmp_debugging ) { // Let debugger override number of threads. + int nth = __kmp_omp_num_threads( loc ); + if ( nth > 0 ) { // 0 means debugger does not want to change number of threads. + master_set_numthreads = nth; + }; // if + }; // if +#endif + + KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); + __kmp_internal_fork( loc, gtid, parent_team ); + KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); + + /* Invoke microtask for MASTER thread */ + KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", + gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); + + { + KMP_TIME_BLOCK(OMP_work); + if (! parent_team->t.t_invoke( gtid )) { + KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); + } + } + KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", + gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); + + return TRUE; + } // Parallel closely nested in teams construct +#endif /* OMP_40_ENABLED */ + +#if KMP_DEBUG + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); + } +#endif + + if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { + nthreads = 1; + } else { +#if OMP_40_ENABLED + int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); +#endif + nthreads = master_set_numthreads ? + master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task + + // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). + // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. + if (nthreads > 1) { + if ( ( !get__nested(master_th) && (root->r.r_in_parallel +#if OMP_40_ENABLED + && !enter_teams +#endif /* OMP_40_ENABLED */ + ) ) || ( __kmp_library == library_serial ) ) { + KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", + gtid, nthreads )); + nthreads = 1; + } + } + if ( nthreads > 1 ) { + /* determine how many new threads we can use */ + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + + nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads +#if OMP_40_ENABLED +/* AC: If we execute teams from parallel region (on host), then teams should be created + but each can only have 1 thread if nesting is disabled. If teams called from serial region, + then teams and their threads should be created regardless of the nesting setting. */ + , enter_teams +#endif /* OMP_40_ENABLED */ + ); + if ( nthreads == 1 ) { + // Free lock for single thread execution here; + // for multi-thread execution it will be freed later + // after team of threads created and initialized + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + } + } + } + KMP_DEBUG_ASSERT( nthreads > 0 ); + + /* If we temporarily changed the set number of threads then restore it now */ + master_th->th.th_set_nproc = 0; + + /* create a serialized parallel region? */ + if ( nthreads == 1 ) { + /* josh todo: hypothetical question: what do we do for OS X*? */ +#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + void * args[ argc ]; +#else + void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); +#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ + + KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); + + __kmpc_serialized_parallel(loc, gtid); + + if ( call_context == fork_context_intel ) { + /* TODO this sucks, use the compiler itself to pass args! :) */ + master_th->th.th_serial_team->t.t_ident = loc; +#if OMP_40_ENABLED + if ( !ap ) { + // revert change made in __kmpc_serialized_parallel() + master_th->th.th_serial_team->t.t_level--; + // Get args from parent team for teams construct + +#if OMPT_SUPPORT + void *dummy; + void **exit_runtime_p; + + ompt_lw_taskteam_t lw_taskteam; + + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + unwrapped_task, ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th); + +#if OMPT_TRACE + my_task_id = lw_taskteam.ompt_task_info.task_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + ompt_parallel_id, my_task_id); + } +#endif + + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } +#endif + + { + KMP_TIME_BLOCK(OMP_work); + __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv +#if OMPT_SUPPORT + , exit_runtime_p +#endif + ); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; + +#if OMPT_TRACE + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + ompt_parallel_id, ompt_task_id); + } +#endif + + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, + OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + } else if ( microtask == (microtask_t)__kmp_teams_master ) { + KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); + team = master_th->th.th_team; + //team->t.t_pkfn = microtask; + team->t.t_invoke = invoker; + __kmp_alloc_argv_entries( argc, team, TRUE ); + team->t.t_argc = argc; + argv = (void**) team->t.t_argv; + if ( ap ) { + for( i=argc-1; i >= 0; --i ) +// TODO: revert workaround for Intel(R) 64 tracker #96 +# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + *argv++ = va_arg( *ap, void * ); +# else + *argv++ = va_arg( ap, void * ); +# endif + } else { + for( i=0; i < argc; ++i ) + // Get args from parent team for teams construct + argv[i] = parent_team->t.t_argv[i]; + } + // AC: revert change made in __kmpc_serialized_parallel() + // because initial code in teams should have level=0 + team->t.t_level--; + // AC: call special invoker for outer "parallel" of the teams construct + { + KMP_TIME_BLOCK(OMP_work); + invoker(gtid); + } + } else { +#endif /* OMP_40_ENABLED */ + argv = args; + for( i=argc-1; i >= 0; --i ) +// TODO: revert workaround for Intel(R) 64 tracker #96 +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + *argv++ = va_arg( *ap, void * ); +#else + *argv++ = va_arg( ap, void * ); +#endif + KMP_MB(); + +#if OMPT_SUPPORT + void *dummy; + void **exit_runtime_p; + + ompt_lw_taskteam_t lw_taskteam; + + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + unwrapped_task, ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th); + +#if OMPT_TRACE + /* OMPT implicit task begin */ + my_task_id = lw_taskteam.ompt_task_info.task_id; + my_parallel_id = ompt_parallel_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + my_parallel_id, my_task_id); + } +#endif + + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } +#endif + + { + KMP_TIME_BLOCK(OMP_work); + __kmp_invoke_microtask( microtask, gtid, 0, argc, args +#if OMPT_SUPPORT + , exit_runtime_p +#endif + ); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { +#if OMPT_TRACE + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; + + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + my_parallel_id, my_task_id); + } +#endif + + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, + OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif +#if OMP_40_ENABLED + } +#endif /* OMP_40_ENABLED */ + } + else if ( call_context == fork_context_gnu ) { +#if OMPT_SUPPORT + ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) + __kmp_allocate(sizeof(ompt_lw_taskteam_t)); + __ompt_lw_taskteam_init(lwt, master_th, gtid, + unwrapped_task, ompt_parallel_id); + + lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); + lwt->ompt_task_info.frame.exit_runtime_frame = 0; + __ompt_lw_taskteam_link(lwt, master_th); +#endif + + // we were called from GNU native code + KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); + return FALSE; + } + else { + KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); + } + + + KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); + KMP_MB(); + return FALSE; + } + + // GEH: only modify the executing flag in the case when not serialized + // serialized case is handled in kmpc_serialized_parallel + KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", + parent_team->t.t_active_level, master_th, master_th->th.th_current_task, + master_th->th.th_current_task->td_icvs.max_active_levels ) ); + // TODO: GEH - cannot do this assertion because root thread not set up as executing + // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); + master_th->th.th_current_task->td_flags.executing = 0; + +#if OMP_40_ENABLED + if ( !master_th->th.th_teams_microtask || level > teams_level ) +#endif /* OMP_40_ENABLED */ + { + /* Increment our nested depth level */ + KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); + } + + // See if we need to make a copy of the ICVs. + int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; + if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { + nthreads_icv = __kmp_nested_nth.nth[level+1]; + } + else { + nthreads_icv = 0; // don't update + } + +#if OMP_40_ENABLED + // Figure out the proc_bind_policy for the new team. + kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; + kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update + if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { + proc_bind = proc_bind_false; + } + else { + if (proc_bind == proc_bind_default) { + // No proc_bind clause specified; use current proc-bind-var for this parallel region + proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; + } + /* else: The proc_bind policy was specified explicitly on parallel clause. This + overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ + // Figure the value of proc-bind-var for the child threads. + if ((level+1 < __kmp_nested_proc_bind.used) + && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { + proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; + } + } + + // Reset for next parallel region + master_th->th.th_set_proc_bind = proc_bind_default; +#endif /* OMP_40_ENABLED */ + + if ((nthreads_icv > 0) +#if OMP_40_ENABLED + || (proc_bind_icv != proc_bind_default) +#endif /* OMP_40_ENABLED */ + ) { + kmp_internal_control_t new_icvs; + copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); + new_icvs.next = NULL; + if (nthreads_icv > 0) { + new_icvs.nproc = nthreads_icv; + } + +#if OMP_40_ENABLED + if (proc_bind_icv != proc_bind_default) { + new_icvs.proc_bind = proc_bind_icv; + } +#endif /* OMP_40_ENABLED */ + + /* allocate a new parallel team */ + KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); + team = __kmp_allocate_team(root, nthreads, nthreads, +#if OMPT_SUPPORT + ompt_parallel_id, +#endif +#if OMP_40_ENABLED + proc_bind, +#endif + &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); + } else { + /* allocate a new parallel team */ + KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); + team = __kmp_allocate_team(root, nthreads, nthreads, +#if OMPT_SUPPORT + ompt_parallel_id, +#endif +#if OMP_40_ENABLED + proc_bind, +#endif + &master_th->th.th_current_task->td_icvs, argc + USE_NESTED_HOT_ARG(master_th) ); + } + KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); + + /* setup the new team */ + team->t.t_master_tid = master_tid; + team->t.t_master_this_cons = master_this_cons; + team->t.t_ident = loc; + team->t.t_parent = parent_team; + TCW_SYNC_PTR(team->t.t_pkfn, microtask); +#if OMPT_SUPPORT + TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task); +#endif + team->t.t_invoke = invoker; /* TODO move this to root, maybe */ + // TODO: parent_team->t.t_level == INT_MAX ??? +#if OMP_40_ENABLED + if ( !master_th->th.th_teams_microtask || level > teams_level ) { +#endif /* OMP_40_ENABLED */ + team->t.t_level = parent_team->t.t_level + 1; + team->t.t_active_level = parent_team->t.t_active_level + 1; +#if OMP_40_ENABLED + } else { + // AC: Do not increase parallel level at start of the teams construct + team->t.t_level = parent_team->t.t_level; + team->t.t_active_level = parent_team->t.t_active_level; + } +#endif /* OMP_40_ENABLED */ + team->t.t_sched = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule + +#if OMP_40_ENABLED + team->t.t_cancel_request = cancel_noreq; +#endif + + // Update the floating point rounding in the team if required. + propagateFPControl(team); + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Set master's task team to team's task team. Unless this is hot team, it should be NULL. +#if 0 + // Patch out an assertion that trips while the runtime seems to operate correctly. + // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. + KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); +#endif + KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", + __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, + parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); + if (level) { + // Take a memo of master's task_state + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size + kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; + kmp_uint8 *old_stack, *new_stack; + kmp_uint32 i; + new_stack = (kmp_uint8 *)__kmp_allocate(new_size); + for (i=0; ith.th_task_state_stack_sz; ++i) { + new_stack[i] = master_th->th.th_task_state_memo_stack[i]; + } + for (i=master_th->th.th_task_state_stack_sz; ith.th_task_state_memo_stack; + master_th->th.th_task_state_memo_stack = new_stack; + master_th->th.th_task_state_stack_sz = new_size; + __kmp_free(old_stack); + } + // Store master's task_state on stack + master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; + master_th->th.th_task_state_top++; +#if KMP_NESTED_HOT_TEAMS + if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team + master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; + } + else { +#endif + master_th->th.th_task_state = 0; +#if KMP_NESTED_HOT_TEAMS + } +#endif + } +#if !KMP_NESTED_HOT_TEAMS + KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); +#endif + } + + KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", + gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); + KMP_DEBUG_ASSERT( team != root->r.r_hot_team || + ( team->t.t_master_tid == 0 && + ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); + KMP_MB(); + + /* now, setup the arguments */ + argv = (void**)team->t.t_argv; +#if OMP_40_ENABLED + if ( ap ) { +#endif /* OMP_40_ENABLED */ + for ( i=argc-1; i >= 0; --i ) +// TODO: revert workaround for Intel(R) 64 tracker #96 +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + *argv++ = va_arg( *ap, void * ); +#else + *argv++ = va_arg( ap, void * ); +#endif +#if OMP_40_ENABLED + } else { + for ( i=0; i < argc; ++i ) + // Get args from parent team for teams construct + argv[i] = team->t.t_parent->t.t_argv[i]; + } +#endif /* OMP_40_ENABLED */ + + /* now actually fork the threads */ + team->t.t_master_active = master_active; + if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong + root->r.r_active = TRUE; + + __kmp_fork_team_threads( root, team, master_th, gtid ); + __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); + +#if OMPT_SUPPORT + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; +#endif + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + + +#if USE_ITT_BUILD + if ( team->t.t_active_level == 1 // only report frames at level 1 +# if OMP_40_ENABLED + && !master_th->th.th_teams_microtask // not in teams construct +# endif /* OMP_40_ENABLED */ + ) { +#if USE_ITT_NOTIFY + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && + ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) + { + kmp_uint64 tmp_time = 0; + if ( __itt_get_timestamp_ptr ) + tmp_time = __itt_get_timestamp(); + // Internal fork - report frame begin + master_th->th.th_frame_time = tmp_time; + if ( __kmp_forkjoin_frames_mode == 3 ) + team->t.t_region_time = tmp_time; + } else // only one notification scheme (either "submit" or "forking/joined", not both) +#endif /* USE_ITT_NOTIFY */ + if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && + __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) + { // Mark start of "parallel" region for VTune. + __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); + } + } +#endif /* USE_ITT_BUILD */ + + /* now go on and do the work */ + KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); + KMP_MB(); + KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", + root, team, master_th, gtid)); + +#if USE_ITT_BUILD + if ( __itt_stack_caller_create_ptr ) { + team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier + } +#endif /* USE_ITT_BUILD */ + +#if OMP_40_ENABLED + if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute +#endif /* OMP_40_ENABLED */ + { + __kmp_internal_fork( loc, gtid, team ); + KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", + root, team, master_th, gtid)); + } + + if (call_context == fork_context_gnu) { + KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); + return TRUE; + } + + /* Invoke microtask for MASTER thread */ + KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", + gtid, team->t.t_id, team->t.t_pkfn ) ); + } // END of timer KMP_fork_call block + + { + KMP_TIME_BLOCK(OMP_work); + // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke); + if (! team->t.t_invoke( gtid )) { + KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); + } + } + KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", + gtid, team->t.t_id, team->t.t_pkfn ) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); + +#if OMPT_SUPPORT + if (ompt_enabled) { + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + + return TRUE; +} + +#if OMPT_SUPPORT +static inline void +__kmp_join_restore_state( + kmp_info_t *thread, + kmp_team_t *team) +{ + // restore state outside the region + thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? + ompt_state_work_serial : ompt_state_work_parallel); +} + +static inline void +__kmp_join_ompt( + kmp_info_t *thread, + kmp_team_t *team, + ompt_parallel_id_t parallel_id, + fork_context_e fork_context) +{ + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); + } + + __kmp_join_restore_state(thread,team); +} +#endif + +void +__kmp_join_call(ident_t *loc, int gtid +#if OMPT_SUPPORT + , enum fork_context_e fork_context +#endif +#if OMP_40_ENABLED + , int exit_teams +#endif /* OMP_40_ENABLED */ +) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_join_call); + kmp_team_t *team; + kmp_team_t *parent_team; + kmp_info_t *master_th; + kmp_root_t *root; + int master_active; + int i; + + KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); + + /* setup current data */ + master_th = __kmp_threads[ gtid ]; + root = master_th->th.th_root; + team = master_th->th.th_team; + parent_team = team->t.t_parent; + + master_th->th.th_ident = loc; + +#if OMPT_SUPPORT + if (ompt_enabled) { + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + +#if KMP_DEBUG + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", + __kmp_gtid_from_thread( master_th ), team, + team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); + KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); + } +#endif + + if( team->t.t_serialized ) { +#if OMP_40_ENABLED + if ( master_th->th.th_teams_microtask ) { + // We are in teams construct + int level = team->t.t_level; + int tlevel = master_th->th.th_teams_level; + if ( level == tlevel ) { + // AC: we haven't incremented it earlier at start of teams construct, + // so do it here - at the end of teams construct + team->t.t_level++; + } else if ( level == tlevel + 1 ) { + // AC: we are exiting parallel inside teams, need to increment serialization + // in order to restore it in the next call to __kmpc_end_serialized_parallel + team->t.t_serialized++; + } + } +#endif /* OMP_40_ENABLED */ + __kmpc_end_serialized_parallel( loc, gtid ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + __kmp_join_restore_state(master_th, parent_team); + } +#endif + + return; + } + + master_active = team->t.t_master_active; + +#if OMP_40_ENABLED + if (!exit_teams) +#endif /* OMP_40_ENABLED */ + { + // AC: No barrier for internal teams at exit from teams construct. + // But there is barrier for external team (league). + __kmp_internal_join( loc, gtid, team ); + } +#if OMP_40_ENABLED + else { + master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) + } +#endif /* OMP_40_ENABLED */ + + KMP_MB(); + +#if OMPT_SUPPORT + ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; +#endif + +#if USE_ITT_BUILD + if ( __itt_stack_caller_create_ptr ) { + __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier + } + + // Mark end of "parallel" region for VTune. + if ( team->t.t_active_level == 1 +# if OMP_40_ENABLED + && !master_th->th.th_teams_microtask /* not in teams construct */ +# endif /* OMP_40_ENABLED */ + ) { + master_th->th.th_ident = loc; + // only one notification scheme (either "submit" or "forking/joined", not both) + if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) + __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, + 0, loc, master_th->th.th_team_nproc, 1 ); + else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && + ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) + __kmp_itt_region_joined( gtid ); + } // active_level == 1 +#endif /* USE_ITT_BUILD */ + +#if OMP_40_ENABLED + if ( master_th->th.th_teams_microtask && + !exit_teams && + team->t.t_pkfn != (microtask_t)__kmp_teams_master && + team->t.t_level == master_th->th.th_teams_level + 1 ) { + // AC: We need to leave the team structure intact at the end + // of parallel inside the teams construct, so that at the next + // parallel same (hot) team works, only adjust nesting levels + + /* Decrement our nested depth level */ + team->t.t_level --; + team->t.t_active_level --; + KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); + + /* Restore number of threads in the team if needed */ + if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { + int old_num = master_th->th.th_team_nproc; + int new_num = master_th->th.th_teams_size.nth; + kmp_info_t **other_threads = team->t.t_threads; + team->t.t_nproc = new_num; + for ( i = 0; i < old_num; ++i ) { + other_threads[i]->th.th_team_nproc = new_num; + } + // Adjust states of non-used threads of the team + for ( i = old_num; i < new_num; ++i ) { + // Re-initialize thread's barrier data. + int b; + kmp_balign_t * balign = other_threads[i]->th.th_bar; + for ( b = 0; b < bs_last_barrier; ++ b ) { + balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; + KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#if USE_DEBUGGER + balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; +#endif + } + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Synchronize thread's task state + other_threads[i]->th.th_task_state = master_th->th.th_task_state; + } + } + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + } +#endif + + return; + } +#endif /* OMP_40_ENABLED */ + + /* do cleanup and restore the parent team */ + master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; + master_th->th.th_local.this_construct = team->t.t_master_this_cons; + + master_th->th.th_dispatch = + & parent_team->t.t_dispatch[ team->t.t_master_tid ]; + + /* jc: The following lock has instructions with REL and ACQ semantics, + separating the parallel user code called in this parallel region + from the serial user code called after this function returns. + */ + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + +#if OMP_40_ENABLED + if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) +#endif /* OMP_40_ENABLED */ + { + /* Decrement our nested depth level */ + KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); + } + KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); + + KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", + 0, master_th, team ) ); + __kmp_pop_current_task_from_thread( master_th ); + +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + // + // Restore master thread's partition. + // + master_th->th.th_first_place = team->t.t_first_place; + master_th->th.th_last_place = team->t.t_last_place; +#endif /* OMP_40_ENABLED */ + + updateHWFPControl (team); + + if ( root->r.r_active != master_active ) + root->r.r_active = master_active; + + __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads + + /* this race was fun to find. make sure the following is in the critical + * region otherwise assertions may fail occasionally since the old team + * may be reallocated and the hierarchy appears inconsistent. it is + * actually safe to run and won't cause any bugs, but will cause those + * assertion failures. it's only one deref&assign so might as well put this + * in the critical region */ + master_th->th.th_team = parent_team; + master_th->th.th_team_nproc = parent_team->t.t_nproc; + master_th->th.th_team_master = parent_team->t.t_threads[0]; + master_th->th.th_team_serialized = parent_team->t.t_serialized; + + /* restore serialized team, if need be */ + if( parent_team->t.t_serialized && + parent_team != master_th->th.th_serial_team && + parent_team != root->r.r_root_team ) { + __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); + master_th->th.th_serial_team = parent_team; + } + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + // Remember master's state if we re-use this nested hot team + master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; + --master_th->th.th_task_state_top; // pop + // Now restore state at this level + master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; + } + // Copy the task team from the parent team to the master thread + master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; + KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", + __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); + } + + // TODO: GEH - cannot do this assertion because root thread not set up as executing + // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); + master_th->th.th_current_task->td_flags.executing = 1; + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + } +#endif + + KMP_MB(); + KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* Check whether we should push an internal control record onto the + serial team stack. If so, do it. */ +void +__kmp_save_internal_controls ( kmp_info_t * thread ) +{ + + if ( thread->th.th_team != thread->th.th_serial_team ) { + return; + } + if (thread->th.th_team->t.t_serialized > 1) { + int push = 0; + + if (thread->th.th_team->t.t_control_stack_top == NULL) { + push = 1; + } else { + if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != + thread->th.th_team->t.t_serialized ) { + push = 1; + } + } + if (push) { /* push a record on the serial team's stack */ + kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); + + copy_icvs( control, & thread->th.th_current_task->td_icvs ); + + control->serial_nesting_level = thread->th.th_team->t.t_serialized; + + control->next = thread->th.th_team->t.t_control_stack_top; + thread->th.th_team->t.t_control_stack_top = control; + } + } +} + +/* Changes set_nproc */ +void +__kmp_set_num_threads( int new_nth, int gtid ) +{ + kmp_info_t *thread; + kmp_root_t *root; + + KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + if (new_nth < 1) + new_nth = 1; + else if (new_nth > __kmp_max_nth) + new_nth = __kmp_max_nth; + + KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); + thread = __kmp_threads[gtid]; + + __kmp_save_internal_controls( thread ); + + set__nproc( thread, new_nth ); + + // + // If this omp_set_num_threads() call will cause the hot team size to be + // reduced (in the absence of a num_threads clause), then reduce it now, + // rather than waiting for the next parallel region. + // + root = thread->th.th_root; + if ( __kmp_init_parallel && ( ! root->r.r_active ) + && ( root->r.r_hot_team->t.t_nproc > new_nth ) +#if KMP_NESTED_HOT_TEAMS + && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode +#endif + ) { + kmp_team_t *hot_team = root->r.r_hot_team; + int f; + + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + + + // Release the extra threads we don't need any more. + for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { + KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); + if ( __kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should unref task team. + hot_team->t.t_threads[f]->th.th_task_team = NULL; + } + __kmp_free_thread( hot_team->t.t_threads[f] ); + hot_team->t.t_threads[f] = NULL; + } + hot_team->t.t_nproc = new_nth; +#if KMP_NESTED_HOT_TEAMS + if( thread->th.th_hot_teams ) { + KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); + thread->th.th_hot_teams[0].hot_team_nth = new_nth; + } +#endif + + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + + // + // Update the t_nproc field in the threads that are still active. + // + for( f=0 ; f < new_nth; f++ ) { + KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); + hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; + } + // Special flag in case omp_set_num_threads() call + hot_team->t.t_size_changed = -1; + } +} + +/* Changes max_active_levels */ +void +__kmp_set_max_active_levels( int gtid, int max_active_levels ) +{ + kmp_info_t *thread; + + KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + // validate max_active_levels + if( max_active_levels < 0 ) { + KMP_WARNING( ActiveLevelsNegative, max_active_levels ); + // We ignore this call if the user has specified a negative value. + // The current setting won't be changed. The last valid setting will be used. + // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). + KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); + return; + } + if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { + // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] + // We allow a zero value. (implementation defined behavior) + } else { + KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); + max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + // Current upper limit is MAX_INT. (implementation defined behavior) + // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) + // Actually, the flow should never get here until we use MAX_INT limit. + } + KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); + + thread = __kmp_threads[ gtid ]; + + __kmp_save_internal_controls( thread ); + + set__max_active_levels( thread, max_active_levels ); + +} + +/* Gets max_active_levels */ +int +__kmp_get_max_active_levels( int gtid ) +{ + kmp_info_t *thread; + + KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + thread = __kmp_threads[ gtid ]; + KMP_DEBUG_ASSERT( thread->th.th_current_task ); + KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", + gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); + return thread->th.th_current_task->td_icvs.max_active_levels; +} + +/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ +void +__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) +{ + kmp_info_t *thread; +// kmp_team_t *team; + + KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + // Check if the kind parameter is valid, correct if needed. + // Valid parameters should fit in one of two intervals - standard or extended: + // , , , , , + // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 + if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || + ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) + { + // TODO: Hint needs attention in case we change the default schedule. + __kmp_msg( + kmp_ms_warning, + KMP_MSG( ScheduleKindOutOfRange, kind ), + KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), + __kmp_msg_null + ); + kind = kmp_sched_default; + chunk = 0; // ignore chunk value in case of bad kind + } + + thread = __kmp_threads[ gtid ]; + + __kmp_save_internal_controls( thread ); + + if ( kind < kmp_sched_upper_std ) { + if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { + // differ static chunked vs. unchunked: + // chunk should be invalid to indicate unchunked schedule (which is the default) + thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; + } else { + thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; + } + } else { + // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; + thread->th.th_current_task->td_icvs.sched.r_sched_type = + __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; + } + if ( kind == kmp_sched_auto ) { + // ignore parameter chunk for schedule auto + thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; + } else { + thread->th.th_current_task->td_icvs.sched.chunk = chunk; + } +} + +/* Gets def_sched_var ICV values */ +void +__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) +{ + kmp_info_t *thread; + enum sched_type th_type; + + KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + thread = __kmp_threads[ gtid ]; + + //th_type = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type; + th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; + + switch ( th_type ) { + case kmp_sch_static: + case kmp_sch_static_greedy: + case kmp_sch_static_balanced: + *kind = kmp_sched_static; + *chunk = 0; // chunk was not set, try to show this fact via zero value + return; + case kmp_sch_static_chunked: + *kind = kmp_sched_static; + break; + case kmp_sch_dynamic_chunked: + *kind = kmp_sched_dynamic; + break; + case kmp_sch_guided_chunked: + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + *kind = kmp_sched_guided; + break; + case kmp_sch_auto: + *kind = kmp_sched_auto; + break; + case kmp_sch_trapezoidal: + *kind = kmp_sched_trapezoidal; + break; +/* + case kmp_sch_static_steal: + *kind = kmp_sched_static_steal; + break; +*/ + default: + KMP_FATAL( UnknownSchedulingType, th_type ); + } + + //*chunk = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk; + *chunk = thread->th.th_current_task->td_icvs.sched.chunk; +} + +int +__kmp_get_ancestor_thread_num( int gtid, int level ) { + + int ii, dd; + kmp_team_t *team; + kmp_info_t *thr; + + KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + // validate level + if( level == 0 ) return 0; + if( level < 0 ) return -1; + thr = __kmp_threads[ gtid ]; + team = thr->th.th_team; + ii = team->t.t_level; + if( level > ii ) return -1; + +#if OMP_40_ENABLED + if( thr->th.th_teams_microtask ) { + // AC: we are in teams region where multiple nested teams have same level + int tlevel = thr->th.th_teams_level; // the level of the teams construct + if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) + KMP_DEBUG_ASSERT( ii >= tlevel ); + // AC: As we need to pass by the teams league, we need to artificially increase ii + if ( ii == tlevel ) { + ii += 2; // three teams have same level + } else { + ii ++; // two teams have same level + } + } + } +#endif + + if( ii == level ) return __kmp_tid_from_gtid( gtid ); + + dd = team->t.t_serialized; + level++; + while( ii > level ) + { + for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) + { + } + if( ( team->t.t_serialized ) && ( !dd ) ) { + team = team->t.t_parent; + continue; + } + if( ii > level ) { + team = team->t.t_parent; + dd = team->t.t_serialized; + ii--; + } + } + + return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); +} + +int +__kmp_get_team_size( int gtid, int level ) { + + int ii, dd; + kmp_team_t *team; + kmp_info_t *thr; + + KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); + KMP_DEBUG_ASSERT( __kmp_init_serial ); + + // validate level + if( level == 0 ) return 1; + if( level < 0 ) return -1; + thr = __kmp_threads[ gtid ]; + team = thr->th.th_team; + ii = team->t.t_level; + if( level > ii ) return -1; + +#if OMP_40_ENABLED + if( thr->th.th_teams_microtask ) { + // AC: we are in teams region where multiple nested teams have same level + int tlevel = thr->th.th_teams_level; // the level of the teams construct + if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) + KMP_DEBUG_ASSERT( ii >= tlevel ); + // AC: As we need to pass by the teams league, we need to artificially increase ii + if ( ii == tlevel ) { + ii += 2; // three teams have same level + } else { + ii ++; // two teams have same level + } + } + } +#endif + + while( ii > level ) + { + for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) + { + } + if( team->t.t_serialized && ( !dd ) ) { + team = team->t.t_parent; + continue; + } + if( ii > level ) { + team = team->t.t_parent; + ii--; + } + } + + return team->t.t_nproc; +} + +kmp_r_sched_t +__kmp_get_schedule_global() { +// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) +// may be changed by kmp_set_defaults independently. So one can get the updated schedule here. + + kmp_r_sched_t r_sched; + + // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided + // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, + // and thus have different run-time schedules in different roots (even in OMP 2.5) + if ( __kmp_sched == kmp_sch_static ) { + r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) + } else if ( __kmp_sched == kmp_sch_guided_chunked ) { + r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) + } else { + r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other + } + + if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) + r_sched.chunk = KMP_DEFAULT_CHUNK; + } else { + r_sched.chunk = __kmp_chunk; + } + + return r_sched; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + + +/* + * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) + * at least argc number of *t_argv entries for the requested team. + */ +static void +__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) +{ + + KMP_DEBUG_ASSERT( team ); + if( !realloc || argc > team->t.t_max_argc ) { + + KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", + team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); + /* if previously allocated heap space for args, free them */ + if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) + __kmp_free( (void *) team->t.t_argv ); + + if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { + /* use unused space in the cache line for arguments */ + team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; + KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", + team->t.t_id, team->t.t_max_argc )); + team->t.t_argv = &team->t.t_inline_argv[0]; + if ( __kmp_storage_map ) { + __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], + &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], + (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), + "team_%d.t_inline_argv", + team->t.t_id ); + } + } else { + /* allocate space for arguments in the heap */ + team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? + KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; + KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", + team->t.t_id, team->t.t_max_argc )); + team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); + if ( __kmp_storage_map ) { + __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], + sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", + team->t.t_id ); + } + } + } +} + +static void +__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) +{ + int i; + int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2; +#if KMP_USE_POOLED_ALLOC + // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth! + char *ptr = __kmp_allocate(max_nth * + ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf + + sizeof(kmp_disp_t) + sizeof(int)*6 + //+ sizeof(int) + + sizeof(kmp_r_sched_t) + + sizeof(kmp_taskdata_t) ) ); + + team->t.t_threads = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth; + team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr; + ptr += sizeof(dispatch_shared_info_t) * num_disp_buff; + team->t.t_dispatch = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth; + team->t.t_set_nproc = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_dynamic = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_nested = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_blocktime = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_bt_set = (int*) ptr; + ptr += sizeof(int) * max_nth; + //team->t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth; + team->t.t_set_sched = (kmp_r_sched_t*) ptr; + ptr += sizeof(kmp_r_sched_t) * max_nth; + team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr; + ptr += sizeof(kmp_taskdata_t) * max_nth; +#else + + team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); + team->t.t_disp_buffer = (dispatch_shared_info_t*) + __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); + team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); + //team->t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth ); + //team->t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth ); + team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); +#endif + team->t.t_max_nproc = max_nth; + + /* setup dispatch buffers */ + for(i = 0 ; i < num_disp_buff; ++i) + team->t.t_disp_buffer[i].buffer_index = i; +} + +static void +__kmp_free_team_arrays(kmp_team_t *team) { + /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ + int i; + for ( i = 0; i < team->t.t_max_nproc; ++ i ) { + if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { + __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); + team->t.t_dispatch[ i ].th_disp_buffer = NULL; + }; // if + }; // for + __kmp_free(team->t.t_threads); + #if !KMP_USE_POOLED_ALLOC + __kmp_free(team->t.t_disp_buffer); + __kmp_free(team->t.t_dispatch); + //__kmp_free(team->t.t_set_max_active_levels); + //__kmp_free(team->t.t_set_sched); + __kmp_free(team->t.t_implicit_task_taskdata); + #endif + team->t.t_threads = NULL; + team->t.t_disp_buffer = NULL; + team->t.t_dispatch = NULL; + //team->t.t_set_sched = 0; + //team->t.t_set_max_active_levels = 0; + team->t.t_implicit_task_taskdata = 0; +} + +static void +__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { + kmp_info_t **oldThreads = team->t.t_threads; + + #if !KMP_USE_POOLED_ALLOC + __kmp_free(team->t.t_disp_buffer); + __kmp_free(team->t.t_dispatch); + //__kmp_free(team->t.t_set_max_active_levels); + //__kmp_free(team->t.t_set_sched); + __kmp_free(team->t.t_implicit_task_taskdata); + #endif + __kmp_allocate_team_arrays(team, max_nth); + + KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); + + __kmp_free(oldThreads); +} + +static kmp_internal_control_t +__kmp_get_global_icvs( void ) { + + kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals + +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); +#endif /* OMP_40_ENABLED */ + + kmp_internal_control_t g_icvs = { + 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field + (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) + (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) + (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set + __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime + __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals + __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) + // (use a max ub on value if __kmp_parallel_initialize not called yet) + __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels + r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0], +#endif /* OMP_40_ENABLED */ + NULL //struct kmp_internal_control *next; + }; + + return g_icvs; +} + +static kmp_internal_control_t +__kmp_get_x_global_icvs( const kmp_team_t *team ) { + + kmp_internal_control_t gx_icvs; + gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls + copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); + gx_icvs.next = NULL; + + return gx_icvs; +} + +static void +__kmp_initialize_root( kmp_root_t *root ) +{ + int f; + kmp_team_t *root_team; + kmp_team_t *hot_team; + int hot_team_max_nth; + kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals + kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); + KMP_DEBUG_ASSERT( root ); + KMP_ASSERT( ! root->r.r_begin ); + + /* setup the root state structure */ + __kmp_init_lock( &root->r.r_begin_lock ); + root->r.r_begin = FALSE; + root->r.r_active = FALSE; + root->r.r_in_parallel = 0; + root->r.r_blocktime = __kmp_dflt_blocktime; + root->r.r_nested = __kmp_dflt_nested; + + /* setup the root team for this task */ + /* allocate the root team structure */ + KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); + + root_team = + __kmp_allocate_team( + root, + 1, // new_nproc + 1, // max_nproc +#if OMPT_SUPPORT + 0, // root parallel id +#endif +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0], +#endif + &r_icvs, + 0 // argc + USE_NESTED_HOT_ARG(NULL) // master thread is unknown + ); +#if USE_DEBUGGER + // Non-NULL value should be assigned to make the debugger display the root team. + TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); +#endif + + KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); + + root->r.r_root_team = root_team; + root_team->t.t_control_stack_top = NULL; + + /* initialize root team */ + root_team->t.t_threads[0] = NULL; + root_team->t.t_nproc = 1; + root_team->t.t_serialized = 1; + // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; + root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; + root_team->t.t_sched.chunk = r_sched.chunk; + KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", + root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); + + /* setup the hot team for this task */ + /* allocate the hot team structure */ + KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); + + hot_team = + __kmp_allocate_team( + root, + 1, // new_nproc + __kmp_dflt_team_nth_ub * 2, // max_nproc +#if OMPT_SUPPORT + 0, // root parallel id +#endif +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0], +#endif + &r_icvs, + 0 // argc + USE_NESTED_HOT_ARG(NULL) // master thread is unknown + ); + KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); + + root->r.r_hot_team = hot_team; + root_team->t.t_control_stack_top = NULL; + + /* first-time initialization */ + hot_team->t.t_parent = root_team; + + /* initialize hot team */ + hot_team_max_nth = hot_team->t.t_max_nproc; + for ( f = 0; f < hot_team_max_nth; ++ f ) { + hot_team->t.t_threads[ f ] = NULL; + }; // for + hot_team->t.t_nproc = 1; + // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; + hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; + hot_team->t.t_sched.chunk = r_sched.chunk; + hot_team->t.t_size_changed = 0; +} + +#ifdef KMP_DEBUG + + +typedef struct kmp_team_list_item { + kmp_team_p const * entry; + struct kmp_team_list_item * next; +} kmp_team_list_item_t; +typedef kmp_team_list_item_t * kmp_team_list_t; + + +static void +__kmp_print_structure_team_accum( // Add team to list of teams. + kmp_team_list_t list, // List of teams. + kmp_team_p const * team // Team to add. +) { + + // List must terminate with item where both entry and next are NULL. + // Team is added to the list only once. + // List is sorted in ascending order by team id. + // Team id is *not* a key. + + kmp_team_list_t l; + + KMP_DEBUG_ASSERT( list != NULL ); + if ( team == NULL ) { + return; + }; // if + + __kmp_print_structure_team_accum( list, team->t.t_parent ); + __kmp_print_structure_team_accum( list, team->t.t_next_pool ); + + // Search list for the team. + l = list; + while ( l->next != NULL && l->entry != team ) { + l = l->next; + }; // while + if ( l->next != NULL ) { + return; // Team has been added before, exit. + }; // if + + // Team is not found. Search list again for insertion point. + l = list; + while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { + l = l->next; + }; // while + + // Insert team. + { + kmp_team_list_item_t * item = + (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); + * item = * l; + l->entry = team; + l->next = item; + } + +} + +static void +__kmp_print_structure_team( + char const * title, + kmp_team_p const * team + +) { + __kmp_printf( "%s", title ); + if ( team != NULL ) { + __kmp_printf( "%2x %p\n", team->t.t_id, team ); + } else { + __kmp_printf( " - (nil)\n" ); + }; // if +} + +static void +__kmp_print_structure_thread( + char const * title, + kmp_info_p const * thread + +) { + __kmp_printf( "%s", title ); + if ( thread != NULL ) { + __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); + } else { + __kmp_printf( " - (nil)\n" ); + }; // if +} + +void +__kmp_print_structure( + void +) { + + kmp_team_list_t list; + + // Initialize list of teams. + list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); + list->entry = NULL; + list->next = NULL; + + __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); + { + int gtid; + for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { + __kmp_printf( "%2d", gtid ); + if ( __kmp_threads != NULL ) { + __kmp_printf( " %p", __kmp_threads[ gtid ] ); + }; // if + if ( __kmp_root != NULL ) { + __kmp_printf( " %p", __kmp_root[ gtid ] ); + }; // if + __kmp_printf( "\n" ); + }; // for gtid + } + + // Print out __kmp_threads array. + __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); + if ( __kmp_threads != NULL ) { + int gtid; + for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { + kmp_info_t const * thread = __kmp_threads[ gtid ]; + if ( thread != NULL ) { + __kmp_printf( "GTID %2d %p:\n", gtid, thread ); + __kmp_printf( " Our Root: %p\n", thread->th.th_root ); + __kmp_print_structure_team( " Our Team: ", thread->th.th_team ); + __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team ); + __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc ); + __kmp_print_structure_thread( " Master: ", thread->th.th_team_master ); + __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized ); + __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc ); +#if OMP_40_ENABLED + __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind ); +#endif + __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool ); + __kmp_printf( "\n" ); + __kmp_print_structure_team_accum( list, thread->th.th_team ); + __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); + }; // if + }; // for gtid + } else { + __kmp_printf( "Threads array is not allocated.\n" ); + }; // if + + // Print out __kmp_root array. + __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); + if ( __kmp_root != NULL ) { + int gtid; + for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { + kmp_root_t const * root = __kmp_root[ gtid ]; + if ( root != NULL ) { + __kmp_printf( "GTID %2d %p:\n", gtid, root ); + __kmp_print_structure_team( " Root Team: ", root->r.r_root_team ); + __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team ); + __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread ); + __kmp_printf( " Active?: %2d\n", root->r.r_active ); + __kmp_printf( " Nested?: %2d\n", root->r.r_nested ); + __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel ); + __kmp_printf( "\n" ); + __kmp_print_structure_team_accum( list, root->r.r_root_team ); + __kmp_print_structure_team_accum( list, root->r.r_hot_team ); + }; // if + }; // for gtid + } else { + __kmp_printf( "Ubers array is not allocated.\n" ); + }; // if + + __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); + while ( list->next != NULL ) { + kmp_team_p const * team = list->entry; + int i; + __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); + __kmp_print_structure_team( " Parent Team: ", team->t.t_parent ); + __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid ); + __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc ); + __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized ); + __kmp_printf( " Number threads: %2d\n", team->t.t_nproc ); + for ( i = 0; i < team->t.t_nproc; ++ i ) { + __kmp_printf( " Thread %2d: ", i ); + __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); + }; // for i + __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool ); + __kmp_printf( "\n" ); + list = list->next; + }; // while + + // Print out __kmp_thread_pool and __kmp_team_pool. + __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); + __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool ); + __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool ); + __kmp_printf( "\n" ); + + // Free team list. + while ( list != NULL ) { + kmp_team_list_item_t * item = list; + list = list->next; + KMP_INTERNAL_FREE( item ); + }; // while + +} + +#endif + + +//--------------------------------------------------------------------------- +// Stuff for per-thread fast random number generator +// Table of primes + +static const unsigned __kmp_primes[] = { + 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, + 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, + 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, + 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, + 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, + 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, + 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, + 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, + 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, + 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, + 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, + 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, + 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, + 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, + 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, + 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f +}; + +//--------------------------------------------------------------------------- +// __kmp_get_random: Get a random number using a linear congruential method. + +unsigned short +__kmp_get_random( kmp_info_t * thread ) +{ + unsigned x = thread->th.th_x; + unsigned short r = x>>16; + + thread->th.th_x = x*thread->th.th_a+1; + + KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", + thread->th.th_info.ds.ds_tid, r) ); + + return r; +} +//-------------------------------------------------------- +// __kmp_init_random: Initialize a random number generator + +void +__kmp_init_random( kmp_info_t * thread ) +{ + unsigned seed = thread->th.th_info.ds.ds_tid; + + thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; + thread->th.th_x = (seed+1)*thread->th.th_a+1; + KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); +} + + +#if KMP_OS_WINDOWS +/* reclaim array entries for root threads that are already dead, returns number reclaimed */ +static int +__kmp_reclaim_dead_roots(void) { + int i, r = 0; + + for(i = 0; i < __kmp_threads_capacity; ++i) { + if( KMP_UBER_GTID( i ) && + !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && + !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state + r += __kmp_unregister_root_other_thread(i); + } + } + return r; +} +#endif + +/* + This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of + free entries generated. + + For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are + already dead. + + On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate + update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to + __kmp_tp_capacity, if threadprivate cache array has been created. + Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. + + After any dead root reclamation, if the clipping value allows array expansion to result in the generation + of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows + array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. + Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero, + a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create + as many free slots as possible up to nWish. + + If any argument is negative, the behavior is undefined. +*/ +static int +__kmp_expand_threads(int nWish, int nNeed) { + int added = 0; + int old_tp_cached; + int __kmp_actual_max_nth; + + if(nNeed > nWish) /* normalize the arguments */ + nWish = nNeed; +#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB +/* only for Windows static library */ + /* reclaim array entries for root threads that are already dead */ + added = __kmp_reclaim_dead_roots(); + + if(nNeed) { + nNeed -= added; + if(nNeed < 0) + nNeed = 0; + } + if(nWish) { + nWish -= added; + if(nWish < 0) + nWish = 0; + } +#endif + if(nWish <= 0) + return added; + + while(1) { + int nTarget; + int minimumRequiredCapacity; + int newCapacity; + kmp_info_t **newThreads; + kmp_root_t **newRoot; + + // + // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. + // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth + // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may + // become > __kmp_max_nth in one of two ways: + // + // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] + // may not be resused by another thread, so we may need to increase + // __kmp_threads_capacity to __kmp_max_threads + 1. + // + // 2) New foreign root(s) are encountered. We always register new + // foreign roots. This may cause a smaller # of threads to be + // allocated at subsequent parallel regions, but the worker threads + // hang around (and eventually go to sleep) and need slots in the + // __kmp_threads[] array. + // + // Anyway, that is the reason for moving the check to see if + // __kmp_max_threads was exceeded into __kmp_reseerve_threads() + // instead of having it performed here. -BB + // + old_tp_cached = __kmp_tp_cached; + __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; + KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); + + /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ + nTarget = nWish; + if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { + /* can't fulfil nWish, so try nNeed */ + if(nNeed) { + nTarget = nNeed; + if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { + /* possible expansion too small -- give up */ + break; + } + } else { + /* best-effort */ + nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; + if(!nTarget) { + /* can expand at all -- give up */ + break; + } + } + } + minimumRequiredCapacity = __kmp_threads_capacity + nTarget; + + newCapacity = __kmp_threads_capacity; + do{ + newCapacity = + newCapacity <= (__kmp_actual_max_nth >> 1) ? + (newCapacity << 1) : + __kmp_actual_max_nth; + } while(newCapacity < minimumRequiredCapacity); + newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); + newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); + KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); + KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); + memset(newThreads + __kmp_threads_capacity, 0, + (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); + memset(newRoot + __kmp_threads_capacity, 0, + (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); + + if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { + /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache + while we were allocating the expanded array, and our new capacity is larger than the threadprivate + cache capacity, so we should deallocate the expanded arrays and try again. This is the first check + of a double-check pair. + */ + __kmp_free(newThreads); + continue; /* start over and try again */ + } + __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); + if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { + /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + __kmp_free(newThreads); + continue; /* start over and try again */ + } else { + /* success */ + // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. + // + *(kmp_info_t**volatile*)&__kmp_threads = newThreads; + *(kmp_root_t**volatile*)&__kmp_root = newRoot; + added += newCapacity - __kmp_threads_capacity; + *(volatile int*)&__kmp_threads_capacity = newCapacity; + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + break; /* succeeded, so we can exit the loop */ + } + } + return added; +} + +/* register the current thread as a root thread and obtain our gtid */ +/* we must have the __kmp_initz_lock held at this point */ +/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ +int +__kmp_register_root( int initial_thread ) +{ + kmp_info_t *root_thread; + kmp_root_t *root; + int gtid; + int capacity; + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + KA_TRACE( 20, ("__kmp_register_root: entered\n")); + KMP_MB(); + + + /* + 2007-03-02: + + If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, + "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may + return false (that means there is at least one empty slot in __kmp_threads array), but it + is possible the only free slot is #0, which is reserved for initial thread and so cannot be + used for this one. Following code workarounds this bug. + + However, right solution seems to be not reserving slot #0 for initial thread because: + (1) there is no magic in slot #0, + (2) we cannot detect initial thread reliably (the first thread which does serial + initialization may be not a real initial thread). + */ + capacity = __kmp_threads_capacity; + if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { + -- capacity; + }; // if + + /* see if there are too many threads */ + if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { + if ( __kmp_tp_cached ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantRegisterNewThread ), + KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), + KMP_HNT( PossibleSystemLimitOnThreads ), + __kmp_msg_null + ); + } + else { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantRegisterNewThread ), + KMP_HNT( SystemLimitOnThreads ), + __kmp_msg_null + ); + } + }; // if + + /* find an available thread slot */ + /* Don't reassign the zero slot since we need that to only be used by initial + thread */ + for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) + ; + KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); + KMP_ASSERT( gtid < __kmp_threads_capacity ); + + /* update global accounting */ + __kmp_all_nth ++; + TCW_4(__kmp_nth, __kmp_nth + 1); + + // + // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) + // for low numbers of procs, and method #2 (keyed API call) for higher + // numbers of procs. + // + if ( __kmp_adjust_gtid_mode ) { + if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { + if ( TCR_4(__kmp_gtid_mode) != 2) { + TCW_4(__kmp_gtid_mode, 2); + } + } + else { + if (TCR_4(__kmp_gtid_mode) != 1 ) { + TCW_4(__kmp_gtid_mode, 1); + } + } + } + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime to zero if necessary */ + /* Middle initialization might not have occurred yet */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + if ( __kmp_nth > __kmp_avail_proc ) { + __kmp_zero_bt = TRUE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + + /* setup this new hierarchy */ + if( ! ( root = __kmp_root[gtid] )) { + root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); + KMP_DEBUG_ASSERT( ! root->r.r_root_team ); + } + + __kmp_initialize_root( root ); + + /* setup new root thread structure */ + if( root->r.r_uber_thread ) { + root_thread = root->r.r_uber_thread; + } else { + root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); + if ( __kmp_storage_map ) { + __kmp_print_thread_storage_map( root_thread, gtid ); + } + root_thread->th.th_info .ds.ds_gtid = gtid; + root_thread->th.th_root = root; + if( __kmp_env_consistency_check ) { + root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); + } + #if USE_FAST_MEMORY + __kmp_initialize_fast_memory( root_thread ); + #endif /* USE_FAST_MEMORY */ + + #if KMP_USE_BGET + KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); + __kmp_initialize_bget( root_thread ); + #endif + __kmp_init_random( root_thread ); // Initialize random number generator + } + + /* setup the serial team held in reserve by the root thread */ + if( ! root_thread->th.th_serial_team ) { + kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); + KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); + + root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, +#if OMPT_SUPPORT + 0, // root parallel id +#endif +#if OMP_40_ENABLED + proc_bind_default, +#endif + &r_icvs, + 0 USE_NESTED_HOT_ARG(NULL) ); + } + KMP_ASSERT( root_thread->th.th_serial_team ); + KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", + root_thread->th.th_serial_team ) ); + + /* drop root_thread into place */ + TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); + + root->r.r_root_team->t.t_threads[0] = root_thread; + root->r.r_hot_team ->t.t_threads[0] = root_thread; + root_thread->th.th_serial_team->t.t_threads[0] = root_thread; + root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). + root->r.r_uber_thread = root_thread; + + /* initialize the thread, get it ready to go */ + __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); + + /* prepare the master thread for get_gtid() */ + __kmp_gtid_set_specific( gtid ); + + __kmp_itt_thread_name( gtid ); + + #ifdef KMP_TDATA_GTID + __kmp_gtid = gtid; + #endif + __kmp_create_worker( gtid, root_thread, __kmp_stksize ); + KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); + TCW_4(__kmp_init_gtid, TRUE); + + KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", + gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), + root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, + KMP_INIT_BARRIER_STATE ) ); + { // Initialize barrier data. + int b; + for ( b = 0; b < bs_last_barrier; ++ b ) { + root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE; +#if USE_DEBUGGER + root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; +#endif + }; // for + } + KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); + +#if KMP_AFFINITY_SUPPORTED + if ( TCR_4(__kmp_init_middle) ) { + __kmp_affinity_set_init_mask( gtid, TRUE ); + } +#endif /* KMP_AFFINITY_SUPPORTED */ + + __kmp_root_counter ++; + + KMP_MB(); + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + + return gtid; +} + +#if KMP_NESTED_HOT_TEAMS +static int +__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) +{ + int i, n, nth; + kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; + if( !hot_teams || !hot_teams[level].hot_team ) { + return 0; + } + KMP_DEBUG_ASSERT( level < max_level ); + kmp_team_t *team = hot_teams[level].hot_team; + nth = hot_teams[level].hot_team_nth; + n = nth - 1; // master is not freed + if( level < max_level - 1 ) { + for( i = 0; i < nth; ++i ) { + kmp_info_t *th = team->t.t_threads[i]; + n += __kmp_free_hot_teams( root, th, level + 1, max_level ); + if( i > 0 && th->th.th_hot_teams ) { + __kmp_free( th->th.th_hot_teams ); + th->th.th_hot_teams = NULL; + } + } + } + __kmp_free_team( root, team, NULL ); + return n; +} +#endif + +/* Resets a root thread and clear its root and hot teams. + Returns the number of __kmp_threads entries directly and indirectly freed. +*/ +static int +__kmp_reset_root(int gtid, kmp_root_t *root) +{ + kmp_team_t * root_team = root->r.r_root_team; + kmp_team_t * hot_team = root->r.r_hot_team; + int n = hot_team->t.t_nproc; + int i; + + KMP_DEBUG_ASSERT( ! root->r.r_active ); + + root->r.r_root_team = NULL; + root->r.r_hot_team = NULL; + // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call + // to __kmp_free_team(). + __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); +#if KMP_NESTED_HOT_TEAMS + if( __kmp_hot_teams_max_level > 1 ) { // need to free nested hot teams and their threads if any + for( i = 0; i < hot_team->t.t_nproc; ++i ) { + kmp_info_t *th = hot_team->t.t_threads[i]; + n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); + if( th->th.th_hot_teams ) { + __kmp_free( th->th.th_hot_teams ); + th->th.th_hot_teams = NULL; + } + } + } +#endif + __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); + + // + // Before we can reap the thread, we need to make certain that all + // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. + // + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + __kmp_wait_to_unref_task_teams(); + } + + #if KMP_OS_WINDOWS + /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ + KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", + (LPVOID)&(root->r.r_uber_thread->th), + root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); + __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); + #endif /* KMP_OS_WINDOWS */ + +#if OMPT_SUPPORT + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_thread_end)) { + int gtid = __kmp_get_gtid(); + __ompt_thread_end(ompt_thread_initial, gtid); + } +#endif + + TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. + __kmp_reap_thread( root->r.r_uber_thread, 1 ); + + // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. + root->r.r_uber_thread = NULL; + /* mark root as no longer in use */ + root->r.r_begin = FALSE; + + return n; +} + +void +__kmp_unregister_root_current_thread( int gtid ) +{ + KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); + /* this lock should be ok, since unregister_root_current_thread is never called during + * and abort, only during a normal close. furthermore, if you have the + * forkjoin lock, you should never try to get the initz lock */ + + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { + KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + return; + } + kmp_root_t *root = __kmp_root[gtid]; + + KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); + KMP_ASSERT( KMP_UBER_GTID( gtid )); + KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); + KMP_ASSERT( root->r.r_active == FALSE ); + + + KMP_MB(); + +#if OMP_41_ENABLED + kmp_info_t * thread = __kmp_threads[gtid]; + kmp_team_t * team = thread->th.th_team; + kmp_task_team_t * task_team = thread->th.th_task_team; + + // we need to wait for the proxy tasks before finishing the thread + if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { +#if OMPT_SUPPORT + // the runtime is shutting down so we won't report any events + thread->th.ompt_thread_info.state = ompt_state_undefined; +#endif + __kmp_task_team_wait(thread, team, NULL ); + } +#endif + + __kmp_reset_root(gtid, root); + + /* free up this thread slot */ + __kmp_gtid_set_specific( KMP_GTID_DNE ); +#ifdef KMP_TDATA_GTID + __kmp_gtid = KMP_GTID_DNE; +#endif + + KMP_MB(); + KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); +} + +#if KMP_OS_WINDOWS +/* __kmp_forkjoin_lock must be already held + Unregisters a root thread that is not the current thread. Returns the number of + __kmp_threads entries freed as a result. + */ +static int +__kmp_unregister_root_other_thread( int gtid ) +{ + kmp_root_t *root = __kmp_root[gtid]; + int r; + + KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); + KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); + KMP_ASSERT( KMP_UBER_GTID( gtid )); + KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); + KMP_ASSERT( root->r.r_active == FALSE ); + + r = __kmp_reset_root(gtid, root); + KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); + return r; +} +#endif + +#if KMP_DEBUG +void __kmp_task_info() { + + kmp_int32 gtid = __kmp_entry_gtid(); + kmp_int32 tid = __kmp_tid_from_gtid( gtid ); + kmp_info_t *this_thr = __kmp_threads[ gtid ]; + kmp_team_t *steam = this_thr->th.th_serial_team; + kmp_team_t *team = this_thr->th.th_team; + + __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", + gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); +} +#endif // KMP_DEBUG + +/* TODO optimize with one big memclr, take out what isn't needed, + * split responsibility to workers as much as possible, and delay + * initialization of features as much as possible */ +static void +__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) +{ + /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker + * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ + kmp_info_t *master = team->t.t_threads[0]; + KMP_DEBUG_ASSERT( this_thr != NULL ); + KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( team->t.t_threads ); + KMP_DEBUG_ASSERT( team->t.t_dispatch ); + KMP_DEBUG_ASSERT( master ); + KMP_DEBUG_ASSERT( master->th.th_root ); + + KMP_MB(); + + TCW_SYNC_PTR(this_thr->th.th_team, team); + + this_thr->th.th_info.ds.ds_tid = tid; + this_thr->th.th_set_nproc = 0; +#if OMP_40_ENABLED + this_thr->th.th_set_proc_bind = proc_bind_default; +# if KMP_AFFINITY_SUPPORTED + this_thr->th.th_new_place = this_thr->th.th_current_place; +# endif +#endif + this_thr->th.th_root = master->th.th_root; + + /* setup the thread's cache of the team structure */ + this_thr->th.th_team_nproc = team->t.t_nproc; + this_thr->th.th_team_master = master; + this_thr->th.th_team_serialized = team->t.t_serialized; + TCW_PTR(this_thr->th.th_sleep_loc, NULL); + + KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); + + KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", + tid, gtid, this_thr, this_thr->th.th_current_task ) ); + + __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); + + KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", + tid, gtid, this_thr, this_thr->th.th_current_task ) ); + // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? + + /* TODO no worksharing in speculative threads */ + this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ]; + + this_thr->th.th_local.this_construct = 0; + +#ifdef BUILD_TV + this_thr->th.th_local.tv_data = 0; +#endif + + if ( ! this_thr->th.th_pri_common ) { + this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); + if ( __kmp_storage_map ) { + __kmp_print_storage_map_gtid( + gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, + sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid + ); + }; // if + this_thr->th.th_pri_head = NULL; + }; // if + + /* Initialize dynamic dispatch */ + { + volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; + /* + * Use team max_nproc since this will never change for the team. + */ + size_t disp_size = sizeof( dispatch_private_info_t ) * + ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ); + KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); + KMP_ASSERT( dispatch ); + KMP_DEBUG_ASSERT( team->t.t_dispatch ); + KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); + + dispatch->th_disp_index = 0; + + if( ! dispatch->th_disp_buffer ) { + dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); + + if ( __kmp_storage_map ) { + __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], + &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ], + disp_size, "th_%d.th_dispatch.th_disp_buffer " + "(team_%d.t_dispatch[%d].th_disp_buffer)", + gtid, team->t.t_id, gtid ); + } + } else { + memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); + } + + dispatch->th_dispatch_pr_current = 0; + dispatch->th_dispatch_sh_current = 0; + + dispatch->th_deo_fcn = 0; /* ORDERED */ + dispatch->th_dxo_fcn = 0; /* END ORDERED */ + } + + this_thr->th.th_next_pool = NULL; + + if (!this_thr->th.th_task_state_memo_stack) { + size_t i; + this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); + this_thr->th.th_task_state_top = 0; + this_thr->th.th_task_state_stack_sz = 4; + for (i=0; ith.th_task_state_stack_sz; ++i) // zero init the stack + this_thr->th.th_task_state_memo_stack[i] = 0; + } + + KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); + KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + + KMP_MB(); +} + + +/* allocate a new thread for the requesting team. this is only called from within a + * forkjoin critical section. we will first try to get an available thread from the + * thread pool. if none is available, we will fork a new one assuming we are able + * to create a new one. this should be assured, as the caller should check on this + * first. + */ +kmp_info_t * +__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) +{ + kmp_team_t *serial_team; + kmp_info_t *new_thr; + int new_gtid; + + KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); + KMP_DEBUG_ASSERT( root && team ); +#if !KMP_NESTED_HOT_TEAMS + KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); +#endif + KMP_MB(); + + /* first, try to get one from the thread pool */ + if ( __kmp_thread_pool ) { + + new_thr = (kmp_info_t*)__kmp_thread_pool; + __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; + if ( new_thr == __kmp_thread_pool_insert_pt ) { + __kmp_thread_pool_insert_pt = NULL; + } + TCW_4(new_thr->th.th_in_pool, FALSE); + // + // Don't touch th_active_in_pool or th_active. + // The worker thread adjusts those flags as it sleeps/awakens. + // + + __kmp_thread_pool_nth--; + + KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", + __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); + KMP_ASSERT( ! new_thr->th.th_team ); + KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); + KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); + + /* setup the thread structure */ + __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); + KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); + + TCW_4(__kmp_nth, __kmp_nth + 1); + + new_thr->th.th_task_state = 0; + new_thr->th.th_task_state_top = 0; + new_thr->th.th_task_state_stack_sz = 4; + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime back to zero if necessar y */ + /* Middle initialization might not have occurred yet */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + if ( __kmp_nth > __kmp_avail_proc ) { + __kmp_zero_bt = TRUE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + +#if KMP_DEBUG + // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. + int b; + kmp_balign_t * balign = new_thr->th.th_bar; + for( b = 0; b < bs_last_barrier; ++ b ) + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#endif + + KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", + __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); + + KMP_MB(); + return new_thr; + } + + + /* no, well fork a new one */ + KMP_ASSERT( __kmp_nth == __kmp_all_nth ); + KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); + + // + // If this is the first worker thread the RTL is creating, then also + // launch the monitor thread. We try to do this as early as possible. + // + if ( ! TCR_4( __kmp_init_monitor ) ) { + __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); + if ( ! TCR_4( __kmp_init_monitor ) ) { + KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); + TCW_4( __kmp_init_monitor, 1 ); + __kmp_create_monitor( & __kmp_monitor ); + KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); + #if KMP_OS_WINDOWS + // AC: wait until monitor has started. This is a fix for CQ232808. + // The reason is that if the library is loaded/unloaded in a loop with small (parallel) + // work in between, then there is high probability that monitor thread started after + // the library shutdown. At shutdown it is too late to cope with the problem, because + // when the master is in DllMain (process detach) the monitor has no chances to start + // (it is blocked), and master has no means to inform the monitor that the library has gone, + // because all the memory which the monitor can access is going to be released/reset. + while ( TCR_4(__kmp_init_monitor) < 2 ) { + KMP_YIELD( TRUE ); + } + KF_TRACE( 10, ( "after monitor thread has started\n" ) ); + #endif + } + __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); + } + + KMP_MB(); + for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { + KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); + } + + /* allocate space for it. */ + new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); + + TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); + + if ( __kmp_storage_map ) { + __kmp_print_thread_storage_map( new_thr, new_gtid ); + } + + /* add the reserve serialized team, initialized from the team's master thread */ + { + kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); + KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); + + new_thr->th.th_serial_team = serial_team = + (kmp_team_t*) __kmp_allocate_team( root, 1, 1, +#if OMPT_SUPPORT + 0, // root parallel id +#endif +#if OMP_40_ENABLED + proc_bind_default, +#endif + &r_icvs, + 0 USE_NESTED_HOT_ARG(NULL) ); + } + KMP_ASSERT ( serial_team ); + serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). + serial_team->t.t_threads[0] = new_thr; + KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", + new_thr ) ); + + /* setup the thread structures */ + __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); + + #if USE_FAST_MEMORY + __kmp_initialize_fast_memory( new_thr ); + #endif /* USE_FAST_MEMORY */ + + #if KMP_USE_BGET + KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); + __kmp_initialize_bget( new_thr ); + #endif + + __kmp_init_random( new_thr ); // Initialize random number generator + + /* Initialize these only once when thread is grabbed for a team allocation */ + KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", + __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); + + int b; + kmp_balign_t * balign = new_thr->th.th_bar; + for(b=0; bth.th_spin_here = FALSE; + new_thr->th.th_next_waiting = 0; + +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; +#endif + + TCW_4(new_thr->th.th_in_pool, FALSE); + new_thr->th.th_active_in_pool = FALSE; + TCW_4(new_thr->th.th_active, TRUE); + + /* adjust the global counters */ + __kmp_all_nth ++; + __kmp_nth ++; + + // + // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) + // for low numbers of procs, and method #2 (keyed API call) for higher + // numbers of procs. + // + if ( __kmp_adjust_gtid_mode ) { + if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { + if ( TCR_4(__kmp_gtid_mode) != 2) { + TCW_4(__kmp_gtid_mode, 2); + } + } + else { + if (TCR_4(__kmp_gtid_mode) != 1 ) { + TCW_4(__kmp_gtid_mode, 1); + } + } + } + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime back to zero if necessary */ + /* Middle initialization might not have occurred yet */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + if ( __kmp_nth > __kmp_avail_proc ) { + __kmp_zero_bt = TRUE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + + /* actually fork it and create the new worker thread */ + KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); + __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); + KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); + + + KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); + KMP_MB(); + return new_thr; +} + +/* + * reinitialize team for reuse. + * + * The hot team code calls this case at every fork barrier, so EPCC barrier + * test are extremely sensitive to changes in it, esp. writes to the team + * struct, which cause a cache invalidation in all threads. + * + * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! + */ +static void +__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { + KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", + team->t.t_threads[0], team ) ); + KMP_DEBUG_ASSERT( team && new_icvs); + KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); + team->t.t_ident = loc; + + team->t.t_id = KMP_GEN_TEAM_ID(); + + // Copy ICVs to the master thread's implicit taskdata + __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); + copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); + + KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", + team->t.t_threads[0], team ) ); +} + + +/* initialize the team data structure + * this assumes the t_threads and t_max_nproc are already set + * also, we don't touch the arguments */ +static void +__kmp_initialize_team( + kmp_team_t * team, + int new_nproc, + kmp_internal_control_t * new_icvs, + ident_t * loc +) { + KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); + + /* verify */ + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); + KMP_DEBUG_ASSERT( team->t.t_threads ); + KMP_MB(); + + team->t.t_master_tid = 0; /* not needed */ + /* team->t.t_master_bar; not needed */ + team->t.t_serialized = new_nproc > 1 ? 0 : 1; + team->t.t_nproc = new_nproc; + + /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ + team->t.t_next_pool = NULL; + /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ + + TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ + team->t.t_invoke = NULL; /* not needed */ + + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + team->t.t_sched = new_icvs->sched; + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + team->t.t_fp_control_saved = FALSE; /* not needed */ + team->t.t_x87_fpu_control_word = 0; /* not needed */ + team->t.t_mxcsr = 0; /* not needed */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + team->t.t_construct = 0; + __kmp_init_lock( & team->t.t_single_lock ); + + team->t.t_ordered .dt.t_value = 0; + team->t.t_master_active = FALSE; + + memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); + +#ifdef KMP_DEBUG + team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ +#endif + team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ + + team->t.t_control_stack_top = NULL; + + __kmp_reinitialize_team( team, new_icvs, loc ); + + KMP_MB(); + KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); +} + +#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +/* Sets full mask for thread and returns old mask, no changes to structures. */ +static void +__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) +{ + if ( KMP_AFFINITY_CAPABLE() ) { + int status; + if ( old_mask != NULL ) { + status = __kmp_get_system_affinity( old_mask, TRUE ); + int error = errno; + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( ChangeThreadAffMaskError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + } + __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE ); + } +} +#endif + +#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + +// +// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. +// It calculats the worker + master thread's partition based upon the parent +// thread's partition, and binds each worker to a thread in their partition. +// The master thread's partition should already include its current binding. +// +static void +__kmp_partition_places( kmp_team_t *team ) +{ + // + // Copy the master thread's place partion to the team struct + // + kmp_info_t *master_th = team->t.t_threads[0]; + KMP_DEBUG_ASSERT( master_th != NULL ); + kmp_proc_bind_t proc_bind = team->t.t_proc_bind; + int first_place = master_th->th.th_first_place; + int last_place = master_th->th.th_last_place; + int masters_place = master_th->th.th_current_place; + team->t.t_first_place = first_place; + team->t.t_last_place = last_place; + + KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", + proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, + masters_place, first_place, last_place ) ); + + switch ( proc_bind ) { + + case proc_bind_default: + // + // serial teams might have the proc_bind policy set to + // proc_bind_default. It doesn't matter, as we don't + // rebind the master thread for any proc_bind policy. + // + KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); + break; + + case proc_bind_master: + { + int f; + int n_th = team->t.t_nproc; + for ( f = 1; f < n_th; f++ ) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT( th != NULL ); + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = masters_place; + + KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", + __kmp_gtid_from_thread( team->t.t_threads[f] ), + team->t.t_id, f, masters_place, first_place, last_place ) ); + } + } + break; + + case proc_bind_close: + { + int f; + int n_th = team->t.t_nproc; + int n_places; + if ( first_place <= last_place ) { + n_places = last_place - first_place + 1; + } + else { + n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + } + if ( n_th <= n_places ) { + int place = masters_place; + for ( f = 1; f < n_th; f++ ) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT( th != NULL ); + + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = place; + + KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", + __kmp_gtid_from_thread( team->t.t_threads[f] ), + team->t.t_id, f, place, first_place, last_place ) ); + } + } + else { + int S, rem, gap, s_count; + S = n_th / n_places; + s_count = 0; + rem = n_th - ( S * n_places ); + gap = rem > 0 ? n_places/rem : n_places; + int place = masters_place; + int gap_ct = gap; + for ( f = 0; f < n_th; f++ ) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT( th != NULL ); + + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = place; + s_count++; + + if ( (s_count == S) && rem && (gap_ct == gap) ) { + // do nothing, add an extra thread to place on next iteration + } + else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { + // we added an extra thread to this place; move to next place + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + s_count = 0; + gap_ct = 1; + rem--; + } + else if (s_count == S) { // place full; don't add extra + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + gap_ct++; + s_count = 0; + } + + KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", + __kmp_gtid_from_thread( team->t.t_threads[f] ), + team->t.t_id, f, th->th.th_new_place, first_place, + last_place ) ); + } + KMP_DEBUG_ASSERT( place == masters_place ); + } + } + break; + + case proc_bind_spread: + { + int f; + int n_th = team->t.t_nproc; + int n_places; + if ( first_place <= last_place ) { + n_places = last_place - first_place + 1; + } + else { + n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + } + if ( n_th <= n_places ) { + int place = masters_place; + int S = n_places/n_th; + int s_count, rem, gap, gap_ct; + rem = n_places - n_th*S; + gap = rem ? n_th/rem : 1; + gap_ct = gap; + for ( f = 0; f < n_th; f++ ) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT( th != NULL ); + + th->th.th_first_place = place; + th->th.th_new_place = place; + s_count = 1; + while (s_count < S) { + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + s_count++; + } + if (rem && (gap_ct == gap)) { + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + rem--; + gap_ct = 0; + } + th->th.th_last_place = place; + gap_ct++; + + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + + KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", + __kmp_gtid_from_thread( team->t.t_threads[f] ), + team->t.t_id, f, th->th.th_new_place, + th->th.th_first_place, th->th.th_last_place ) ); + } + KMP_DEBUG_ASSERT( place == masters_place ); + } + else { + int S, rem, gap, s_count; + S = n_th / n_places; + s_count = 0; + rem = n_th - ( S * n_places ); + gap = rem > 0 ? n_places/rem : n_places; + int place = masters_place; + int gap_ct = gap; + for ( f = 0; f < n_th; f++ ) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT( th != NULL ); + + th->th.th_first_place = place; + th->th.th_last_place = place; + th->th.th_new_place = place; + s_count++; + + if ( (s_count == S) && rem && (gap_ct == gap) ) { + // do nothing, add an extra thread to place on next iteration + } + else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { + // we added an extra thread to this place; move on to next place + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + s_count = 0; + gap_ct = 1; + rem--; + } + else if (s_count == S) { // place is full; don't add extra thread + if ( place == last_place ) { + place = first_place; + } + else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { + place = 0; + } + else { + place++; + } + gap_ct++; + s_count = 0; + } + + KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", + __kmp_gtid_from_thread( team->t.t_threads[f] ), + team->t.t_id, f, th->th.th_new_place, + th->th.th_first_place, th->th.th_last_place) ); + } + KMP_DEBUG_ASSERT( place == masters_place ); + } + } + break; + + default: + break; + } + + KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); +} + +#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ + +/* allocate a new team data structure to use. take one off of the free pool if available */ +kmp_team_t * +__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id, +#endif +#if OMP_40_ENABLED + kmp_proc_bind_t new_proc_bind, +#endif + kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) +{ + KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team); + int f; + kmp_team_t *team; + int use_hot_team = ! root->r.r_active; + int level = 0; + + KA_TRACE( 20, ("__kmp_allocate_team: called\n")); + KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); + KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); + KMP_MB(); + +#if KMP_NESTED_HOT_TEAMS + kmp_hot_team_ptr_t *hot_teams; + if( master ) { + team = master->th.th_team; + level = team->t.t_active_level; + if( master->th.th_teams_microtask ) { // in teams construct? + if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1 + team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams + master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams + ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise + } + } + hot_teams = master->th.th_hot_teams; + if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) + { // hot team has already been allocated for given level + use_hot_team = 1; + } else { + use_hot_team = 0; + } + } +#endif + // Optimization to use a "hot" team + if( use_hot_team && new_nproc > 1 ) { + KMP_DEBUG_ASSERT( new_nproc == max_nproc ); +#if KMP_NESTED_HOT_TEAMS + team = hot_teams[level].hot_team; +#else + team = root->r.r_hot_team; +#endif +#if KMP_DEBUG + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1] )); + } +#endif + + // Has the number of threads changed? + /* Let's assume the most common case is that the number of threads is unchanged, and + put that case first. */ + if (team->t.t_nproc == new_nproc) { // Check changes in number of threads + KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); + // This case can mean that omp_set_num_threads() was called and the hot team size + // was already reduced, so we check the special flag + if ( team->t.t_size_changed == -1 ) { + team->t.t_size_changed = 1; + } else { + team->t.t_size_changed = 0; + } + + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + team->t.t_sched = new_icvs->sched; + + __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); + + KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", + 0, team->t.t_threads[0], team ) ); + __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); + +#if OMP_40_ENABLED +# if KMP_AFFINITY_SUPPORTED + if ( ( team->t.t_size_changed == 0 ) + && ( team->t.t_proc_bind == new_proc_bind ) ) { + KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", + team->t.t_id, new_proc_bind, team->t.t_first_place, + team->t.t_last_place ) ); + } + else { + team->t.t_proc_bind = new_proc_bind; + __kmp_partition_places( team ); + } +# else + if ( team->t.t_proc_bind != new_proc_bind ) { + team->t.t_proc_bind = new_proc_bind; + } +# endif /* KMP_AFFINITY_SUPPORTED */ +#endif /* OMP_40_ENABLED */ + } + else if( team->t.t_nproc > new_nproc ) { + KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); + + team->t.t_size_changed = 1; +#if KMP_NESTED_HOT_TEAMS + if( __kmp_hot_teams_mode == 0 ) { + // AC: saved number of threads should correspond to team's value in this mode, + // can be bigger in mode 1, when hot team has some threads in reserve + KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); + hot_teams[level].hot_team_nth = new_nproc; +#endif // KMP_NESTED_HOT_TEAMS + /* release the extra threads we don't need any more */ + for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { + KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); + if ( __kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should unref task team. + team->t.t_threads[f]->th.th_task_team = NULL; + } + __kmp_free_thread( team->t.t_threads[ f ] ); + team->t.t_threads[ f ] = NULL; + } +#if KMP_NESTED_HOT_TEAMS + } // (__kmp_hot_teams_mode == 0) +#endif // KMP_NESTED_HOT_TEAMS + team->t.t_nproc = new_nproc; + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + team->t.t_sched = new_icvs->sched; + __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); + + /* update the remaining threads */ + for(f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_team_nproc = new_nproc; + } + // restore the current task state of the master thread: should be the implicit task + KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", + 0, team->t.t_threads[0], team ) ); + + __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); + +#ifdef KMP_DEBUG + for ( f = 0; f < team->t.t_nproc; f++ ) { + KMP_DEBUG_ASSERT( team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); + } +#endif + +#if OMP_40_ENABLED + team->t.t_proc_bind = new_proc_bind; +# if KMP_AFFINITY_SUPPORTED + __kmp_partition_places( team ); +# endif +#endif + } + else { // team->t.t_nproc < new_nproc +#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED + kmp_affin_mask_t *old_mask; + if ( KMP_AFFINITY_CAPABLE() ) { + KMP_CPU_ALLOC(old_mask); + } +#endif + + KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); + + team->t.t_size_changed = 1; + + +#if KMP_NESTED_HOT_TEAMS + int avail_threads = hot_teams[level].hot_team_nth; + if( new_nproc < avail_threads ) + avail_threads = new_nproc; + kmp_info_t **other_threads = team->t.t_threads; + for ( f = team->t.t_nproc; f < avail_threads; ++f ) { + // Adjust barrier data of reserved threads (if any) of the team + // Other data will be set in __kmp_initialize_info() below. + int b; + kmp_balign_t * balign = other_threads[f]->th.th_bar; + for ( b = 0; b < bs_last_barrier; ++ b ) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#if USE_DEBUGGER + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; +#endif + } + } + if( hot_teams[level].hot_team_nth >= new_nproc ) { + // we have all needed threads in reserve, no need to allocate any + // this only possible in mode 1, cannot have reserved threads in mode 0 + KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); + team->t.t_nproc = new_nproc; // just get reserved threads involved + } else { + // we may have some threads in reserve, but not enough + team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any + hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size +#endif // KMP_NESTED_HOT_TEAMS + if(team->t.t_max_nproc < new_nproc) { + /* reallocate larger arrays */ + __kmp_reallocate_team_arrays(team, new_nproc); + __kmp_reinitialize_team( team, new_icvs, NULL ); + } + +#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED + /* Temporarily set full mask for master thread before + creation of workers. The reason is that workers inherit + the affinity from master, so if a lot of workers are + created on the single core quickly, they don't get + a chance to set their own affinity for a long time. + */ + __kmp_set_thread_affinity_mask_full_tmp( old_mask ); +#endif + + /* allocate new threads for the hot team */ + for( f = team->t.t_nproc ; f < new_nproc ; f++ ) { + kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); + KMP_DEBUG_ASSERT( new_worker ); + team->t.t_threads[ f ] = new_worker; + + KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", + team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, + team->t.t_bar[bs_forkjoin_barrier].b_arrived, + team->t.t_bar[bs_plain_barrier].b_arrived ) ); + + { // Initialize barrier data for new threads. + int b; + kmp_balign_t * balign = new_worker->th.th_bar; + for( b = 0; b < bs_last_barrier; ++ b ) { + balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#if USE_DEBUGGER + balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; +#endif + } + } + } + +#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED + if ( KMP_AFFINITY_CAPABLE() ) { + /* Restore initial master thread's affinity mask */ + __kmp_set_system_affinity( old_mask, TRUE ); + KMP_CPU_FREE(old_mask); + } +#endif +#if KMP_NESTED_HOT_TEAMS + } // end of check of t_nproc vs. new_nproc vs. hot_team_nth +#endif // KMP_NESTED_HOT_TEAMS + /* make sure everyone is syncronized */ + int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below + __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); + + /* reinitialize the threads */ + KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); + for (f=0; f < team->t.t_nproc; ++f) + __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); + if (level) { // set th_task_state for new threads in nested hot team + // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the + // th_task_state for the new threads. th_task_state for master thread will not be accurate until + // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. + for (f=old_nproc; f < team->t.t_nproc; ++f) + team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; + } + else { // set th_task_state for new threads in non-nested hot team + int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state + for (f=old_nproc; f < team->t.t_nproc; ++f) + team->t.t_threads[f]->th.th_task_state = old_state; + } + +#ifdef KMP_DEBUG + for ( f = 0; f < team->t.t_nproc; ++ f ) { + KMP_DEBUG_ASSERT( team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); + } +#endif + +#if OMP_40_ENABLED + team->t.t_proc_bind = new_proc_bind; +# if KMP_AFFINITY_SUPPORTED + __kmp_partition_places( team ); +# endif +#endif + } // Check changes in number of threads + +#if OMP_40_ENABLED + kmp_info_t *master = team->t.t_threads[0]; + if( master->th.th_teams_microtask ) { + for( f = 1; f < new_nproc; ++f ) { + // propagate teams construct specific info to workers + kmp_info_t *thr = team->t.t_threads[f]; + thr->th.th_teams_microtask = master->th.th_teams_microtask; + thr->th.th_teams_level = master->th.th_teams_level; + thr->th.th_teams_size = master->th.th_teams_size; + } + } +#endif /* OMP_40_ENABLED */ +#if KMP_NESTED_HOT_TEAMS + if( level ) { + // Sync barrier state for nested hot teams, not needed for outermost hot team. + for( f = 1; f < new_nproc; ++f ) { + kmp_info_t *thr = team->t.t_threads[f]; + int b; + kmp_balign_t * balign = thr->th.th_bar; + for( b = 0; b < bs_last_barrier; ++ b ) { + balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); +#if USE_DEBUGGER + balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; +#endif + } + } + } +#endif // KMP_NESTED_HOT_TEAMS + + /* reallocate space for arguments if necessary */ + __kmp_alloc_argv_entries( argc, team, TRUE ); + team->t.t_argc = argc; + // + // The hot team re-uses the previous task team, + // if untouched during the previous release->gather phase. + // + + KF_TRACE( 10, ( " hot_team = %p\n", team ) ); + +#if KMP_DEBUG + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1] )); + } +#endif + +#if OMPT_SUPPORT + __ompt_team_assign_id(team, ompt_parallel_id); +#endif + + KMP_MB(); + + return team; + } + + /* next, let's try to take one from the team pool */ + KMP_MB(); + for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) + { + /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ + if ( team->t.t_max_nproc >= max_nproc ) { + /* take this team from the team pool */ + __kmp_team_pool = team->t.t_next_pool; + + /* setup the team for fresh use */ + __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); + + KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1]) ); + team->t.t_task_team[0] = NULL; + team->t.t_task_team[1] = NULL; + + /* reallocate space for arguments if necessary */ + __kmp_alloc_argv_entries( argc, team, TRUE ); + team->t.t_argc = argc; + + KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", + team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); + { // Initialize barrier data. + int b; + for ( b = 0; b < bs_last_barrier; ++ b) { + team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; +#if USE_DEBUGGER + team->t.t_bar[ b ].b_master_arrived = 0; + team->t.t_bar[ b ].b_team_arrived = 0; +#endif + } + } + +#if OMP_40_ENABLED + team->t.t_proc_bind = new_proc_bind; +#endif + + KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); + +#if OMPT_SUPPORT + __ompt_team_assign_id(team, ompt_parallel_id); +#endif + + KMP_MB(); + + return team; + } + + /* reap team if it is too small, then loop back and check the next one */ + /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ + /* TODO: Use technique to find the right size hot-team, don't reap them */ + team = __kmp_reap_team( team ); + __kmp_team_pool = team; + } + + /* nothing available in the pool, no matter, make a new team! */ + KMP_MB(); + team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); + + /* and set it up */ + team->t.t_max_nproc = max_nproc; + /* NOTE well, for some reason allocating one big buffer and dividing it + * up seems to really hurt performance a lot on the P4, so, let's not use + * this... */ + __kmp_allocate_team_arrays( team, max_nproc ); + + KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); + __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); + + KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); + team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate + team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate + + if ( __kmp_storage_map ) { + __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); + } + + /* allocate space for arguments */ + __kmp_alloc_argv_entries( argc, team, FALSE ); + team->t.t_argc = argc; + + KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", + team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); + { // Initialize barrier data. + int b; + for ( b = 0; b < bs_last_barrier; ++ b ) { + team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; +#if USE_DEBUGGER + team->t.t_bar[ b ].b_master_arrived = 0; + team->t.t_bar[ b ].b_team_arrived = 0; +#endif + } + } + +#if OMP_40_ENABLED + team->t.t_proc_bind = new_proc_bind; +#endif + +#if OMPT_SUPPORT + __ompt_team_assign_id(team, ompt_parallel_id); + team->t.ompt_serialized_team_info = NULL; +#endif + + KMP_MB(); + + KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); + + return team; +} + +/* TODO implement hot-teams at all levels */ +/* TODO implement lazy thread release on demand (disband request) */ + +/* free the team. return it to the team pool. release all the threads + * associated with it */ +void +__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) ) +{ + int f; + KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); + + /* verify state */ + KMP_DEBUG_ASSERT( root ); + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); + KMP_DEBUG_ASSERT( team->t.t_threads ); + + int use_hot_team = team == root->r.r_hot_team; +#if KMP_NESTED_HOT_TEAMS + int level; + kmp_hot_team_ptr_t *hot_teams; + if( master ) { + level = team->t.t_active_level - 1; + if( master->th.th_teams_microtask ) { // in teams construct? + if( master->th.th_teams_size.nteams > 1 ) { + ++level; // level was not increased in teams construct for team_of_masters + } + if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && + master->th.th_teams_level == team->t.t_level ) { + ++level; // level was not increased in teams construct for team_of_workers before the parallel + } // team->t.t_level will be increased inside parallel + } + hot_teams = master->th.th_hot_teams; + if( level < __kmp_hot_teams_max_level ) { + KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); + use_hot_team = 1; + } + } +#endif // KMP_NESTED_HOT_TEAMS + + /* team is done working */ + TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. + team->t.t_copyin_counter = 0; // init counter for possible reuse + // Do not reset pointer to parent team to NULL for hot teams. + + /* if we are non-hot team, release our threads */ + if( ! use_hot_team ) { + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Delete task teams + int tt_idx; + for (tt_idx=0; tt_idx<2; ++tt_idx) { + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if ( task_team != NULL ) { + for (f=0; ft.t_nproc; ++f) { // Have all threads unref task teams + team->t.t_threads[f]->th.th_task_team = NULL; + } + KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); +#if KMP_NESTED_HOT_TEAMS + __kmp_free_task_team( master, task_team ); +#endif + team->t.t_task_team[tt_idx] = NULL; + } + } + } + + // Reset pointer to parent team only for non-hot teams. + team->t.t_parent = NULL; + + + /* free the worker threads */ + for ( f = 1; f < team->t.t_nproc; ++ f ) { + KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); + __kmp_free_thread( team->t.t_threads[ f ] ); + team->t.t_threads[ f ] = NULL; + } + + + /* put the team back in the team pool */ + /* TODO limit size of team pool, call reap_team if pool too large */ + team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool; + __kmp_team_pool = (volatile kmp_team_t*) team; + } + + KMP_MB(); +} + + +/* reap the team. destroy it, reclaim all its resources and free its memory */ +kmp_team_t * +__kmp_reap_team( kmp_team_t *team ) +{ + kmp_team_t *next_pool = team->t.t_next_pool; + + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( team->t.t_dispatch ); + KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); + KMP_DEBUG_ASSERT( team->t.t_threads ); + KMP_DEBUG_ASSERT( team->t.t_argv ); + + /* TODO clean the threads that are a part of this? */ + + /* free stuff */ + + __kmp_free_team_arrays( team ); + if ( team->t.t_argv != &team->t.t_inline_argv[0] ) + __kmp_free( (void*) team->t.t_argv ); + __kmp_free( team ); + + KMP_MB(); + return next_pool; +} + +// +// Free the thread. Don't reap it, just place it on the pool of available +// threads. +// +// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid +// binding for the affinity mechanism to be useful. +// +// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. +// However, we want to avoid a potential performance problem by always +// scanning through the list to find the correct point at which to insert +// the thread (potential N**2 behavior). To do this we keep track of the +// last place a thread struct was inserted (__kmp_thread_pool_insert_pt). +// With single-level parallelism, threads will always be added to the tail +// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested +// parallelism, all bets are off and we may need to scan through the entire +// free list. +// +// This change also has a potentially large performance benefit, for some +// applications. Previously, as threads were freed from the hot team, they +// would be placed back on the free list in inverse order. If the hot team +// grew back to it's original size, then the freed thread would be placed +// back on the hot team in reverse order. This could cause bad cache +// locality problems on programs where the size of the hot team regularly +// grew and shrunk. +// +// Now, for single-level parallelism, the OMP tid is alway == gtid. +// +void +__kmp_free_thread( kmp_info_t *this_th ) +{ + int gtid; + kmp_info_t **scan; + + KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", + __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); + + KMP_DEBUG_ASSERT( this_th ); + + // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). + int b; + kmp_balign_t *balign = this_th->th.th_bar; + for (b=0; bth.th_task_state = 0; + + + /* put thread back on the free pool */ + TCW_PTR(this_th->th.th_team, NULL); + TCW_PTR(this_th->th.th_root, NULL); + TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ + + // + // If the __kmp_thread_pool_insert_pt is already past the new insert + // point, then we need to re-scan the entire list. + // + gtid = this_th->th.th_info.ds.ds_gtid; + if ( __kmp_thread_pool_insert_pt != NULL ) { + KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); + if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { + __kmp_thread_pool_insert_pt = NULL; + } + } + + // + // Scan down the list to find the place to insert the thread. + // scan is the address of a link in the list, possibly the address of + // __kmp_thread_pool itself. + // + // In the absence of nested parallism, the for loop will have 0 iterations. + // + if ( __kmp_thread_pool_insert_pt != NULL ) { + scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); + } + else { + scan = (kmp_info_t **)&__kmp_thread_pool; + } + for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); + scan = &( (*scan)->th.th_next_pool ) ); + + // + // Insert the new element on the list, and set __kmp_thread_pool_insert_pt + // to its address. + // + TCW_PTR(this_th->th.th_next_pool, *scan); + __kmp_thread_pool_insert_pt = *scan = this_th; + KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) + || ( this_th->th.th_info.ds.ds_gtid + < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); + TCW_4(this_th->th.th_in_pool, TRUE); + __kmp_thread_pool_nth++; + + TCW_4(__kmp_nth, __kmp_nth - 1); + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime back to user setting or default if necessary */ + /* Middle initialization might never have occurred */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); + if ( __kmp_nth <= __kmp_avail_proc ) { + __kmp_zero_bt = FALSE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + + KMP_MB(); +} + + +/* ------------------------------------------------------------------------ */ + +void * +__kmp_launch_thread( kmp_info_t *this_thr ) +{ + int gtid = this_thr->th.th_info.ds.ds_gtid; +/* void *stack_data;*/ + kmp_team_t *(*volatile pteam); + + KMP_MB(); + KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); + + if( __kmp_env_consistency_check ) { + this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak? + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + this_thr->th.ompt_thread_info.wait_id = 0; + this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); + if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { + __ompt_thread_begin(ompt_thread_worker, gtid); + } + } +#endif + + /* This is the place where threads wait for work */ + while( ! TCR_4(__kmp_global.g.g_done) ) { + KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); + KMP_MB(); + + /* wait for work to do */ + KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); + +#if OMPT_SUPPORT + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_idle; + } +#endif + + /* No tid yet since not part of a team */ + __kmp_fork_barrier( gtid, KMP_GTID_DNE ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + + pteam = (kmp_team_t *(*))(& this_thr->th.th_team); + + /* have we been allocated? */ + if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { + /* we were just woken up, so run our new task */ + if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { + int rc; + KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", + gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); + + updateHWFPControl (*pteam); + +#if OMPT_SUPPORT + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + // Initialize OMPT task id for implicit task. + int tid = __kmp_tid_from_gtid(gtid); + (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id = + __ompt_task_id_new(tid); + } +#endif + + KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); + { + KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke); + rc = (*pteam)->t.t_invoke( gtid ); + } + KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); + KMP_ASSERT( rc ); + +#if OMPT_SUPPORT + if (ompt_enabled) { + /* no frame set while outside task */ + int tid = __kmp_tid_from_gtid(gtid); + (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; + + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + KMP_MB(); + KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", + gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); + } + /* join barrier after parallel region */ + __kmp_join_barrier( gtid ); + } + } + TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); + +#if OMPT_SUPPORT + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_thread_end)) { + __ompt_thread_end(ompt_thread_worker, gtid); + } +#endif + + this_thr->th.th_task_team = NULL; + /* run the destructors for the threadprivate data for this thread */ + __kmp_common_destroy_gtid( gtid ); + + KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); + KMP_MB(); + return this_thr; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_internal_end_dest( void *specific_gtid ) +{ + #if KMP_COMPILER_ICC + #pragma warning( push ) + #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits + #endif + // Make sure no significant bits are lost + int gtid = (kmp_intptr_t)specific_gtid - 1; + #if KMP_COMPILER_ICC + #pragma warning( pop ) + #endif + + KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); + /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage + * this is because 0 is reserved for the nothing-stored case */ + + /* josh: One reason for setting the gtid specific data even when it is being + destroyed by pthread is to allow gtid lookup through thread specific data + (__kmp_gtid_get_specific). Some of the code, especially stat code, + that gets executed in the call to __kmp_internal_end_thread, actually + gets the gtid through the thread specific data. Setting it here seems + rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread + to run smoothly. + todo: get rid of this after we remove the dependence on + __kmp_gtid_get_specific + */ + if(gtid >= 0 && KMP_UBER_GTID(gtid)) + __kmp_gtid_set_specific( gtid ); + #ifdef KMP_TDATA_GTID + __kmp_gtid = gtid; + #endif + __kmp_internal_end_thread( gtid ); +} + +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + +// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work +// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker +// option in makefile.mk works fine. + +__attribute__(( destructor )) +void +__kmp_internal_end_dtor( void ) +{ + __kmp_internal_end_atexit(); +} + +void +__kmp_internal_end_fini( void ) +{ + __kmp_internal_end_atexit(); +} + +#endif + +/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ +void +__kmp_internal_end_atexit( void ) +{ + KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); + /* [Windows] + josh: ideally, we want to completely shutdown the library in this atexit handler, but + stat code that depends on thread specific data for gtid fails because that data becomes + unavailable at some point during the shutdown, so we call __kmp_internal_end_thread + instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the + stat code and use __kmp_internal_end_library to cleanly shutdown the library. + +// TODO: Can some of this comment about GVS be removed? + I suspect that the offending stat code is executed when the calling thread tries to + clean up a dead root thread's data structures, resulting in GVS code trying to close + the GVS structures for that thread, but since the stat code uses + __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is + cleaning up itself instead of another thread, it gets confused. This happens because + allowing a thread to unregister and cleanup another thread is a recent modification for + addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a + thread may end up trying to unregister another thread only if thread death does not + trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread + specific data destructor function to detect thread death. For Windows dynamic, there + is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the + workaround is applicable only for Windows static stat library. + */ + __kmp_internal_end_library( -1 ); + #if KMP_OS_WINDOWS + __kmp_close_console(); + #endif +} + +static void +__kmp_reap_thread( + kmp_info_t * thread, + int is_root +) { + + // It is assumed __kmp_forkjoin_lock is acquired. + + int gtid; + + KMP_DEBUG_ASSERT( thread != NULL ); + + gtid = thread->th.th_info.ds.ds_gtid; + + if ( ! is_root ) { + + if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { + /* Assume the threads are at the fork barrier here */ + KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); + /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ + kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); + __kmp_release_64(&flag); + }; // if + + + // Terminate OS thread. + __kmp_reap_worker( thread ); + + // + // The thread was killed asynchronously. If it was actively + // spinning in the in the thread pool, decrement the global count. + // + // There is a small timing hole here - if the worker thread was + // just waking up after sleeping in the pool, had reset it's + // th_active_in_pool flag but not decremented the global counter + // __kmp_thread_pool_active_nth yet, then the global counter + // might not get updated. + // + // Currently, this can only happen as the library is unloaded, + // so there are no harmful side effects. + // + if ( thread->th.th_active_in_pool ) { + thread->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32( + (kmp_int32 *) &__kmp_thread_pool_active_nth ); + KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); + } + + // Decrement # of [worker] threads in the pool. + KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); + --__kmp_thread_pool_nth; + }; // if + + // Free the fast memory for tasking + #if USE_FAST_MEMORY + __kmp_free_fast_memory( thread ); + #endif /* USE_FAST_MEMORY */ + + __kmp_suspend_uninitialize_thread( thread ); + + KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); + TCW_SYNC_PTR(__kmp_threads[gtid], NULL); + + -- __kmp_all_nth; + // __kmp_nth was decremented when thread is added to the pool. + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime back to user setting or default if necessary */ + /* Middle initialization might never have occurred */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); + if ( __kmp_nth <= __kmp_avail_proc ) { + __kmp_zero_bt = FALSE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + + /* free the memory being used */ + if( __kmp_env_consistency_check ) { + if ( thread->th.th_cons ) { + __kmp_free_cons_stack( thread->th.th_cons ); + thread->th.th_cons = NULL; + }; // if + } + + if ( thread->th.th_pri_common != NULL ) { + __kmp_free( thread->th.th_pri_common ); + thread->th.th_pri_common = NULL; + }; // if + + if (thread->th.th_task_state_memo_stack != NULL) { + __kmp_free(thread->th.th_task_state_memo_stack); + thread->th.th_task_state_memo_stack = NULL; + } + + #if KMP_USE_BGET + if ( thread->th.th_local.bget_data != NULL ) { + __kmp_finalize_bget( thread ); + }; // if + #endif + +#if KMP_AFFINITY_SUPPORTED + if ( thread->th.th_affin_mask != NULL ) { + KMP_CPU_FREE( thread->th.th_affin_mask ); + thread->th.th_affin_mask = NULL; + }; // if +#endif /* KMP_AFFINITY_SUPPORTED */ + + __kmp_reap_team( thread->th.th_serial_team ); + thread->th.th_serial_team = NULL; + __kmp_free( thread ); + + KMP_MB(); + +} // __kmp_reap_thread + +static void +__kmp_internal_end(void) +{ + int i; + + /* First, unregister the library */ + __kmp_unregister_library(); + + #if KMP_OS_WINDOWS + /* In Win static library, we can't tell when a root actually dies, so we + reclaim the data structures for any root threads that have died but not + unregistered themselves, in order to shut down cleanly. + In Win dynamic library we also can't tell when a thread dies. + */ + __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots + #endif + + for( i=0 ; i<__kmp_threads_capacity ; i++ ) + if( __kmp_root[i] ) + if( __kmp_root[i]->r.r_active ) + break; + KMP_MB(); /* Flush all pending memory write invalidates. */ + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); + + if ( i < __kmp_threads_capacity ) { + // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? + KMP_MB(); /* Flush all pending memory write invalidates. */ + + // + // Need to check that monitor was initialized before reaping it. + // If we are called form __kmp_atfork_child (which sets + // __kmp_init_parallel = 0), then __kmp_monitor will appear to + // contain valid data, but it is only valid in the parent process, + // not the child. + // + // New behavior (201008): instead of keying off of the flag + // __kmp_init_parallel, the monitor thread creation is keyed off + // of the new flag __kmp_init_monitor. + // + __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); + if ( TCR_4( __kmp_init_monitor ) ) { + __kmp_reap_monitor( & __kmp_monitor ); + TCW_4( __kmp_init_monitor, 0 ); + } + __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); + KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); + } else { + /* TODO move this to cleanup code */ + #ifdef KMP_DEBUG + /* make sure that everything has properly ended */ + for ( i = 0; i < __kmp_threads_capacity; i++ ) { + if( __kmp_root[i] ) { +// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here + KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active? + } + } + #endif + + KMP_MB(); + + // Reap the worker threads. + // This is valid for now, but be careful if threads are reaped sooner. + while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool. + // Get the next thread from the pool. + kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; + __kmp_thread_pool = thread->th.th_next_pool; + // Reap it. + thread->th.th_next_pool = NULL; + thread->th.th_in_pool = FALSE; + __kmp_reap_thread( thread, 0 ); + }; // while + __kmp_thread_pool_insert_pt = NULL; + + // Reap teams. + while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool. + // Get the next team from the pool. + kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; + __kmp_team_pool = team->t.t_next_pool; + // Reap it. + team->t.t_next_pool = NULL; + __kmp_reap_team( team ); + }; // while + + __kmp_reap_task_teams( ); + + for ( i = 0; i < __kmp_threads_capacity; ++ i ) { + // TBD: Add some checking... + // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); + } + + /* Make sure all threadprivate destructors get run by joining with all worker + threads before resetting this flag */ + TCW_SYNC_4(__kmp_init_common, FALSE); + + KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); + KMP_MB(); + + // + // See note above: One of the possible fixes for CQ138434 / CQ140126 + // + // FIXME: push both code fragments down and CSE them? + // push them into __kmp_cleanup() ? + // + __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); + if ( TCR_4( __kmp_init_monitor ) ) { + __kmp_reap_monitor( & __kmp_monitor ); + TCW_4( __kmp_init_monitor, 0 ); + } + __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); + KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); + + } /* else !__kmp_global.t_active */ + TCW_4(__kmp_init_gtid, FALSE); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + + __kmp_cleanup(); +#if OMPT_SUPPORT + ompt_fini(); +#endif +} + +void +__kmp_internal_end_library( int gtid_req ) +{ + /* if we have already cleaned up, don't try again, it wouldn't be pretty */ + /* this shouldn't be a race condition because __kmp_internal_end() is the + * only place to clear __kmp_serial_init */ + /* we'll check this later too, after we get the lock */ + // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, + // because the next check will work in any case. + if( __kmp_global.g.g_abort ) { + KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); + /* TODO abort? */ + return; + } + if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { + KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); + return; + } + + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* find out who we are and what we should do */ + { + int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); + KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req )); + if( gtid == KMP_GTID_SHUTDOWN ) { + KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); + return; + } else if( gtid == KMP_GTID_MONITOR ) { + KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); + return; + } else if( gtid == KMP_GTID_DNE ) { + KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); + /* we don't know who we are, but we may still shutdown the library */ + } else if( KMP_UBER_GTID( gtid )) { + /* unregister ourselves as an uber thread. gtid is no longer valid */ + if( __kmp_root[gtid]->r.r_active ) { + __kmp_global.g.g_abort = -1; + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); + KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); + return; + } else { + KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); + __kmp_unregister_root_current_thread( gtid ); + } + } else { + /* worker threads may call this function through the atexit handler, if they call exit() */ + /* For now, skip the usual subsequent processing and just dump the debug buffer. + TODO: do a thorough shutdown instead + */ + #ifdef DUMP_DEBUG_ON_EXIT + if ( __kmp_debug_buf ) + __kmp_dump_debug_buffer( ); + #endif + return; + } + } + /* synchronize the termination process */ + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + + /* have we already finished */ + if( __kmp_global.g.g_abort ) { + KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); + /* TODO abort? */ + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + } + if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + } + + /* We need this lock to enforce mutex between this reading of + __kmp_threads_capacity and the writing by __kmp_register_root. + Alternatively, we can use a counter of roots that is + atomically updated by __kmp_get_global_thread_id_reg, + __kmp_do_serial_initialize and __kmp_internal_end_*. + */ + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + + /* now we can safely conduct the actual termination */ + __kmp_internal_end(); + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + + KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); + + #ifdef DUMP_DEBUG_ON_EXIT + if ( __kmp_debug_buf ) + __kmp_dump_debug_buffer(); + #endif + + #if KMP_OS_WINDOWS + __kmp_close_console(); + #endif + + __kmp_fini_allocator(); + +} // __kmp_internal_end_library + +void +__kmp_internal_end_thread( int gtid_req ) +{ + int i; + + /* if we have already cleaned up, don't try again, it wouldn't be pretty */ + /* this shouldn't be a race condition because __kmp_internal_end() is the + * only place to clear __kmp_serial_init */ + /* we'll check this later too, after we get the lock */ + // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, + // because the next check will work in any case. + if( __kmp_global.g.g_abort ) { + KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); + /* TODO abort? */ + return; + } + if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); + return; + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* find out who we are and what we should do */ + { + int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); + KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req )); + if( gtid == KMP_GTID_SHUTDOWN ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); + return; + } else if( gtid == KMP_GTID_MONITOR ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); + return; + } else if( gtid == KMP_GTID_DNE ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); + return; + /* we don't know who we are */ + } else if( KMP_UBER_GTID( gtid )) { + /* unregister ourselves as an uber thread. gtid is no longer valid */ + if( __kmp_root[gtid]->r.r_active ) { + __kmp_global.g.g_abort = -1; + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); + KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); + return; + } else { + KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); + __kmp_unregister_root_current_thread( gtid ); + } + } else { + /* just a worker thread, let's leave */ + KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); + + if ( gtid >= 0 ) { + __kmp_threads[gtid]->th.th_task_team = NULL; + } + + KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); + return; + } + } + #if defined KMP_DYNAMIC_LIB + // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, + // because we will better shutdown later in the library destructor. + // The reason of this change is performance problem when non-openmp thread + // in a loop forks and joins many openmp threads. We can save a lot of time + // keeping worker threads alive until the program shutdown. + // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and + // Windows(DPD200287443) that occurs when using critical sections from foreign threads. + KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); + return; + #endif + /* synchronize the termination process */ + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + + /* have we already finished */ + if( __kmp_global.g.g_abort ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); + /* TODO abort? */ + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + } + if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + } + + /* We need this lock to enforce mutex between this reading of + __kmp_threads_capacity and the writing by __kmp_register_root. + Alternatively, we can use a counter of roots that is + atomically updated by __kmp_get_global_thread_id_reg, + __kmp_do_serial_initialize and __kmp_internal_end_*. + */ + + /* should we finish the run-time? are all siblings done? */ + __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + + for ( i = 0; i < __kmp_threads_capacity; ++ i ) { + if ( KMP_UBER_GTID( i ) ) { + KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + }; + } + + /* now we can safely conduct the actual termination */ + + __kmp_internal_end(); + + __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + + KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); + + #ifdef DUMP_DEBUG_ON_EXIT + if ( __kmp_debug_buf ) + __kmp_dump_debug_buffer(); + #endif +} // __kmp_internal_end_thread + +// ------------------------------------------------------------------------------------------------- +// Library registration stuff. + +static long __kmp_registration_flag = 0; + // Random value used to indicate library initialization. +static char * __kmp_registration_str = NULL; + // Value to be saved in env var __KMP_REGISTERED_LIB_. + + +static inline +char * +__kmp_reg_status_name() { + /* + On RHEL 3u5 if linked statically, getpid() returns different values in each thread. + If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), + the name of registered_lib_env env var can not be found, because the name will contain different pid. + */ + return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); +} // __kmp_reg_status_get + + +void +__kmp_register_library_startup( + void +) { + + char * name = __kmp_reg_status_name(); // Name of the environment variable. + int done = 0; + union { + double dtime; + long ltime; + } time; + #if KMP_OS_WINDOWS + __kmp_initialize_system_tick(); + #endif + __kmp_read_system_time( & time.dtime ); + __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); + __kmp_registration_str = + __kmp_str_format( + "%p-%lx-%s", + & __kmp_registration_flag, + __kmp_registration_flag, + KMP_LIBRARY_FILE + ); + + KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); + + while ( ! done ) { + + char * value = NULL; // Actual value of the environment variable. + + // Set environment variable, but do not overwrite if it is exist. + __kmp_env_set( name, __kmp_registration_str, 0 ); + // Check the variable is written. + value = __kmp_env_get( name ); + if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { + + done = 1; // Ok, environment variable set successfully, exit the loop. + + } else { + + // Oops. Write failed. Another copy of OpenMP RTL is in memory. + // Check whether it alive or dead. + int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. + char * tail = value; + char * flag_addr_str = NULL; + char * flag_val_str = NULL; + char const * file_name = NULL; + __kmp_str_split( tail, '-', & flag_addr_str, & tail ); + __kmp_str_split( tail, '-', & flag_val_str, & tail ); + file_name = tail; + if ( tail != NULL ) { + long * flag_addr = 0; + long flag_val = 0; + KMP_SSCANF( flag_addr_str, "%p", & flag_addr ); + KMP_SSCANF( flag_val_str, "%lx", & flag_val ); + if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { + // First, check whether environment-encoded address is mapped into addr space. + // If so, dereference it to see if it still has the right value. + + if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { + neighbor = 1; + } else { + // If not, then we know the other copy of the library is no longer running. + neighbor = 2; + }; // if + }; // if + }; // if + switch ( neighbor ) { + case 0 : // Cannot parse environment variable -- neighbor status unknown. + // Assume it is the incompatible format of future version of the library. + // Assume the other library is alive. + // WARN( ... ); // TODO: Issue a warning. + file_name = "unknown library"; + // Attention! Falling to the next case. That's intentional. + case 1 : { // Neighbor is alive. + // Check it is allowed. char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" ); - if ( ! __kmp_str_match_true( duplicate_ok ) ) { - // That's not allowed. Issue fatal error. - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), - KMP_HNT( DuplicateLibrary ), - __kmp_msg_null - ); - }; // if - KMP_INTERNAL_FREE( duplicate_ok ); - __kmp_duplicate_library_ok = 1; - done = 1; // Exit the loop. - } break; - case 2 : { // Neighbor is dead. - // Clear the variable and try to register library again. - __kmp_env_unset( name ); - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - } break; - }; // switch - - }; // if - KMP_INTERNAL_FREE( (void *) value ); - - }; // while - KMP_INTERNAL_FREE( (void *) name ); - -} // func __kmp_register_library_startup - - -void -__kmp_unregister_library( void ) { - - char * name = __kmp_reg_status_name(); - char * value = __kmp_env_get( name ); - - KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); - KMP_DEBUG_ASSERT( __kmp_registration_str != NULL ); - if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { - // Ok, this is our variable. Delete it. - __kmp_env_unset( name ); - }; // if - - KMP_INTERNAL_FREE( __kmp_registration_str ); - KMP_INTERNAL_FREE( value ); - KMP_INTERNAL_FREE( name ); - - __kmp_registration_flag = 0; - __kmp_registration_str = NULL; - -} // __kmp_unregister_library - - -// End of Library registration stuff. -// ------------------------------------------------------------------------------------------------- - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - -static void __kmp_check_mic_type() -{ - kmp_cpuid_t cpuid_state = {0}; - kmp_cpuid_t * cs_p = &cpuid_state; - __kmp_x86_cpuid(1, 0, cs_p); - // We don't support mic1 at the moment - if( (cs_p->eax & 0xff0) == 0xB10 ) { - __kmp_mic_type = mic2; - } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { - __kmp_mic_type = mic3; - } else { - __kmp_mic_type = non_mic; - } -} - -#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ - -static void -__kmp_do_serial_initialize( void ) -{ - int i, gtid; - int size; - - KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); - - KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); - KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); - KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); - KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); - KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); - -#if OMPT_SUPPORT - ompt_pre_init(); -#endif - - __kmp_validate_locks(); - - /* Initialize internal memory allocator */ - __kmp_init_allocator(); - - /* Register the library startup via an environment variable - and check to see whether another copy of the library is already - registered. */ - - __kmp_register_library_startup( ); - - /* TODO reinitialization of library */ - if( TCR_4(__kmp_global.g.g_done) ) { - KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); - } - - __kmp_global.g.g_abort = 0; - TCW_SYNC_4(__kmp_global.g.g_done, FALSE); - - /* initialize the locks */ -#if KMP_USE_ADAPTIVE_LOCKS -#if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_init_speculative_stats(); -#endif -#endif -#if KMP_STATS_ENABLED - __kmp_init_tas_lock( & __kmp_stats_lock ); -#endif - __kmp_init_lock( & __kmp_global_lock ); - __kmp_init_queuing_lock( & __kmp_dispatch_lock ); - __kmp_init_lock( & __kmp_debug_lock ); - __kmp_init_atomic_lock( & __kmp_atomic_lock ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_1i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_2i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_4i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_4r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); - __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock ); - __kmp_init_bootstrap_lock( & __kmp_exit_lock ); - __kmp_init_bootstrap_lock( & __kmp_monitor_lock ); - __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); - - /* conduct initialization and initial setup of configuration */ - - __kmp_runtime_initialize(); - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - __kmp_check_mic_type(); -#endif - - // Some global variable initialization moved here from kmp_env_initialize() -#ifdef KMP_DEBUG - kmp_diag = 0; -#endif - __kmp_abort_delay = 0; - - // From __kmp_init_dflt_team_nth() - /* assume the entire machine will be used */ - __kmp_dflt_team_nth_ub = __kmp_xproc; - if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { - __kmp_dflt_team_nth_ub = KMP_MIN_NTH; - } - if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { - __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; - } - __kmp_max_nth = __kmp_sys_max_nth; - - // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; - __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - // From "KMP_LIBRARY" part of __kmp_env_initialize() - __kmp_library = library_throughput; - // From KMP_SCHEDULE initialization - __kmp_static = kmp_sch_static_balanced; - // AC: do not use analytical here, because it is non-monotonous - //__kmp_guided = kmp_sch_guided_iterative_chunked; - //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment - // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method - // control parts - #if KMP_FAST_REDUCTION_BARRIER - #define kmp_reduction_barrier_gather_bb ((int)1) - #define kmp_reduction_barrier_release_bb ((int)1) - #define kmp_reduction_barrier_gather_pat bp_hyper_bar - #define kmp_reduction_barrier_release_pat bp_hyper_bar - #endif // KMP_FAST_REDUCTION_BARRIER - for ( i=bs_plain_barrier; i 0 ); - if ( __kmp_avail_proc == 0 ) { - __kmp_avail_proc = __kmp_xproc; - } - - // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now - j = 0; - while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { - __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; - j++; - } - - if ( __kmp_dflt_team_nth == 0 ) { -#ifdef KMP_DFLT_NTH_CORES - // - // Default #threads = #cores - // - __kmp_dflt_team_nth = __kmp_ncores; - KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", - __kmp_dflt_team_nth ) ); -#else - // - // Default #threads = #available OS procs - // - __kmp_dflt_team_nth = __kmp_avail_proc; - KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", - __kmp_dflt_team_nth ) ); -#endif /* KMP_DFLT_NTH_CORES */ - } - - if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { - __kmp_dflt_team_nth = KMP_MIN_NTH; - } - if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { - __kmp_dflt_team_nth = __kmp_sys_max_nth; - } - - // - // There's no harm in continuing if the following check fails, - // but it indicates an error in the previous logic. - // - KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); - - if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { - // - // Run through the __kmp_threads array and set the num threads icv - // for each root thread that is currently registered with the RTL - // (which has not already explicitly set its nthreads-var with a - // call to omp_set_num_threads()). - // - for ( i = 0; i < __kmp_threads_capacity; i++ ) { - kmp_info_t *thread = __kmp_threads[ i ]; - if ( thread == NULL ) continue; - if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; - - set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); - } - } - KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth) ); - -#ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary */ - /* now that __kmp_avail_proc is set */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } - } -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* we have finished middle initialization */ - TCW_SYNC_4(__kmp_init_middle, TRUE); - - KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); -} - -void -__kmp_middle_initialize( void ) -{ - if ( __kmp_init_middle ) { - return; - } - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if ( __kmp_init_middle ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - __kmp_do_middle_initialize(); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); -} - -void -__kmp_parallel_initialize( void ) -{ - int gtid = __kmp_entry_gtid(); // this might be a new root - - /* synchronize parallel initialization (for sibling) */ - if( TCR_4(__kmp_init_parallel) ) return; - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } - - /* TODO reinitialization after we have already shut down */ - if( TCR_4(__kmp_global.g.g_done) ) { - KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); - __kmp_infinite_loop(); - } - - /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize - would cause a deadlock. So we call __kmp_do_serial_initialize directly. - */ - if( !__kmp_init_middle ) { - __kmp_do_middle_initialize(); - } - - /* begin initialization */ - KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); - KMP_ASSERT( KMP_UBER_GTID( gtid ) ); - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Save the FP control regs. - // Worker threads will set theirs to these values at thread startup. - // - __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_store_mxcsr( &__kmp_init_mxcsr ); - __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#if KMP_OS_UNIX -# if KMP_HANDLE_SIGNALS - /* must be after __kmp_serial_initialize */ - __kmp_install_signals( TRUE ); -# endif -#endif - - __kmp_suspend_initialize(); - -# if defined(USE_LOAD_BALANCE) - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } -#else - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } -#endif - - if ( __kmp_version ) { - __kmp_print_version_2(); - } - - /* we have finished parallel initialization */ - TCW_SYNC_4(__kmp_init_parallel, TRUE); - - KMP_MB(); - KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); - - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); -} - - -/* ------------------------------------------------------------------------ */ - -void -__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team ) -{ - kmp_disp_t *dispatch; - - KMP_MB(); - - /* none of the threads have encountered any constructs, yet. */ - this_thr->th.th_local.this_construct = 0; -#if KMP_CACHE_MANAGE - KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); -#endif /* KMP_CACHE_MANAGE */ - dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); - KMP_DEBUG_ASSERT( dispatch ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); - - dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ - - if( __kmp_env_consistency_check ) - __kmp_push_parallel( gtid, team->t.t_ident ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -void -__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team ) -{ - if( __kmp_env_consistency_check ) - __kmp_pop_parallel( gtid, team->t.t_ident ); -} - -int -__kmp_invoke_task_func( int gtid ) -{ - int rc; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *team = this_thr->th.th_team; - - __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); -#if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code - } -#endif /* USE_ITT_BUILD */ -#if INCLUDE_SSC_MARKS - SSC_MARK_INVOKING(); -#endif - -#if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.exit_runtime_frame); - } else { - exit_runtime_p = &dummy; - } - -#if OMPT_TRACE - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif -#endif - - { - KMP_TIME_BLOCK(OMP_work); - rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), - gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv -#if OMPT_SUPPORT - , exit_runtime_p -#endif - ); - } - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); - } - // the implicit task is not dead yet, so we can't clear its task id here - team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; - } -#endif - -#if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code - } -#endif /* USE_ITT_BUILD */ - __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); - - return rc; -} - -#if OMP_40_ENABLED -void -__kmp_teams_master( int gtid ) -{ - // This routine is called by all master threads in teams construct - kmp_info_t *thr = __kmp_threads[ gtid ]; - kmp_team_t *team = thr->th.th_team; - ident_t *loc = team->t.t_ident; - thr->th.th_set_nproc = thr->th.th_teams_size.nth; - KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); - KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); - KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", - gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); - // Launch league of teams now, but not let workers execute - // (they hang on fork barrier until next parallel) -#if INCLUDE_SSC_MARKS - SSC_MARK_FORKING(); -#endif - __kmp_fork_call( loc, gtid, fork_context_intel, - team->t.t_argc, -#if OMPT_SUPPORT - (void *)thr->th.th_teams_microtask, // "unwrapped" task -#endif - (microtask_t)thr->th.th_teams_microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, - NULL ); -#if INCLUDE_SSC_MARKS - SSC_MARK_JOINING(); -#endif - - // AC: last parameter "1" eliminates join barrier which won't work because - // worker threads are in a fork barrier waiting for more parallel regions - __kmp_join_call( loc, gtid -#if OMPT_SUPPORT - , fork_context_intel -#endif - , 1 ); -} - -int -__kmp_invoke_teams_master( int gtid ) -{ - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *team = this_thr->th.th_team; - #if KMP_DEBUG - if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) - KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); - #endif - __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); - __kmp_teams_master( gtid ); - __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); - return 1; -} -#endif /* OMP_40_ENABLED */ - -/* this sets the requested number of threads for the next parallel region - * encountered by this team */ -/* since this should be enclosed in the forkjoin critical section it - * should avoid race conditions with assymmetrical nested parallelism */ - -void -__kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; - - if( num_threads > 0 ) - thr->th.th_set_nproc = num_threads; -} - -#if OMP_40_ENABLED - -/* this sets the requested number of teams for the teams region and/or - * the number of threads for the next parallel region encountered */ -void -__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(num_teams >= 0); - KMP_DEBUG_ASSERT(num_threads >= 0); - - if( num_teams == 0 ) - num_teams = 1; // default number of teams is 1. - if( num_teams > __kmp_max_nth ) { // if too many teams requested? - if ( !__kmp_reserve_warn ) { - __kmp_reserve_warn = 1; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - num_teams = __kmp_max_nth; - } - // Set number of teams (number of threads in the outer "parallel" of the teams) - thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; - - // Remember the number of threads for inner parallel regions - if( num_threads == 0 ) { - if( !TCR_4(__kmp_init_middle) ) - __kmp_middle_initialize(); // get __kmp_avail_proc calculated - num_threads = __kmp_avail_proc / num_teams; - if( num_teams * num_threads > __kmp_max_nth ) { - // adjust num_threads w/o warning as it is not user setting - num_threads = __kmp_max_nth / num_teams; - } - } else { - if( num_teams * num_threads > __kmp_max_nth ) { - int new_threads = __kmp_max_nth / num_teams; - if ( !__kmp_reserve_warn ) { // user asked for too many threads - __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, num_threads, new_threads ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - num_threads = new_threads; - } - } - thr->th.th_teams_size.nth = num_threads; -} - - -// -// Set the proc_bind var to use in the following parallel region. -// -void -__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; - thr->th.th_set_proc_bind = proc_bind; -} - -#endif /* OMP_40_ENABLED */ - -/* Launch the worker threads into the microtask. */ - -void -__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) -{ - kmp_info_t *this_thr = __kmp_threads[gtid]; - -#ifdef KMP_DEBUG - int f; -#endif /* KMP_DEBUG */ - - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); - KMP_ASSERT( KMP_MASTER_GTID(gtid) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - team->t.t_construct = 0; /* no single directives seen yet */ - team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ - - /* Reset the identifiers on the dispatch buffer */ - KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); - if ( team->t.t_max_nproc > 1 ) { - int i; - for (i = 0; i < KMP_MAX_DISP_BUF; ++i) - team->t.t_disp_buffer[ i ].buffer_index = i; - } else { - team->t.t_disp_buffer[ 0 ].buffer_index = 0; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT( this_thr->th.th_team == team ); - -#ifdef KMP_DEBUG - for( f=0 ; ft.t_nproc ; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } -#endif /* KMP_DEBUG */ - - /* release the worker threads so they may begin working */ - __kmp_fork_barrier( gtid, 0 ); -} - - -void -__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) -{ - kmp_info_t *this_thr = __kmp_threads[gtid]; - - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); - KMP_ASSERT( KMP_MASTER_GTID(gtid) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* Join barrier after fork */ - -#ifdef KMP_DEBUG - if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { - __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); - __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", - gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); - __kmp_print_structure(); - } - KMP_DEBUG_ASSERT( __kmp_threads[gtid] && - __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); -#endif /* KMP_DEBUG */ - - __kmp_join_barrier( gtid ); /* wait for everyone */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT( this_thr->th.th_team == team ); -} - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef USE_LOAD_BALANCE - -// -// Return the worker threads actively spinning in the hot team, if we -// are at the outermost level of parallelism. Otherwise, return 0. -// -static int -__kmp_active_hot_team_nproc( kmp_root_t *root ) -{ - int i; - int retval; - kmp_team_t *hot_team; - - if ( root->r.r_active ) { - return 0; - } - hot_team = root->r.r_hot_team; - if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - return hot_team->t.t_nproc - 1; // Don't count master thread - } - - // - // Skip the master thread - it is accounted for elsewhere. - // - retval = 0; - for ( i = 1; i < hot_team->t.t_nproc; i++ ) { - if ( hot_team->t.t_threads[i]->th.th_active ) { - retval++; - } - } - return retval; -} - -// -// Perform an automatic adjustment to the number of -// threads used by the next parallel region. -// -static int -__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) -{ - int retval; - int pool_active; - int hot_team_active; - int team_curr_active; - int system_active; - - KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", - root, set_nproc ) ); - KMP_DEBUG_ASSERT( root ); - KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); - KMP_DEBUG_ASSERT( set_nproc > 1 ); - - if ( set_nproc == 1) { - KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); - return 1; - } - - // - // Threads that are active in the thread pool, active in the hot team - // for this particular root (if we are at the outer par level), and - // the currently executing thread (to become the master) are available - // to add to the new team, but are currently contributing to the system - // load, and must be accounted for. - // - pool_active = TCR_4(__kmp_thread_pool_active_nth); - hot_team_active = __kmp_active_hot_team_nproc( root ); - team_curr_active = pool_active + hot_team_active + 1; - - // - // Check the system load. - // - system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); - KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", - system_active, pool_active, hot_team_active ) ); - - if ( system_active < 0 ) { - // - // There was an error reading the necessary info from /proc, - // so use the thread limit algorithm instead. Once we set - // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, - // we shouldn't wind up getting back here. - // - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); - - // - // Make this call behave like the thread limit algorithm. - // - retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 - : root->r.r_hot_team->t.t_nproc); - if ( retval > set_nproc ) { - retval = set_nproc; - } - if ( retval < KMP_MIN_NTH ) { - retval = KMP_MIN_NTH; - } - - KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); - return retval; - } - - // - // There is a slight delay in the load balance algorithm in detecting - // new running procs. The real system load at this instant should be - // at least as large as the #active omp thread that are available to - // add to the team. - // - if ( system_active < team_curr_active ) { - system_active = team_curr_active; - } - retval = __kmp_avail_proc - system_active + team_curr_active; - if ( retval > set_nproc ) { - retval = set_nproc; - } - if ( retval < KMP_MIN_NTH ) { - retval = KMP_MIN_NTH; - } - - KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); - return retval; -} // __kmp_load_balance_nproc() - -#endif /* USE_LOAD_BALANCE */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* NOTE: this is called with the __kmp_init_lock held */ -void -__kmp_cleanup( void ) -{ - int f; - - KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); - - if (TCR_4(__kmp_init_parallel)) { -#if KMP_HANDLE_SIGNALS - __kmp_remove_signals(); -#endif - TCW_4(__kmp_init_parallel, FALSE); - } - - if (TCR_4(__kmp_init_middle)) { -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); -#endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_cleanup_hierarchy(); - TCW_4(__kmp_init_middle, FALSE); - } - - KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); - - if (__kmp_init_serial) { - __kmp_runtime_destroy(); - __kmp_init_serial = FALSE; - } - - for ( f = 0; f < __kmp_threads_capacity; f++ ) { - if ( __kmp_root[ f ] != NULL ) { - __kmp_free( __kmp_root[ f ] ); - __kmp_root[ f ] = NULL; - } - } - __kmp_free( __kmp_threads ); - // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in - // freeing __kmp_root. - __kmp_threads = NULL; - __kmp_root = NULL; - __kmp_threads_capacity = 0; - -#if KMP_USE_DYNAMIC_LOCK - __kmp_cleanup_indirect_user_locks(); -#else - __kmp_cleanup_user_locks(); -#endif - - #if KMP_AFFINITY_SUPPORTED - KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); - __kmp_cpuinfo_file = NULL; - #endif /* KMP_AFFINITY_SUPPORTED */ - - #if KMP_USE_ADAPTIVE_LOCKS - #if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_print_speculative_stats(); - #endif - #endif - KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); - __kmp_nested_nth.nth = NULL; - __kmp_nested_nth.size = 0; - __kmp_nested_nth.used = 0; - - __kmp_i18n_catclose(); - -#if KMP_STATS_ENABLED - __kmp_accumulate_stats_at_exit(); - __kmp_stats_list.deallocate(); -#endif - - KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -int -__kmp_ignore_mppbeg( void ) -{ - char *env; - - if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { - if (__kmp_str_match_false( env )) - return FALSE; - } - // By default __kmpc_begin() is no-op. - return TRUE; -} - -int -__kmp_ignore_mppend( void ) -{ - char *env; - - if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { - if (__kmp_str_match_false( env )) - return FALSE; - } - // By default __kmpc_end() is no-op. - return TRUE; -} - -void -__kmp_internal_begin( void ) -{ - int gtid; - kmp_root_t *root; - - /* this is a very important step as it will register new sibling threads - * and assign these new uber threads a new gtid */ - gtid = __kmp_entry_gtid(); - root = __kmp_threads[ gtid ]->th.th_root; - KMP_ASSERT( KMP_UBER_GTID( gtid )); - - if( root->r.r_begin ) return; - __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); - if( root->r.r_begin ) { - __kmp_release_lock( & root->r.r_begin_lock, gtid ); - return; - } - - root->r.r_begin = TRUE; - - __kmp_release_lock( & root->r.r_begin_lock, gtid ); -} - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_user_set_library (enum library_type arg) -{ - int gtid; - kmp_root_t *root; - kmp_info_t *thread; - - /* first, make sure we are initialized so we can get our gtid */ - - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[ gtid ]; - - root = thread->th.th_root; - - KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); - if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ - KMP_WARNING( SetLibraryIncorrectCall ); - return; - } - - switch ( arg ) { - case library_serial : - thread->th.th_set_nproc = 0; - set__nproc( thread, 1 ); - break; - case library_turnaround : - thread->th.th_set_nproc = 0; - set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); - break; - case library_throughput : - thread->th.th_set_nproc = 0; - set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); - break; - default: - KMP_FATAL( UnknownLibraryType, arg ); - } - - __kmp_aux_set_library ( arg ); -} - -void -__kmp_aux_set_stacksize( size_t arg ) -{ - if (! __kmp_init_serial) - __kmp_serial_initialize(); - -#if KMP_OS_DARWIN - if (arg & (0x1000 - 1)) { - arg &= ~(0x1000 - 1); - if(arg + 0x1000) /* check for overflow if we round up */ - arg += 0x1000; - } -#endif - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - /* only change the default stacksize before the first parallel region */ - if (! TCR_4(__kmp_init_parallel)) { - size_t value = arg; /* argument is in bytes */ - - if (value < __kmp_sys_min_stksize ) - value = __kmp_sys_min_stksize ; - else if (value > KMP_MAX_STKSIZE) - value = KMP_MAX_STKSIZE; - - __kmp_stksize = value; - - __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ - } - - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); -} - -/* set the behaviour of the runtime library */ -/* TODO this can cause some odd behaviour with sibling parallelism... */ -void -__kmp_aux_set_library (enum library_type arg) -{ - __kmp_library = arg; - - switch ( __kmp_library ) { - case library_serial : - { - KMP_INFORM( LibraryIsSerial ); - (void) __kmp_change_library( TRUE ); - } - break; - case library_turnaround : - (void) __kmp_change_library( TRUE ); - break; - case library_throughput : - (void) __kmp_change_library( FALSE ); - break; - default: - KMP_FATAL( UnknownLibraryType, arg ); - } -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) -{ - int blocktime = arg; /* argument is in milliseconds */ - int bt_intervals; - int bt_set; - - __kmp_save_internal_controls( thread ); - - /* Normalize and set blocktime for the teams */ - if (blocktime < KMP_MIN_BLOCKTIME) - blocktime = KMP_MIN_BLOCKTIME; - else if (blocktime > KMP_MAX_BLOCKTIME) - blocktime = KMP_MAX_BLOCKTIME; - - set__blocktime_team( thread->th.th_team, tid, blocktime ); - set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); - - /* Calculate and set blocktime intervals for the teams */ - bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); - - set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); - set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); - - /* Set whether blocktime has been set to "TRUE" */ - bt_set = TRUE; - - set__bt_set_team( thread->th.th_team, tid, bt_set ); - set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); - KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n", - __kmp_gtid_from_tid(tid, thread->th.th_team), - thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) ); -} - -void -__kmp_aux_set_defaults( - char const * str, - int len -) { - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - __kmp_env_initialize( str ); - - if (__kmp_settings -#if OMP_40_ENABLED - || __kmp_display_env || __kmp_display_env_verbose -#endif // OMP_40_ENABLED - ) { - __kmp_env_print(); - } -} // __kmp_aux_set_defaults - -/* ------------------------------------------------------------------------ */ - -/* - * internal fast reduction routines - */ - -PACKED_REDUCTION_METHOD_T -__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) -{ - - // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) - // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL - // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL - // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. - - PACKED_REDUCTION_METHOD_T retval; - - int team_size; - - KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) - KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) - - #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) - #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) - - retval = critical_reduce_block; - - team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower - - if( team_size == 1 ) { - - retval = empty_reduce_block; - - } else { - - int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - - #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 - - #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN - - int teamsize_cutoff = 4; - -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - teamsize_cutoff = 8; - } -#endif - if( tree_available ) { - if( team_size <= teamsize_cutoff ) { - if ( atomic_available ) { - retval = atomic_reduce_block; - } - } else { - retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; - } - } else if ( atomic_available ) { - retval = atomic_reduce_block; - } - #else - #error "Unknown or unsupported OS" - #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN - - #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH - - #if KMP_OS_LINUX || KMP_OS_WINDOWS - - // basic tuning - - if( atomic_available ) { - if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? - retval = atomic_reduce_block; - } - } // otherwise: use critical section - - #elif KMP_OS_DARWIN - - if( atomic_available && ( num_vars <= 3 ) ) { - retval = atomic_reduce_block; - } else if( tree_available ) { - if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { - retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; - } - } // otherwise: use critical section - - #else - #error "Unknown or unsupported OS" - #endif - - #else - #error "Unknown or unsupported architecture" - #endif - - } - - // KMP_FORCE_REDUCTION - - // If the team is serialized (team_size == 1), ignore the forced reduction - // method and stay with the unsynchronized method (empty_reduce_block) - if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { - - PACKED_REDUCTION_METHOD_T forced_retval; - - int atomic_available, tree_available; - - switch( ( forced_retval = __kmp_force_reduction_method ) ) - { - case critical_reduce_block: - KMP_ASSERT( lck ); // lck should be != 0 - break; - - case atomic_reduce_block: - atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - KMP_ASSERT( atomic_available ); // atomic_available should be != 0 - break; - - case tree_reduce_block: - tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - KMP_ASSERT( tree_available ); // tree_available should be != 0 - #if KMP_FAST_REDUCTION_BARRIER - forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; - #endif - break; - - default: - KMP_ASSERT( 0 ); // "unsupported method specified" - } - - retval = forced_retval; - } - - KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); - - #undef FAST_REDUCTION_TREE_METHOD_GENERATED - #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED - - return ( retval ); -} - -// this function is for testing set/get/determine reduce method -kmp_int32 -__kmp_get_reduce_method( void ) { - return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); -} - -/* ------------------------------------------------------------------------ */ + if ( ! __kmp_str_match_true( duplicate_ok ) ) { + // That's not allowed. Issue fatal error. + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), + KMP_HNT( DuplicateLibrary ), + __kmp_msg_null + ); + }; // if + KMP_INTERNAL_FREE( duplicate_ok ); + __kmp_duplicate_library_ok = 1; + done = 1; // Exit the loop. + } break; + case 2 : { // Neighbor is dead. + // Clear the variable and try to register library again. + __kmp_env_unset( name ); + } break; + default : { + KMP_DEBUG_ASSERT( 0 ); + } break; + }; // switch + + }; // if + KMP_INTERNAL_FREE( (void *) value ); + + }; // while + KMP_INTERNAL_FREE( (void *) name ); + +} // func __kmp_register_library_startup + + +void +__kmp_unregister_library( void ) { + + char * name = __kmp_reg_status_name(); + char * value = __kmp_env_get( name ); + + KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); + KMP_DEBUG_ASSERT( __kmp_registration_str != NULL ); + if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { + // Ok, this is our variable. Delete it. + __kmp_env_unset( name ); + }; // if + + KMP_INTERNAL_FREE( __kmp_registration_str ); + KMP_INTERNAL_FREE( value ); + KMP_INTERNAL_FREE( name ); + + __kmp_registration_flag = 0; + __kmp_registration_str = NULL; + +} // __kmp_unregister_library + + +// End of Library registration stuff. +// ------------------------------------------------------------------------------------------------- + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + +static void __kmp_check_mic_type() +{ + kmp_cpuid_t cpuid_state = {0}; + kmp_cpuid_t * cs_p = &cpuid_state; + __kmp_x86_cpuid(1, 0, cs_p); + // We don't support mic1 at the moment + if( (cs_p->eax & 0xff0) == 0xB10 ) { + __kmp_mic_type = mic2; + } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { + __kmp_mic_type = mic3; + } else { + __kmp_mic_type = non_mic; + } +} + +#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ + +static void +__kmp_do_serial_initialize( void ) +{ + int i, gtid; + int size; + + KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); + + KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); + KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); + KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); + KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); + KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); + +#if OMPT_SUPPORT + ompt_pre_init(); +#endif + + __kmp_validate_locks(); + + /* Initialize internal memory allocator */ + __kmp_init_allocator(); + + /* Register the library startup via an environment variable + and check to see whether another copy of the library is already + registered. */ + + __kmp_register_library_startup( ); + + /* TODO reinitialization of library */ + if( TCR_4(__kmp_global.g.g_done) ) { + KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); + } + + __kmp_global.g.g_abort = 0; + TCW_SYNC_4(__kmp_global.g.g_done, FALSE); + + /* initialize the locks */ +#if KMP_USE_ADAPTIVE_LOCKS +#if KMP_DEBUG_ADAPTIVE_LOCKS + __kmp_init_speculative_stats(); +#endif +#endif +#if KMP_STATS_ENABLED + __kmp_init_tas_lock( & __kmp_stats_lock ); +#endif + __kmp_init_lock( & __kmp_global_lock ); + __kmp_init_queuing_lock( & __kmp_dispatch_lock ); + __kmp_init_lock( & __kmp_debug_lock ); + __kmp_init_atomic_lock( & __kmp_atomic_lock ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_1i ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_2i ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_4i ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_4r ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_8i ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_8r ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_8c ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); + __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); + __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock ); + __kmp_init_bootstrap_lock( & __kmp_exit_lock ); + __kmp_init_bootstrap_lock( & __kmp_monitor_lock ); + __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); + + /* conduct initialization and initial setup of configuration */ + + __kmp_runtime_initialize(); + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + __kmp_check_mic_type(); +#endif + + // Some global variable initialization moved here from kmp_env_initialize() +#ifdef KMP_DEBUG + kmp_diag = 0; +#endif + __kmp_abort_delay = 0; + + // From __kmp_init_dflt_team_nth() + /* assume the entire machine will be used */ + __kmp_dflt_team_nth_ub = __kmp_xproc; + if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { + __kmp_dflt_team_nth_ub = KMP_MIN_NTH; + } + if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { + __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; + } + __kmp_max_nth = __kmp_sys_max_nth; + + // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part + __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; + __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); + __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); + // From "KMP_LIBRARY" part of __kmp_env_initialize() + __kmp_library = library_throughput; + // From KMP_SCHEDULE initialization + __kmp_static = kmp_sch_static_balanced; + // AC: do not use analytical here, because it is non-monotonous + //__kmp_guided = kmp_sch_guided_iterative_chunked; + //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment + // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method + // control parts + #if KMP_FAST_REDUCTION_BARRIER + #define kmp_reduction_barrier_gather_bb ((int)1) + #define kmp_reduction_barrier_release_bb ((int)1) + #define kmp_reduction_barrier_gather_pat bp_hyper_bar + #define kmp_reduction_barrier_release_pat bp_hyper_bar + #endif // KMP_FAST_REDUCTION_BARRIER + for ( i=bs_plain_barrier; i 0 ); + if ( __kmp_avail_proc == 0 ) { + __kmp_avail_proc = __kmp_xproc; + } + + // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now + j = 0; + while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { + __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; + j++; + } + + if ( __kmp_dflt_team_nth == 0 ) { +#ifdef KMP_DFLT_NTH_CORES + // + // Default #threads = #cores + // + __kmp_dflt_team_nth = __kmp_ncores; + KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", + __kmp_dflt_team_nth ) ); +#else + // + // Default #threads = #available OS procs + // + __kmp_dflt_team_nth = __kmp_avail_proc; + KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", + __kmp_dflt_team_nth ) ); +#endif /* KMP_DFLT_NTH_CORES */ + } + + if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { + __kmp_dflt_team_nth = KMP_MIN_NTH; + } + if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { + __kmp_dflt_team_nth = __kmp_sys_max_nth; + } + + // + // There's no harm in continuing if the following check fails, + // but it indicates an error in the previous logic. + // + KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); + + if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { + // + // Run through the __kmp_threads array and set the num threads icv + // for each root thread that is currently registered with the RTL + // (which has not already explicitly set its nthreads-var with a + // call to omp_set_num_threads()). + // + for ( i = 0; i < __kmp_threads_capacity; i++ ) { + kmp_info_t *thread = __kmp_threads[ i ]; + if ( thread == NULL ) continue; + if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; + + set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); + } + } + KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", + __kmp_dflt_team_nth) ); + +#ifdef KMP_ADJUST_BLOCKTIME + /* Adjust blocktime to zero if necessary */ + /* now that __kmp_avail_proc is set */ + if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { + KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); + if ( __kmp_nth > __kmp_avail_proc ) { + __kmp_zero_bt = TRUE; + } + } +#endif /* KMP_ADJUST_BLOCKTIME */ + + /* we have finished middle initialization */ + TCW_SYNC_4(__kmp_init_middle, TRUE); + + KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); +} + +void +__kmp_middle_initialize( void ) +{ + if ( __kmp_init_middle ) { + return; + } + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + if ( __kmp_init_middle ) { + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + return; + } + __kmp_do_middle_initialize(); + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); +} + +void +__kmp_parallel_initialize( void ) +{ + int gtid = __kmp_entry_gtid(); // this might be a new root + + /* synchronize parallel initialization (for sibling) */ + if( TCR_4(__kmp_init_parallel) ) return; + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } + + /* TODO reinitialization after we have already shut down */ + if( TCR_4(__kmp_global.g.g_done) ) { + KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); + __kmp_infinite_loop(); + } + + /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize + would cause a deadlock. So we call __kmp_do_serial_initialize directly. + */ + if( !__kmp_init_middle ) { + __kmp_do_middle_initialize(); + } + + /* begin initialization */ + KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); + KMP_ASSERT( KMP_UBER_GTID( gtid ) ); + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + // + // Save the FP control regs. + // Worker threads will set theirs to these values at thread startup. + // + __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); + __kmp_store_mxcsr( &__kmp_init_mxcsr ); + __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#if KMP_OS_UNIX +# if KMP_HANDLE_SIGNALS + /* must be after __kmp_serial_initialize */ + __kmp_install_signals( TRUE ); +# endif +#endif + + __kmp_suspend_initialize(); + +# if defined(USE_LOAD_BALANCE) + if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { + __kmp_global.g.g_dynamic_mode = dynamic_load_balance; + } +#else + if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + } +#endif + + if ( __kmp_version ) { + __kmp_print_version_2(); + } + + /* we have finished parallel initialization */ + TCW_SYNC_4(__kmp_init_parallel, TRUE); + + KMP_MB(); + KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); + + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); +} + + +/* ------------------------------------------------------------------------ */ + +void +__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, + kmp_team_t *team ) +{ + kmp_disp_t *dispatch; + + KMP_MB(); + + /* none of the threads have encountered any constructs, yet. */ + this_thr->th.th_local.this_construct = 0; +#if KMP_CACHE_MANAGE + KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); +#endif /* KMP_CACHE_MANAGE */ + dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); + KMP_DEBUG_ASSERT( dispatch ); + KMP_DEBUG_ASSERT( team->t.t_dispatch ); + //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); + + dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ + + if( __kmp_env_consistency_check ) + __kmp_push_parallel( gtid, team->t.t_ident ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ +} + +void +__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, + kmp_team_t *team ) +{ + if( __kmp_env_consistency_check ) + __kmp_pop_parallel( gtid, team->t.t_ident ); +} + +int +__kmp_invoke_task_func( int gtid ) +{ + int rc; + int tid = __kmp_tid_from_gtid( gtid ); + kmp_info_t *this_thr = __kmp_threads[ gtid ]; + kmp_team_t *team = this_thr->th.th_team; + + __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); +#if USE_ITT_BUILD + if ( __itt_stack_caller_create_ptr ) { + __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code + } +#endif /* USE_ITT_BUILD */ +#if INCLUDE_SSC_MARKS + SSC_MARK_INVOKING(); +#endif + +#if OMPT_SUPPORT + void *dummy; + void **exit_runtime_p; + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; + + if (ompt_enabled) { + exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. + ompt_task_info.frame.exit_runtime_frame); + } else { + exit_runtime_p = &dummy; + } + +#if OMPT_TRACE + my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + my_parallel_id, my_task_id); + } +#endif +#endif + + { + KMP_TIME_BLOCK(OMP_work); + rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), + gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv +#if OMPT_SUPPORT + , exit_runtime_p +#endif + ); + } + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + my_parallel_id, my_task_id); + } + // the implicit task is not dead yet, so we can't clear its task id here + team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; + } +#endif + +#if USE_ITT_BUILD + if ( __itt_stack_caller_create_ptr ) { + __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code + } +#endif /* USE_ITT_BUILD */ + __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); + + return rc; +} + +#if OMP_40_ENABLED +void +__kmp_teams_master( int gtid ) +{ + // This routine is called by all master threads in teams construct + kmp_info_t *thr = __kmp_threads[ gtid ]; + kmp_team_t *team = thr->th.th_team; + ident_t *loc = team->t.t_ident; + thr->th.th_set_nproc = thr->th.th_teams_size.nth; + KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); + KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); + KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", + gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); + // Launch league of teams now, but not let workers execute + // (they hang on fork barrier until next parallel) +#if INCLUDE_SSC_MARKS + SSC_MARK_FORKING(); +#endif + __kmp_fork_call( loc, gtid, fork_context_intel, + team->t.t_argc, +#if OMPT_SUPPORT + (void *)thr->th.th_teams_microtask, // "unwrapped" task +#endif + (microtask_t)thr->th.th_teams_microtask, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_task_func, + NULL ); +#if INCLUDE_SSC_MARKS + SSC_MARK_JOINING(); +#endif + + // AC: last parameter "1" eliminates join barrier which won't work because + // worker threads are in a fork barrier waiting for more parallel regions + __kmp_join_call( loc, gtid +#if OMPT_SUPPORT + , fork_context_intel +#endif + , 1 ); +} + +int +__kmp_invoke_teams_master( int gtid ) +{ + kmp_info_t *this_thr = __kmp_threads[ gtid ]; + kmp_team_t *team = this_thr->th.th_team; + #if KMP_DEBUG + if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) + KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); + #endif + __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); + __kmp_teams_master( gtid ); + __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); + return 1; +} +#endif /* OMP_40_ENABLED */ + +/* this sets the requested number of threads for the next parallel region + * encountered by this team */ +/* since this should be enclosed in the forkjoin critical section it + * should avoid race conditions with assymmetrical nested parallelism */ + +void +__kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) +{ + kmp_info_t *thr = __kmp_threads[gtid]; + + if( num_threads > 0 ) + thr->th.th_set_nproc = num_threads; +} + +#if OMP_40_ENABLED + +/* this sets the requested number of teams for the teams region and/or + * the number of threads for the next parallel region encountered */ +void +__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) +{ + kmp_info_t *thr = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(num_teams >= 0); + KMP_DEBUG_ASSERT(num_threads >= 0); + + if( num_teams == 0 ) + num_teams = 1; // default number of teams is 1. + if( num_teams > __kmp_max_nth ) { // if too many teams requested? + if ( !__kmp_reserve_warn ) { + __kmp_reserve_warn = 1; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), + KMP_HNT( Unset_ALL_THREADS ), + __kmp_msg_null + ); + } + num_teams = __kmp_max_nth; + } + // Set number of teams (number of threads in the outer "parallel" of the teams) + thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; + + // Remember the number of threads for inner parallel regions + if( num_threads == 0 ) { + if( !TCR_4(__kmp_init_middle) ) + __kmp_middle_initialize(); // get __kmp_avail_proc calculated + num_threads = __kmp_avail_proc / num_teams; + if( num_teams * num_threads > __kmp_max_nth ) { + // adjust num_threads w/o warning as it is not user setting + num_threads = __kmp_max_nth / num_teams; + } + } else { + if( num_teams * num_threads > __kmp_max_nth ) { + int new_threads = __kmp_max_nth / num_teams; + if ( !__kmp_reserve_warn ) { // user asked for too many threads + __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantFormThrTeam, num_threads, new_threads ), + KMP_HNT( Unset_ALL_THREADS ), + __kmp_msg_null + ); + } + num_threads = new_threads; + } + } + thr->th.th_teams_size.nth = num_threads; +} + + +// +// Set the proc_bind var to use in the following parallel region. +// +void +__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) +{ + kmp_info_t *thr = __kmp_threads[gtid]; + thr->th.th_set_proc_bind = proc_bind; +} + +#endif /* OMP_40_ENABLED */ + +/* Launch the worker threads into the microtask. */ + +void +__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) +{ + kmp_info_t *this_thr = __kmp_threads[gtid]; + +#ifdef KMP_DEBUG + int f; +#endif /* KMP_DEBUG */ + + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); + KMP_ASSERT( KMP_MASTER_GTID(gtid) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + team->t.t_construct = 0; /* no single directives seen yet */ + team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ + + /* Reset the identifiers on the dispatch buffer */ + KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); + if ( team->t.t_max_nproc > 1 ) { + int i; + for (i = 0; i < KMP_MAX_DISP_BUF; ++i) + team->t.t_disp_buffer[ i ].buffer_index = i; + } else { + team->t.t_disp_buffer[ 0 ].buffer_index = 0; + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_ASSERT( this_thr->th.th_team == team ); + +#ifdef KMP_DEBUG + for( f=0 ; ft.t_nproc ; f++ ) { + KMP_DEBUG_ASSERT( team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); + } +#endif /* KMP_DEBUG */ + + /* release the worker threads so they may begin working */ + __kmp_fork_barrier( gtid, 0 ); +} + + +void +__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) +{ + kmp_info_t *this_thr = __kmp_threads[gtid]; + + KMP_DEBUG_ASSERT( team ); + KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); + KMP_ASSERT( KMP_MASTER_GTID(gtid) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* Join barrier after fork */ + +#ifdef KMP_DEBUG + if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { + __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); + __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", + gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); + __kmp_print_structure(); + } + KMP_DEBUG_ASSERT( __kmp_threads[gtid] && + __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); +#endif /* KMP_DEBUG */ + + __kmp_join_barrier( gtid ); /* wait for everyone */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_ASSERT( this_thr->th.th_team == team ); +} + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef USE_LOAD_BALANCE + +// +// Return the worker threads actively spinning in the hot team, if we +// are at the outermost level of parallelism. Otherwise, return 0. +// +static int +__kmp_active_hot_team_nproc( kmp_root_t *root ) +{ + int i; + int retval; + kmp_team_t *hot_team; + + if ( root->r.r_active ) { + return 0; + } + hot_team = root->r.r_hot_team; + if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { + return hot_team->t.t_nproc - 1; // Don't count master thread + } + + // + // Skip the master thread - it is accounted for elsewhere. + // + retval = 0; + for ( i = 1; i < hot_team->t.t_nproc; i++ ) { + if ( hot_team->t.t_threads[i]->th.th_active ) { + retval++; + } + } + return retval; +} + +// +// Perform an automatic adjustment to the number of +// threads used by the next parallel region. +// +static int +__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) +{ + int retval; + int pool_active; + int hot_team_active; + int team_curr_active; + int system_active; + + KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", + root, set_nproc ) ); + KMP_DEBUG_ASSERT( root ); + KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); + KMP_DEBUG_ASSERT( set_nproc > 1 ); + + if ( set_nproc == 1) { + KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); + return 1; + } + + // + // Threads that are active in the thread pool, active in the hot team + // for this particular root (if we are at the outer par level), and + // the currently executing thread (to become the master) are available + // to add to the new team, but are currently contributing to the system + // load, and must be accounted for. + // + pool_active = TCR_4(__kmp_thread_pool_active_nth); + hot_team_active = __kmp_active_hot_team_nproc( root ); + team_curr_active = pool_active + hot_team_active + 1; + + // + // Check the system load. + // + system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); + KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", + system_active, pool_active, hot_team_active ) ); + + if ( system_active < 0 ) { + // + // There was an error reading the necessary info from /proc, + // so use the thread limit algorithm instead. Once we set + // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, + // we shouldn't wind up getting back here. + // + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); + + // + // Make this call behave like the thread limit algorithm. + // + retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 + : root->r.r_hot_team->t.t_nproc); + if ( retval > set_nproc ) { + retval = set_nproc; + } + if ( retval < KMP_MIN_NTH ) { + retval = KMP_MIN_NTH; + } + + KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); + return retval; + } + + // + // There is a slight delay in the load balance algorithm in detecting + // new running procs. The real system load at this instant should be + // at least as large as the #active omp thread that are available to + // add to the team. + // + if ( system_active < team_curr_active ) { + system_active = team_curr_active; + } + retval = __kmp_avail_proc - system_active + team_curr_active; + if ( retval > set_nproc ) { + retval = set_nproc; + } + if ( retval < KMP_MIN_NTH ) { + retval = KMP_MIN_NTH; + } + + KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); + return retval; +} // __kmp_load_balance_nproc() + +#endif /* USE_LOAD_BALANCE */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* NOTE: this is called with the __kmp_init_lock held */ +void +__kmp_cleanup( void ) +{ + int f; + + KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); + + if (TCR_4(__kmp_init_parallel)) { +#if KMP_HANDLE_SIGNALS + __kmp_remove_signals(); +#endif + TCW_4(__kmp_init_parallel, FALSE); + } + + if (TCR_4(__kmp_init_middle)) { +#if KMP_AFFINITY_SUPPORTED + __kmp_affinity_uninitialize(); +#endif /* KMP_AFFINITY_SUPPORTED */ + __kmp_cleanup_hierarchy(); + TCW_4(__kmp_init_middle, FALSE); + } + + KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); + + if (__kmp_init_serial) { + __kmp_runtime_destroy(); + __kmp_init_serial = FALSE; + } + + for ( f = 0; f < __kmp_threads_capacity; f++ ) { + if ( __kmp_root[ f ] != NULL ) { + __kmp_free( __kmp_root[ f ] ); + __kmp_root[ f ] = NULL; + } + } + __kmp_free( __kmp_threads ); + // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in + // freeing __kmp_root. + __kmp_threads = NULL; + __kmp_root = NULL; + __kmp_threads_capacity = 0; + +#if KMP_USE_DYNAMIC_LOCK + __kmp_cleanup_indirect_user_locks(); +#else + __kmp_cleanup_user_locks(); +#endif + + #if KMP_AFFINITY_SUPPORTED + KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); + __kmp_cpuinfo_file = NULL; + #endif /* KMP_AFFINITY_SUPPORTED */ + + #if KMP_USE_ADAPTIVE_LOCKS + #if KMP_DEBUG_ADAPTIVE_LOCKS + __kmp_print_speculative_stats(); + #endif + #endif + KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); + __kmp_nested_nth.nth = NULL; + __kmp_nested_nth.size = 0; + __kmp_nested_nth.used = 0; + + __kmp_i18n_catclose(); + +#if KMP_STATS_ENABLED + __kmp_accumulate_stats_at_exit(); + __kmp_stats_list.deallocate(); +#endif + + KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +int +__kmp_ignore_mppbeg( void ) +{ + char *env; + + if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { + if (__kmp_str_match_false( env )) + return FALSE; + } + // By default __kmpc_begin() is no-op. + return TRUE; +} + +int +__kmp_ignore_mppend( void ) +{ + char *env; + + if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { + if (__kmp_str_match_false( env )) + return FALSE; + } + // By default __kmpc_end() is no-op. + return TRUE; +} + +void +__kmp_internal_begin( void ) +{ + int gtid; + kmp_root_t *root; + + /* this is a very important step as it will register new sibling threads + * and assign these new uber threads a new gtid */ + gtid = __kmp_entry_gtid(); + root = __kmp_threads[ gtid ]->th.th_root; + KMP_ASSERT( KMP_UBER_GTID( gtid )); + + if( root->r.r_begin ) return; + __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); + if( root->r.r_begin ) { + __kmp_release_lock( & root->r.r_begin_lock, gtid ); + return; + } + + root->r.r_begin = TRUE; + + __kmp_release_lock( & root->r.r_begin_lock, gtid ); +} + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_user_set_library (enum library_type arg) +{ + int gtid; + kmp_root_t *root; + kmp_info_t *thread; + + /* first, make sure we are initialized so we can get our gtid */ + + gtid = __kmp_entry_gtid(); + thread = __kmp_threads[ gtid ]; + + root = thread->th.th_root; + + KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); + if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ + KMP_WARNING( SetLibraryIncorrectCall ); + return; + } + + switch ( arg ) { + case library_serial : + thread->th.th_set_nproc = 0; + set__nproc( thread, 1 ); + break; + case library_turnaround : + thread->th.th_set_nproc = 0; + set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); + break; + case library_throughput : + thread->th.th_set_nproc = 0; + set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); + break; + default: + KMP_FATAL( UnknownLibraryType, arg ); + } + + __kmp_aux_set_library ( arg ); +} + +void +__kmp_aux_set_stacksize( size_t arg ) +{ + if (! __kmp_init_serial) + __kmp_serial_initialize(); + +#if KMP_OS_DARWIN + if (arg & (0x1000 - 1)) { + arg &= ~(0x1000 - 1); + if(arg + 0x1000) /* check for overflow if we round up */ + arg += 0x1000; + } +#endif + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + + /* only change the default stacksize before the first parallel region */ + if (! TCR_4(__kmp_init_parallel)) { + size_t value = arg; /* argument is in bytes */ + + if (value < __kmp_sys_min_stksize ) + value = __kmp_sys_min_stksize ; + else if (value > KMP_MAX_STKSIZE) + value = KMP_MAX_STKSIZE; + + __kmp_stksize = value; + + __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ + } + + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); +} + +/* set the behaviour of the runtime library */ +/* TODO this can cause some odd behaviour with sibling parallelism... */ +void +__kmp_aux_set_library (enum library_type arg) +{ + __kmp_library = arg; + + switch ( __kmp_library ) { + case library_serial : + { + KMP_INFORM( LibraryIsSerial ); + (void) __kmp_change_library( TRUE ); + } + break; + case library_turnaround : + (void) __kmp_change_library( TRUE ); + break; + case library_throughput : + (void) __kmp_change_library( FALSE ); + break; + default: + KMP_FATAL( UnknownLibraryType, arg ); + } +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) +{ + int blocktime = arg; /* argument is in milliseconds */ + int bt_intervals; + int bt_set; + + __kmp_save_internal_controls( thread ); + + /* Normalize and set blocktime for the teams */ + if (blocktime < KMP_MIN_BLOCKTIME) + blocktime = KMP_MIN_BLOCKTIME; + else if (blocktime > KMP_MAX_BLOCKTIME) + blocktime = KMP_MAX_BLOCKTIME; + + set__blocktime_team( thread->th.th_team, tid, blocktime ); + set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); + + /* Calculate and set blocktime intervals for the teams */ + bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); + + set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); + set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); + + /* Set whether blocktime has been set to "TRUE" */ + bt_set = TRUE; + + set__bt_set_team( thread->th.th_team, tid, bt_set ); + set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); + KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n", + __kmp_gtid_from_tid(tid, thread->th.th_team), + thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) ); +} + +void +__kmp_aux_set_defaults( + char const * str, + int len +) { + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + }; + __kmp_env_initialize( str ); + + if (__kmp_settings +#if OMP_40_ENABLED + || __kmp_display_env || __kmp_display_env_verbose +#endif // OMP_40_ENABLED + ) { + __kmp_env_print(); + } +} // __kmp_aux_set_defaults + +/* ------------------------------------------------------------------------ */ + +/* + * internal fast reduction routines + */ + +PACKED_REDUCTION_METHOD_T +__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck ) +{ + + // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) + // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL + // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL + // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. + + PACKED_REDUCTION_METHOD_T retval; + + int team_size; + + KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) + KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) + + #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) + #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) + + retval = critical_reduce_block; + + team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower + + if( team_size == 1 ) { + + retval = empty_reduce_block; + + } else { + + int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; + + #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 + + #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN + + int teamsize_cutoff = 4; + +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + teamsize_cutoff = 8; + } +#endif + if( tree_available ) { + if( team_size <= teamsize_cutoff ) { + if ( atomic_available ) { + retval = atomic_reduce_block; + } + } else { + retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; + } + } else if ( atomic_available ) { + retval = atomic_reduce_block; + } + #else + #error "Unknown or unsupported OS" + #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN + + #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH + + #if KMP_OS_LINUX || KMP_OS_WINDOWS + + // basic tuning + + if( atomic_available ) { + if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? + retval = atomic_reduce_block; + } + } // otherwise: use critical section + + #elif KMP_OS_DARWIN + + if( atomic_available && ( num_vars <= 3 ) ) { + retval = atomic_reduce_block; + } else if( tree_available ) { + if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { + retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; + } + } // otherwise: use critical section + + #else + #error "Unknown or unsupported OS" + #endif + + #else + #error "Unknown or unsupported architecture" + #endif + + } + + // KMP_FORCE_REDUCTION + + // If the team is serialized (team_size == 1), ignore the forced reduction + // method and stay with the unsynchronized method (empty_reduce_block) + if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { + + PACKED_REDUCTION_METHOD_T forced_retval; + + int atomic_available, tree_available; + + switch( ( forced_retval = __kmp_force_reduction_method ) ) + { + case critical_reduce_block: + KMP_ASSERT( lck ); // lck should be != 0 + break; + + case atomic_reduce_block: + atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + KMP_ASSERT( atomic_available ); // atomic_available should be != 0 + break; + + case tree_reduce_block: + tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; + KMP_ASSERT( tree_available ); // tree_available should be != 0 + #if KMP_FAST_REDUCTION_BARRIER + forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; + #endif + break; + + default: + KMP_ASSERT( 0 ); // "unsupported method specified" + } + + retval = forced_retval; + } + + KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); + + #undef FAST_REDUCTION_TREE_METHOD_GENERATED + #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED + + return ( retval ); +} + +// this function is for testing set/get/determine reduce method +kmp_int32 +__kmp_get_reduce_method( void ) { + return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); +} + +/* ------------------------------------------------------------------------ */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h b/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h index 6ba814703b1..c1df64c6085 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h +++ b/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h @@ -1,62 +1,62 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_SAFE_C_API_H -#define KMP_SAFE_C_API_H - -// -// Replacement for banned C API -// - -// Not every unsafe call listed here is handled now, but keeping everything -// in one place should be handy for future maintenance. -#if KMP_OS_WINDOWS - -# define RSIZE_MAX_STR ( 4UL << 10 ) // 4KB - -// _malloca was suggested, but it is not a drop-in replacement for _alloca -# define KMP_ALLOCA _alloca - -# define KMP_MEMCPY_S memcpy_s -# define KMP_SNPRINTF sprintf_s -# define KMP_SSCANF sscanf_s -# define KMP_STRCPY_S strcpy_s -# define KMP_STRNCPY_S strncpy_s - -// Use this only when buffer size is unknown -# define KMP_MEMCPY(dst, src, cnt) memcpy_s(dst, cnt, src, cnt) - -# define KMP_STRLEN(str) strnlen_s(str, RSIZE_MAX_STR) - -// Use this only when buffer size is unknown -# define KMP_STRNCPY(dst, src, cnt) strncpy_s(dst, cnt, src, cnt) - -// _TRUNCATE insures buffer size > max string to print. -# define KMP_VSNPRINTF(dst, cnt, fmt, arg) vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) - -#else // KMP_OS_WINDOWS - -// For now, these macros use the existing API. - -# define KMP_ALLOCA alloca -# define KMP_MEMCPY_S(dst, bsz, src, cnt) memcpy(dst, src, cnt) -# define KMP_SNPRINTF snprintf -# define KMP_SSCANF sscanf -# define KMP_STRCPY_S(dst, bsz, src) strcpy(dst, src) -# define KMP_STRNCPY_S(dst, bsz, src, cnt) strncpy(dst, src, cnt) -# define KMP_VSNPRINTF vsnprintf -# define KMP_STRNCPY strncpy -# define KMP_STRLEN strlen -# define KMP_MEMCPY memcpy - -#endif // KMP_OS_WINDOWS - -#endif // KMP_SAFE_C_API_H + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_SAFE_C_API_H +#define KMP_SAFE_C_API_H + +// +// Replacement for banned C API +// + +// Not every unsafe call listed here is handled now, but keeping everything +// in one place should be handy for future maintenance. +#if KMP_OS_WINDOWS + +# define RSIZE_MAX_STR ( 4UL << 10 ) // 4KB + +// _malloca was suggested, but it is not a drop-in replacement for _alloca +# define KMP_ALLOCA _alloca + +# define KMP_MEMCPY_S memcpy_s +# define KMP_SNPRINTF sprintf_s +# define KMP_SSCANF sscanf_s +# define KMP_STRCPY_S strcpy_s +# define KMP_STRNCPY_S strncpy_s + +// Use this only when buffer size is unknown +# define KMP_MEMCPY(dst, src, cnt) memcpy_s(dst, cnt, src, cnt) + +# define KMP_STRLEN(str) strnlen_s(str, RSIZE_MAX_STR) + +// Use this only when buffer size is unknown +# define KMP_STRNCPY(dst, src, cnt) strncpy_s(dst, cnt, src, cnt) + +// _TRUNCATE insures buffer size > max string to print. +# define KMP_VSNPRINTF(dst, cnt, fmt, arg) vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) + +#else // KMP_OS_WINDOWS + +// For now, these macros use the existing API. + +# define KMP_ALLOCA alloca +# define KMP_MEMCPY_S(dst, bsz, src, cnt) memcpy(dst, src, cnt) +# define KMP_SNPRINTF snprintf +# define KMP_SSCANF sscanf +# define KMP_STRCPY_S(dst, bsz, src) strcpy(dst, src) +# define KMP_STRNCPY_S(dst, bsz, src, cnt) strncpy(dst, src, cnt) +# define KMP_VSNPRINTF vsnprintf +# define KMP_STRNCPY strncpy +# define KMP_STRLEN strlen +# define KMP_MEMCPY memcpy + +#endif // KMP_OS_WINDOWS + +#endif // KMP_SAFE_C_API_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp index 798ed0e2063..80ad960a8d3 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp @@ -1,940 +1,940 @@ -/* - * kmp_sched.c -- static scheduling -- iteration initialization - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -/* - * Static scheduling initialization. - * - * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however - * it may change values between parallel regions. __kmp_max_nth - * is the largest value __kmp_nth may take, 1 is the smallest. - * - */ - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_str.h" -#include "kmp_error.h" -#include "kmp_stats.h" -#include "kmp_itt.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - -// template for type limits -template< typename T > -struct i_maxmin { - static const T mx; - static const T mn; -}; -template<> -struct i_maxmin< int > { - static const int mx = 0x7fffffff; - static const int mn = 0x80000000; -}; -template<> -struct i_maxmin< unsigned int > { - static const unsigned int mx = 0xffffffff; - static const unsigned int mn = 0x00000000; -}; -template<> -struct i_maxmin< long long > { - static const long long mx = 0x7fffffffffffffffLL; - static const long long mn = 0x8000000000000000LL; -}; -template<> -struct i_maxmin< unsigned long long > { - static const unsigned long long mx = 0xffffffffffffffffLL; - static const unsigned long long mn = 0x0000000000000000LL; -}; -//------------------------------------------------------------------------- -#ifdef KMP_DEBUG -//------------------------------------------------------------------------- -// template for debug prints specification ( d, u, lld, llu ) - char const * traits_t< int >::spec = "d"; - char const * traits_t< unsigned int >::spec = "u"; - char const * traits_t< long long >::spec = "lld"; - char const * traits_t< unsigned long long >::spec = "llu"; -//------------------------------------------------------------------------- -#endif - -template< typename T > -static void -__kmp_for_static_init( - ident_t *loc, - kmp_int32 global_tid, - kmp_int32 schedtype, - kmp_int32 *plastiter, - T *plower, - T *pupper, - typename traits_t< T >::signed_t *pstride, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - KMP_COUNT_BLOCK(OMP_FOR_static); - KMP_TIME_BLOCK (FOR_static_scheduling); - - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - /* this all has to be changed back to TID and such.. */ +/* + * kmp_sched.c -- static scheduling -- iteration initialization + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +/* + * Static scheduling initialization. + * + * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however + * it may change values between parallel regions. __kmp_max_nth + * is the largest value __kmp_nth may take, 1 is the smallest. + * + */ + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_str.h" +#include "kmp_error.h" +#include "kmp_stats.h" +#include "kmp_itt.h" + +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + +// template for type limits +template< typename T > +struct i_maxmin { + static const T mx; + static const T mn; +}; +template<> +struct i_maxmin< int > { + static const int mx = 0x7fffffff; + static const int mn = 0x80000000; +}; +template<> +struct i_maxmin< unsigned int > { + static const unsigned int mx = 0xffffffff; + static const unsigned int mn = 0x00000000; +}; +template<> +struct i_maxmin< long long > { + static const long long mx = 0x7fffffffffffffffLL; + static const long long mn = 0x8000000000000000LL; +}; +template<> +struct i_maxmin< unsigned long long > { + static const unsigned long long mx = 0xffffffffffffffffLL; + static const unsigned long long mn = 0x0000000000000000LL; +}; +//------------------------------------------------------------------------- +#ifdef KMP_DEBUG +//------------------------------------------------------------------------- +// template for debug prints specification ( d, u, lld, llu ) + char const * traits_t< int >::spec = "d"; + char const * traits_t< unsigned int >::spec = "u"; + char const * traits_t< long long >::spec = "lld"; + char const * traits_t< unsigned long long >::spec = "llu"; +//------------------------------------------------------------------------- +#endif + +template< typename T > +static void +__kmp_for_static_init( + ident_t *loc, + kmp_int32 global_tid, + kmp_int32 schedtype, + kmp_int32 *plastiter, + T *plower, + T *pupper, + typename traits_t< T >::signed_t *pstride, + typename traits_t< T >::signed_t incr, + typename traits_t< T >::signed_t chunk +) { + KMP_COUNT_BLOCK(OMP_FOR_static); + KMP_TIME_BLOCK (FOR_static_scheduling); + + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + /* this all has to be changed back to TID and such.. */ kmp_int32 gtid = global_tid; kmp_uint32 tid; kmp_uint32 nth; UT trip_count; kmp_team_t *team; kmp_info_t *th = __kmp_threads[ gtid ]; - -#if OMPT_SUPPORT && OMPT_TRACE - ompt_team_info_t *team_info = NULL; - ompt_task_info_t *task_info = NULL; - - if (ompt_enabled) { - // Only fully initialize variables needed by OMPT if OMPT is enabled. - team_info = __ompt_get_teaminfo(0, NULL); - task_info = __ompt_get_taskinfo(0); - } -#endif - - KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); - KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ - " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, - *plower, *pupper, *pstride, incr, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - - if ( __kmp_env_consistency_check ) { - __kmp_push_workshare( global_tid, ct_pdo, loc ); - if ( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - } - /* special handling for zero-trip loops */ - if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - if( plastiter != NULL ) - *plastiter = FALSE; - /* leave pupper and plower set to entire iteration space */ - *pstride = incr; /* value should never be used */ - // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ - upper=0,stride=1) - JPH June 23, 2009. - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } -#endif - KMP_COUNT_VALUE (FOR_static_iterations, 0); - return; - } - - #if OMP_40_ENABLED - if ( schedtype > kmp_ord_upper ) { - // we are in DISTRIBUTE construct - schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type - tid = th->th.th_team->t.t_master_tid; - team = th->th.th_team->t.t_parent; - } else - #endif - { - tid = __kmp_tid_from_gtid( global_tid ); - team = th->th.th_team; - } - - /* determine if "for" loop is an active worksharing construct */ - if ( team -> t.t_serialized ) { - /* serialized parallel, each thread executes whole iteration space */ - if( plastiter != NULL ) - *plastiter = TRUE; - /* leave pupper and plower set to entire iteration space */ - *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } -#endif - return; - } - nth = team->t.t_nproc; - if ( nth == 1 ) { - if( plastiter != NULL ) - *plastiter = TRUE; - *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } -#endif - return; - } - - /* compute trip count */ - if ( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupper + 1; - } else { - if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0 - trip_count = (*pupper - *plower) / incr + 1; - } else { - trip_count = (*plower - *pupper) / ( -incr ) + 1; - } - } - - if ( __kmp_env_consistency_check ) { - /* tripcount overflow? */ - if ( trip_count == 0 && *pupper != *plower ) { - __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); - } - } - KMP_COUNT_VALUE (FOR_static_iterations, trip_count); - - /* compute remaining parameters */ - switch ( schedtype ) { - case kmp_sch_static: - { - if ( trip_count < nth ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - if ( tid < trip_count ) { - *pupper = *plower = *plower + tid * incr; - } else { - *plower = *pupper + incr; - } - if( plastiter != NULL ) - *plastiter = ( tid == trip_count - 1 ); - } else { - if ( __kmp_static == kmp_sch_static_balanced ) { + +#if OMPT_SUPPORT && OMPT_TRACE + ompt_team_info_t *team_info = NULL; + ompt_task_info_t *task_info = NULL; + + if (ompt_enabled) { + // Only fully initialize variables needed by OMPT if OMPT is enabled. + team_info = __ompt_get_teaminfo(0, NULL); + task_info = __ompt_get_taskinfo(0); + } +#endif + + KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); + KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ + " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, + traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, + *plower, *pupper, *pstride, incr, chunk ) ); + __kmp_str_free( &buff ); + } + #endif + + if ( __kmp_env_consistency_check ) { + __kmp_push_workshare( global_tid, ct_pdo, loc ); + if ( incr == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); + } + } + /* special handling for zero-trip loops */ + if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { + if( plastiter != NULL ) + *plastiter = FALSE; + /* leave pupper and plower set to entire iteration space */ + *pstride = incr; /* value should never be used */ + // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ + upper=0,stride=1) - JPH June 23, 2009. + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); + __kmp_str_free( &buff ); + } + #endif + KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, + team_info->microtask); + } +#endif + KMP_COUNT_VALUE (FOR_static_iterations, 0); + return; + } + + #if OMP_40_ENABLED + if ( schedtype > kmp_ord_upper ) { + // we are in DISTRIBUTE construct + schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type + tid = th->th.th_team->t.t_master_tid; + team = th->th.th_team->t.t_parent; + } else + #endif + { + tid = __kmp_tid_from_gtid( global_tid ); + team = th->th.th_team; + } + + /* determine if "for" loop is an active worksharing construct */ + if ( team -> t.t_serialized ) { + /* serialized parallel, each thread executes whole iteration space */ + if( plastiter != NULL ) + *plastiter = TRUE; + /* leave pupper and plower set to entire iteration space */ + *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); + + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); + __kmp_str_free( &buff ); + } + #endif + KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, + team_info->microtask); + } +#endif + return; + } + nth = team->t.t_nproc; + if ( nth == 1 ) { + if( plastiter != NULL ) + *plastiter = TRUE; + *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); + KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); + __kmp_str_free( &buff ); + } + #endif + KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, + team_info->microtask); + } +#endif + return; + } + + /* compute trip count */ + if ( incr == 1 ) { + trip_count = *pupper - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupper + 1; + } else { + if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0 + trip_count = (*pupper - *plower) / incr + 1; + } else { + trip_count = (*plower - *pupper) / ( -incr ) + 1; + } + } + + if ( __kmp_env_consistency_check ) { + /* tripcount overflow? */ + if ( trip_count == 0 && *pupper != *plower ) { + __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); + } + } + KMP_COUNT_VALUE (FOR_static_iterations, trip_count); + + /* compute remaining parameters */ + switch ( schedtype ) { + case kmp_sch_static: + { + if ( trip_count < nth ) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || \ + __kmp_static == kmp_sch_static_balanced + ); // Unknown static scheduling type. + if ( tid < trip_count ) { + *pupper = *plower = *plower + tid * incr; + } else { + *plower = *pupper + incr; + } + if( plastiter != NULL ) + *plastiter = ( tid == trip_count - 1 ); + } else { + if ( __kmp_static == kmp_sch_static_balanced ) { UT small_chunk = trip_count / nth; UT extras = trip_count % nth; - *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); - *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( tid == nth - 1 ); - } else { + *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); + *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); + if( plastiter != NULL ) + *plastiter = ( tid == nth - 1 ); + } else { T big_chunk_inc_count = ( trip_count/nth + - ( ( trip_count % nth ) ? 1 : 0) ) * incr; + ( ( trip_count % nth ) ? 1 : 0) ) * incr; T old_upper = *pupper; - - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - - *plower += tid * big_chunk_inc_count; - *pupper = *plower + big_chunk_inc_count - incr; - if ( incr > 0 ) { - if( *pupper < *plower ) - *pupper = i_maxmin< T >::mx; - if( plastiter != NULL ) - *plastiter = *plower <= old_upper && *pupper > old_upper - incr; - if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = i_maxmin< T >::mn; - if( plastiter != NULL ) - *plastiter = *plower >= old_upper && *pupper < old_upper - incr; - if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 - } - } - } - break; - } - case kmp_sch_static_chunked: - { + + KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); + // Unknown static scheduling type. + + *plower += tid * big_chunk_inc_count; + *pupper = *plower + big_chunk_inc_count - incr; + if ( incr > 0 ) { + if( *pupper < *plower ) + *pupper = i_maxmin< T >::mx; + if( plastiter != NULL ) + *plastiter = *plower <= old_upper && *pupper > old_upper - incr; + if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 + } else { + if( *pupper > *plower ) + *pupper = i_maxmin< T >::mn; + if( plastiter != NULL ) + *plastiter = *plower >= old_upper && *pupper < old_upper - incr; + if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 + } + } + } + break; + } + case kmp_sch_static_chunked: + { ST span; - if ( chunk < 1 ) { - chunk = 1; - } - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if( plastiter != NULL ) - *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); - break; - } - default: - KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); - break; - } - -#if USE_ITT_BUILD - // Report loop metadata - if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && -#if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && -#endif - team->t.t_active_level == 1 ) - { - kmp_uint64 cur_chunk = chunk; - // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked - if ( schedtype == kmp_sch_static ) { - cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); - } - // 0 - "static" schedule - __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); - } -#endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); - } -#endif - - return; -} - -template< typename T > -static void -__kmp_dist_for_static_init( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 schedule, - kmp_int32 *plastiter, - T *plower, - T *pupper, - T *pupperDist, - typename traits_t< T >::signed_t *pstride, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - KMP_COUNT_BLOCK(OMP_DISTRIBUTE); - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; + if ( chunk < 1 ) { + chunk = 1; + } + span = chunk * incr; + *pstride = span * nth; + *plower = *plower + (span * tid); + *pupper = *plower + span - incr; + if( plastiter != NULL ) + *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); + break; + } + default: + KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); + break; + } + +#if USE_ITT_BUILD + // Report loop metadata + if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && +#if OMP_40_ENABLED + th->th.th_teams_microtask == NULL && +#endif + team->t.t_active_level == 1 ) + { + kmp_uint64 cur_chunk = chunk; + // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked + if ( schedtype == kmp_sch_static ) { + cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); + } + // 0 - "static" schedule + __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); + } +#endif + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); + __kmp_str_free( &buff ); + } + #endif + KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } +#endif + + return; +} + +template< typename T > +static void +__kmp_dist_for_static_init( + ident_t *loc, + kmp_int32 gtid, + kmp_int32 schedule, + kmp_int32 *plastiter, + T *plower, + T *pupper, + T *pupperDist, + typename traits_t< T >::signed_t *pstride, + typename traits_t< T >::signed_t incr, + typename traits_t< T >::signed_t chunk +) { + KMP_COUNT_BLOCK(OMP_DISTRIBUTE); + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; kmp_uint32 tid; kmp_uint32 nth; kmp_uint32 team_id; kmp_uint32 nteams; UT trip_count; kmp_team_t *team; - kmp_info_t * th; - - KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); - KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, schedule, *plastiter, - *plower, *pupper, incr, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - - if( __kmp_env_consistency_check ) { - __kmp_push_workshare( gtid, ct_pdo, loc ); - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); - } - } - tid = __kmp_tid_from_gtid( gtid ); - th = __kmp_threads[gtid]; - nth = th->th.th_team_nproc; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute global trip count - if( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupper + 1; - } else { - trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case - } - - *pstride = *pupper - *plower; // just in case (can be unused) - if( trip_count <= nteams ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - // only masters of some teams get single iteration, other threads get nothing - if( team_id < trip_count && tid == 0 ) { - *pupper = *pupperDist = *plower = *plower + team_id * incr; - } else { - *pupperDist = *pupper; - *plower = *pupper + incr; // compiler should skip loop body - } - if( plastiter != NULL ) - *plastiter = ( tid == 0 && team_id == trip_count - 1 ); - } else { - // Get the team's chunk first (each team gets at most one chunk) - if( __kmp_static == kmp_sch_static_balanced ) { + kmp_info_t * th; + + KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); + KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ + "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, + traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, gtid, schedule, *plastiter, + *plower, *pupper, incr, chunk ) ); + __kmp_str_free( &buff ); + } + #endif + + if( __kmp_env_consistency_check ) { + __kmp_push_workshare( gtid, ct_pdo, loc ); + if( incr == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); + } + if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); + } + } + tid = __kmp_tid_from_gtid( gtid ); + th = __kmp_threads[gtid]; + nth = th->th.th_team_nproc; + team = th->th.th_team; + #if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; + #endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute global trip count + if( incr == 1 ) { + trip_count = *pupper - *plower + 1; + } else if(incr == -1) { + trip_count = *plower - *pupper + 1; + } else { + trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case + } + + *pstride = *pupper - *plower; // just in case (can be unused) + if( trip_count <= nteams ) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || \ + __kmp_static == kmp_sch_static_balanced + ); // Unknown static scheduling type. + // only masters of some teams get single iteration, other threads get nothing + if( team_id < trip_count && tid == 0 ) { + *pupper = *pupperDist = *plower = *plower + team_id * incr; + } else { + *pupperDist = *pupper; + *plower = *pupper + incr; // compiler should skip loop body + } + if( plastiter != NULL ) + *plastiter = ( tid == 0 && team_id == trip_count - 1 ); + } else { + // Get the team's chunk first (each team gets at most one chunk) + if( __kmp_static == kmp_sch_static_balanced ) { UT chunkD = trip_count / nteams; UT extras = trip_count % nteams; - *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); - *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( team_id == nteams - 1 ); - } else { + *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); + *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); + if( plastiter != NULL ) + *plastiter = ( team_id == nteams - 1 ); + } else { T chunk_inc_count = - ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; + ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; T upper = *pupper; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupperDist = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if( incr > 0 ) { - if( *pupperDist < *plower ) - *pupperDist = i_maxmin< T >::mx; - if( plastiter != NULL ) - *plastiter = *plower <= upper && *pupperDist > upper - incr; - if( *pupperDist > upper ) - *pupperDist = upper; // tracker C73258 - if( *plower > *pupperDist ) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } else { - if( *pupperDist > *plower ) - *pupperDist = i_maxmin< T >::mn; - if( plastiter != NULL ) - *plastiter = *plower >= upper && *pupperDist < upper - incr; - if( *pupperDist < upper ) - *pupperDist = upper; // tracker C73258 - if( *plower < *pupperDist ) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } - } - // Get the parallel loop chunk now (for thread) - // compute trip count for team's chunk - if( incr == 1 ) { - trip_count = *pupperDist - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupperDist + 1; - } else { - trip_count = (ST)(*pupperDist - *plower) / incr + 1; - } - KMP_DEBUG_ASSERT( trip_count ); - switch( schedule ) { - case kmp_sch_static: - { - if( trip_count <= nth ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - if( tid < trip_count ) - *pupper = *plower = *plower + tid * incr; - else - *plower = *pupper + incr; // no iterations available - if( plastiter != NULL ) - if( *plastiter != 0 && !( tid == trip_count - 1 ) ) - *plastiter = 0; - } else { - if( __kmp_static == kmp_sch_static_balanced ) { + KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); + // Unknown static scheduling type. + *plower += team_id * chunk_inc_count; + *pupperDist = *plower + chunk_inc_count - incr; + // Check/correct bounds if needed + if( incr > 0 ) { + if( *pupperDist < *plower ) + *pupperDist = i_maxmin< T >::mx; + if( plastiter != NULL ) + *plastiter = *plower <= upper && *pupperDist > upper - incr; + if( *pupperDist > upper ) + *pupperDist = upper; // tracker C73258 + if( *plower > *pupperDist ) { + *pupper = *pupperDist; // no iterations available for the team + goto end; + } + } else { + if( *pupperDist > *plower ) + *pupperDist = i_maxmin< T >::mn; + if( plastiter != NULL ) + *plastiter = *plower >= upper && *pupperDist < upper - incr; + if( *pupperDist < upper ) + *pupperDist = upper; // tracker C73258 + if( *plower < *pupperDist ) { + *pupper = *pupperDist; // no iterations available for the team + goto end; + } + } + } + // Get the parallel loop chunk now (for thread) + // compute trip count for team's chunk + if( incr == 1 ) { + trip_count = *pupperDist - *plower + 1; + } else if(incr == -1) { + trip_count = *plower - *pupperDist + 1; + } else { + trip_count = (ST)(*pupperDist - *plower) / incr + 1; + } + KMP_DEBUG_ASSERT( trip_count ); + switch( schedule ) { + case kmp_sch_static: + { + if( trip_count <= nth ) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || \ + __kmp_static == kmp_sch_static_balanced + ); // Unknown static scheduling type. + if( tid < trip_count ) + *pupper = *plower = *plower + tid * incr; + else + *plower = *pupper + incr; // no iterations available + if( plastiter != NULL ) + if( *plastiter != 0 && !( tid == trip_count - 1 ) ) + *plastiter = 0; + } else { + if( __kmp_static == kmp_sch_static_balanced ) { UT chunkL = trip_count / nth; UT extras = trip_count % nth; - *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); - *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); - if( plastiter != NULL ) - if( *plastiter != 0 && !( tid == nth - 1 ) ) - *plastiter = 0; - } else { + *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); + *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); + if( plastiter != NULL ) + if( *plastiter != 0 && !( tid == nth - 1 ) ) + *plastiter = 0; + } else { T chunk_inc_count = - ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; + ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; T upper = *pupperDist; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += tid * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - if( incr > 0 ) { - if( *pupper < *plower ) - *pupper = i_maxmin< T >::mx; - if( plastiter != NULL ) - if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) - *plastiter = 0; - if( *pupper > upper ) - *pupper = upper;//tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = i_maxmin< T >::mn; - if( plastiter != NULL ) - if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) - *plastiter = 0; - if( *pupper < upper ) - *pupper = upper;//tracker C73258 - } - } - } - break; - } - case kmp_sch_static_chunked: - { + KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); + // Unknown static scheduling type. + *plower += tid * chunk_inc_count; + *pupper = *plower + chunk_inc_count - incr; + if( incr > 0 ) { + if( *pupper < *plower ) + *pupper = i_maxmin< T >::mx; + if( plastiter != NULL ) + if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) + *plastiter = 0; + if( *pupper > upper ) + *pupper = upper;//tracker C73258 + } else { + if( *pupper > *plower ) + *pupper = i_maxmin< T >::mn; + if( plastiter != NULL ) + if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) + *plastiter = 0; + if( *pupper < upper ) + *pupper = upper;//tracker C73258 + } + } + } + break; + } + case kmp_sch_static_chunked: + { ST span; - if( chunk < 1 ) - chunk = 1; - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if( plastiter != NULL ) - if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) - *plastiter = 0; - break; - } - default: - KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); - break; - } - } - end:; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ - "stride=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); - return; -} - -template< typename T > -static void -__kmp_team_static_init( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 *p_last, - T *p_lb, - T *p_ub, - typename traits_t< T >::signed_t *p_st, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - // The routine returns the first chunk distributed to the team and - // stride for next chunks calculation. - // Last iteration flag set for the team that will execute - // the last iteration of the loop. - // The routine is called for dist_schedue(static,chunk) only. - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - kmp_uint32 team_id; - kmp_uint32 nteams; - UT trip_count; - T lower; - T upper; - ST span; - kmp_team_t *team; - kmp_info_t *th; - - KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); - KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - - lower = *p_lb; - upper = *p_ub; - if( __kmp_env_consistency_check ) { - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - if( incr > 0 ? (upper < lower) : (lower < upper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); - } - } - th = __kmp_threads[gtid]; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute trip count - if( incr == 1 ) { - trip_count = upper - lower + 1; - } else if(incr == -1) { - trip_count = lower - upper + 1; - } else { - trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case - } - if( chunk < 1 ) - chunk = 1; - span = chunk * incr; - *p_st = span * nteams; - *p_lb = lower + (span * team_id); - *p_ub = *p_lb + span - incr; - if ( p_last != NULL ) - *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); - // Correct upper bound if needed - if( incr > 0 ) { - if( *p_ub < *p_lb ) // overflow? - *p_ub = i_maxmin< T >::mx; - if( *p_ub > upper ) - *p_ub = upper; // tracker C73258 - } else { // incr < 0 - if( *p_ub > *p_lb ) - *p_ub = i_maxmin< T >::mn; - if( *p_ub < upper ) - *p_ub = upper; // tracker C73258 - } - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec ); - KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); - __kmp_str_free( &buff ); - } - #endif -} - -//-------------------------------------------------------------------------------------- -extern "C" { - -/*! -@ingroup WORK_SHARING -@param loc Source code location -@param gtid Global thread id of this thread -@param schedtype Scheduling type -@param plastiter Pointer to the "last iteration" flag -@param plower Pointer to the lower bound -@param pupper Pointer to the upper bound -@param pstride Pointer to the stride -@param incr Loop increment -@param chunk The chunk size - -Each of the four functions here are identical apart from the argument types. - -The functions compute the upper and lower bounds and stride to be used for the set of iterations -to be executed by the current thread from the statically scheduled loop that is described by the -initial values of the bounds, stride, increment and chunk size. - -@{ -*/ -void -__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int32 *plower, kmp_int32 *pupper, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_for_static_init< kmp_int32 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void -__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_for_static_init< kmp_uint32 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void -__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int64 *plower, kmp_int64 *pupper, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_for_static_init< kmp_int64 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_for_static_init_4 - */ -void -__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_for_static_init< kmp_uint64 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); -} -/*! -@} -*/ - -/*! -@ingroup WORK_SHARING -@param loc Source code location -@param gtid Global thread id of this thread -@param schedule Scheduling type for the parallel loop -@param plastiter Pointer to the "last iteration" flag -@param plower Pointer to the lower bound -@param pupper Pointer to the upper bound of loop chunk -@param pupperD Pointer to the upper bound of dist_chunk -@param pstride Pointer to the stride for parallel loop -@param incr Loop increment -@param chunk The chunk size for the parallel loop - -Each of the four functions here are identical apart from the argument types. - -The functions compute the upper and lower bounds and strides to be used for the set of iterations -to be executed by the current thread from the statically scheduled loop that is described by the -initial values of the bounds, strides, increment and chunks for parallel loop and distribute -constructs. - -@{ -*/ -void -__kmpc_dist_for_static_init_4( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_dist_for_static_init< kmp_int32 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void -__kmpc_dist_for_static_init_4u( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_dist_for_static_init< kmp_uint32 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void -__kmpc_dist_for_static_init_8( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_dist_for_static_init< kmp_int64 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); -} - -/*! - See @ref __kmpc_dist_for_static_init_4 - */ -void -__kmpc_dist_for_static_init_8u( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_dist_for_static_init< kmp_uint64 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); -} -/*! -@} -*/ - -//----------------------------------------------------------------------------------------- -// Auxiliary routines for Distribute Parallel Loop construct implementation -// Transfer call to template< type T > -// __kmp_team_static_init( ident_t *loc, int gtid, -// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) - -/*! -@ingroup WORK_SHARING -@{ -@param loc Source location -@param gtid Global thread id -@param p_last pointer to last iteration flag -@param p_lb pointer to Lower bound -@param p_ub pointer to Upper bound -@param p_st Step (or increment if you prefer) -@param incr Loop increment -@param chunk The chunk size to block with - -The functions compute the upper and lower bounds and stride to be used for the set of iterations -to be executed by the current team from the statically scheduled loop that is described by the -initial values of the bounds, stride, increment and chunk for the distribute construct as part of -composite distribute parallel loop construct. -These functions are all identical apart from the types of the arguments. -*/ - -void -__kmpc_team_static_init_4( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void -__kmpc_team_static_init_4u( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void -__kmpc_team_static_init_8( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); -} - -/*! - See @ref __kmpc_team_static_init_4 - */ -void -__kmpc_team_static_init_8u( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); -} -/*! -@} -*/ - -} // extern "C" - + if( chunk < 1 ) + chunk = 1; + span = chunk * incr; + *pstride = span * nth; + *plower = *plower + (span * tid); + *pupper = *plower + span - incr; + if( plastiter != NULL ) + if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) + *plastiter = 0; + break; + } + default: + KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); + break; + } + } + end:; + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ + "stride=%%%s signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, + traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); + __kmp_str_free( &buff ); + } + #endif + KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); + return; +} + +template< typename T > +static void +__kmp_team_static_init( + ident_t *loc, + kmp_int32 gtid, + kmp_int32 *p_last, + T *p_lb, + T *p_ub, + typename traits_t< T >::signed_t *p_st, + typename traits_t< T >::signed_t incr, + typename traits_t< T >::signed_t chunk +) { + // The routine returns the first chunk distributed to the team and + // stride for next chunks calculation. + // Last iteration flag set for the team that will execute + // the last iteration of the loop. + // The routine is called for dist_schedue(static,chunk) only. + typedef typename traits_t< T >::unsigned_t UT; + typedef typename traits_t< T >::signed_t ST; + kmp_uint32 team_id; + kmp_uint32 nteams; + UT trip_count; + T lower; + T upper; + ST span; + kmp_team_t *team; + kmp_info_t *th; + + KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); + KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ + "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, + traits_t< ST >::spec, traits_t< T >::spec ); + KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); + __kmp_str_free( &buff ); + } + #endif + + lower = *p_lb; + upper = *p_ub; + if( __kmp_env_consistency_check ) { + if( incr == 0 ) { + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); + } + if( incr > 0 ? (upper < lower) : (lower < upper) ) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); + } + } + th = __kmp_threads[gtid]; + team = th->th.th_team; + #if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; + #endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute trip count + if( incr == 1 ) { + trip_count = upper - lower + 1; + } else if(incr == -1) { + trip_count = lower - upper + 1; + } else { + trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case + } + if( chunk < 1 ) + chunk = 1; + span = chunk * incr; + *p_st = span * nteams; + *p_lb = lower + (span * team_id); + *p_ub = *p_lb + span - incr; + if ( p_last != NULL ) + *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); + // Correct upper bound if needed + if( incr > 0 ) { + if( *p_ub < *p_lb ) // overflow? + *p_ub = i_maxmin< T >::mx; + if( *p_ub > upper ) + *p_ub = upper; // tracker C73258 + } else { // incr < 0 + if( *p_ub > *p_lb ) + *p_ub = i_maxmin< T >::mn; + if( *p_ub < upper ) + *p_ub = upper; // tracker C73258 + } + #ifdef KMP_DEBUG + { + const char * buff; + // create format specifiers before the debug output + buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ + "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", + traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, + traits_t< ST >::spec ); + KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); + __kmp_str_free( &buff ); + } + #endif +} + +//-------------------------------------------------------------------------------------- +extern "C" { + +/*! +@ingroup WORK_SHARING +@param loc Source code location +@param gtid Global thread id of this thread +@param schedtype Scheduling type +@param plastiter Pointer to the "last iteration" flag +@param plower Pointer to the lower bound +@param pupper Pointer to the upper bound +@param pstride Pointer to the stride +@param incr Loop increment +@param chunk The chunk size + +Each of the four functions here are identical apart from the argument types. + +The functions compute the upper and lower bounds and stride to be used for the set of iterations +to be executed by the current thread from the statically scheduled loop that is described by the +initial values of the bounds, stride, increment and chunk size. + +@{ +*/ +void +__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_int32 *plower, kmp_int32 *pupper, + kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) +{ + __kmp_for_static_init< kmp_int32 >( + loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_for_static_init_4 + */ +void +__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_uint32 *plower, kmp_uint32 *pupper, + kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) +{ + __kmp_for_static_init< kmp_uint32 >( + loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_for_static_init_4 + */ +void +__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_int64 *plower, kmp_int64 *pupper, + kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) +{ + __kmp_for_static_init< kmp_int64 >( + loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_for_static_init_4 + */ +void +__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_uint64 *plower, kmp_uint64 *pupper, + kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) +{ + __kmp_for_static_init< kmp_uint64 >( + loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +} +/*! +@} +*/ + +/*! +@ingroup WORK_SHARING +@param loc Source code location +@param gtid Global thread id of this thread +@param schedule Scheduling type for the parallel loop +@param plastiter Pointer to the "last iteration" flag +@param plower Pointer to the lower bound +@param pupper Pointer to the upper bound of loop chunk +@param pupperD Pointer to the upper bound of dist_chunk +@param pstride Pointer to the stride for parallel loop +@param incr Loop increment +@param chunk The chunk size for the parallel loop + +Each of the four functions here are identical apart from the argument types. + +The functions compute the upper and lower bounds and strides to be used for the set of iterations +to be executed by the current thread from the statically scheduled loop that is described by the +initial values of the bounds, strides, increment and chunks for parallel loop and distribute +constructs. + +@{ +*/ +void +__kmpc_dist_for_static_init_4( + ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, + kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, + kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) +{ + __kmp_dist_for_static_init< kmp_int32 >( + loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_dist_for_static_init_4 + */ +void +__kmpc_dist_for_static_init_4u( + ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, + kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, + kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) +{ + __kmp_dist_for_static_init< kmp_uint32 >( + loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_dist_for_static_init_4 + */ +void +__kmpc_dist_for_static_init_8( + ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, + kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, + kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) +{ + __kmp_dist_for_static_init< kmp_int64 >( + loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +} + +/*! + See @ref __kmpc_dist_for_static_init_4 + */ +void +__kmpc_dist_for_static_init_8u( + ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, + kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, + kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) +{ + __kmp_dist_for_static_init< kmp_uint64 >( + loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +} +/*! +@} +*/ + +//----------------------------------------------------------------------------------------- +// Auxiliary routines for Distribute Parallel Loop construct implementation +// Transfer call to template< type T > +// __kmp_team_static_init( ident_t *loc, int gtid, +// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) + +/*! +@ingroup WORK_SHARING +@{ +@param loc Source location +@param gtid Global thread id +@param p_last pointer to last iteration flag +@param p_lb pointer to Lower bound +@param p_ub pointer to Upper bound +@param p_st Step (or increment if you prefer) +@param incr Loop increment +@param chunk The chunk size to block with + +The functions compute the upper and lower bounds and stride to be used for the set of iterations +to be executed by the current team from the statically scheduled loop that is described by the +initial values of the bounds, stride, increment and chunk for the distribute construct as part of +composite distribute parallel loop construct. +These functions are all identical apart from the types of the arguments. +*/ + +void +__kmpc_team_static_init_4( + ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +} + +/*! + See @ref __kmpc_team_static_init_4 + */ +void +__kmpc_team_static_init_4u( + ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +} + +/*! + See @ref __kmpc_team_static_init_4 + */ +void +__kmpc_team_static_init_8( + ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +} + +/*! + See @ref __kmpc_team_static_init_4 + */ +void +__kmpc_team_static_init_8u( + ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) +{ + KMP_DEBUG_ASSERT( __kmp_init_serial ); + __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +} +/*! +@} +*/ + +} // extern "C" + diff --git a/contrib/libs/cxxsupp/openmp/kmp_settings.c b/contrib/libs/cxxsupp/openmp/kmp_settings.c index 2bc312f6f19..067574f242c 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_settings.c +++ b/contrib/libs/cxxsupp/openmp/kmp_settings.c @@ -1,5469 +1,5469 @@ -/* - * kmp_settings.c -- Initialize environment variables - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_environment.h" -#include "kmp_atomic.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_settings.h" -#include "kmp_i18n.h" -#include "kmp_io.h" - -static int __kmp_env_toPrint( char const * name, int flag ); - -bool __kmp_env_format = 0; // 0 - old format; 1 - new format -// ------------------------------------------------------------------------------------------------- -// Helper string functions. Subject to move to kmp_str. -// ------------------------------------------------------------------------------------------------- - -static double -__kmp_convert_to_double( char const * s ) -{ - double result; - - if ( KMP_SSCANF( s, "%lf", &result ) < 1 ) { - result = 0.0; - } - - return result; -} - -#ifdef KMP_DEBUG -static unsigned int -__kmp_readstr_with_sentinel(char *dest, char const * src, size_t len, char sentinel) { - unsigned int i; - for (i = 0; i < len; i++) { - if ((*src == '\0') || (*src == sentinel)) { - break; - } - *(dest++) = *(src++); - } - *dest = '\0'; - return i; -} -#endif - -static int -__kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sentinel ) { - size_t l = 0; - - if(a == NULL) - a = ""; - if(b == NULL) - b = ""; - while(*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if(ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if(ca != cb) - return FALSE; - ++l; - ++a; - ++b; - } - return l >= len; -} - -// -// Expected usage: -// token is the token to check for. -// buf is the string being parsed. -// *end returns the char after the end of the token. -// it is not modified unless a match occurs. -// -// -// Example 1: -// -// if (__kmp_match_str("token", buf, *end) { -// -// buf = end; -// } -// -// Example 2: -// -// if (__kmp_match_str("token", buf, *end) { -// char *save = **end; -// **end = sentinel; -// -// **end = save; -// buf = end; -// } -// - -static int -__kmp_match_str( char const *token, char const *buf, const char **end) { - - KMP_ASSERT(token != NULL); - KMP_ASSERT(buf != NULL); - KMP_ASSERT(end != NULL); - - while (*token && *buf) { - char ct = *token, cb = *buf; - - if(ct >= 'a' && ct <= 'z') - ct -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if (ct != cb) - return FALSE; - ++token; - ++buf; - } - if (*token) { - return FALSE; - } - *end = buf; - return TRUE; -} - - -static size_t -__kmp_round4k( size_t size ) { - size_t _4k = 4 * 1024; - if ( size & ( _4k - 1 ) ) { - size &= ~ ( _4k - 1 ); - if ( size <= KMP_SIZE_T_MAX - _4k ) { - size += _4k; // Round up if there is no overflow. - }; // if - }; // if - return size; -} // __kmp_round4k - - -/* - Here, multipliers are like __kmp_convert_to_seconds, but floating-point - values are allowed, and the return value is in milliseconds. The default - multiplier is milliseconds. Returns INT_MAX only if the value specified - matches "infinit*". Returns -1 if specified string is invalid. -*/ -int -__kmp_convert_to_milliseconds( char const * data ) -{ - int ret, nvalues, factor; - char mult, extra; - double value; - - if (data == NULL) return (-1); - if ( __kmp_str_match( "infinit", -1, data)) return (INT_MAX); - value = (double) 0.0; - mult = '\0'; - nvalues = KMP_SSCANF (data, "%lf%c%c", &value, &mult, &extra); - if (nvalues < 1) return (-1); - if (nvalues == 1) mult = '\0'; - if (nvalues == 3) return (-1); - - if (value < 0) return (-1); - - switch (mult) { - case '\0': - /* default is milliseconds */ - factor = 1; - break; - case 's': case 'S': - factor = 1000; - break; - case 'm': case 'M': - factor = 1000 * 60; - break; - case 'h': case 'H': - factor = 1000 * 60 * 60; - break; - case 'd': case 'D': - factor = 1000 * 24 * 60 * 60; - break; - default: - return (-1); - } - - if ( value >= ( (INT_MAX-1) / factor) ) - ret = INT_MAX-1; /* Don't allow infinite value here */ - else - ret = (int) (value * (double) factor); /* truncate to int */ - - return ret; -} - - -static int -__kmp_strcasecmp_with_sentinel( char const * a, char const * b, char sentinel ) { - if(a == NULL) - a = ""; - if(b == NULL) - b = ""; - while(*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if(ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if(ca != cb) - return (int)(unsigned char)*a - (int)(unsigned char)*b; - ++a; - ++b; - } - return *a ? - (*b && *b != sentinel) ? (int)(unsigned char)*a - (int)(unsigned char)*b : 1 : - (*b && *b != sentinel) ? -1 : 0; -} - - -// ================================================================================================= -// Table structures and helper functions. -// ================================================================================================= - -typedef struct __kmp_setting kmp_setting_t; -typedef struct __kmp_stg_ss_data kmp_stg_ss_data_t; -typedef struct __kmp_stg_wp_data kmp_stg_wp_data_t; -typedef struct __kmp_stg_fr_data kmp_stg_fr_data_t; - -typedef void ( * kmp_stg_parse_func_t )( char const * name, char const * value, void * data ); -typedef void ( * kmp_stg_print_func_t )( kmp_str_buf_t * buffer, char const * name, void * data ); - -struct __kmp_setting { - char const * name; // Name of setting (environment variable). - kmp_stg_parse_func_t parse; // Parser function. - kmp_stg_print_func_t print; // Print function. - void * data; // Data passed to parser and printer. - int set; // Variable set during this "session" - // (__kmp_env_initialize() or kmp_set_defaults() call). - int defined; // Variable set in any "session". -}; // struct __kmp_setting - -struct __kmp_stg_ss_data { - size_t factor; // Default factor: 1 for KMP_STACKSIZE, 1024 for others. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_ss_data - -struct __kmp_stg_wp_data { - int omp; // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_wp_data - -struct __kmp_stg_fr_data { - int force; // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). -}; // struct __kmp_stg_fr_data - -static int -__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const * name, // Name of variable. - char const * value, // Value of the variable. - kmp_setting_t * * rivals // List of rival settings (the list must include current one). -); - - -// ------------------------------------------------------------------------------------------------- -// Helper parse functions. -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_bool( - char const * name, - char const * value, - int * out -) { - if ( __kmp_str_match_true( value ) ) { - * out = TRUE; - } else if (__kmp_str_match_false( value ) ) { - * out = FALSE; - } else { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( BadBoolValue, name, value ), - KMP_HNT( ValidBoolValues ), - __kmp_msg_null - ); - }; // if -} // __kmp_stg_parse_bool - -static void -__kmp_stg_parse_size( - char const * name, - char const * value, - size_t size_min, - size_t size_max, - int * is_specified, - size_t * out, - size_t factor -) { - char const * msg = NULL; - #if KMP_OS_DARWIN - size_min = __kmp_round4k( size_min ); - size_max = __kmp_round4k( size_max ); - #endif // KMP_OS_DARWIN - if ( value ) { - if ( is_specified != NULL ) { - * is_specified = 1; - }; // if - __kmp_str_to_size( value, out, factor, & msg ); - if ( msg == NULL ) { - if ( * out > size_max ) { - * out = size_max; - msg = KMP_I18N_STR( ValueTooLarge ); - } else if ( * out < size_min ) { - * out = size_min; - msg = KMP_I18N_STR( ValueTooSmall ); - } else { - #if KMP_OS_DARWIN - size_t round4k = __kmp_round4k( * out ); - if ( * out != round4k ) { - * out = round4k; - msg = KMP_I18N_STR( NotMultiple4K ); - }; // if - #endif - }; // if - } else { - // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to size_max silently. - if ( * out < size_min ) { - * out = size_max; - } - else if ( * out > size_max ) { - * out = size_max; - }; // if - }; // if - if ( msg != NULL ) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print_size( & buf, * out ); - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - KMP_INFORM( Using_str_Value, name, buf.str ); - __kmp_str_buf_free( & buf ); - }; // if - }; // if -} // __kmp_stg_parse_size - -#if KMP_AFFINITY_SUPPORTED -static void -__kmp_stg_parse_str( - char const * name, - char const * value, - char const * * out -) { - KMP_INTERNAL_FREE( (void *) * out ); - * out = __kmp_str_format( "%s", value ); -} // __kmp_stg_parse_str -#endif - -static void -__kmp_stg_parse_int( - char const * name, // I: Name of environment variable (used in warning messages). - char const * value, // I: Value of environment variable to parse. - int min, // I: Miminal allowed value. - int max, // I: Maximum allowed value. - int * out // O: Output (parsed) value. -) { - char const * msg = NULL; - kmp_uint64 uint = * out; - __kmp_str_to_uint( value, & uint, & msg ); - if ( msg == NULL ) { - if ( uint < (unsigned int)min ) { - msg = KMP_I18N_STR( ValueTooSmall ); - uint = min; - } else if ( uint > (unsigned int)max ) { - msg = KMP_I18N_STR( ValueTooLarge ); - uint = max; - }; // if - } else { - // If overflow occurred msg contains error message and uint is very big. Cut tmp it - // to INT_MAX. - if ( uint < (unsigned int)min ) { - uint = min; - } - else if ( uint > (unsigned int)max ) { - uint = max; - }; // if - }; // if - if ( msg != NULL ) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print( &buf, "%" KMP_UINT64_SPEC "", uint ); - KMP_INFORM( Using_uint64_Value, name, buf.str ); - __kmp_str_buf_free( &buf ); - }; // if - * out = uint; -} // __kmp_stg_parse_int - - -#if KMP_DEBUG_ADAPTIVE_LOCKS -static void -__kmp_stg_parse_file( - char const * name, - char const * value, - char * suffix, - char * * out -) { - char buffer[256]; - char *t; - int hasSuffix; - KMP_INTERNAL_FREE( (void *) * out ); - t = (char *) strrchr(value, '.'); - hasSuffix = t && __kmp_str_eqf( t, suffix ); - t = __kmp_str_format( "%s%s", value, hasSuffix ? "" : suffix ); - __kmp_expand_file_name( buffer, sizeof(buffer), t); - KMP_INTERNAL_FREE(t); - * out = __kmp_str_format( "%s", buffer ); -} // __kmp_stg_parse_file -#endif - -#ifdef KMP_DEBUG -static char * par_range_to_print = NULL; - -static void -__kmp_stg_parse_par_range( - char const * name, - char const * value, - int * out_range, - char * out_routine, - char * out_file, - int * out_lb, - int * out_ub -) { - size_t len = KMP_STRLEN( value + 1 ); - par_range_to_print = (char *) KMP_INTERNAL_MALLOC( len +1 ); - KMP_STRNCPY_S( par_range_to_print, len + 1, value, len + 1); - __kmp_par_range = +1; - __kmp_par_range_lb = 0; - __kmp_par_range_ub = INT_MAX; - for (;;) { - unsigned int len; - if (( value == NULL ) || ( *value == '\0' )) { - break; - } - if ( ! __kmp_strcasecmp_with_sentinel( "routine", value, '=' )) { - value = strchr( value, '=' ) + 1; - len = __kmp_readstr_with_sentinel( out_routine, - value, KMP_PAR_RANGE_ROUTINE_LEN - 1, ',' ); - if ( len == 0 ) { - goto par_range_error; - } - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if ( ! __kmp_strcasecmp_with_sentinel( "filename", value, '=' )) { - value = strchr( value, '=' ) + 1; - len = __kmp_readstr_with_sentinel( out_file, - value, KMP_PAR_RANGE_FILENAME_LEN - 1, ',' ); - if ( len == 0) { - goto par_range_error; - } - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if (( ! __kmp_strcasecmp_with_sentinel( "range", value, '=' )) - || ( ! __kmp_strcasecmp_with_sentinel( "incl_range", value, '=' ))) { - value = strchr( value, '=' ) + 1; - if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub ) != 2 ) { - goto par_range_error; - } - *out_range = +1; - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if ( ! __kmp_strcasecmp_with_sentinel( "excl_range", value, '=' )) { - value = strchr( value, '=' ) + 1; - if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub) != 2 ) { - goto par_range_error; - } - *out_range = -1; - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - par_range_error: - KMP_WARNING( ParRangeSyntax, name ); - __kmp_par_range = 0; - break; - } -} // __kmp_stg_parse_par_range -#endif - -int -__kmp_initial_threads_capacity( int req_nproc ) -{ - int nth = 32; - - /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); - - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; - - return nth; -} - - -int -__kmp_default_tp_capacity( int req_nproc, int max_nth, int all_threads_specified) { - int nth = 128; - - if(all_threads_specified) - return max_nth; - /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth ) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); - - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; - - return nth; -} - - -// ------------------------------------------------------------------------------------------------- -// Helper print functions. -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_print_bool( kmp_str_buf_t * buffer, char const * name, int value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_BOOL; - } else { - __kmp_str_buf_print( buffer, " %s=%s\n", name, value ? "true" : "false" ); - } -} // __kmp_stg_print_bool - -static void -__kmp_stg_print_int( kmp_str_buf_t * buffer, char const * name, int value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_INT; - } else { - __kmp_str_buf_print( buffer, " %s=%d\n", name, value ); - } -} // __kmp_stg_print_int - -static void -__kmp_stg_print_uint64( kmp_str_buf_t * buffer, char const * name, kmp_uint64 value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_UINT64; - } else { - __kmp_str_buf_print( buffer, " %s=%" KMP_UINT64_SPEC "\n", name, value ); - } -} // __kmp_stg_print_uint64 - -static void -__kmp_stg_print_str( kmp_str_buf_t * buffer, char const * name, char const * value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_STR; - } else { - __kmp_str_buf_print( buffer, " %s=%s\n", name, value ); - } -} // __kmp_stg_print_str - -static void -__kmp_stg_print_size( kmp_str_buf_t * buffer, char const * name, size_t value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size( buffer, value ); - __kmp_str_buf_print( buffer, "'\n" ); - } else { - __kmp_str_buf_print( buffer, " %s=", name ); - __kmp_str_buf_print_size( buffer, value ); - __kmp_str_buf_print( buffer, "\n" ); - return; - } -} // __kmp_stg_print_size - - -// ================================================================================================= -// Parse and print functions. -// ================================================================================================= - -// ------------------------------------------------------------------------------------------------- -// KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_all_threads( char const * name, char const * value, void * data ) { - - kmp_setting_t * * rivals = (kmp_setting_t * *) data; - int rc; - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - }; // if - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - __kmp_max_nth = __kmp_xproc; - __kmp_allThreadsSpecified = 1; - } else { - __kmp_stg_parse_int( name, value, 1, __kmp_sys_max_nth, & __kmp_max_nth ); - __kmp_allThreadsSpecified = 0; - } - K_DIAG( 1, ( "__kmp_max_nth == %d\n", __kmp_max_nth ) ); - -} // __kmp_stg_parse_all_threads - -static void -__kmp_stg_print_all_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_max_nth ); -} // __kmp_stg_print_all_threads - -// ------------------------------------------------------------------------------------------------- -// KMP_BLOCKTIME -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_blocktime( char const * name, char const * value, void * data ) { - __kmp_dflt_blocktime = __kmp_convert_to_milliseconds( value ); - if ( __kmp_dflt_blocktime < 0 ) { - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidValue, name, value ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_dflt_blocktime ); - __kmp_env_blocktime = FALSE; // Revert to default as if var not set. - } else { - if ( __kmp_dflt_blocktime < KMP_MIN_BLOCKTIME ) { - __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( SmallValue, name, value ), __kmp_msg_null ); - KMP_INFORM( MinValueUsing, name, __kmp_dflt_blocktime ); - } else if ( __kmp_dflt_blocktime > KMP_MAX_BLOCKTIME ) { - __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( LargeValue, name, value ), __kmp_msg_null ); - KMP_INFORM( MaxValueUsing, name, __kmp_dflt_blocktime ); - }; // if - __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified. - }; // if - // calculate number of monitor thread wakeup intervals corresonding to blocktime. - __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - K_DIAG( 1, ( "__kmp_env_blocktime == %d\n", __kmp_env_blocktime ) ); - if ( __kmp_env_blocktime ) { - K_DIAG( 1, ( "__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime ) ); - } -} // __kmp_stg_parse_blocktime - -static void -__kmp_stg_print_blocktime( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_dflt_blocktime ); -} // __kmp_stg_print_blocktime - -// ------------------------------------------------------------------------------------------------- -// KMP_DUPLICATE_LIB_OK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_duplicate_lib_ok( char const * name, char const * value, void * data ) { - /* actually this variable is not supported, - put here for compatibility with earlier builds and for static/dynamic combination */ - __kmp_stg_parse_bool( name, value, & __kmp_duplicate_library_ok ); -} // __kmp_stg_parse_duplicate_lib_ok - -static void -__kmp_stg_print_duplicate_lib_ok( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_duplicate_library_ok ); -} // __kmp_stg_print_duplicate_lib_ok - -// ------------------------------------------------------------------------------------------------- -// KMP_INHERIT_FP_CONTROL -// ------------------------------------------------------------------------------------------------- - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -static void -__kmp_stg_parse_inherit_fp_control( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_inherit_fp_control ); -} // __kmp_stg_parse_inherit_fp_control - -static void -__kmp_stg_print_inherit_fp_control( kmp_str_buf_t * buffer, char const * name, void * data ) { -#if KMP_DEBUG - __kmp_stg_print_bool( buffer, name, __kmp_inherit_fp_control ); -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_inherit_fp_control - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -// ------------------------------------------------------------------------------------------------- -// KMP_LIBRARY, OMP_WAIT_POLICY -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_wait_policy( char const * name, char const * value, void * data ) { - - kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, wait->rivals ); - if ( rc ) { - return; - }; // if - - if ( wait->omp ) { - if ( __kmp_str_match( "ACTIVE", 1, value ) ) { - __kmp_library = library_turnaround; - } else if ( __kmp_str_match( "PASSIVE", 1, value ) ) { - __kmp_library = library_throughput; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if - } else { - if ( __kmp_str_match( "serial", 1, value ) ) { /* S */ - __kmp_library = library_serial; - } else if ( __kmp_str_match( "throughput", 2, value ) ) { /* TH */ - __kmp_library = library_throughput; - } else if ( __kmp_str_match( "turnaround", 2, value ) ) { /* TU */ - __kmp_library = library_turnaround; - } else if ( __kmp_str_match( "dedicated", 1, value ) ) { /* D */ - __kmp_library = library_turnaround; - } else if ( __kmp_str_match( "multiuser", 1, value ) ) { /* M */ - __kmp_library = library_throughput; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if - }; // if - __kmp_aux_set_library( __kmp_library ); - -} // __kmp_stg_parse_wait_policy - -static void -__kmp_stg_print_wait_policy( kmp_str_buf_t * buffer, char const * name, void * data ) { - - kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; - char const * value = NULL; - - if ( wait->omp ) { - switch ( __kmp_library ) { - case library_turnaround : { - value = "ACTIVE"; - } break; - case library_throughput : { - value = "PASSIVE"; - } break; - }; // switch - } else { - switch ( __kmp_library ) { - case library_serial : { - value = "serial"; - } break; - case library_turnaround : { - value = "turnaround"; - } break; - case library_throughput : { - value = "throughput"; - } break; - }; // switch - }; // if - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - }; // if - -} // __kmp_stg_print_wait_policy - -// ------------------------------------------------------------------------------------------------- -// KMP_MONITOR_STACKSIZE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_monitor_stacksize( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - __kmp_sys_min_stksize, - KMP_MAX_STKSIZE, - NULL, - & __kmp_monitor_stksize, - 1 - ); -} // __kmp_stg_parse_monitor_stacksize - -static void -__kmp_stg_print_monitor_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - if ( __kmp_monitor_stksize > 0 ) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( __kmp_monitor_stksize > 0 ) { - __kmp_str_buf_print_size( buffer, __kmp_monitor_stksize ); - } else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - if( __kmp_env_format && __kmp_monitor_stksize ) { - __kmp_str_buf_print( buffer, "'\n"); - } - -} // __kmp_stg_print_monitor_stacksize - -// ------------------------------------------------------------------------------------------------- -// KMP_SETTINGS -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_settings( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_settings ); -} // __kmp_stg_parse_settings - -static void -__kmp_stg_print_settings( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_settings ); -} // __kmp_stg_print_settings - -// ------------------------------------------------------------------------------------------------- -// KMP_STACKPAD -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_stackpad( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, // Env var name - value, // Env var value - KMP_MIN_STKPADDING, // Min value - KMP_MAX_STKPADDING, // Max value - & __kmp_stkpadding // Var to initialize - ); -} // __kmp_stg_parse_stackpad - -static void -__kmp_stg_print_stackpad( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_stkpadding ); -} // __kmp_stg_print_stackpad - -// ------------------------------------------------------------------------------------------------- -// KMP_STACKOFFSET -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_stackoffset( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, // Env var name - value, // Env var value - KMP_MIN_STKOFFSET, // Min value - KMP_MAX_STKOFFSET, // Max value - NULL, // - & __kmp_stkoffset, // Var to initialize - 1 - ); -} // __kmp_stg_parse_stackoffset - -static void -__kmp_stg_print_stackoffset( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_stkoffset ); -} // __kmp_stg_print_stackoffset - -// ------------------------------------------------------------------------------------------------- -// KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_stacksize( char const * name, char const * value, void * data ) { - - kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, stacksize->rivals ); - if ( rc ) { - return; - }; // if - __kmp_stg_parse_size( - name, // Env var name - value, // Env var value - __kmp_sys_min_stksize, // Min value - KMP_MAX_STKSIZE, // Max value - & __kmp_env_stksize, // - & __kmp_stksize, // Var to initialize - stacksize->factor - ); - -} // __kmp_stg_parse_stacksize - -// This function is called for printing both KMP_STACKSIZE (factor is 1) and OMP_STACKSIZE (factor is 1024). -// Currently it is not possible to print OMP_STACKSIZE value in bytes. We can consider adding this -// possibility by a customer request in future. -static void -__kmp_stg_print_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { - kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); - __kmp_str_buf_print( buffer, "'\n" ); - } else { - __kmp_str_buf_print( buffer, " %s=", name ); - __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); - __kmp_str_buf_print( buffer, "\n" ); - } -} // __kmp_stg_print_stacksize - -// ------------------------------------------------------------------------------------------------- -// KMP_VERSION -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_version( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_version ); -} // __kmp_stg_parse_version - -static void -__kmp_stg_print_version( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_version ); -} // __kmp_stg_print_version - -// ------------------------------------------------------------------------------------------------- -// KMP_WARNINGS -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_warnings( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_generate_warnings ); - if (__kmp_generate_warnings != kmp_warnings_off) { // AC: we have only 0/1 values documented, - __kmp_generate_warnings = kmp_warnings_explicit; // so reset it to explicit in order to - } // distinguish from default setting -} // __kmp_env_parse_warnings - -static void -__kmp_stg_print_warnings( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_generate_warnings ); // AC: TODO: change to print_int? -} // __kmp_env_print_warnings // (needs documentation change)... - -// ------------------------------------------------------------------------------------------------- -// OMP_NESTED, OMP_NUM_THREADS -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_nested( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_dflt_nested ); -} // __kmp_stg_parse_nested - -static void -__kmp_stg_print_nested( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_dflt_nested ); -} // __kmp_stg_print_nested - -static void -__kmp_parse_nested_num_threads( const char *var, const char *env, kmp_nested_nthreads_t *nth_array ) -{ - const char *next = env; - const char *scan = next; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - - // Count the number of values in the env. var string - for ( ; ; ) { - SKIP_WS( next ); - - if ( *next == '\0' ) { - break; - } - // Next character is not an integer or not a comma => end of list - if ( ( ( *next < '0' ) || ( *next > '9' ) ) && ( *next !=',') ) { - KMP_WARNING( NthSyntaxError, var, env ); - return; - } - // The next character is ',' - if ( *next == ',' ) { - // ',' is the fisrt character - if ( total == 0 || prev_comma ) { - total++; - } - prev_comma = TRUE; - next++; //skip ',' - SKIP_WS( next ); - } - // Next character is a digit - if ( *next >= '0' && *next <= '9' ) { - prev_comma = FALSE; - SKIP_DIGITS( next ); - total++; - const char *tmp = next; - SKIP_WS( tmp ); - if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { - KMP_WARNING( NthSpacesNotAllowed, var, env ); - return; - } - } - } - KMP_DEBUG_ASSERT( total > 0 ); - if( total <= 0 ) { - KMP_WARNING( NthSyntaxError, var, env ); - return; - } - - // Check if the nested nthreads array exists - if ( ! nth_array->nth ) { - // Allocate an array of double size - nth_array->nth = ( int * )KMP_INTERNAL_MALLOC( sizeof( int ) * total * 2 ); - if ( nth_array->nth == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - nth_array->size = total * 2; - } else { - if ( nth_array->size < total ) { - // Increase the array size - do { - nth_array->size *= 2; - } while ( nth_array->size < total ); - - nth_array->nth = (int *) KMP_INTERNAL_REALLOC( - nth_array->nth, sizeof( int ) * nth_array->size ); - if ( nth_array->nth == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - } - } - nth_array->used = total; - int i = 0; - - prev_comma = FALSE; - total = 0; - // Save values in the array - for ( ; ; ) { - SKIP_WS( scan ); - if ( *scan == '\0' ) { - break; - } - // The next character is ',' - if ( *scan == ',' ) { - // ',' in the beginning of the list - if ( total == 0 ) { - // The value is supposed to be equal to __kmp_avail_proc but it is unknown at the moment. - // So let's put a placeholder (#threads = 0) to correct it later. - nth_array->nth[i++] = 0; - total++; - }else if ( prev_comma ) { - // Num threads is inherited from the previous level - nth_array->nth[i] = nth_array->nth[i - 1]; - i++; - total++; - } - prev_comma = TRUE; - scan++; //skip ',' - SKIP_WS( scan ); - } - // Next character is a digit - if ( *scan >= '0' && *scan <= '9' ) { - int num; - const char *buf = scan; - char const * msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS( scan ); - total++; - - num = __kmp_str_to_int( buf, *scan ); - if ( num < KMP_MIN_NTH ) { - msg = KMP_I18N_STR( ValueTooSmall ); - num = KMP_MIN_NTH; - } else if ( num > __kmp_sys_max_nth ) { - msg = KMP_I18N_STR( ValueTooLarge ); - num = __kmp_sys_max_nth; - } - if ( msg != NULL ) { - // Message is not empty. Print warning. - KMP_WARNING( ParseSizeIntWarn, var, env, msg ); - KMP_INFORM( Using_int_Value, var, num ); - } - nth_array->nth[i++] = num; - } - } -} - -static void -__kmp_stg_parse_num_threads( char const * name, char const * value, void * data ) { - // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - // The array of 1 element - __kmp_nested_nth.nth = ( int* )KMP_INTERNAL_MALLOC( sizeof( int ) ); - __kmp_nested_nth.size = __kmp_nested_nth.used = 1; - __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc; - } else { - __kmp_parse_nested_num_threads( name, value, & __kmp_nested_nth ); - if ( __kmp_nested_nth.nth ) { - __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; - if ( __kmp_dflt_team_nth_ub < __kmp_dflt_team_nth ) { - __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; - } - } - }; // if - K_DIAG( 1, ( "__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth ) ); -} // __kmp_stg_parse_num_threads - -static void -__kmp_stg_print_num_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( __kmp_nested_nth.used ) { - kmp_str_buf_t buf; - __kmp_str_buf_init( &buf ); - for ( int i = 0; i < __kmp_nested_nth.used; i++) { - __kmp_str_buf_print( &buf, "%d", __kmp_nested_nth.nth[i] ); - if ( i < __kmp_nested_nth.used - 1 ) { - __kmp_str_buf_print( &buf, "," ); - } - } - __kmp_str_buf_print( buffer, "='%s'\n", buf.str ); - __kmp_str_buf_free(&buf); - } else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } -} // __kmp_stg_print_num_threads - -// ------------------------------------------------------------------------------------------------- -// OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS, -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_tasking( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode ); -} // __kmp_stg_parse_tasking - -static void -__kmp_stg_print_tasking( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_tasking_mode ); -} // __kmp_stg_print_tasking - -static void -__kmp_stg_parse_task_stealing( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, 1, (int *)&__kmp_task_stealing_constraint ); -} // __kmp_stg_parse_task_stealing - -static void -__kmp_stg_print_task_stealing( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_task_stealing_constraint ); -} // __kmp_stg_print_task_stealing - -static void -__kmp_stg_parse_max_active_levels( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_dflt_max_active_levels ); -} // __kmp_stg_parse_max_active_levels - -static void -__kmp_stg_print_max_active_levels( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_dflt_max_active_levels ); -} // __kmp_stg_print_max_active_levels - -#if KMP_NESTED_HOT_TEAMS -// ------------------------------------------------------------------------------------------------- -// KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_hot_teams_level( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_max_level ); -} // __kmp_stg_parse_hot_teams_level - -static void -__kmp_stg_print_hot_teams_level( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_hot_teams_max_level ); -} // __kmp_stg_print_hot_teams_level - -static void -__kmp_stg_parse_hot_teams_mode( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_mode ); -} // __kmp_stg_parse_hot_teams_mode - -static void -__kmp_stg_print_hot_teams_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_hot_teams_mode ); -} // __kmp_stg_print_hot_teams_mode - -#endif // KMP_NESTED_HOT_TEAMS - -// ------------------------------------------------------------------------------------------------- -// KMP_HANDLE_SIGNALS -// ------------------------------------------------------------------------------------------------- - -#if KMP_HANDLE_SIGNALS - -static void -__kmp_stg_parse_handle_signals( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_handle_signals ); -} // __kmp_stg_parse_handle_signals - -static void -__kmp_stg_print_handle_signals( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_handle_signals ); -} // __kmp_stg_print_handle_signals - -#endif // KMP_HANDLE_SIGNALS - -// ------------------------------------------------------------------------------------------------- -// KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG -// ------------------------------------------------------------------------------------------------- - -#ifdef KMP_DEBUG - -#define KMP_STG_X_DEBUG( x ) \ - static void __kmp_stg_parse_##x##_debug( char const * name, char const * value, void * data ) { \ - __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_##x##_debug ); \ - } /* __kmp_stg_parse_x_debug */ \ - static void __kmp_stg_print_##x##_debug( kmp_str_buf_t * buffer, char const * name, void * data ) { \ - __kmp_stg_print_int( buffer, name, kmp_##x##_debug ); \ - } /* __kmp_stg_print_x_debug */ - -KMP_STG_X_DEBUG( a ) -KMP_STG_X_DEBUG( b ) -KMP_STG_X_DEBUG( c ) -KMP_STG_X_DEBUG( d ) -KMP_STG_X_DEBUG( e ) -KMP_STG_X_DEBUG( f ) - -#undef KMP_STG_X_DEBUG - -static void -__kmp_stg_parse_debug( char const * name, char const * value, void * data ) { - int debug = 0; - __kmp_stg_parse_int( name, value, 0, INT_MAX, & debug ); - if ( kmp_a_debug < debug ) { - kmp_a_debug = debug; - }; // if - if ( kmp_b_debug < debug ) { - kmp_b_debug = debug; - }; // if - if ( kmp_c_debug < debug ) { - kmp_c_debug = debug; - }; // if - if ( kmp_d_debug < debug ) { - kmp_d_debug = debug; - }; // if - if ( kmp_e_debug < debug ) { - kmp_e_debug = debug; - }; // if - if ( kmp_f_debug < debug ) { - kmp_f_debug = debug; - }; // if -} // __kmp_stg_parse_debug - -static void -__kmp_stg_parse_debug_buf( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_debug_buf ); - // !!! TODO: Move buffer initialization of of this file! It may works incorrectly if - // KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or KMP_DEBUG_BUF_CHARS. - if ( __kmp_debug_buf ) { - int i; - int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; - - /* allocate and initialize all entries in debug buffer to empty */ - __kmp_debug_buffer = (char *) __kmp_page_allocate( elements * sizeof( char ) ); - for ( i = 0; i < elements; i += __kmp_debug_buf_chars ) - __kmp_debug_buffer[i] = '\0'; - - __kmp_debug_count = 0; - } - K_DIAG( 1, ( "__kmp_debug_buf = %d\n", __kmp_debug_buf ) ); -} // __kmp_stg_parse_debug_buf - -static void -__kmp_stg_print_debug_buf( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_debug_buf ); -} // __kmp_stg_print_debug_buf - -static void -__kmp_stg_parse_debug_buf_atomic( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_debug_buf_atomic ); -} // __kmp_stg_parse_debug_buf_atomic - -static void -__kmp_stg_print_debug_buf_atomic( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_debug_buf_atomic ); -} // __kmp_stg_print_debug_buf_atomic - -static void -__kmp_stg_parse_debug_buf_chars( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, - value, - KMP_DEBUG_BUF_CHARS_MIN, - INT_MAX, - & __kmp_debug_buf_chars - ); -} // __kmp_stg_debug_parse_buf_chars - -static void -__kmp_stg_print_debug_buf_chars( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_debug_buf_chars ); -} // __kmp_stg_print_debug_buf_chars - -static void -__kmp_stg_parse_debug_buf_lines( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, - value, - KMP_DEBUG_BUF_LINES_MIN, - INT_MAX, - & __kmp_debug_buf_lines - ); -} // __kmp_stg_parse_debug_buf_lines - -static void -__kmp_stg_print_debug_buf_lines( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_debug_buf_lines ); -} // __kmp_stg_print_debug_buf_lines - -static void -__kmp_stg_parse_diag( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_diag ); -} // __kmp_stg_parse_diag - -static void -__kmp_stg_print_diag( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, kmp_diag ); -} // __kmp_stg_print_diag - -#endif // KMP_DEBUG - -// ------------------------------------------------------------------------------------------------- -// KMP_ALIGN_ALLOC -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_align_alloc( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - CACHE_LINE, - INT_MAX, - NULL, - & __kmp_align_alloc, - 1 - ); -} // __kmp_stg_parse_align_alloc - -static void -__kmp_stg_print_align_alloc( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_align_alloc ); -} // __kmp_stg_print_align_alloc - -// ------------------------------------------------------------------------------------------------- -// KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER -// ------------------------------------------------------------------------------------------------- - -// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from parse and print -// functions, pass required info through data argument. - -static void -__kmp_stg_parse_barrier_branch_bit( char const * name, char const * value, void * data ) { - const char *var; - - /* ---------- Barrier branch bit control ------------ */ - for ( int i=bs_plain_barrier; i KMP_MAX_BRANCH_BITS ) { - __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null ); - __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; - } - } - if ( __kmp_barrier_gather_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) { - KMP_WARNING( BarrGatherValueInvalid, name, value ); - KMP_INFORM( Using_uint_Value, name, __kmp_barrier_gather_bb_dflt ); - __kmp_barrier_gather_branch_bits[ i ] = __kmp_barrier_gather_bb_dflt; - } - } - K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[ i ], \ - __kmp_barrier_gather_branch_bits [ i ], \ - __kmp_barrier_release_branch_bits [ i ])) - } -} // __kmp_stg_parse_barrier_branch_bit - -static void -__kmp_stg_print_barrier_branch_bit( kmp_str_buf_t * buffer, char const * name, void * data ) { - const char *var; - for ( int i=bs_plain_barrier; irivals ); - if ( rc ) { - return; - }; // if - if ( reduction->force ) { - if( value != 0 ) { - if( __kmp_str_match( "critical", 0, value ) ) - __kmp_force_reduction_method = critical_reduce_block; - else if( __kmp_str_match( "atomic", 0, value ) ) - __kmp_force_reduction_method = atomic_reduce_block; - else if( __kmp_str_match( "tree", 0, value ) ) - __kmp_force_reduction_method = tree_reduce_block; - else { - KMP_FATAL( UnknownForceReduction, name, value ); - } - } - } else { - __kmp_stg_parse_bool( name, value, & __kmp_determ_red ); - if( __kmp_determ_red ) { - __kmp_force_reduction_method = tree_reduce_block; - } else { - __kmp_force_reduction_method = reduction_method_not_defined; - } - } - K_DIAG( 1, ( "__kmp_force_reduction_method == %d\n", __kmp_force_reduction_method ) ); -} // __kmp_stg_parse_force_reduction - -static void -__kmp_stg_print_force_reduction( kmp_str_buf_t * buffer, char const * name, void * data ) { - - kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data; - if ( reduction->force ) { - if( __kmp_force_reduction_method == critical_reduce_block) { - __kmp_stg_print_str( buffer, name, "critical"); - } else if ( __kmp_force_reduction_method == atomic_reduce_block ) { - __kmp_stg_print_str( buffer, name, "atomic"); - } else if ( __kmp_force_reduction_method == tree_reduce_block ) { - __kmp_stg_print_str( buffer, name, "tree"); - } else { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - } else { - __kmp_stg_print_bool( buffer, name, __kmp_determ_red ); - } - - -} // __kmp_stg_print_force_reduction - -// ------------------------------------------------------------------------------------------------- -// KMP_STORAGE_MAP -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_storage_map( char const * name, char const * value, void * data ) { - if ( __kmp_str_match( "verbose", 1, value ) ) { - __kmp_storage_map = TRUE; - __kmp_storage_map_verbose = TRUE; - __kmp_storage_map_verbose_specified = TRUE; - - } else { - __kmp_storage_map_verbose = FALSE; - __kmp_stg_parse_bool( name, value, & __kmp_storage_map ); // !!! - }; // if -} // __kmp_stg_parse_storage_map - -static void -__kmp_stg_print_storage_map( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_storage_map_verbose || __kmp_storage_map_verbose_specified ) { - __kmp_stg_print_str( buffer, name, "verbose" ); - } else { - __kmp_stg_print_bool( buffer, name, __kmp_storage_map ); - } -} // __kmp_stg_print_storage_map - -// ------------------------------------------------------------------------------------------------- -// KMP_ALL_THREADPRIVATE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_all_threadprivate( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, __kmp_allThreadsSpecified ? __kmp_max_nth : 1, __kmp_max_nth, - & __kmp_tp_capacity ); -} // __kmp_stg_parse_all_threadprivate - -static void -__kmp_stg_print_all_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_tp_capacity ); - -} - -// ------------------------------------------------------------------------------------------------- -// KMP_FOREIGN_THREADS_THREADPRIVATE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_foreign_threads_threadprivate( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_foreign_tp ); -} // __kmp_stg_parse_foreign_threads_threadprivate - -static void -__kmp_stg_print_foreign_threads_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_foreign_tp ); -} // __kmp_stg_print_foreign_threads_threadprivate - - -// ------------------------------------------------------------------------------------------------- -// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD -// ------------------------------------------------------------------------------------------------- - -#if KMP_AFFINITY_SUPPORTED -// -// Parse the proc id list. Return TRUE if successful, FALSE otherwise. -// -static int -__kmp_parse_affinity_proc_id_list( const char *var, const char *env, - const char **nextEnv, char **proclist ) -{ - const char *scan = env; - const char *next = scan; - int empty = TRUE; - - *proclist = NULL; - - for (;;) { - int start, end, stride; - - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; - } - - if (*next == '{') { - int num; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // - // Read the first integer in the set. - // - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - - for (;;) { - // - // Check for end of set. - // - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // - // Read the next integer in the set. - // - scan = next; - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - } - empty = FALSE; - - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // - // Next character is not an integer => end of list - // - if ((*next < '0') || (*next > '9')) { - if (empty) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - break; - } - - // - // Read the first integer. - // - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT(start >= 0); - SKIP_WS(next); - - // - // If this isn't a range, then go on. - // - if (*next != '-') { - empty = FALSE; - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // - // This is a range. Skip over the '-' and read in the 2nd int. - // - next++; // skip '-' - SKIP_WS(next); - scan = next; - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT(end >= 0); - - // - // Check for a stride parameter - // - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // - // A stride is specified. Skip over the ':" and read the 3rd int. - // - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - stride *= sign; - } - - // - // Do some range checks. - // - if (stride == 0) { - KMP_WARNING( AffZeroStride, var ); - return FALSE; - } - if (stride > 0) { - if (start > end) { - KMP_WARNING( AffStartGreaterEnd, var, start, end ); - return FALSE; - } - } - else { - if (start < end) { - KMP_WARNING( AffStrideLessZero, var, start, end ); - return FALSE; - } - } - if ((end - start) / stride > 65536 ) { - KMP_WARNING( AffRangeTooBig, var, end, start, stride ); - return FALSE; - } - - empty = FALSE; - - // - // Skip optional comma. - // - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - } - - *nextEnv = next; - - { - int len = next - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *proclist = retlist; - } - return TRUE; -} - - -// -// If KMP_AFFINITY is specified without a type, then -// __kmp_affinity_notype should point to its setting. -// -static kmp_setting_t *__kmp_affinity_notype = NULL; - -static void -__kmp_parse_affinity_env( char const * name, char const * value, - enum affinity_type * out_type, - char ** out_proclist, - int * out_verbose, - int * out_warn, - int * out_respect, - enum affinity_gran * out_gran, - int * out_gran_levels, - int * out_dups, - int * out_compact, - int * out_offset -) -{ - char * buffer = NULL; // Copy of env var value. - char * buf = NULL; // Buffer for strtok_r() function. - char * next = NULL; // end of token / start of next. - const char * start; // start of current token (for err msgs) - int count = 0; // Counter of parsed integer numbers. - int number[ 2 ]; // Parsed numbers. - - // Guards. - int type = 0; - int proclist = 0; - int max_proclist = 0; - int verbose = 0; - int warnings = 0; - int respect = 0; - int gran = 0; - int dups = 0; - - KMP_ASSERT( value != NULL ); - - if ( TCR_4(__kmp_init_middle) ) { - KMP_WARNING( EnvMiddleWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } - __kmp_env_toPrint( name, 1 ); - - buffer = __kmp_str_format( "%s", value ); // Copy env var to keep original intact. - buf = buffer; - SKIP_WS(buf); - - // Helper macros. - - // - // If we see a parse error, emit a warning and scan to the next ",". - // - // FIXME - there's got to be a better way to print an error - // message, hopefully without overwritting peices of buf. - // - #define EMIT_WARN(skip,errlist) \ - { \ - char ch; \ - if (skip) { \ - SKIP_TO(next, ','); \ - } \ - ch = *next; \ - *next = '\0'; \ - KMP_WARNING errlist; \ - *next = ch; \ - if (skip) { \ - if (ch == ',') next++; \ - } \ - buf = next; \ - } - - #define _set_param(_guard,_var,_val) \ - { \ - if ( _guard == 0 ) { \ - _var = _val; \ - } else { \ - EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ - }; \ - ++ _guard; \ - } - - #define set_type(val) _set_param( type, *out_type, val ) - #define set_verbose(val) _set_param( verbose, *out_verbose, val ) - #define set_warnings(val) _set_param( warnings, *out_warn, val ) - #define set_respect(val) _set_param( respect, *out_respect, val ) - #define set_dups(val) _set_param( dups, *out_dups, val ) - #define set_proclist(val) _set_param( proclist, *out_proclist, val ) - - #define set_gran(val,levels) \ - { \ - if ( gran == 0 ) { \ - *out_gran = val; \ - *out_gran_levels = levels; \ - } else { \ - EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ - }; \ - ++ gran; \ - } - -# if OMP_40_ENABLED - KMP_DEBUG_ASSERT( ( __kmp_nested_proc_bind.bind_types != NULL ) - && ( __kmp_nested_proc_bind.used > 0 ) ); -# endif - - while ( *buf != '\0' ) { - start = next = buf; - - if (__kmp_match_str("none", buf, (const char **)&next)) { - set_type( affinity_none ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - buf = next; - } else if (__kmp_match_str("scatter", buf, (const char **)&next)) { - set_type( affinity_scatter ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("compact", buf, (const char **)&next)) { - set_type( affinity_compact ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("logical", buf, (const char **)&next)) { - set_type( affinity_logical ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("physical", buf, (const char **)&next)) { - set_type( affinity_physical ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("explicit", buf, (const char **)&next)) { - set_type( affinity_explicit ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("balanced", buf, (const char **)&next)) { - set_type( affinity_balanced ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("disabled", buf, (const char **)&next)) { - set_type( affinity_disabled ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - buf = next; - } else if (__kmp_match_str("verbose", buf, (const char **)&next)) { - set_verbose( TRUE ); - buf = next; - } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) { - set_verbose( FALSE ); - buf = next; - } else if (__kmp_match_str("warnings", buf, (const char **)&next)) { - set_warnings( TRUE ); - buf = next; - } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) { - set_warnings( FALSE ); - buf = next; - } else if (__kmp_match_str("respect", buf, (const char **)&next)) { - set_respect( TRUE ); - buf = next; - } else if (__kmp_match_str("norespect", buf, (const char **)&next)) { - set_respect( FALSE ); - buf = next; - } else if (__kmp_match_str("duplicates", buf, (const char **)&next) - || __kmp_match_str("dups", buf, (const char **)&next)) { - set_dups( TRUE ); - buf = next; - } else if (__kmp_match_str("noduplicates", buf, (const char **)&next) - || __kmp_match_str("nodups", buf, (const char **)&next)) { - set_dups( FALSE ); - buf = next; - } else if (__kmp_match_str("granularity", buf, (const char **)&next) - || __kmp_match_str("gran", buf, (const char **)&next)) { - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '=' - SKIP_WS(next); - - buf = next; - if (__kmp_match_str("fine", buf, (const char **)&next)) { - set_gran( affinity_gran_fine, -1 ); - buf = next; - } else if (__kmp_match_str("thread", buf, (const char **)&next)) { - set_gran( affinity_gran_thread, -1 ); - buf = next; - } else if (__kmp_match_str("core", buf, (const char **)&next)) { - set_gran( affinity_gran_core, -1 ); - buf = next; - } else if (__kmp_match_str("package", buf, (const char **)&next)) { - set_gran( affinity_gran_package, -1 ); - buf = next; - } else if (__kmp_match_str("node", buf, (const char **)&next)) { - set_gran( affinity_gran_node, -1 ); - buf = next; -# if KMP_GROUP_AFFINITY - } else if (__kmp_match_str("group", buf, (const char **)&next)) { - set_gran( affinity_gran_group, -1 ); - buf = next; -# endif /* KMP_GROUP AFFINITY */ - } else if ((*buf >= '0') && (*buf <= '9')) { - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int( buf, *next ); - KMP_ASSERT(n >= 0); - buf = next; - set_gran( affinity_gran_default, n ); - } else { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - } else if (__kmp_match_str("proclist", buf, (const char **)&next)) { - char *temp_proclist; - - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '=' - SKIP_WS(next); - if (*next != '[') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '[' - buf = next; - if (! __kmp_parse_affinity_proc_id_list(name, buf, - (const char **)&next, &temp_proclist)) { - // - // warning already emitted. - // - SKIP_TO(next, ']'); - if (*next == ']') next++; - SKIP_TO(next, ','); - if (*next == ',') next++; - buf = next; - continue; - } - if (*next != ']') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip ']' - set_proclist( temp_proclist ); - } else if ((*buf >= '0') && (*buf <= '9')) { - // Parse integer numbers -- permute and offset. - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int( buf, *next ); - KMP_ASSERT(n >= 0); - buf = next; - if ( count < 2 ) { - number[ count ] = n; - } else { - KMP_WARNING( AffManyParams, name, start ); - }; // if - ++ count; - } else { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - - SKIP_WS(next); - if (*next == ',') { - next++; - SKIP_WS(next); - } - else if (*next != '\0') { - const char *temp = next; - EMIT_WARN( TRUE, ( ParseExtraCharsWarn, name, temp ) ); - continue; - } - buf = next; - } // while - - #undef EMIT_WARN - #undef _set_param - #undef set_type - #undef set_verbose - #undef set_warnings - #undef set_respect - #undef set_granularity - - KMP_INTERNAL_FREE( buffer ); - - if ( proclist ) { - if ( ! type ) { - KMP_WARNING( AffProcListNoType, name ); - __kmp_affinity_type = affinity_explicit; - } - else if ( __kmp_affinity_type != affinity_explicit ) { - KMP_WARNING( AffProcListNotExplicit, name ); - KMP_ASSERT( *out_proclist != NULL ); - KMP_INTERNAL_FREE( *out_proclist ); - *out_proclist = NULL; - } - } - switch ( *out_type ) { - case affinity_logical: - case affinity_physical: { - if ( count > 0 ) { - *out_offset = number[ 0 ]; - }; // if - if ( count > 1 ) { - KMP_WARNING( AffManyParamsForLogic, name, number[ 1 ] ); - }; // if - } break; - case affinity_balanced: { - if ( count > 0 ) { - *out_compact = number[ 0 ]; - }; // if - if ( count > 1 ) { - *out_offset = number[ 1 ]; - }; // if - - if ( __kmp_affinity_gran == affinity_gran_default ) { -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); - } - __kmp_affinity_gran = affinity_gran_fine; - } else -#endif - { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); - } - __kmp_affinity_gran = affinity_gran_core; - } - } - } break; - case affinity_scatter: - case affinity_compact: { - if ( count > 0 ) { - *out_compact = number[ 0 ]; - }; // if - if ( count > 1 ) { - *out_offset = number[ 1 ]; - }; // if - } break; - case affinity_explicit: { - if ( *out_proclist == NULL ) { - KMP_WARNING( AffNoProcList, name ); - __kmp_affinity_type = affinity_none; - } - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "explicit" ); - } - } break; - case affinity_none: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "none" ); - }; // if - } break; - case affinity_disabled: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "disabled" ); - }; // if - } break; - case affinity_default: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "default" ); - }; // if - } break; - default: { - KMP_ASSERT( 0 ); - }; - }; // switch -} // __kmp_parse_affinity_env - -static void -__kmp_stg_parse_affinity( char const * name, char const * value, void * data ) -{ - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } - - __kmp_parse_affinity_env( name, value, & __kmp_affinity_type, - & __kmp_affinity_proclist, & __kmp_affinity_verbose, - & __kmp_affinity_warnings, & __kmp_affinity_respect_mask, - & __kmp_affinity_gran, & __kmp_affinity_gran_levels, - & __kmp_affinity_dups, & __kmp_affinity_compact, - & __kmp_affinity_offset ); - -} // __kmp_stg_parse_affinity - -static void -__kmp_stg_print_affinity( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - if ( __kmp_affinity_verbose ) { - __kmp_str_buf_print( buffer, "%s,", "verbose"); - } else { - __kmp_str_buf_print( buffer, "%s,", "noverbose"); - } - if ( __kmp_affinity_warnings ) { - __kmp_str_buf_print( buffer, "%s,", "warnings"); - } else { - __kmp_str_buf_print( buffer, "%s,", "nowarnings"); - } - if ( KMP_AFFINITY_CAPABLE() ) { - if ( __kmp_affinity_respect_mask ) { - __kmp_str_buf_print( buffer, "%s,", "respect"); - } else { - __kmp_str_buf_print( buffer, "%s,", "norespect"); - } - switch ( __kmp_affinity_gran ) { - case affinity_gran_default: - __kmp_str_buf_print( buffer, "%s", "granularity=default,"); - break; - case affinity_gran_fine: - __kmp_str_buf_print( buffer, "%s", "granularity=fine,"); - break; - case affinity_gran_thread: - __kmp_str_buf_print( buffer, "%s", "granularity=thread,"); - break; - case affinity_gran_core: - __kmp_str_buf_print( buffer, "%s", "granularity=core,"); - break; - case affinity_gran_package: - __kmp_str_buf_print( buffer, "%s", "granularity=package,"); - break; - case affinity_gran_node: - __kmp_str_buf_print( buffer, "%s", "granularity=node,"); - break; -# if KMP_GROUP_AFFINITY - case affinity_gran_group: - __kmp_str_buf_print( buffer, "%s", "granularity=group,"); - break; -# endif /* KMP_GROUP_AFFINITY */ - } - if ( __kmp_affinity_dups ) { - __kmp_str_buf_print( buffer, "%s,", "duplicates"); - } else { - __kmp_str_buf_print( buffer, "%s,", "noduplicates"); - } - } - if ( ! KMP_AFFINITY_CAPABLE() ) { - __kmp_str_buf_print( buffer, "%s", "disabled" ); - } - else switch ( __kmp_affinity_type ){ - case affinity_none: - __kmp_str_buf_print( buffer, "%s", "none"); - break; - case affinity_physical: - __kmp_str_buf_print( buffer, "%s,%d", "physical", - __kmp_affinity_offset ); - break; - case affinity_logical: - __kmp_str_buf_print( buffer, "%s,%d", "logical", - __kmp_affinity_offset ); - break; - case affinity_compact: - __kmp_str_buf_print( buffer, "%s,%d,%d", "compact", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_scatter: - __kmp_str_buf_print( buffer, "%s,%d,%d", "scatter", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_explicit: - __kmp_str_buf_print( buffer, "%s=[%s],%s", "proclist", - __kmp_affinity_proclist, "explicit" ); - break; - case affinity_balanced: - __kmp_str_buf_print( buffer, "%s,%d,%d", "balanced", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_disabled: - __kmp_str_buf_print( buffer, "%s", "disabled"); - break; - case affinity_default: - __kmp_str_buf_print( buffer, "%s", "default"); - break; - default: - __kmp_str_buf_print( buffer, "%s", ""); - break; - } - __kmp_str_buf_print( buffer, "'\n" ); -} //__kmp_stg_print_affinity - -# ifdef KMP_GOMP_COMPAT - -static void -__kmp_stg_parse_gomp_cpu_affinity( char const * name, char const * value, void * data ) -{ - const char * next = NULL; - char * temp_proclist; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } - - if ( TCR_4(__kmp_init_middle) ) { - KMP_WARNING( EnvMiddleWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } - - __kmp_env_toPrint( name, 1 ); - - if ( __kmp_parse_affinity_proc_id_list( name, value, &next, - &temp_proclist )) { - SKIP_WS(next); - if (*next == '\0') { - // - // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... - // - __kmp_affinity_proclist = temp_proclist; - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - } - else { - KMP_WARNING( AffSyntaxError, name ); - if (temp_proclist != NULL) { - KMP_INTERNAL_FREE((void *)temp_proclist); - } - } - } - else { - // - // Warning already emitted - // - __kmp_affinity_type = affinity_none; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - } -} // __kmp_stg_parse_gomp_cpu_affinity - -# endif /* KMP_GOMP_COMPAT */ - - -# if OMP_40_ENABLED - -/*----------------------------------------------------------------------------- - -The OMP_PLACES proc id list parser. Here is the grammar: - -place_list := place -place_list := place , place_list -place := num -place := place : num -place := place : num : signed -place := { subplacelist } -place := ! place // (lowest priority) -subplace_list := subplace -subplace_list := subplace , subplace_list -subplace := num -subplace := num : num -subplace := num : num : signed -signed := num -signed := + signed -signed := - signed - ------------------------------------------------------------------------------*/ - -static int -__kmp_parse_subplace_list( const char *var, const char **scan ) -{ - const char *next; - - for (;;) { - int start, count, stride; - - // - // Read in the starting proc id - // - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // - // valid follow sets are ',' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - return TRUE; -} - -static int -__kmp_parse_place( const char *var, const char ** scan ) -{ - const char *next; - - // - // valid follow sets are '{' '!' and num - // - SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - if (! __kmp_parse_subplace_list(var, scan)) { - return FALSE; - } - if (**scan != '}') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip '}' - } - else if (**scan == '!') { - (*scan)++; // skip '!' - return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' - } - else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int proc = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(proc >= 0); - *scan = next; - } - else { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - return TRUE; -} - -static int -__kmp_parse_place_list( const char *var, const char *env, char **place_list ) -{ - const char *scan = env; - const char *next = scan; - - for (;;) { - int start, count, stride; - - if (! __kmp_parse_place(var, &scan)) { - return FALSE; - } - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - scan++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - scan++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - scan = next; - stride *= sign; - - // - // valid follow sets are ',' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - - { - int len = scan - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *place_list = retlist; - } - return TRUE; -} - -static void -__kmp_stg_parse_places( char const * name, char const * value, void * data ) -{ - int count; - const char *scan = value; - const char *next = scan; - const char *kind = "\"threads\""; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } - - // - // If OMP_PROC_BIND is not specified but OMP_PLACES is, - // then let OMP_PROC_BIND default to true. - // - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - - //__kmp_affinity_num_places = 0; - - if ( __kmp_match_str( "threads", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_thread; - __kmp_affinity_dups = FALSE; - kind = "\"threads\""; - } - else if ( __kmp_match_str( "cores", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_core; - __kmp_affinity_dups = FALSE; - kind = "\"cores\""; - } - else if ( __kmp_match_str( "sockets", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_package; - __kmp_affinity_dups = FALSE; - kind = "\"sockets\""; - } - else { - if ( __kmp_affinity_proclist != NULL ) { - KMP_INTERNAL_FREE( (void *)__kmp_affinity_proclist ); - __kmp_affinity_proclist = NULL; - } - if ( __kmp_parse_place_list( name, value, &__kmp_affinity_proclist ) ) { - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; - __kmp_affinity_dups = FALSE; - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - } - return; - } - - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - - SKIP_WS(scan); - if ( *scan == '\0' ) { - return; - } - - // - // Parse option count parameter in parentheses - // - if ( *scan != '(' ) { - KMP_WARNING( SyntaxErrorUsing, name, kind ); - return; - } - scan++; // skip '(' - - SKIP_WS(scan); - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - SKIP_WS(scan); - if ( *scan != ')' ) { - KMP_WARNING( SyntaxErrorUsing, name, kind ); - return; - } - scan++; // skip ')' - - SKIP_WS(scan); - if ( *scan != '\0' ) { - KMP_WARNING( ParseExtraCharsWarn, name, scan ); - } - __kmp_affinity_num_places = count; -} - -static void -__kmp_stg_print_places( kmp_str_buf_t * buffer, char const * name, - void * data ) -{ - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( ( __kmp_nested_proc_bind.used == 0 ) - || ( __kmp_nested_proc_bind.bind_types == NULL ) - || ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_false ) ) { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - else if ( __kmp_affinity_type == affinity_explicit ) { - if ( __kmp_affinity_proclist != NULL ) { - __kmp_str_buf_print( buffer, "='%s'\n", __kmp_affinity_proclist ); - } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - } - else if ( __kmp_affinity_type == affinity_compact ) { - int num; - if ( __kmp_affinity_num_masks > 0 ) { - num = __kmp_affinity_num_masks; - } - else if ( __kmp_affinity_num_places > 0 ) { - num = __kmp_affinity_num_places; - } - else { - num = 0; - } - if ( __kmp_affinity_gran == affinity_gran_thread ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='threads(%d)'\n", num ); - } - else { - __kmp_str_buf_print( buffer, "='threads'\n" ); - } - } - else if ( __kmp_affinity_gran == affinity_gran_core ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='cores(%d)' \n", num ); - } - else { - __kmp_str_buf_print( buffer, "='cores'\n" ); - } - } - else if ( __kmp_affinity_gran == affinity_gran_package ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='sockets(%d)'\n", num ); - } - else { - __kmp_str_buf_print( buffer, "='sockets'\n" ); - } - } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } -} - -# endif /* OMP_40_ENABLED */ - -# if (! OMP_40_ENABLED) - -static void -__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) -{ - int enabled; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } - - // - // in OMP 3.1, OMP_PROC_BIND is strictly a boolean - // - __kmp_stg_parse_bool( name, value, & enabled ); - if ( enabled ) { - // - // OMP_PROC_BIND => granularity=fine,scatter on MIC - // OMP_PROC_BIND => granularity=core,scatter elsewhere - // - __kmp_affinity_type = affinity_scatter; -# if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) - __kmp_affinity_gran = affinity_gran_fine; - else -# endif - __kmp_affinity_gran = affinity_gran_core; - } - else { - __kmp_affinity_type = affinity_none; - } -} // __kmp_parse_proc_bind - -# endif /* if (! OMP_40_ENABLED) */ - - -static void -__kmp_stg_parse_topology_method( char const * name, char const * value, - void * data ) { - if ( __kmp_str_match( "all", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_all; - } -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - else if ( __kmp_str_match( "x2apic id", 9, value ) - || __kmp_str_match( "x2apic_id", 9, value ) - || __kmp_str_match( "x2apic-id", 9, value ) - || __kmp_str_match( "x2apicid", 8, value ) - || __kmp_str_match( "cpuid leaf 11", 13, value ) - || __kmp_str_match( "cpuid_leaf_11", 13, value ) - || __kmp_str_match( "cpuid-leaf-11", 13, value ) - || __kmp_str_match( "cpuid leaf11", 12, value ) - || __kmp_str_match( "cpuid_leaf11", 12, value ) - || __kmp_str_match( "cpuid-leaf11", 12, value ) - || __kmp_str_match( "cpuidleaf 11", 12, value ) - || __kmp_str_match( "cpuidleaf_11", 12, value ) - || __kmp_str_match( "cpuidleaf-11", 12, value ) - || __kmp_str_match( "cpuidleaf11", 11, value ) - || __kmp_str_match( "cpuid 11", 8, value ) - || __kmp_str_match( "cpuid_11", 8, value ) - || __kmp_str_match( "cpuid-11", 8, value ) - || __kmp_str_match( "cpuid11", 7, value ) - || __kmp_str_match( "leaf 11", 7, value ) - || __kmp_str_match( "leaf_11", 7, value ) - || __kmp_str_match( "leaf-11", 7, value ) - || __kmp_str_match( "leaf11", 6, value ) ) { - __kmp_affinity_top_method = affinity_top_method_x2apicid; - } - else if ( __kmp_str_match( "apic id", 7, value ) - || __kmp_str_match( "apic_id", 7, value ) - || __kmp_str_match( "apic-id", 7, value ) - || __kmp_str_match( "apicid", 6, value ) - || __kmp_str_match( "cpuid leaf 4", 12, value ) - || __kmp_str_match( "cpuid_leaf_4", 12, value ) - || __kmp_str_match( "cpuid-leaf-4", 12, value ) - || __kmp_str_match( "cpuid leaf4", 11, value ) - || __kmp_str_match( "cpuid_leaf4", 11, value ) - || __kmp_str_match( "cpuid-leaf4", 11, value ) - || __kmp_str_match( "cpuidleaf 4", 11, value ) - || __kmp_str_match( "cpuidleaf_4", 11, value ) - || __kmp_str_match( "cpuidleaf-4", 11, value ) - || __kmp_str_match( "cpuidleaf4", 10, value ) - || __kmp_str_match( "cpuid 4", 7, value ) - || __kmp_str_match( "cpuid_4", 7, value ) - || __kmp_str_match( "cpuid-4", 7, value ) - || __kmp_str_match( "cpuid4", 6, value ) - || __kmp_str_match( "leaf 4", 6, value ) - || __kmp_str_match( "leaf_4", 6, value ) - || __kmp_str_match( "leaf-4", 6, value ) - || __kmp_str_match( "leaf4", 5, value ) ) { - __kmp_affinity_top_method = affinity_top_method_apicid; - } -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - else if ( __kmp_str_match( "/proc/cpuinfo", 2, value ) - || __kmp_str_match( "cpuinfo", 5, value )) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } -# if KMP_GROUP_AFFINITY - else if ( __kmp_str_match( "group", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_group; - } -# endif /* KMP_GROUP_AFFINITY */ - else if ( __kmp_str_match( "flat", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_flat; - } -# if KMP_USE_HWLOC - else if ( __kmp_str_match( "hwloc", 1, value) ) { - __kmp_affinity_top_method = affinity_top_method_hwloc; - } -# endif - else { - KMP_WARNING( StgInvalidValue, name, value ); - } -} // __kmp_stg_parse_topology_method - -static void -__kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name, - void * data ) { -# if KMP_DEBUG - char const * value = NULL; - - switch ( __kmp_affinity_top_method ) { - case affinity_top_method_default: - value = "default"; - break; - - case affinity_top_method_all: - value = "all"; - break; - -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - case affinity_top_method_x2apicid: - value = "x2APIC id"; - break; - - case affinity_top_method_apicid: - value = "APIC id"; - break; -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - case affinity_top_method_cpuinfo: - value = "cpuinfo"; - break; - -# if KMP_GROUP_AFFINITY - case affinity_top_method_group: - value = "group"; - break; -# endif /* KMP_GROUP_AFFINITY */ - - case affinity_top_method_flat: - value = "flat"; - break; - } - - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } -# endif /* KMP_DEBUG */ -} // __kmp_stg_print_topology_method - -#endif /* KMP_AFFINITY_SUPPORTED */ - - -#if OMP_40_ENABLED - -// -// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* -// OMP_PLACES / place-partition-var is not. -// -static void -__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) -{ - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } - - // - // in OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. - // - KMP_DEBUG_ASSERT( (__kmp_nested_proc_bind.bind_types != NULL) - && ( __kmp_nested_proc_bind.used > 0 ) ); - - const char *buf = value; - const char *next; - int num; - SKIP_WS( buf ); - if ( (*buf >= '0') && (*buf <= '9') ) { - next = buf; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( buf, *next ); - KMP_ASSERT( num >= 0 ); - buf = next; - SKIP_WS( buf ); - } - else { - num = -1; - } - - next = buf; - if ( __kmp_match_str( "disabled", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); -# if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_disabled; -# endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - else if ( ( num == (int)proc_bind_false ) - || __kmp_match_str( "false", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); -# if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_none; -# endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - else if ( ( num == (int)proc_bind_true ) - || __kmp_match_str( "true", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - else { - // - // Count the number of values in the env var string - // - const char *scan; - int nelem = 1; - for ( scan = buf; *scan != '\0'; scan++ ) { - if ( *scan == ',' ) { - nelem++; - } - } - - // - // Create / expand the nested proc_bind array as needed - // - if ( __kmp_nested_proc_bind.size < nelem ) { - __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) - KMP_INTERNAL_REALLOC( __kmp_nested_proc_bind.bind_types, - sizeof(kmp_proc_bind_t) * nelem ); - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - __kmp_nested_proc_bind.size = nelem; - } - __kmp_nested_proc_bind.used = nelem; - - // - // Save values in the nested proc_bind array - // - int i = 0; - for (;;) { - enum kmp_proc_bind_t bind; - - if ( ( num == (int)proc_bind_master ) - || __kmp_match_str( "master", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_master; - } - else if ( ( num == (int)proc_bind_close ) - || __kmp_match_str( "close", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_close; - } - else if ( ( num == (int)proc_bind_spread ) - || __kmp_match_str( "spread", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_spread; - } - else { - KMP_WARNING( StgInvalidValue, name, value ); - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - __kmp_nested_proc_bind.used = 1; - return; - } - - __kmp_nested_proc_bind.bind_types[i++] = bind; - if ( i >= nelem ) { - break; - } - KMP_DEBUG_ASSERT( *buf == ',' ); - buf++; - SKIP_WS( buf ); - - // - // Read next value if it was specified as an integer - // - if ( (*buf >= '0') && (*buf <= '9') ) { - next = buf; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( buf, *next ); - KMP_ASSERT( num >= 0 ); - buf = next; - SKIP_WS( buf ); - } - else { - num = -1; - } - } - SKIP_WS( buf ); - } - if ( *buf != '\0' ) { - KMP_WARNING( ParseExtraCharsWarn, name, buf ); - } -} - - -static void -__kmp_stg_print_proc_bind( kmp_str_buf_t * buffer, char const * name, - void * data ) -{ - int nelem = __kmp_nested_proc_bind.used; - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( nelem == 0 ) { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - else { - int i; - __kmp_str_buf_print( buffer, "='", name ); - for ( i = 0; i < nelem; i++ ) { - switch ( __kmp_nested_proc_bind.bind_types[i] ) { - case proc_bind_false: - __kmp_str_buf_print( buffer, "false" ); - break; - - case proc_bind_true: - __kmp_str_buf_print( buffer, "true" ); - break; - - case proc_bind_master: - __kmp_str_buf_print( buffer, "master" ); - break; - - case proc_bind_close: - __kmp_str_buf_print( buffer, "close" ); - break; - - case proc_bind_spread: - __kmp_str_buf_print( buffer, "spread" ); - break; - - case proc_bind_intel: - __kmp_str_buf_print( buffer, "intel" ); - break; - - case proc_bind_default: - __kmp_str_buf_print( buffer, "default" ); - break; - } - if ( i < nelem - 1 ) { - __kmp_str_buf_print( buffer, "," ); - } - } - __kmp_str_buf_print( buffer, "'\n" ); - } -} - -#endif /* OMP_40_ENABLED */ - - -// ------------------------------------------------------------------------------------------------- -// OMP_DYNAMIC -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_omp_dynamic( char const * name, char const * value, void * data ) -{ - __kmp_stg_parse_bool( name, value, & (__kmp_global.g.g_dynamic) ); -} // __kmp_stg_parse_omp_dynamic - -static void -__kmp_stg_print_omp_dynamic( kmp_str_buf_t * buffer, char const * name, void * data ) -{ - __kmp_stg_print_bool( buffer, name, __kmp_global.g.g_dynamic ); -} // __kmp_stg_print_omp_dynamic - -static void -__kmp_stg_parse_kmp_dynamic_mode( char const * name, char const * value, void * data ) -{ - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } -#ifdef USE_LOAD_BALANCE - else if ( __kmp_str_match( "load balance", 2, value ) - || __kmp_str_match( "load_balance", 2, value ) - || __kmp_str_match( "load-balance", 2, value ) - || __kmp_str_match( "loadbalance", 2, value ) - || __kmp_str_match( "balance", 1, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } -#endif /* USE_LOAD_BALANCE */ - else if ( __kmp_str_match( "thread limit", 1, value ) - || __kmp_str_match( "thread_limit", 1, value ) - || __kmp_str_match( "thread-limit", 1, value ) - || __kmp_str_match( "threadlimit", 1, value ) - || __kmp_str_match( "limit", 2, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } - else if ( __kmp_str_match( "random", 1, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_random; - } - else { - KMP_WARNING( StgInvalidValue, name, value ); - } -} //__kmp_stg_parse_kmp_dynamic_mode - -static void -__kmp_stg_print_kmp_dynamic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) -{ -#if KMP_DEBUG - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); - } -# ifdef USE_LOAD_BALANCE - else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { - __kmp_stg_print_str( buffer, name, "load balance" ); - } -# endif /* USE_LOAD_BALANCE */ - else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { - __kmp_stg_print_str( buffer, name, "thread limit" ); - } - else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { - __kmp_stg_print_str( buffer, name, "random" ); - } - else { - KMP_ASSERT(0); - } -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_kmp_dynamic_mode - - -#ifdef USE_LOAD_BALANCE - -// ------------------------------------------------------------------------------------------------- -// KMP_LOAD_BALANCE_INTERVAL -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_ld_balance_interval( char const * name, char const * value, void * data ) -{ - double interval = __kmp_convert_to_double( value ); - if ( interval >= 0 ) { - __kmp_load_balance_interval = interval; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if -} // __kmp_stg_parse_load_balance_interval - -static void -__kmp_stg_print_ld_balance_interval( kmp_str_buf_t * buffer, char const * name, void * data ) { -#if KMP_DEBUG - __kmp_str_buf_print( buffer, " %s=%8.6f\n", name, __kmp_load_balance_interval ); -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_load_balance_interval - -#endif /* USE_LOAD_BALANCE */ - -// ------------------------------------------------------------------------------------------------- -// KMP_INIT_AT_FORK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_init_at_fork( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_need_register_atfork ); - if ( __kmp_need_register_atfork ) { - __kmp_need_register_atfork_specified = TRUE; - }; -} // __kmp_stg_parse_init_at_fork - -static void -__kmp_stg_print_init_at_fork( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_need_register_atfork_specified ); -} // __kmp_stg_print_init_at_fork - -// ------------------------------------------------------------------------------------------------- -// KMP_SCHEDULE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_schedule( char const * name, char const * value, void * data ) { - - if ( value != NULL ) { - size_t length = KMP_STRLEN( value ); - if ( length > INT_MAX ) { - KMP_WARNING( LongValue, name ); - } else { - char *semicolon; - if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'' ) - KMP_WARNING( UnbalancedQuotes, name ); - do { - char sentinel; - - semicolon = (char *) strchr( value, ';' ); - if( *value && semicolon != value ) { - char *comma = (char *) strchr( value, ',' ); - - if ( comma ) { - ++comma; - sentinel = ','; - } else - sentinel = ';'; - if ( !__kmp_strcasecmp_with_sentinel( "static", value, sentinel ) ) { - if( !__kmp_strcasecmp_with_sentinel( "greedy", comma, ';' ) ) { - __kmp_static = kmp_sch_static_greedy; - continue; - } else if( !__kmp_strcasecmp_with_sentinel( "balanced", comma, ';' ) ) { - __kmp_static = kmp_sch_static_balanced; - continue; - } - } else if ( !__kmp_strcasecmp_with_sentinel( "guided", value, sentinel ) ) { - if ( !__kmp_strcasecmp_with_sentinel( "iterative", comma, ';' ) ) { - __kmp_guided = kmp_sch_guided_iterative_chunked; - continue; - } else if ( !__kmp_strcasecmp_with_sentinel( "analytical", comma, ';' ) ) { - /* analytical not allowed for too many threads */ - __kmp_guided = kmp_sch_guided_analytical_chunked; - continue; - } - } - KMP_WARNING( InvalidClause, name, value ); - } else - KMP_WARNING( EmptyClause, name ); - } while ( (value = semicolon ? semicolon + 1 : NULL) ); - } - }; // if - -} // __kmp_stg_parse__schedule - -static void -__kmp_stg_print_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - if ( __kmp_static == kmp_sch_static_greedy ) { - __kmp_str_buf_print( buffer, "%s", "static,greedy"); - } else if ( __kmp_static == kmp_sch_static_balanced ) { - __kmp_str_buf_print ( buffer, "%s", "static,balanced"); - } - if ( __kmp_guided == kmp_sch_guided_iterative_chunked ) { - __kmp_str_buf_print( buffer, ";%s'\n", "guided,iterative"); - } else if ( __kmp_guided == kmp_sch_guided_analytical_chunked ) { - __kmp_str_buf_print( buffer, ";%s'\n", "guided,analytical"); - } -} // __kmp_stg_print_schedule - -// ------------------------------------------------------------------------------------------------- -// OMP_SCHEDULE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_omp_schedule( char const * name, char const * value, void * data ) -{ - size_t length; - if( value ) { - length = KMP_STRLEN( value ); - if( length ) { - char *comma = (char *) strchr( value, ',' ); - if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'') - KMP_WARNING( UnbalancedQuotes, name ); - /* get the specified scheduling style */ - if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ',')) /* DYNAMIC */ - __kmp_sched = kmp_sch_dynamic_chunked; - else if (!__kmp_strcasecmp_with_sentinel("guided", value, ',')) /* GUIDED */ - __kmp_sched = kmp_sch_guided_chunked; -// AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 does not allow it) - else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) { /* AUTO */ - __kmp_sched = kmp_sch_auto; - if( comma ) { - __kmp_msg( kmp_ms_warning, KMP_MSG( IgnoreChunk, name, comma ), __kmp_msg_null ); - comma = NULL; - } - } - else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, ',')) /* TRAPEZOIDAL */ - __kmp_sched = kmp_sch_trapezoidal; - else if (!__kmp_strcasecmp_with_sentinel("static", value, ',')) /* STATIC */ - __kmp_sched = kmp_sch_static; -#ifdef KMP_STATIC_STEAL_ENABLED - else if (KMP_ARCH_X86_64 && - !__kmp_strcasecmp_with_sentinel("static_steal", value, ',')) - __kmp_sched = kmp_sch_static_steal; -#endif - else { - KMP_WARNING( StgInvalidValue, name, value ); - value = NULL; /* skip processing of comma */ - } - if( value && comma ) { - __kmp_env_chunk = TRUE; - - if(__kmp_sched == kmp_sch_static) - __kmp_sched = kmp_sch_static_chunked; - ++comma; - __kmp_chunk = __kmp_str_to_int( comma, 0 ); - if ( __kmp_chunk < 1 ) { - __kmp_chunk = KMP_DEFAULT_CHUNK; - __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidChunk, name, comma ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_chunk ); -// AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK (to improve code coverage :) -// The default chunk size is 1 according to standard, thus making KMP_MIN_CHUNK not 1 we would introduce mess: -// wrong chunk becomes 1, but it will be impossible to explicitely set 1, because it becomes KMP_MIN_CHUNK... -// } else if ( __kmp_chunk < KMP_MIN_CHUNK ) { -// __kmp_chunk = KMP_MIN_CHUNK; - } else if ( __kmp_chunk > KMP_MAX_CHUNK ) { - __kmp_chunk = KMP_MAX_CHUNK; - __kmp_msg( kmp_ms_warning, KMP_MSG( LargeChunk, name, comma ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_chunk ); - } - } else - __kmp_env_chunk = FALSE; - } else - KMP_WARNING( EmptyString, name ); - } - K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) - K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) - K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) - K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) -} // __kmp_stg_parse_omp_schedule - -static void -__kmp_stg_print_omp_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - if ( __kmp_chunk ) { - switch ( __kmp_sched ) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print( buffer, "%s,%d'\n", "dynamic", __kmp_chunk); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print( buffer, "%s,%d'\n", "guided", __kmp_chunk); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print( buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print( buffer, "%s,%d'\n", "static", __kmp_chunk); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print( buffer, "%s,%d'\n", "static_steal", __kmp_chunk); - break; - case kmp_sch_auto: - __kmp_str_buf_print( buffer, "%s,%d'\n", "auto", __kmp_chunk); - break; - } - } else { - switch ( __kmp_sched ) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print( buffer, "%s'\n", "dynamic"); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print( buffer, "%s'\n", "guided"); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print( buffer, "%s'\n", "trapezoidal"); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print( buffer, "%s'\n", "static"); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print( buffer, "%s'\n", "static_steal"); - break; - case kmp_sch_auto: - __kmp_str_buf_print( buffer, "%s'\n", "auto"); - break; - } - } -} // __kmp_stg_print_omp_schedule - -// ------------------------------------------------------------------------------------------------- -// KMP_ATOMIC_MODE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_atomic_mode( char const * name, char const * value, void * data ) { - // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP compatibility mode. - int mode = 0; - int max = 1; - #ifdef KMP_GOMP_COMPAT - max = 2; - #endif /* KMP_GOMP_COMPAT */ - __kmp_stg_parse_int( name, value, 0, max, & mode ); - // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use - // 0 rather that max value. - if ( mode > 0 ) { - __kmp_atomic_mode = mode; - }; // if -} // __kmp_stg_parse_atomic_mode - -static void -__kmp_stg_print_atomic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_atomic_mode ); -} // __kmp_stg_print_atomic_mode - - -// ------------------------------------------------------------------------------------------------- -// KMP_CONSISTENCY_CHECK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_consistency_check( char const * name, char const * value, void * data ) { - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - // Note, this will not work from kmp_set_defaults because th_cons stack was not allocated - // for existed thread(s) thus the first __kmp_push_ will break with assertion. - // TODO: allocate th_cons if called from kmp_set_defaults. - __kmp_env_consistency_check = TRUE; - } else if ( ! __kmp_strcasecmp_with_sentinel( "none", value, 0 ) ) { - __kmp_env_consistency_check = FALSE; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if -} // __kmp_stg_parse_consistency_check - -static void -__kmp_stg_print_consistency_check( kmp_str_buf_t * buffer, char const * name, void * data ) { -#if KMP_DEBUG - const char *value = NULL; - - if ( __kmp_env_consistency_check ) { - value = "all"; - } else { - value = "none"; - } - - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } -#endif /* KMP_DEBUG */ -} // __kmp_stg_print_consistency_check - - -#if USE_ITT_BUILD -// ------------------------------------------------------------------------------------------------- -// KMP_ITT_PREPARE_DELAY -// ------------------------------------------------------------------------------------------------- - -#if USE_ITT_NOTIFY - -static void -__kmp_stg_parse_itt_prepare_delay( char const * name, char const * value, void * data ) -{ - // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop iterations. - int delay = 0; - __kmp_stg_parse_int( name, value, 0, INT_MAX, & delay ); - __kmp_itt_prepare_delay = delay; -} // __kmp_str_parse_itt_prepare_delay - -static void -__kmp_stg_print_itt_prepare_delay( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_uint64( buffer, name, __kmp_itt_prepare_delay ); - -} // __kmp_str_print_itt_prepare_delay - -#endif // USE_ITT_NOTIFY -#endif /* USE_ITT_BUILD */ - -// ------------------------------------------------------------------------------------------------- -// KMP_MALLOC_POOL_INCR -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_malloc_pool_incr( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - KMP_MIN_MALLOC_POOL_INCR, - KMP_MAX_MALLOC_POOL_INCR, - NULL, - & __kmp_malloc_pool_incr, - 1 - ); -} // __kmp_stg_parse_malloc_pool_incr - -static void -__kmp_stg_print_malloc_pool_incr( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_malloc_pool_incr ); - -} // _kmp_stg_print_malloc_pool_incr - - -#ifdef KMP_DEBUG - -// ------------------------------------------------------------------------------------------------- -// KMP_PAR_RANGE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_par_range_env( char const * name, char const * value, void * data ) { - __kmp_stg_parse_par_range( - name, - value, - & __kmp_par_range, - __kmp_par_range_routine, - __kmp_par_range_filename, - & __kmp_par_range_lb, - & __kmp_par_range_ub - ); -} // __kmp_stg_parse_par_range_env - -static void -__kmp_stg_print_par_range_env( kmp_str_buf_t * buffer, char const * name, void * data ) { - if (__kmp_par_range != 0) { - __kmp_stg_print_str( buffer, name, par_range_to_print ); - } -} // __kmp_stg_print_par_range_env - -// ------------------------------------------------------------------------------------------------- -// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_yield_cycle( char const * name, char const * value, void * data ) { - int flag = __kmp_yield_cycle; - __kmp_stg_parse_bool( name, value, & flag ); - __kmp_yield_cycle = flag; -} // __kmp_stg_parse_yield_cycle - -static void -__kmp_stg_print_yield_cycle( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_yield_cycle ); -} // __kmp_stg_print_yield_cycle - -static void -__kmp_stg_parse_yield_on( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_on_count ); -} // __kmp_stg_parse_yield_on - -static void -__kmp_stg_print_yield_on( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_yield_on_count ); -} // __kmp_stg_print_yield_on - -static void -__kmp_stg_parse_yield_off( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_off_count ); -} // __kmp_stg_parse_yield_off - -static void -__kmp_stg_print_yield_off( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_yield_off_count ); -} // __kmp_stg_print_yield_off - -#endif - -// ------------------------------------------------------------------------------------------------- -// KMP_INIT_WAIT, KMP_NEXT_WAIT -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_init_wait( char const * name, char const * value, void * data ) { - int wait; - KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); - wait = __kmp_init_wait / 2; - __kmp_stg_parse_int( name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, & wait ); - __kmp_init_wait = wait * 2; - KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); - __kmp_yield_init = __kmp_init_wait; -} // __kmp_stg_parse_init_wait - -static void -__kmp_stg_print_init_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_init_wait ); -} // __kmp_stg_print_init_wait - -static void -__kmp_stg_parse_next_wait( char const * name, char const * value, void * data ) { - int wait; - KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); - wait = __kmp_next_wait / 2; - __kmp_stg_parse_int( name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, & wait ); - __kmp_next_wait = wait * 2; - KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); - __kmp_yield_next = __kmp_next_wait; -} // __kmp_stg_parse_next_wait - -static void -__kmp_stg_print_next_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_next_wait ); -} //__kmp_stg_print_next_wait - - -// ------------------------------------------------------------------------------------------------- -// KMP_GTID_MODE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_gtid_mode( char const * name, char const * value, void * data ) { - // - // Modes: - // 0 -- do not change default - // 1 -- sp search - // 2 -- use "keyed" TLS var, i.e. - // pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) - // 3 -- __declspec(thread) TLS var in tdata section - // - int mode = 0; - int max = 2; - #ifdef KMP_TDATA_GTID - max = 3; - #endif /* KMP_TDATA_GTID */ - __kmp_stg_parse_int( name, value, 0, max, & mode ); - // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use - // 0 rather that max value. - if ( mode == 0 ) { - __kmp_adjust_gtid_mode = TRUE; - } - else { - __kmp_gtid_mode = mode; - __kmp_adjust_gtid_mode = FALSE; - }; // if -} // __kmp_str_parse_gtid_mode - -static void -__kmp_stg_print_gtid_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_adjust_gtid_mode ) { - __kmp_stg_print_int( buffer, name, 0 ); - } - else { - __kmp_stg_print_int( buffer, name, __kmp_gtid_mode ); - } -} // __kmp_stg_print_gtid_mode - - -// ------------------------------------------------------------------------------------------------- -// KMP_NUM_LOCKS_IN_BLOCK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_lock_block( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, KMP_INT_MAX, & __kmp_num_locks_in_block ); -} // __kmp_str_parse_lock_block - -static void -__kmp_stg_print_lock_block( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_num_locks_in_block ); -} // __kmp_stg_print_lock_block - -// ------------------------------------------------------------------------------------------------- -// KMP_LOCK_KIND -// ------------------------------------------------------------------------------------------------- - -#if KMP_USE_DYNAMIC_LOCK -# define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) -#else -# define KMP_STORE_LOCK_SEQ(a) -#endif - -static void -__kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) { - if ( __kmp_init_user_locks ) { - KMP_WARNING( EnvLockWarn, name ); - return; - } - - if ( __kmp_str_match( "tas", 2, value ) - || __kmp_str_match( "test and set", 2, value ) - || __kmp_str_match( "test_and_set", 2, value ) - || __kmp_str_match( "test-and-set", 2, value ) - || __kmp_str_match( "test andset", 2, value ) - || __kmp_str_match( "test_andset", 2, value ) - || __kmp_str_match( "test-andset", 2, value ) - || __kmp_str_match( "testand set", 2, value ) - || __kmp_str_match( "testand_set", 2, value ) - || __kmp_str_match( "testand-set", 2, value ) - || __kmp_str_match( "testandset", 2, value ) ) { - __kmp_user_lock_kind = lk_tas; - KMP_STORE_LOCK_SEQ(tas); - } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) - else if ( __kmp_str_match( "futex", 1, value ) ) { - if ( __kmp_futex_determine_capable() ) { - __kmp_user_lock_kind = lk_futex; - KMP_STORE_LOCK_SEQ(futex); - } - else { - KMP_WARNING( FutexNotSupported, name, value ); - } - } -#endif - else if ( __kmp_str_match( "ticket", 2, value ) ) { - __kmp_user_lock_kind = lk_ticket; - KMP_STORE_LOCK_SEQ(ticket); - } - else if ( __kmp_str_match( "queuing", 1, value ) - || __kmp_str_match( "queue", 1, value ) ) { - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - else if ( __kmp_str_match( "drdpa ticket", 1, value ) - || __kmp_str_match( "drdpa_ticket", 1, value ) - || __kmp_str_match( "drdpa-ticket", 1, value ) - || __kmp_str_match( "drdpaticket", 1, value ) - || __kmp_str_match( "drdpa", 1, value ) ) { - __kmp_user_lock_kind = lk_drdpa; - KMP_STORE_LOCK_SEQ(drdpa); - } -#if KMP_USE_ADAPTIVE_LOCKS - else if ( __kmp_str_match( "adaptive", 1, value ) ) { - if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here? - __kmp_user_lock_kind = lk_adaptive; - KMP_STORE_LOCK_SEQ(adaptive); - } else { - KMP_WARNING( AdaptiveNotSupported, name, value ); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - } -#endif // KMP_USE_ADAPTIVE_LOCKS -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - else if ( __kmp_str_match("rtm", 1, value) ) { - if ( __kmp_cpuinfo.rtm ) { - __kmp_user_lock_kind = lk_rtm; - KMP_STORE_LOCK_SEQ(rtm); - } else { - KMP_WARNING( AdaptiveNotSupported, name, value ); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - } - else if ( __kmp_str_match("hle", 1, value) ) { - __kmp_user_lock_kind = lk_hle; - KMP_STORE_LOCK_SEQ(hle); - } -#endif - else { - KMP_WARNING( StgInvalidValue, name, value ); - } -} - -static void -__kmp_stg_print_lock_kind( kmp_str_buf_t * buffer, char const * name, void * data ) { - const char *value = NULL; - - switch ( __kmp_user_lock_kind ) { - case lk_default: - value = "default"; - break; - - case lk_tas: - value = "tas"; - break; - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) - case lk_futex: - value = "futex"; - break; -#endif - -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - case lk_rtm: - value = "rtm"; - break; - - case lk_hle: - value = "hle"; - break; -#endif - - case lk_ticket: - value = "ticket"; - break; - - case lk_queuing: - value = "queuing"; - break; - - case lk_drdpa: - value = "drdpa"; - break; -#if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: - value = "adaptive"; - break; -#endif - } - - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } -} - -#if KMP_USE_ADAPTIVE_LOCKS - -// ------------------------------------------------------------------------------------------------- -// KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE -// ------------------------------------------------------------------------------------------------- - -// Parse out values for the tunable parameters from a string of the form -// KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness] -static void -__kmp_stg_parse_adaptive_lock_props( const char *name, const char *value, void *data ) -{ - int max_retries = 0; - int max_badness = 0; - - const char *next = value; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - int i; - - // Save values in the structure __kmp_speculative_backoff_params - // Run only 3 iterations because it is enough to read two values or find a syntax error - for ( i = 0; i < 3 ; i++) { - SKIP_WS( next ); - - if ( *next == '\0' ) { - break; - } - // Next character is not an integer or not a comma OR number of values > 2 => end of list - if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) { - KMP_WARNING( EnvSyntaxError, name, value ); - return; - } - // The next character is ',' - if ( *next == ',' ) { - // ',' is the fisrt character - if ( total == 0 || prev_comma ) { - total++; - } - prev_comma = TRUE; - next++; //skip ',' - SKIP_WS( next ); - } - // Next character is a digit - if ( *next >= '0' && *next <= '9' ) { - int num; - const char *buf = next; - char const * msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS( next ); - total++; - - const char *tmp = next; - SKIP_WS( tmp ); - if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { - KMP_WARNING( EnvSpacesNotAllowed, name, value ); - return; - } - - num = __kmp_str_to_int( buf, *next ); - if ( num < 0 ) { // The number of retries should be >= 0 - msg = KMP_I18N_STR( ValueTooSmall ); - num = 1; - } else if ( num > KMP_INT_MAX ) { - msg = KMP_I18N_STR( ValueTooLarge ); - num = KMP_INT_MAX; - } - if ( msg != NULL ) { - // Message is not empty. Print warning. - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - KMP_INFORM( Using_int_Value, name, num ); - } - if( total == 1 ) { - max_retries = num; - } else if( total == 2 ) { - max_badness = num; - } - } - } - KMP_DEBUG_ASSERT( total > 0 ); - if( total <= 0 ) { - KMP_WARNING( EnvSyntaxError, name, value ); - return; - } - __kmp_adaptive_backoff_params.max_soft_retries = max_retries; - __kmp_adaptive_backoff_params.max_badness = max_badness; -} - - -static void -__kmp_stg_print_adaptive_lock_props(kmp_str_buf_t * buffer, char const * name, void * data ) -{ - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness ); -} // __kmp_stg_print_adaptive_lock_props - -#if KMP_DEBUG_ADAPTIVE_LOCKS - -static void -__kmp_stg_parse_speculative_statsfile( char const * name, char const * value, void * data ) { - __kmp_stg_parse_file( name, value, "", & __kmp_speculative_statsfile ); -} // __kmp_stg_parse_speculative_statsfile - -static void -__kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_str_match( "-", 0, __kmp_speculative_statsfile ) ) { - __kmp_stg_print_str( buffer, name, "stdout" ); - } else { - __kmp_stg_print_str( buffer, name, __kmp_speculative_statsfile ); - } - -} // __kmp_stg_print_speculative_statsfile - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -#endif // KMP_USE_ADAPTIVE_LOCKS - -// ------------------------------------------------------------------------------------------------- -// KMP_PLACE_THREADS -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) { - // Value example: 5Cx2Tx15O - // Which means "use 5 cores with offset 15, 2 threads per core" - // AC: extended to sockets level, examples of - // "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core": - // 2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t - // To not break legacy code core-offset can be last; - // postfix "o" or prefix @ can be offset designator. - // Note: not all syntax errors are analyzed, some may be skipped. -#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == 'x') - int num; - int single_warning = 0; - int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0; - const char *next = value; - const char *prev; - - SKIP_WS(next); // skip white spaces - if (*next == '\0') - return; // no data provided, retain default values - // Get num_sockets first (or whatever specified) - if (*next >= '0' && *next <= '9') { - prev = next; - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - SKIP_WS(next); - if (*next == 's' || *next == 'S') { // e.g. "2s" - __kmp_place_num_sockets = num; - flagS = 1; // got num sockets - next++; - if (*next == '@') { // socket offset, e.g. "2s@4" - flagSO = 1; - prev = ++next; // don't allow spaces for simplicity - if (!(*next >= '0' && *next <= '9')) { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - __kmp_place_socket_offset = num; - } - } else if (*next == 'c' || *next == 'C') { - __kmp_place_num_cores = num; - flagS = flagC = 1; // sockets were not specified - use default - next++; - if (*next == '@') { // core offset, e.g. "2c@6" - flagCO = 1; - prev = ++next; // don't allow spaces for simplicity - if (!(*next >= '0' && *next <= '9')) { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - __kmp_place_core_offset = num; - } - } else if (CHECK_DELIM(next)) { - __kmp_place_num_cores = num; // no letter-designator - num cores - flagS = flagC = 1; // sockets were not specified - use default - next++; - } else if (*next == 't' || *next == 'T') { - __kmp_place_num_threads_per_core = num; - // sockets, cores were not specified - use default - return; // we ignore offset value in case all cores are used - } else if (*next == '\0') { - __kmp_place_num_cores = num; - return; // the only value provided - set num cores - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here - SKIP_WS(next); - if (*next == '\0') - return; // " n " - something like this - if (CHECK_DELIM(next)) { - next++; // skip delimiter - SKIP_WS(next); - } - - // Get second value (could be offset, num_cores, num_threads) - if (*next >= '0' && *next <= '9') { - prev = next; - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - SKIP_WS(next); - if (*next == 'c' || *next == 'C') { - KMP_DEBUG_ASSERT(flagC == 0); - __kmp_place_num_cores = num; - flagC = 1; - next++; - if (*next == '@') { // core offset, e.g. "2c@6" - flagCO = 1; - prev = ++next; // don't allow spaces for simplicity - if (!(*next >= '0' && *next <= '9')) { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - __kmp_place_core_offset = num; - } - } else if (*next == 'o' || *next == 'O') { // offset specified - KMP_WARNING(AffThrPlaceDeprecated); - single_warning = 1; - if (flagC) { // whether num_cores already specified (sockets skipped) - KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both - __kmp_place_core_offset = num; - } else { - KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both - __kmp_place_socket_offset = num; - } - next++; - } else if (*next == 't' || *next == 'T') { - KMP_DEBUG_ASSERT(flagT == 0); - __kmp_place_num_threads_per_core = num; - flagC = 1; // num_cores could be skipped ? - flagT = 1; - next++; // can have core-offset specified after num threads - } else if (*next == '\0') { - KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core - __kmp_place_num_threads_per_core = num; - return; // two values provided without letter-designator - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - SKIP_WS(next); - if (*next == '\0') - return; // " Ns,Nc " - something like this - if (CHECK_DELIM(next)) { - next++; // skip delimiter - SKIP_WS(next); - } - - // Get third value (could be core-offset, num_cores, num_threads) - if (*next >= '0' && *next <= '9') { - prev = next; - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - SKIP_WS(next); - if (*next == 't' || *next == 'T') { - KMP_DEBUG_ASSERT(flagT == 0); - __kmp_place_num_threads_per_core = num; - if (flagC == 0) - return; // num_cores could be skipped (e.g. 2s,4o,2t) - flagT = 1; - next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o) - } else if (*next == 'c' || *next == 'C') { - KMP_DEBUG_ASSERT(flagC == 0); - __kmp_place_num_cores = num; - flagC = 1; - next++; - //KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator - } else if (*next == 'o' || *next == 'O') { - KMP_WARNING(AffThrPlaceDeprecated); - single_warning = 1; - KMP_DEBUG_ASSERT(flagC); - //KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator - __kmp_place_core_offset = num; - next++; - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - KMP_DEBUG_ASSERT(flagC); - SKIP_WS(next); - if ( *next == '\0' ) - return; - if (CHECK_DELIM(next)) { - next++; // skip delimiter - SKIP_WS(next); - } - - // Get 4-th value (could be core-offset, num_threads) - if (*next >= '0' && *next <= '9') { - prev = next; - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - SKIP_WS(next); - if (*next == 'o' || *next == 'O') { - if (!single_warning) { // warn once - KMP_WARNING(AffThrPlaceDeprecated); - } - KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator - __kmp_place_core_offset = num; - next++; - } else if (*next == 't' || *next == 'T') { - KMP_DEBUG_ASSERT(flagT == 0); - __kmp_place_num_threads_per_core = num; - flagT = 1; - next++; // can have core-offset specified after num threads - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - return; - } - SKIP_WS(next); - if ( *next == '\0' ) - return; - if (CHECK_DELIM(next)) { - next++; // skip delimiter - SKIP_WS(next); - } - - // Get 5-th value (could be core-offset, num_threads) - if (*next >= '0' && *next <= '9') { - prev = next; - SKIP_DIGITS(next); - num = __kmp_str_to_int(prev, *next); - SKIP_WS(next); - if (*next == 'o' || *next == 'O') { - if (!single_warning) { // warn once - KMP_WARNING(AffThrPlaceDeprecated); - } - KMP_DEBUG_ASSERT(flagT); - KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator - __kmp_place_core_offset = num; - } else if (*next == 't' || *next == 'T') { - KMP_DEBUG_ASSERT(flagT == 0); - __kmp_place_num_threads_per_core = num; - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - } - } else { - KMP_WARNING(AffThrPlaceInvalid, name, value); - } - return; -#undef CHECK_DELIM -} - -static void -__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) { - int comma = 0; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - if(__kmp_env_format) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - __kmp_str_buf_print(buffer, " %s='", name); - if (__kmp_place_num_sockets) { - __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets); - if (__kmp_place_socket_offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset); - comma = 1; - } - if (__kmp_place_num_cores) { - __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores); - if (__kmp_place_core_offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset); - comma = 1; - } - if (__kmp_place_num_threads_per_core) - __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core); - __kmp_str_buf_print(buffer, "%s'\n", buf.str ); - __kmp_str_buf_free(&buf); -/* - } else { - __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); -*/ - } -} - -#if USE_ITT_BUILD -// ------------------------------------------------------------------------------------------------- -// KMP_FORKJOIN_FRAMES -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_forkjoin_frames( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_forkjoin_frames ); -} // __kmp_stg_parse_forkjoin_frames - -static void -__kmp_stg_print_forkjoin_frames( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_forkjoin_frames ); -} // __kmp_stg_print_forkjoin_frames - -// ------------------------------------------------------------------------------------------------- -// KMP_FORKJOIN_FRAMES_MODE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_forkjoin_frames_mode( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, 3, & __kmp_forkjoin_frames_mode ); -} // __kmp_stg_parse_forkjoin_frames - -static void -__kmp_stg_print_forkjoin_frames_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_forkjoin_frames_mode ); -} // __kmp_stg_print_forkjoin_frames -#endif /* USE_ITT_BUILD */ - -// ------------------------------------------------------------------------------------------------- -// OMP_DISPLAY_ENV -// ------------------------------------------------------------------------------------------------- - -#if OMP_40_ENABLED - -static void -__kmp_stg_parse_omp_display_env( char const * name, char const * value, void * data ) -{ - if ( __kmp_str_match( "VERBOSE", 1, value ) ) - { - __kmp_display_env_verbose = TRUE; - } else { - __kmp_stg_parse_bool( name, value, & __kmp_display_env ); - } - -} // __kmp_stg_parse_omp_display_env - -static void -__kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void * data ) -{ - if ( __kmp_display_env_verbose ) - { - __kmp_stg_print_str( buffer, name, "VERBOSE" ); - } else { - __kmp_stg_print_bool( buffer, name, __kmp_display_env ); - } -} // __kmp_stg_print_omp_display_env - -static void -__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation ); -} // __kmp_stg_parse_omp_cancellation - -static void -__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation ); -} // __kmp_stg_print_omp_cancellation - -#endif - -// ------------------------------------------------------------------------------------------------- -// Table. -// ------------------------------------------------------------------------------------------------- - - -static kmp_setting_t __kmp_stg_table[] = { - - { "KMP_ALL_THREADS", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, - { "KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, NULL, 0, 0 }, - { "KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0 }, - { "KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, - { "KMP_MAX_THREADS", __kmp_stg_parse_all_threads, NULL, NULL, 0, 0 }, // For backward compatibility - { "KMP_MONITOR_STACKSIZE", __kmp_stg_parse_monitor_stacksize, __kmp_stg_print_monitor_stacksize, NULL, 0, 0 }, - { "KMP_SETTINGS", __kmp_stg_parse_settings, __kmp_stg_print_settings, NULL, 0, 0 }, - { "KMP_STACKOFFSET", __kmp_stg_parse_stackoffset, __kmp_stg_print_stackoffset, NULL, 0, 0 }, - { "KMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, - { "KMP_STACKPAD", __kmp_stg_parse_stackpad, __kmp_stg_print_stackpad, NULL, 0, 0 }, - { "KMP_VERSION", __kmp_stg_parse_version, __kmp_stg_print_version, NULL, 0, 0 }, - { "KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL, 0, 0 }, - - { "OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0 }, - { "OMP_NUM_THREADS", __kmp_stg_parse_num_threads, __kmp_stg_print_num_threads, NULL, 0, 0 }, - { "OMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, - - { "KMP_TASKING", __kmp_stg_parse_tasking, __kmp_stg_print_tasking, NULL, 0, 0 }, - { "KMP_TASK_STEALING_CONSTRAINT", __kmp_stg_parse_task_stealing, __kmp_stg_print_task_stealing, NULL, 0, 0 }, - { "OMP_MAX_ACTIVE_LEVELS", __kmp_stg_parse_max_active_levels, __kmp_stg_print_max_active_levels, NULL, 0, 0 }, - { "OMP_THREAD_LIMIT", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, - { "OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, -#if KMP_NESTED_HOT_TEAMS - { "KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, __kmp_stg_print_hot_teams_level, NULL, 0, 0 }, - { "KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, __kmp_stg_print_hot_teams_mode, NULL, 0, 0 }, -#endif // KMP_NESTED_HOT_TEAMS - -#if KMP_HANDLE_SIGNALS - { "KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, __kmp_stg_print_handle_signals, NULL, 0, 0 }, -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - { "KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0 }, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef KMP_GOMP_COMPAT - { "GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0 }, -#endif - -#ifdef KMP_DEBUG - { "KMP_A_DEBUG", __kmp_stg_parse_a_debug, __kmp_stg_print_a_debug, NULL, 0, 0 }, - { "KMP_B_DEBUG", __kmp_stg_parse_b_debug, __kmp_stg_print_b_debug, NULL, 0, 0 }, - { "KMP_C_DEBUG", __kmp_stg_parse_c_debug, __kmp_stg_print_c_debug, NULL, 0, 0 }, - { "KMP_D_DEBUG", __kmp_stg_parse_d_debug, __kmp_stg_print_d_debug, NULL, 0, 0 }, - { "KMP_E_DEBUG", __kmp_stg_parse_e_debug, __kmp_stg_print_e_debug, NULL, 0, 0 }, - { "KMP_F_DEBUG", __kmp_stg_parse_f_debug, __kmp_stg_print_f_debug, NULL, 0, 0 }, - { "KMP_DEBUG", __kmp_stg_parse_debug, NULL, /* no print */ NULL, 0, 0 }, - { "KMP_DEBUG_BUF", __kmp_stg_parse_debug_buf, __kmp_stg_print_debug_buf, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_ATOMIC", __kmp_stg_parse_debug_buf_atomic, __kmp_stg_print_debug_buf_atomic, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_CHARS", __kmp_stg_parse_debug_buf_chars, __kmp_stg_print_debug_buf_chars, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_LINES", __kmp_stg_parse_debug_buf_lines, __kmp_stg_print_debug_buf_lines, NULL, 0, 0 }, - { "KMP_DIAG", __kmp_stg_parse_diag, __kmp_stg_print_diag, NULL, 0, 0 }, - - { "KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, __kmp_stg_print_par_range_env, NULL, 0, 0 }, - { "KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, __kmp_stg_print_yield_cycle, NULL, 0, 0 }, - { "KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, 0, 0 }, - { "KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, NULL, 0, 0 }, -#endif // KMP_DEBUG - - { "KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, __kmp_stg_print_align_alloc, NULL, 0, 0 }, - - { "KMP_PLAIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_PLAIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, - { "KMP_FORKJOIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_FORKJOIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, -#if KMP_FAST_REDUCTION_BARRIER - { "KMP_REDUCTION_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_REDUCTION_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, -#endif - - { "KMP_ABORT_DELAY", __kmp_stg_parse_abort_delay, __kmp_stg_print_abort_delay, NULL, 0, 0 }, - { "KMP_CPUINFO_FILE", __kmp_stg_parse_cpuinfo_file, __kmp_stg_print_cpuinfo_file, NULL, 0, 0 }, - { "KMP_FORCE_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, - { "KMP_DETERMINISTIC_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, - { "KMP_STORAGE_MAP", __kmp_stg_parse_storage_map, __kmp_stg_print_storage_map, NULL, 0, 0 }, - { "KMP_ALL_THREADPRIVATE", __kmp_stg_parse_all_threadprivate, __kmp_stg_print_all_threadprivate, NULL, 0, 0 }, - { "KMP_FOREIGN_THREADS_THREADPRIVATE", __kmp_stg_parse_foreign_threads_threadprivate, __kmp_stg_print_foreign_threads_threadprivate, NULL, 0, 0 }, - -#if KMP_AFFINITY_SUPPORTED - { "KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL, 0, 0 }, -# ifdef KMP_GOMP_COMPAT - { "GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL, /* no print */ NULL, 0, 0 }, -# endif /* KMP_GOMP_COMPAT */ -# if OMP_40_ENABLED - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, - { "OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0 }, -# else - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, 0 }, -# endif /* OMP_40_ENABLED */ - - { "KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, __kmp_stg_print_topology_method, NULL, 0, 0 }, - -#else - - // - // KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. - // OMP_PROC_BIND and proc-bind-var are supported, however. - // -# if OMP_40_ENABLED - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, -# endif - -#endif // KMP_AFFINITY_SUPPORTED - - { "KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, __kmp_stg_print_init_at_fork, NULL, 0, 0 }, - { "KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, 0, 0 }, - { "OMP_SCHEDULE", __kmp_stg_parse_omp_schedule, __kmp_stg_print_omp_schedule, NULL, 0, 0 }, - { "KMP_ATOMIC_MODE", __kmp_stg_parse_atomic_mode, __kmp_stg_print_atomic_mode, NULL, 0, 0 }, - { "KMP_CONSISTENCY_CHECK", __kmp_stg_parse_consistency_check, __kmp_stg_print_consistency_check, NULL, 0, 0 }, - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - { "KMP_ITT_PREPARE_DELAY", __kmp_stg_parse_itt_prepare_delay, __kmp_stg_print_itt_prepare_delay, NULL, 0, 0 }, -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - { "KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, __kmp_stg_print_malloc_pool_incr, NULL, 0, 0 }, - { "KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, NULL, 0, 0 }, - { "KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, NULL, 0, 0 }, - { "KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, NULL, 0, 0 }, - { "OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, NULL, 0, 0 }, - { "KMP_DYNAMIC_MODE", __kmp_stg_parse_kmp_dynamic_mode, __kmp_stg_print_kmp_dynamic_mode, NULL, 0, 0 }, - -#ifdef USE_LOAD_BALANCE - { "KMP_LOAD_BALANCE_INTERVAL", __kmp_stg_parse_ld_balance_interval,__kmp_stg_print_ld_balance_interval,NULL, 0, 0 }, -#endif - - { "KMP_NUM_LOCKS_IN_BLOCK", __kmp_stg_parse_lock_block, __kmp_stg_print_lock_block, NULL, 0, 0 }, - { "KMP_LOCK_KIND", __kmp_stg_parse_lock_kind, __kmp_stg_print_lock_kind, NULL, 0, 0 }, -#if KMP_USE_ADAPTIVE_LOCKS - { "KMP_ADAPTIVE_LOCK_PROPS", __kmp_stg_parse_adaptive_lock_props,__kmp_stg_print_adaptive_lock_props, NULL, 0, 0 }, -#if KMP_DEBUG_ADAPTIVE_LOCKS - { "KMP_SPECULATIVE_STATSFILE", __kmp_stg_parse_speculative_statsfile,__kmp_stg_print_speculative_statsfile, NULL, 0, 0 }, -#endif -#endif // KMP_USE_ADAPTIVE_LOCKS - { "KMP_PLACE_THREADS", __kmp_stg_parse_place_threads, __kmp_stg_print_place_threads, NULL, 0, 0 }, -#if USE_ITT_BUILD - { "KMP_FORKJOIN_FRAMES", __kmp_stg_parse_forkjoin_frames, __kmp_stg_print_forkjoin_frames, NULL, 0, 0 }, - { "KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,__kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0 }, -#endif - -# if OMP_40_ENABLED - { "OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, __kmp_stg_print_omp_display_env, NULL, 0, 0 }, - { "OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0 }, -#endif - { "", NULL, NULL, NULL, 0, 0 } -}; // settings - -static int const __kmp_stg_count = sizeof( __kmp_stg_table ) / sizeof( kmp_setting_t ); - -static inline -kmp_setting_t * -__kmp_stg_find( char const * name ) { - - int i; - if ( name != NULL ) { - for ( i = 0; i < __kmp_stg_count; ++ i ) { - if ( strcmp( __kmp_stg_table[ i ].name, name ) == 0 ) { - return & __kmp_stg_table[ i ]; - }; // if - }; // for - }; // if - return NULL; - -} // __kmp_stg_find - - -static int -__kmp_stg_cmp( void const * _a, void const * _b ) { - kmp_setting_t * a = (kmp_setting_t *) _a; - kmp_setting_t * b = (kmp_setting_t *) _b; - - // - // Process KMP_AFFINITY last. - // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. - // - if ( strcmp( a->name, "KMP_AFFINITY" ) == 0 ) { - if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { - return 0; - } - return 1; - } - else if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { - return -1; - } - return strcmp( a->name, b->name ); -} // __kmp_stg_cmp - - -static void -__kmp_stg_init( void -) { - - static int initialized = 0; - - if ( ! initialized ) { - - // Sort table. - qsort( __kmp_stg_table, __kmp_stg_count - 1, sizeof( kmp_setting_t ), __kmp_stg_cmp ); - - { // Initialize *_STACKSIZE data. - - kmp_setting_t * kmp_stacksize = __kmp_stg_find( "KMP_STACKSIZE" ); // 1st priority. -#ifdef KMP_GOMP_COMPAT - kmp_setting_t * gomp_stacksize = __kmp_stg_find( "GOMP_STACKSIZE" ); // 2nd priority. -#endif - kmp_setting_t * omp_stacksize = __kmp_stg_find( "OMP_STACKSIZE" ); // 3rd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - // !!! Compiler does not understand rivals is used and optimizes out assignments - // !!! rivals[ i ++ ] = ...; - static kmp_setting_t * volatile rivals[ 4 ]; - static kmp_stg_ss_data_t kmp_data = { 1, (kmp_setting_t **)rivals }; -#ifdef KMP_GOMP_COMPAT - static kmp_stg_ss_data_t gomp_data = { 1024, (kmp_setting_t **)rivals }; -#endif - static kmp_stg_ss_data_t omp_data = { 1024, (kmp_setting_t **)rivals }; - int i = 0; - - rivals[ i ++ ] = kmp_stacksize; -#ifdef KMP_GOMP_COMPAT - if ( gomp_stacksize != NULL ) { - rivals[ i ++ ] = gomp_stacksize; - }; // if -#endif - rivals[ i ++ ] = omp_stacksize; - rivals[ i ++ ] = NULL; - - kmp_stacksize->data = & kmp_data; -#ifdef KMP_GOMP_COMPAT - if ( gomp_stacksize != NULL ) { - gomp_stacksize->data = & gomp_data; - }; // if -#endif - omp_stacksize->data = & omp_data; - - } - - { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. - - kmp_setting_t * kmp_library = __kmp_stg_find( "KMP_LIBRARY" ); // 1st priority. - kmp_setting_t * omp_wait_policy = __kmp_stg_find( "OMP_WAIT_POLICY" ); // 2nd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 3 ]; - static kmp_stg_wp_data_t kmp_data = { 0, (kmp_setting_t **)rivals }; - static kmp_stg_wp_data_t omp_data = { 1, (kmp_setting_t **)rivals }; - int i = 0; - - rivals[ i ++ ] = kmp_library; - if ( omp_wait_policy != NULL ) { - rivals[ i ++ ] = omp_wait_policy; - }; // if - rivals[ i ++ ] = NULL; - - kmp_library->data = & kmp_data; - if ( omp_wait_policy != NULL ) { - omp_wait_policy->data = & omp_data; - }; // if - - } - - { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data. - - kmp_setting_t * kmp_all_threads = __kmp_stg_find( "KMP_ALL_THREADS" ); // 1st priority. - kmp_setting_t * kmp_max_threads = __kmp_stg_find( "KMP_MAX_THREADS" ); // 2nd priority. - kmp_setting_t * omp_thread_limit = __kmp_stg_find( "OMP_THREAD_LIMIT" ); // 3rd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 4 ]; - int i = 0; - - rivals[ i ++ ] = kmp_all_threads; - rivals[ i ++ ] = kmp_max_threads; - if ( omp_thread_limit != NULL ) { - rivals[ i ++ ] = omp_thread_limit; - }; // if - rivals[ i ++ ] = NULL; - - kmp_all_threads->data = (void*)& rivals; - kmp_max_threads->data = (void*)& rivals; - if ( omp_thread_limit != NULL ) { - omp_thread_limit->data = (void*)& rivals; - }; // if - - } - -#if KMP_AFFINITY_SUPPORTED - { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. - - kmp_setting_t * kmp_affinity = __kmp_stg_find( "KMP_AFFINITY" ); // 1st priority. - KMP_DEBUG_ASSERT( kmp_affinity != NULL ); - -# ifdef KMP_GOMP_COMPAT - kmp_setting_t * gomp_cpu_affinity = __kmp_stg_find( "GOMP_CPU_AFFINITY" ); // 2nd priority. - KMP_DEBUG_ASSERT( gomp_cpu_affinity != NULL ); -# endif - - kmp_setting_t * omp_proc_bind = __kmp_stg_find( "OMP_PROC_BIND" ); // 3rd priority. - KMP_DEBUG_ASSERT( omp_proc_bind != NULL ); - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 4 ]; - int i = 0; - - rivals[ i ++ ] = kmp_affinity; - -# ifdef KMP_GOMP_COMPAT - rivals[ i ++ ] = gomp_cpu_affinity; - gomp_cpu_affinity->data = (void*)& rivals; -# endif - - rivals[ i ++ ] = omp_proc_bind; - omp_proc_bind->data = (void*)& rivals; - rivals[ i ++ ] = NULL; - -# if OMP_40_ENABLED - static kmp_setting_t * volatile places_rivals[ 4 ]; - i = 0; - - kmp_setting_t * omp_places = __kmp_stg_find( "OMP_PLACES" ); // 3rd priority. - KMP_DEBUG_ASSERT( omp_places != NULL ); - - places_rivals[ i ++ ] = kmp_affinity; -# ifdef KMP_GOMP_COMPAT - places_rivals[ i ++ ] = gomp_cpu_affinity; -# endif - places_rivals[ i ++ ] = omp_places; - omp_places->data = (void*)& places_rivals; - places_rivals[ i ++ ] = NULL; -# endif - } -#else - // KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. - // OMP_PLACES not supported yet. -#endif // KMP_AFFINITY_SUPPORTED - - { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. - - kmp_setting_t * kmp_force_red = __kmp_stg_find( "KMP_FORCE_REDUCTION" ); // 1st priority. - kmp_setting_t * kmp_determ_red = __kmp_stg_find( "KMP_DETERMINISTIC_REDUCTION" ); // 2nd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 3 ]; - static kmp_stg_fr_data_t force_data = { 1, (kmp_setting_t **)rivals }; - static kmp_stg_fr_data_t determ_data = { 0, (kmp_setting_t **)rivals }; - int i = 0; - - rivals[ i ++ ] = kmp_force_red; - if ( kmp_determ_red != NULL ) { - rivals[ i ++ ] = kmp_determ_red; - }; // if - rivals[ i ++ ] = NULL; - - kmp_force_red->data = & force_data; - if ( kmp_determ_red != NULL ) { - kmp_determ_red->data = & determ_data; - }; // if - } - - initialized = 1; - - }; // if - - // Reset flags. - int i; - for ( i = 0; i < __kmp_stg_count; ++ i ) { - __kmp_stg_table[ i ].set = 0; - }; // for - -} // __kmp_stg_init - - -static void -__kmp_stg_parse( - char const * name, - char const * value -) { - - // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, really nameless, they are - // presented in environment block as "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. - if ( name[ 0 ] == 0 ) { - return; - }; // if - - if ( value != NULL ) { - kmp_setting_t * setting = __kmp_stg_find( name ); - if ( setting != NULL ) { - setting->parse( name, value, setting->data ); - setting->defined = 1; - }; // if - }; // if - -} // __kmp_stg_parse - - -static int -__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const * name, // Name of variable. - char const * value, // Value of the variable. - kmp_setting_t * * rivals // List of rival settings (the list must include current one). -) { - - if ( rivals == NULL ) { - return 0; - } - - // Loop thru higher priority settings (listed before current). - int i = 0; - for ( ; strcmp( rivals[ i ]->name, name ) != 0; i++ ) { - KMP_DEBUG_ASSERT( rivals[ i ] != NULL ); - -#if KMP_AFFINITY_SUPPORTED - if ( rivals[ i ] == __kmp_affinity_notype ) { - // - // If KMP_AFFINITY is specified without a type name, - // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. - // - continue; - } -#endif - - if ( rivals[ i ]->set ) { - KMP_WARNING( StgIgnored, name, rivals[ i ]->name ); - return 1; - }; // if - }; // while - - ++ i; // Skip current setting. - return 0; - -}; // __kmp_stg_check_rivals - - -static int -__kmp_env_toPrint( char const * name, int flag ) { - int rc = 0; - kmp_setting_t * setting = __kmp_stg_find( name ); - if ( setting != NULL ) { - rc = setting->defined; - if ( flag >= 0 ) { - setting->defined = flag; - }; // if - }; // if - return rc; -} - - -static void -__kmp_aux_env_initialize( kmp_env_blk_t* block ) { - - char const * value; - - /* OMP_NUM_THREADS */ - value = __kmp_env_blk_var( block, "OMP_NUM_THREADS" ); - if ( value ) { - ompc_set_num_threads( __kmp_dflt_team_nth ); - } - - /* KMP_BLOCKTIME */ - value = __kmp_env_blk_var( block, "KMP_BLOCKTIME" ); - if ( value ) { - kmpc_set_blocktime( __kmp_dflt_blocktime ); - } - - /* OMP_NESTED */ - value = __kmp_env_blk_var( block, "OMP_NESTED" ); - if ( value ) { - ompc_set_nested( __kmp_dflt_nested ); - } - - /* OMP_DYNAMIC */ - value = __kmp_env_blk_var( block, "OMP_DYNAMIC" ); - if ( value ) { - ompc_set_dynamic( __kmp_global.g.g_dynamic ); - } - -} - -void -__kmp_env_initialize( char const * string ) { - - kmp_env_blk_t block; - int i; - - __kmp_stg_init(); - - // Hack!!! - if ( string == NULL ) { - // __kmp_max_nth = __kmp_sys_max_nth; - __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); - }; // if - __kmp_env_blk_init( & block, string ); - - // - // update the set flag on all entries that have an env var - // - for ( i = 0; i < block.count; ++ i ) { - if (( block.vars[ i ].name == NULL ) - || ( *block.vars[ i ].name == '\0')) { - continue; - } - if ( block.vars[ i ].value == NULL ) { - continue; - } - kmp_setting_t * setting = __kmp_stg_find( block.vars[ i ].name ); - if ( setting != NULL ) { - setting->set = 1; - } - }; // for i - - // Special case. If we parse environment, not a string, process KMP_WARNINGS first. - if ( string == NULL ) { - char const * name = "KMP_WARNINGS"; - char const * value = __kmp_env_blk_var( & block, name ); - __kmp_stg_parse( name, value ); - }; // if - -#if KMP_AFFINITY_SUPPORTED - // - // Special case. KMP_AFFINITY is not a rival to other affinity env vars - // if no affinity type is specified. We want to allow - // KMP_AFFINITY=[no],verbose/[no]warnings/etc. to be enabled when - // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 - // affinity mechanism. - // - __kmp_affinity_notype = NULL; - char const *aff_str = __kmp_env_blk_var( & block, "KMP_AFFINITY" ); - if ( aff_str != NULL ) { - // - // Check if the KMP_AFFINITY type is specified in the string. - // We just search the string for "compact", "scatter", etc. - // without really parsing the string. The syntax of the - // KMP_AFFINITY env var is such that none of the affinity - // type names can appear anywhere other that the type - // specifier, even as substrings. - // - // I can't find a case-insensitive version of strstr on Windows* OS. - // Use the case-sensitive version for now. - // - -# if KMP_OS_WINDOWS -# define FIND strstr -# else -# define FIND strcasestr -# endif - - if ( ( FIND( aff_str, "none" ) == NULL ) - && ( FIND( aff_str, "physical" ) == NULL ) - && ( FIND( aff_str, "logical" ) == NULL ) - && ( FIND( aff_str, "compact" ) == NULL ) - && ( FIND( aff_str, "scatter" ) == NULL ) - && ( FIND( aff_str, "explicit" ) == NULL ) - && ( FIND( aff_str, "balanced" ) == NULL ) - && ( FIND( aff_str, "disabled" ) == NULL ) ) { - __kmp_affinity_notype = __kmp_stg_find( "KMP_AFFINITY" ); - } - else { - // - // A new affinity type is specified. - // Reset the affinity flags to their default values, - // in case this is called from kmp_set_defaults(). - // - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -# undef FIND - -#if OMP_40_ENABLED - // - // Also reset the affinity flags if OMP_PROC_BIND is specified. - // - aff_str = __kmp_env_blk_var( & block, "OMP_PROC_BIND" ); - if ( aff_str != NULL ) { - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -#endif /* OMP_40_ENABLED */ - } - -#endif /* KMP_AFFINITY_SUPPORTED */ - -#if OMP_40_ENABLED - // - // Set up the nested proc bind type vector. - // - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) - KMP_INTERNAL_MALLOC( sizeof(kmp_proc_bind_t) ); - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - __kmp_nested_proc_bind.size = 1; - __kmp_nested_proc_bind.used = 1; -# if KMP_AFFINITY_SUPPORTED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; -# else - // default proc bind is false if affinity not supported - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - - } -#endif /* OMP_40_ENABLED */ - - // - // Now process all of the settings. - // - for ( i = 0; i < block.count; ++ i ) { - __kmp_stg_parse( block.vars[ i ].name, block.vars[ i ].value ); - }; // for i - - // - // If user locks have been allocated yet, don't reset the lock vptr table. - // - if ( ! __kmp_init_user_locks ) { - if ( __kmp_user_lock_kind == lk_default ) { - __kmp_user_lock_kind = lk_queuing; - } -#if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); -#else - __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); -#endif - } - else { - KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called - KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default ); - // Binds lock functions again to follow the transition between different - // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long - // as we do not allow lock kind changes after making a call to any - // user lock functions (true). -#if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); -#else - __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); -#endif - } - -#if KMP_AFFINITY_SUPPORTED - - if ( ! TCR_4(__kmp_init_middle) ) { - // - // Determine if the machine/OS is actually capable of supporting - // affinity. - // - const char *var = "KMP_AFFINITY"; -# if KMP_USE_HWLOC - if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); - } - hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE); -# endif - if ( __kmp_affinity_type == affinity_disabled ) { - KMP_AFFINITY_DISABLE(); - } - else if ( ! KMP_AFFINITY_CAPABLE() ) { -# if KMP_USE_HWLOC - const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); - if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); - } - // Is the system capable of setting/getting this thread's affinity? - // also, is topology discovery possible? (pu indicates ability to discover processing units) - // and finally, were there no errors when calling any hwloc_* API functions? - if(topology_support->cpubind->set_thisthread_cpubind && - topology_support->cpubind->get_thisthread_cpubind && - topology_support->discovery->pu && - !__kmp_hwloc_error) - { - // enables affinity according to KMP_AFFINITY_CAPABLE() macro - KMP_AFFINITY_ENABLE(TRUE); - } else { - // indicate that hwloc didn't work and disable affinity - __kmp_hwloc_error = TRUE; - KMP_AFFINITY_DISABLE(); - } -# else - __kmp_affinity_determine_capable( var ); -# endif // KMP_USE_HWLOC - if ( ! KMP_AFFINITY_CAPABLE() ) { - if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings - && ( __kmp_affinity_type != affinity_default ) - && ( __kmp_affinity_type != affinity_none ) - && ( __kmp_affinity_type != affinity_disabled ) ) ) { - KMP_WARNING( AffNotSupported, var ); - } - __kmp_affinity_type = affinity_disabled; - __kmp_affinity_respect_mask = 0; - __kmp_affinity_gran = affinity_gran_fine; - } - } - -# if OMP_40_ENABLED - if ( __kmp_affinity_type == affinity_disabled ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - else if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_true ) { - // - // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. - // - __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; - } -# endif /* OMP_40_ENABLED */ - - if ( KMP_AFFINITY_CAPABLE() ) { - -# if KMP_GROUP_AFFINITY - - // - // Handle the Win 64 group affinity stuff if there are multiple - // processor groups, or if the user requested it, and OMP 4.0 - // affinity is not in effect. - // - if ( ( ( __kmp_num_proc_groups > 1 ) - && ( __kmp_affinity_type == affinity_default ) -# if OMP_40_ENABLED - && ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) ) -# endif - || ( __kmp_affinity_top_method == affinity_top_method_group ) ) { - if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { - __kmp_affinity_respect_mask = FALSE; - } - if ( __kmp_affinity_type == affinity_default ) { - __kmp_affinity_type = affinity_compact; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - } - if ( __kmp_affinity_top_method == affinity_top_method_default ) { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_top_method = affinity_top_method_group; - __kmp_affinity_gran = affinity_gran_group; - } - else if ( __kmp_affinity_gran == affinity_gran_group ) { - __kmp_affinity_top_method = affinity_top_method_group; - } - else { - __kmp_affinity_top_method = affinity_top_method_all; - } - } - else if ( __kmp_affinity_top_method == affinity_top_method_group ) { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_gran = affinity_gran_group; - } - else if ( ( __kmp_affinity_gran != affinity_gran_group ) - && ( __kmp_affinity_gran != affinity_gran_fine ) - && ( __kmp_affinity_gran != affinity_gran_thread ) ) { - char *str = NULL; - switch ( __kmp_affinity_gran ) { - case affinity_gran_core: str = "core"; break; - case affinity_gran_package: str = "package"; break; - case affinity_gran_node: str = "node"; break; - default: KMP_DEBUG_ASSERT( 0 ); - } - KMP_WARNING( AffGranTopGroup, var, str ); - __kmp_affinity_gran = affinity_gran_fine; - } - } - else { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_gran = affinity_gran_core; - } - else if ( __kmp_affinity_gran == affinity_gran_group ) { - char *str = NULL; - switch ( __kmp_affinity_type ) { - case affinity_physical: str = "physical"; break; - case affinity_logical: str = "logical"; break; - case affinity_compact: str = "compact"; break; - case affinity_scatter: str = "scatter"; break; - case affinity_explicit: str = "explicit"; break; - // No MIC on windows, so no affinity_balanced case - default: KMP_DEBUG_ASSERT( 0 ); - } - KMP_WARNING( AffGranGroupType, var, str ); - __kmp_affinity_gran = affinity_gran_core; - } - } - } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { -# if KMP_GROUP_AFFINITY - if ( __kmp_num_proc_groups > 1 ) { - __kmp_affinity_respect_mask = FALSE; - } - else -# endif /* KMP_GROUP_AFFINITY */ - { - __kmp_affinity_respect_mask = TRUE; - } - } -# if OMP_40_ENABLED - if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) - && ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ) ) { - if ( __kmp_affinity_type == affinity_default ) { - __kmp_affinity_type = affinity_compact; - __kmp_affinity_dups = FALSE; - } - } - else -# endif /* OMP_40_ENABLED */ - if ( __kmp_affinity_type == affinity_default ) { -#if OMP_40_ENABLED -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; - } else -#endif - { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } -#endif /* OMP_40_ENABLED */ -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_affinity_type = affinity_scatter; - } else -#endif - { - __kmp_affinity_type = affinity_none; - } - - } - if ( ( __kmp_affinity_gran == affinity_gran_default ) - && ( __kmp_affinity_gran_levels < 0 ) ) { -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_affinity_gran = affinity_gran_fine; - } else -#endif - { - __kmp_affinity_gran = affinity_gran_core; - } - } - if ( __kmp_affinity_top_method == affinity_top_method_default ) { - __kmp_affinity_top_method = affinity_top_method_all; - } - } - } - - K_DIAG( 1, ( "__kmp_affinity_type == %d\n", __kmp_affinity_type ) ); - K_DIAG( 1, ( "__kmp_affinity_compact == %d\n", __kmp_affinity_compact ) ); - K_DIAG( 1, ( "__kmp_affinity_offset == %d\n", __kmp_affinity_offset ) ); - K_DIAG( 1, ( "__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose ) ); - K_DIAG( 1, ( "__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings ) ); - K_DIAG( 1, ( "__kmp_affinity_respect_mask == %d\n", __kmp_affinity_respect_mask ) ); - K_DIAG( 1, ( "__kmp_affinity_gran == %d\n", __kmp_affinity_gran ) ); - - KMP_DEBUG_ASSERT( __kmp_affinity_type != affinity_default); -# if OMP_40_ENABLED - KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ); -# endif - } - -#endif /* KMP_AFFINITY_SUPPORTED */ - - if ( __kmp_version ) { - __kmp_print_version_1(); - }; // if - - // Post-initialization step: some env. vars need their value's further processing - if ( string != NULL) { // kmp_set_defaults() was called - __kmp_aux_env_initialize( &block ); - } - - __kmp_env_blk_free( & block ); - - KMP_MB(); - -} // __kmp_env_initialize - - -void -__kmp_env_print() { - - kmp_env_blk_t block; - int i; - kmp_str_buf_t buffer; - - __kmp_stg_init(); - __kmp_str_buf_init( & buffer ); - - __kmp_env_blk_init( & block, NULL ); - __kmp_env_blk_sort( & block ); - - // Print real environment values. - __kmp_str_buf_print( & buffer, "\n%s\n\n", KMP_I18N_STR( UserSettings ) ); - for ( i = 0; i < block.count; ++ i ) { - char const * name = block.vars[ i ].name; - char const * value = block.vars[ i ].value; - if ( - ( KMP_STRLEN( name ) > 4 && strncmp( name, "KMP_", 4 ) == 0 ) - || strncmp( name, "OMP_", 4 ) == 0 - #ifdef KMP_GOMP_COMPAT - || strncmp( name, "GOMP_", 5 ) == 0 - #endif // KMP_GOMP_COMPAT - ) { - __kmp_str_buf_print( & buffer, " %s=%s\n", name, value ); - }; // if - }; // for - __kmp_str_buf_print( & buffer, "\n" ); - - // Print internal (effective) settings. - __kmp_str_buf_print( & buffer, "%s\n\n", KMP_I18N_STR( EffectiveSettings ) ); - for ( int i = 0; i < __kmp_stg_count; ++ i ) { - if ( __kmp_stg_table[ i ].print != NULL ) { - __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); - }; // if - }; // for - - __kmp_printf( "%s", buffer.str ); - - __kmp_env_blk_free( & block ); - __kmp_str_buf_free( & buffer ); - - __kmp_printf("\n"); - -} // __kmp_env_print - - -#if OMP_40_ENABLED -void -__kmp_env_print_2() { - - kmp_env_blk_t block; - kmp_str_buf_t buffer; - - __kmp_env_format = 1; - - __kmp_stg_init(); - __kmp_str_buf_init( & buffer ); - - __kmp_env_blk_init( & block, NULL ); - __kmp_env_blk_sort( & block ); - - __kmp_str_buf_print( & buffer, "\n%s\n", KMP_I18N_STR( DisplayEnvBegin ) ); - __kmp_str_buf_print( & buffer, " _OPENMP='%d'\n", __kmp_openmp_version ); - - for ( int i = 0; i < __kmp_stg_count; ++ i ) { - if ( __kmp_stg_table[ i ].print != NULL && - ( ( __kmp_display_env && strncmp( __kmp_stg_table[ i ].name, "OMP_", 4 ) == 0 ) || __kmp_display_env_verbose ) ) { - __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); - }; // if - }; // for - - __kmp_str_buf_print( & buffer, "%s\n", KMP_I18N_STR( DisplayEnvEnd ) ); - __kmp_str_buf_print( & buffer, "\n" ); - - __kmp_printf( "%s", buffer.str ); - - __kmp_env_blk_free( & block ); - __kmp_str_buf_free( & buffer ); - - __kmp_printf("\n"); - -} // __kmp_env_print_2 -#endif // OMP_40_ENABLED - -// end of file - +/* + * kmp_settings.c -- Initialize environment variables + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_wrapper_getpid.h" +#include "kmp_environment.h" +#include "kmp_atomic.h" +#include "kmp_itt.h" +#include "kmp_str.h" +#include "kmp_settings.h" +#include "kmp_i18n.h" +#include "kmp_io.h" + +static int __kmp_env_toPrint( char const * name, int flag ); + +bool __kmp_env_format = 0; // 0 - old format; 1 - new format +// ------------------------------------------------------------------------------------------------- +// Helper string functions. Subject to move to kmp_str. +// ------------------------------------------------------------------------------------------------- + +static double +__kmp_convert_to_double( char const * s ) +{ + double result; + + if ( KMP_SSCANF( s, "%lf", &result ) < 1 ) { + result = 0.0; + } + + return result; +} + +#ifdef KMP_DEBUG +static unsigned int +__kmp_readstr_with_sentinel(char *dest, char const * src, size_t len, char sentinel) { + unsigned int i; + for (i = 0; i < len; i++) { + if ((*src == '\0') || (*src == sentinel)) { + break; + } + *(dest++) = *(src++); + } + *dest = '\0'; + return i; +} +#endif + +static int +__kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sentinel ) { + size_t l = 0; + + if(a == NULL) + a = ""; + if(b == NULL) + b = ""; + while(*a && *b && *b != sentinel) { + char ca = *a, cb = *b; + + if(ca >= 'a' && ca <= 'z') + ca -= 'a' - 'A'; + if(cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if(ca != cb) + return FALSE; + ++l; + ++a; + ++b; + } + return l >= len; +} + +// +// Expected usage: +// token is the token to check for. +// buf is the string being parsed. +// *end returns the char after the end of the token. +// it is not modified unless a match occurs. +// +// +// Example 1: +// +// if (__kmp_match_str("token", buf, *end) { +// +// buf = end; +// } +// +// Example 2: +// +// if (__kmp_match_str("token", buf, *end) { +// char *save = **end; +// **end = sentinel; +// +// **end = save; +// buf = end; +// } +// + +static int +__kmp_match_str( char const *token, char const *buf, const char **end) { + + KMP_ASSERT(token != NULL); + KMP_ASSERT(buf != NULL); + KMP_ASSERT(end != NULL); + + while (*token && *buf) { + char ct = *token, cb = *buf; + + if(ct >= 'a' && ct <= 'z') + ct -= 'a' - 'A'; + if(cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if (ct != cb) + return FALSE; + ++token; + ++buf; + } + if (*token) { + return FALSE; + } + *end = buf; + return TRUE; +} + + +static size_t +__kmp_round4k( size_t size ) { + size_t _4k = 4 * 1024; + if ( size & ( _4k - 1 ) ) { + size &= ~ ( _4k - 1 ); + if ( size <= KMP_SIZE_T_MAX - _4k ) { + size += _4k; // Round up if there is no overflow. + }; // if + }; // if + return size; +} // __kmp_round4k + + +/* + Here, multipliers are like __kmp_convert_to_seconds, but floating-point + values are allowed, and the return value is in milliseconds. The default + multiplier is milliseconds. Returns INT_MAX only if the value specified + matches "infinit*". Returns -1 if specified string is invalid. +*/ +int +__kmp_convert_to_milliseconds( char const * data ) +{ + int ret, nvalues, factor; + char mult, extra; + double value; + + if (data == NULL) return (-1); + if ( __kmp_str_match( "infinit", -1, data)) return (INT_MAX); + value = (double) 0.0; + mult = '\0'; + nvalues = KMP_SSCANF (data, "%lf%c%c", &value, &mult, &extra); + if (nvalues < 1) return (-1); + if (nvalues == 1) mult = '\0'; + if (nvalues == 3) return (-1); + + if (value < 0) return (-1); + + switch (mult) { + case '\0': + /* default is milliseconds */ + factor = 1; + break; + case 's': case 'S': + factor = 1000; + break; + case 'm': case 'M': + factor = 1000 * 60; + break; + case 'h': case 'H': + factor = 1000 * 60 * 60; + break; + case 'd': case 'D': + factor = 1000 * 24 * 60 * 60; + break; + default: + return (-1); + } + + if ( value >= ( (INT_MAX-1) / factor) ) + ret = INT_MAX-1; /* Don't allow infinite value here */ + else + ret = (int) (value * (double) factor); /* truncate to int */ + + return ret; +} + + +static int +__kmp_strcasecmp_with_sentinel( char const * a, char const * b, char sentinel ) { + if(a == NULL) + a = ""; + if(b == NULL) + b = ""; + while(*a && *b && *b != sentinel) { + char ca = *a, cb = *b; + + if(ca >= 'a' && ca <= 'z') + ca -= 'a' - 'A'; + if(cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if(ca != cb) + return (int)(unsigned char)*a - (int)(unsigned char)*b; + ++a; + ++b; + } + return *a ? + (*b && *b != sentinel) ? (int)(unsigned char)*a - (int)(unsigned char)*b : 1 : + (*b && *b != sentinel) ? -1 : 0; +} + + +// ================================================================================================= +// Table structures and helper functions. +// ================================================================================================= + +typedef struct __kmp_setting kmp_setting_t; +typedef struct __kmp_stg_ss_data kmp_stg_ss_data_t; +typedef struct __kmp_stg_wp_data kmp_stg_wp_data_t; +typedef struct __kmp_stg_fr_data kmp_stg_fr_data_t; + +typedef void ( * kmp_stg_parse_func_t )( char const * name, char const * value, void * data ); +typedef void ( * kmp_stg_print_func_t )( kmp_str_buf_t * buffer, char const * name, void * data ); + +struct __kmp_setting { + char const * name; // Name of setting (environment variable). + kmp_stg_parse_func_t parse; // Parser function. + kmp_stg_print_func_t print; // Print function. + void * data; // Data passed to parser and printer. + int set; // Variable set during this "session" + // (__kmp_env_initialize() or kmp_set_defaults() call). + int defined; // Variable set in any "session". +}; // struct __kmp_setting + +struct __kmp_stg_ss_data { + size_t factor; // Default factor: 1 for KMP_STACKSIZE, 1024 for others. + kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). +}; // struct __kmp_stg_ss_data + +struct __kmp_stg_wp_data { + int omp; // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. + kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). +}; // struct __kmp_stg_wp_data + +struct __kmp_stg_fr_data { + int force; // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. + kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). +}; // struct __kmp_stg_fr_data + +static int +__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. + char const * name, // Name of variable. + char const * value, // Value of the variable. + kmp_setting_t * * rivals // List of rival settings (the list must include current one). +); + + +// ------------------------------------------------------------------------------------------------- +// Helper parse functions. +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_bool( + char const * name, + char const * value, + int * out +) { + if ( __kmp_str_match_true( value ) ) { + * out = TRUE; + } else if (__kmp_str_match_false( value ) ) { + * out = FALSE; + } else { + __kmp_msg( + kmp_ms_warning, + KMP_MSG( BadBoolValue, name, value ), + KMP_HNT( ValidBoolValues ), + __kmp_msg_null + ); + }; // if +} // __kmp_stg_parse_bool + +static void +__kmp_stg_parse_size( + char const * name, + char const * value, + size_t size_min, + size_t size_max, + int * is_specified, + size_t * out, + size_t factor +) { + char const * msg = NULL; + #if KMP_OS_DARWIN + size_min = __kmp_round4k( size_min ); + size_max = __kmp_round4k( size_max ); + #endif // KMP_OS_DARWIN + if ( value ) { + if ( is_specified != NULL ) { + * is_specified = 1; + }; // if + __kmp_str_to_size( value, out, factor, & msg ); + if ( msg == NULL ) { + if ( * out > size_max ) { + * out = size_max; + msg = KMP_I18N_STR( ValueTooLarge ); + } else if ( * out < size_min ) { + * out = size_min; + msg = KMP_I18N_STR( ValueTooSmall ); + } else { + #if KMP_OS_DARWIN + size_t round4k = __kmp_round4k( * out ); + if ( * out != round4k ) { + * out = round4k; + msg = KMP_I18N_STR( NotMultiple4K ); + }; // if + #endif + }; // if + } else { + // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to size_max silently. + if ( * out < size_min ) { + * out = size_max; + } + else if ( * out > size_max ) { + * out = size_max; + }; // if + }; // if + if ( msg != NULL ) { + // Message is not empty. Print warning. + kmp_str_buf_t buf; + __kmp_str_buf_init( & buf ); + __kmp_str_buf_print_size( & buf, * out ); + KMP_WARNING( ParseSizeIntWarn, name, value, msg ); + KMP_INFORM( Using_str_Value, name, buf.str ); + __kmp_str_buf_free( & buf ); + }; // if + }; // if +} // __kmp_stg_parse_size + +#if KMP_AFFINITY_SUPPORTED +static void +__kmp_stg_parse_str( + char const * name, + char const * value, + char const * * out +) { + KMP_INTERNAL_FREE( (void *) * out ); + * out = __kmp_str_format( "%s", value ); +} // __kmp_stg_parse_str +#endif + +static void +__kmp_stg_parse_int( + char const * name, // I: Name of environment variable (used in warning messages). + char const * value, // I: Value of environment variable to parse. + int min, // I: Miminal allowed value. + int max, // I: Maximum allowed value. + int * out // O: Output (parsed) value. +) { + char const * msg = NULL; + kmp_uint64 uint = * out; + __kmp_str_to_uint( value, & uint, & msg ); + if ( msg == NULL ) { + if ( uint < (unsigned int)min ) { + msg = KMP_I18N_STR( ValueTooSmall ); + uint = min; + } else if ( uint > (unsigned int)max ) { + msg = KMP_I18N_STR( ValueTooLarge ); + uint = max; + }; // if + } else { + // If overflow occurred msg contains error message and uint is very big. Cut tmp it + // to INT_MAX. + if ( uint < (unsigned int)min ) { + uint = min; + } + else if ( uint > (unsigned int)max ) { + uint = max; + }; // if + }; // if + if ( msg != NULL ) { + // Message is not empty. Print warning. + kmp_str_buf_t buf; + KMP_WARNING( ParseSizeIntWarn, name, value, msg ); + __kmp_str_buf_init( & buf ); + __kmp_str_buf_print( &buf, "%" KMP_UINT64_SPEC "", uint ); + KMP_INFORM( Using_uint64_Value, name, buf.str ); + __kmp_str_buf_free( &buf ); + }; // if + * out = uint; +} // __kmp_stg_parse_int + + +#if KMP_DEBUG_ADAPTIVE_LOCKS +static void +__kmp_stg_parse_file( + char const * name, + char const * value, + char * suffix, + char * * out +) { + char buffer[256]; + char *t; + int hasSuffix; + KMP_INTERNAL_FREE( (void *) * out ); + t = (char *) strrchr(value, '.'); + hasSuffix = t && __kmp_str_eqf( t, suffix ); + t = __kmp_str_format( "%s%s", value, hasSuffix ? "" : suffix ); + __kmp_expand_file_name( buffer, sizeof(buffer), t); + KMP_INTERNAL_FREE(t); + * out = __kmp_str_format( "%s", buffer ); +} // __kmp_stg_parse_file +#endif + +#ifdef KMP_DEBUG +static char * par_range_to_print = NULL; + +static void +__kmp_stg_parse_par_range( + char const * name, + char const * value, + int * out_range, + char * out_routine, + char * out_file, + int * out_lb, + int * out_ub +) { + size_t len = KMP_STRLEN( value + 1 ); + par_range_to_print = (char *) KMP_INTERNAL_MALLOC( len +1 ); + KMP_STRNCPY_S( par_range_to_print, len + 1, value, len + 1); + __kmp_par_range = +1; + __kmp_par_range_lb = 0; + __kmp_par_range_ub = INT_MAX; + for (;;) { + unsigned int len; + if (( value == NULL ) || ( *value == '\0' )) { + break; + } + if ( ! __kmp_strcasecmp_with_sentinel( "routine", value, '=' )) { + value = strchr( value, '=' ) + 1; + len = __kmp_readstr_with_sentinel( out_routine, + value, KMP_PAR_RANGE_ROUTINE_LEN - 1, ',' ); + if ( len == 0 ) { + goto par_range_error; + } + value = strchr( value, ',' ); + if ( value != NULL ) { + value++; + } + continue; + } + if ( ! __kmp_strcasecmp_with_sentinel( "filename", value, '=' )) { + value = strchr( value, '=' ) + 1; + len = __kmp_readstr_with_sentinel( out_file, + value, KMP_PAR_RANGE_FILENAME_LEN - 1, ',' ); + if ( len == 0) { + goto par_range_error; + } + value = strchr( value, ',' ); + if ( value != NULL ) { + value++; + } + continue; + } + if (( ! __kmp_strcasecmp_with_sentinel( "range", value, '=' )) + || ( ! __kmp_strcasecmp_with_sentinel( "incl_range", value, '=' ))) { + value = strchr( value, '=' ) + 1; + if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub ) != 2 ) { + goto par_range_error; + } + *out_range = +1; + value = strchr( value, ',' ); + if ( value != NULL ) { + value++; + } + continue; + } + if ( ! __kmp_strcasecmp_with_sentinel( "excl_range", value, '=' )) { + value = strchr( value, '=' ) + 1; + if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub) != 2 ) { + goto par_range_error; + } + *out_range = -1; + value = strchr( value, ',' ); + if ( value != NULL ) { + value++; + } + continue; + } + par_range_error: + KMP_WARNING( ParRangeSyntax, name ); + __kmp_par_range = 0; + break; + } +} // __kmp_stg_parse_par_range +#endif + +int +__kmp_initial_threads_capacity( int req_nproc ) +{ + int nth = 32; + + /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth) */ + if (nth < (4 * req_nproc)) + nth = (4 * req_nproc); + if (nth < (4 * __kmp_xproc)) + nth = (4 * __kmp_xproc); + + if (nth > __kmp_max_nth) + nth = __kmp_max_nth; + + return nth; +} + + +int +__kmp_default_tp_capacity( int req_nproc, int max_nth, int all_threads_specified) { + int nth = 128; + + if(all_threads_specified) + return max_nth; + /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth ) */ + if (nth < (4 * req_nproc)) + nth = (4 * req_nproc); + if (nth < (4 * __kmp_xproc)) + nth = (4 * __kmp_xproc); + + if (nth > __kmp_max_nth) + nth = __kmp_max_nth; + + return nth; +} + + +// ------------------------------------------------------------------------------------------------- +// Helper print functions. +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_print_bool( kmp_str_buf_t * buffer, char const * name, int value ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_BOOL; + } else { + __kmp_str_buf_print( buffer, " %s=%s\n", name, value ? "true" : "false" ); + } +} // __kmp_stg_print_bool + +static void +__kmp_stg_print_int( kmp_str_buf_t * buffer, char const * name, int value ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_INT; + } else { + __kmp_str_buf_print( buffer, " %s=%d\n", name, value ); + } +} // __kmp_stg_print_int + +static void +__kmp_stg_print_uint64( kmp_str_buf_t * buffer, char const * name, kmp_uint64 value ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_UINT64; + } else { + __kmp_str_buf_print( buffer, " %s=%" KMP_UINT64_SPEC "\n", name, value ); + } +} // __kmp_stg_print_uint64 + +static void +__kmp_stg_print_str( kmp_str_buf_t * buffer, char const * name, char const * value ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_STR; + } else { + __kmp_str_buf_print( buffer, " %s=%s\n", name, value ); + } +} // __kmp_stg_print_str + +static void +__kmp_stg_print_size( kmp_str_buf_t * buffer, char const * name, size_t value ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + __kmp_str_buf_print_size( buffer, value ); + __kmp_str_buf_print( buffer, "'\n" ); + } else { + __kmp_str_buf_print( buffer, " %s=", name ); + __kmp_str_buf_print_size( buffer, value ); + __kmp_str_buf_print( buffer, "\n" ); + return; + } +} // __kmp_stg_print_size + + +// ================================================================================================= +// Parse and print functions. +// ================================================================================================= + +// ------------------------------------------------------------------------------------------------- +// KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_all_threads( char const * name, char const * value, void * data ) { + + kmp_setting_t * * rivals = (kmp_setting_t * *) data; + int rc; + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + }; // if + if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { + __kmp_max_nth = __kmp_xproc; + __kmp_allThreadsSpecified = 1; + } else { + __kmp_stg_parse_int( name, value, 1, __kmp_sys_max_nth, & __kmp_max_nth ); + __kmp_allThreadsSpecified = 0; + } + K_DIAG( 1, ( "__kmp_max_nth == %d\n", __kmp_max_nth ) ); + +} // __kmp_stg_parse_all_threads + +static void +__kmp_stg_print_all_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_max_nth ); +} // __kmp_stg_print_all_threads + +// ------------------------------------------------------------------------------------------------- +// KMP_BLOCKTIME +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_blocktime( char const * name, char const * value, void * data ) { + __kmp_dflt_blocktime = __kmp_convert_to_milliseconds( value ); + if ( __kmp_dflt_blocktime < 0 ) { + __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; + __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidValue, name, value ), __kmp_msg_null ); + KMP_INFORM( Using_int_Value, name, __kmp_dflt_blocktime ); + __kmp_env_blocktime = FALSE; // Revert to default as if var not set. + } else { + if ( __kmp_dflt_blocktime < KMP_MIN_BLOCKTIME ) { + __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; + __kmp_msg( kmp_ms_warning, KMP_MSG( SmallValue, name, value ), __kmp_msg_null ); + KMP_INFORM( MinValueUsing, name, __kmp_dflt_blocktime ); + } else if ( __kmp_dflt_blocktime > KMP_MAX_BLOCKTIME ) { + __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; + __kmp_msg( kmp_ms_warning, KMP_MSG( LargeValue, name, value ), __kmp_msg_null ); + KMP_INFORM( MaxValueUsing, name, __kmp_dflt_blocktime ); + }; // if + __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified. + }; // if + // calculate number of monitor thread wakeup intervals corresonding to blocktime. + __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); + __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); + K_DIAG( 1, ( "__kmp_env_blocktime == %d\n", __kmp_env_blocktime ) ); + if ( __kmp_env_blocktime ) { + K_DIAG( 1, ( "__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime ) ); + } +} // __kmp_stg_parse_blocktime + +static void +__kmp_stg_print_blocktime( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_dflt_blocktime ); +} // __kmp_stg_print_blocktime + +// ------------------------------------------------------------------------------------------------- +// KMP_DUPLICATE_LIB_OK +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_duplicate_lib_ok( char const * name, char const * value, void * data ) { + /* actually this variable is not supported, + put here for compatibility with earlier builds and for static/dynamic combination */ + __kmp_stg_parse_bool( name, value, & __kmp_duplicate_library_ok ); +} // __kmp_stg_parse_duplicate_lib_ok + +static void +__kmp_stg_print_duplicate_lib_ok( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_duplicate_library_ok ); +} // __kmp_stg_print_duplicate_lib_ok + +// ------------------------------------------------------------------------------------------------- +// KMP_INHERIT_FP_CONTROL +// ------------------------------------------------------------------------------------------------- + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +static void +__kmp_stg_parse_inherit_fp_control( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_inherit_fp_control ); +} // __kmp_stg_parse_inherit_fp_control + +static void +__kmp_stg_print_inherit_fp_control( kmp_str_buf_t * buffer, char const * name, void * data ) { +#if KMP_DEBUG + __kmp_stg_print_bool( buffer, name, __kmp_inherit_fp_control ); +#endif /* KMP_DEBUG */ +} // __kmp_stg_print_inherit_fp_control + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +// ------------------------------------------------------------------------------------------------- +// KMP_LIBRARY, OMP_WAIT_POLICY +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_wait_policy( char const * name, char const * value, void * data ) { + + kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, wait->rivals ); + if ( rc ) { + return; + }; // if + + if ( wait->omp ) { + if ( __kmp_str_match( "ACTIVE", 1, value ) ) { + __kmp_library = library_turnaround; + } else if ( __kmp_str_match( "PASSIVE", 1, value ) ) { + __kmp_library = library_throughput; + } else { + KMP_WARNING( StgInvalidValue, name, value ); + }; // if + } else { + if ( __kmp_str_match( "serial", 1, value ) ) { /* S */ + __kmp_library = library_serial; + } else if ( __kmp_str_match( "throughput", 2, value ) ) { /* TH */ + __kmp_library = library_throughput; + } else if ( __kmp_str_match( "turnaround", 2, value ) ) { /* TU */ + __kmp_library = library_turnaround; + } else if ( __kmp_str_match( "dedicated", 1, value ) ) { /* D */ + __kmp_library = library_turnaround; + } else if ( __kmp_str_match( "multiuser", 1, value ) ) { /* M */ + __kmp_library = library_throughput; + } else { + KMP_WARNING( StgInvalidValue, name, value ); + }; // if + }; // if + __kmp_aux_set_library( __kmp_library ); + +} // __kmp_stg_parse_wait_policy + +static void +__kmp_stg_print_wait_policy( kmp_str_buf_t * buffer, char const * name, void * data ) { + + kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; + char const * value = NULL; + + if ( wait->omp ) { + switch ( __kmp_library ) { + case library_turnaround : { + value = "ACTIVE"; + } break; + case library_throughput : { + value = "PASSIVE"; + } break; + }; // switch + } else { + switch ( __kmp_library ) { + case library_serial : { + value = "serial"; + } break; + case library_turnaround : { + value = "turnaround"; + } break; + case library_throughput : { + value = "throughput"; + } break; + }; // switch + }; // if + if ( value != NULL ) { + __kmp_stg_print_str( buffer, name, value ); + }; // if + +} // __kmp_stg_print_wait_policy + +// ------------------------------------------------------------------------------------------------- +// KMP_MONITOR_STACKSIZE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_monitor_stacksize( char const * name, char const * value, void * data ) { + __kmp_stg_parse_size( + name, + value, + __kmp_sys_min_stksize, + KMP_MAX_STKSIZE, + NULL, + & __kmp_monitor_stksize, + 1 + ); +} // __kmp_stg_parse_monitor_stacksize + +static void +__kmp_stg_print_monitor_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { + if( __kmp_env_format ) { + if ( __kmp_monitor_stksize > 0 ) + KMP_STR_BUF_PRINT_NAME_EX(name); + else + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print( buffer, " %s", name ); + } + if ( __kmp_monitor_stksize > 0 ) { + __kmp_str_buf_print_size( buffer, __kmp_monitor_stksize ); + } else { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + if( __kmp_env_format && __kmp_monitor_stksize ) { + __kmp_str_buf_print( buffer, "'\n"); + } + +} // __kmp_stg_print_monitor_stacksize + +// ------------------------------------------------------------------------------------------------- +// KMP_SETTINGS +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_settings( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_settings ); +} // __kmp_stg_parse_settings + +static void +__kmp_stg_print_settings( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_settings ); +} // __kmp_stg_print_settings + +// ------------------------------------------------------------------------------------------------- +// KMP_STACKPAD +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_stackpad( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( + name, // Env var name + value, // Env var value + KMP_MIN_STKPADDING, // Min value + KMP_MAX_STKPADDING, // Max value + & __kmp_stkpadding // Var to initialize + ); +} // __kmp_stg_parse_stackpad + +static void +__kmp_stg_print_stackpad( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_stkpadding ); +} // __kmp_stg_print_stackpad + +// ------------------------------------------------------------------------------------------------- +// KMP_STACKOFFSET +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_stackoffset( char const * name, char const * value, void * data ) { + __kmp_stg_parse_size( + name, // Env var name + value, // Env var value + KMP_MIN_STKOFFSET, // Min value + KMP_MAX_STKOFFSET, // Max value + NULL, // + & __kmp_stkoffset, // Var to initialize + 1 + ); +} // __kmp_stg_parse_stackoffset + +static void +__kmp_stg_print_stackoffset( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_size( buffer, name, __kmp_stkoffset ); +} // __kmp_stg_print_stackoffset + +// ------------------------------------------------------------------------------------------------- +// KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_stacksize( char const * name, char const * value, void * data ) { + + kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, stacksize->rivals ); + if ( rc ) { + return; + }; // if + __kmp_stg_parse_size( + name, // Env var name + value, // Env var value + __kmp_sys_min_stksize, // Min value + KMP_MAX_STKSIZE, // Max value + & __kmp_env_stksize, // + & __kmp_stksize, // Var to initialize + stacksize->factor + ); + +} // __kmp_stg_parse_stacksize + +// This function is called for printing both KMP_STACKSIZE (factor is 1) and OMP_STACKSIZE (factor is 1024). +// Currently it is not possible to print OMP_STACKSIZE value in bytes. We can consider adding this +// possibility by a customer request in future. +static void +__kmp_stg_print_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { + kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); + __kmp_str_buf_print( buffer, "'\n" ); + } else { + __kmp_str_buf_print( buffer, " %s=", name ); + __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); + __kmp_str_buf_print( buffer, "\n" ); + } +} // __kmp_stg_print_stacksize + +// ------------------------------------------------------------------------------------------------- +// KMP_VERSION +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_version( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_version ); +} // __kmp_stg_parse_version + +static void +__kmp_stg_print_version( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_version ); +} // __kmp_stg_print_version + +// ------------------------------------------------------------------------------------------------- +// KMP_WARNINGS +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_warnings( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_generate_warnings ); + if (__kmp_generate_warnings != kmp_warnings_off) { // AC: we have only 0/1 values documented, + __kmp_generate_warnings = kmp_warnings_explicit; // so reset it to explicit in order to + } // distinguish from default setting +} // __kmp_env_parse_warnings + +static void +__kmp_stg_print_warnings( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_generate_warnings ); // AC: TODO: change to print_int? +} // __kmp_env_print_warnings // (needs documentation change)... + +// ------------------------------------------------------------------------------------------------- +// OMP_NESTED, OMP_NUM_THREADS +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_nested( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_dflt_nested ); +} // __kmp_stg_parse_nested + +static void +__kmp_stg_print_nested( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_dflt_nested ); +} // __kmp_stg_print_nested + +static void +__kmp_parse_nested_num_threads( const char *var, const char *env, kmp_nested_nthreads_t *nth_array ) +{ + const char *next = env; + const char *scan = next; + + int total = 0; // Count elements that were set. It'll be used as an array size + int prev_comma = FALSE; // For correct processing sequential commas + + // Count the number of values in the env. var string + for ( ; ; ) { + SKIP_WS( next ); + + if ( *next == '\0' ) { + break; + } + // Next character is not an integer or not a comma => end of list + if ( ( ( *next < '0' ) || ( *next > '9' ) ) && ( *next !=',') ) { + KMP_WARNING( NthSyntaxError, var, env ); + return; + } + // The next character is ',' + if ( *next == ',' ) { + // ',' is the fisrt character + if ( total == 0 || prev_comma ) { + total++; + } + prev_comma = TRUE; + next++; //skip ',' + SKIP_WS( next ); + } + // Next character is a digit + if ( *next >= '0' && *next <= '9' ) { + prev_comma = FALSE; + SKIP_DIGITS( next ); + total++; + const char *tmp = next; + SKIP_WS( tmp ); + if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { + KMP_WARNING( NthSpacesNotAllowed, var, env ); + return; + } + } + } + KMP_DEBUG_ASSERT( total > 0 ); + if( total <= 0 ) { + KMP_WARNING( NthSyntaxError, var, env ); + return; + } + + // Check if the nested nthreads array exists + if ( ! nth_array->nth ) { + // Allocate an array of double size + nth_array->nth = ( int * )KMP_INTERNAL_MALLOC( sizeof( int ) * total * 2 ); + if ( nth_array->nth == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + } + nth_array->size = total * 2; + } else { + if ( nth_array->size < total ) { + // Increase the array size + do { + nth_array->size *= 2; + } while ( nth_array->size < total ); + + nth_array->nth = (int *) KMP_INTERNAL_REALLOC( + nth_array->nth, sizeof( int ) * nth_array->size ); + if ( nth_array->nth == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + } + } + } + nth_array->used = total; + int i = 0; + + prev_comma = FALSE; + total = 0; + // Save values in the array + for ( ; ; ) { + SKIP_WS( scan ); + if ( *scan == '\0' ) { + break; + } + // The next character is ',' + if ( *scan == ',' ) { + // ',' in the beginning of the list + if ( total == 0 ) { + // The value is supposed to be equal to __kmp_avail_proc but it is unknown at the moment. + // So let's put a placeholder (#threads = 0) to correct it later. + nth_array->nth[i++] = 0; + total++; + }else if ( prev_comma ) { + // Num threads is inherited from the previous level + nth_array->nth[i] = nth_array->nth[i - 1]; + i++; + total++; + } + prev_comma = TRUE; + scan++; //skip ',' + SKIP_WS( scan ); + } + // Next character is a digit + if ( *scan >= '0' && *scan <= '9' ) { + int num; + const char *buf = scan; + char const * msg = NULL; + prev_comma = FALSE; + SKIP_DIGITS( scan ); + total++; + + num = __kmp_str_to_int( buf, *scan ); + if ( num < KMP_MIN_NTH ) { + msg = KMP_I18N_STR( ValueTooSmall ); + num = KMP_MIN_NTH; + } else if ( num > __kmp_sys_max_nth ) { + msg = KMP_I18N_STR( ValueTooLarge ); + num = __kmp_sys_max_nth; + } + if ( msg != NULL ) { + // Message is not empty. Print warning. + KMP_WARNING( ParseSizeIntWarn, var, env, msg ); + KMP_INFORM( Using_int_Value, var, num ); + } + nth_array->nth[i++] = num; + } + } +} + +static void +__kmp_stg_parse_num_threads( char const * name, char const * value, void * data ) { + // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! + if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { + // The array of 1 element + __kmp_nested_nth.nth = ( int* )KMP_INTERNAL_MALLOC( sizeof( int ) ); + __kmp_nested_nth.size = __kmp_nested_nth.used = 1; + __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc; + } else { + __kmp_parse_nested_num_threads( name, value, & __kmp_nested_nth ); + if ( __kmp_nested_nth.nth ) { + __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; + if ( __kmp_dflt_team_nth_ub < __kmp_dflt_team_nth ) { + __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; + } + } + }; // if + K_DIAG( 1, ( "__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth ) ); +} // __kmp_stg_parse_num_threads + +static void +__kmp_stg_print_num_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print( buffer, " %s", name ); + } + if ( __kmp_nested_nth.used ) { + kmp_str_buf_t buf; + __kmp_str_buf_init( &buf ); + for ( int i = 0; i < __kmp_nested_nth.used; i++) { + __kmp_str_buf_print( &buf, "%d", __kmp_nested_nth.nth[i] ); + if ( i < __kmp_nested_nth.used - 1 ) { + __kmp_str_buf_print( &buf, "," ); + } + } + __kmp_str_buf_print( buffer, "='%s'\n", buf.str ); + __kmp_str_buf_free(&buf); + } else { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } +} // __kmp_stg_print_num_threads + +// ------------------------------------------------------------------------------------------------- +// OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS, +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_tasking( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode ); +} // __kmp_stg_parse_tasking + +static void +__kmp_stg_print_tasking( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_tasking_mode ); +} // __kmp_stg_print_tasking + +static void +__kmp_stg_parse_task_stealing( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, 1, (int *)&__kmp_task_stealing_constraint ); +} // __kmp_stg_parse_task_stealing + +static void +__kmp_stg_print_task_stealing( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_task_stealing_constraint ); +} // __kmp_stg_print_task_stealing + +static void +__kmp_stg_parse_max_active_levels( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_dflt_max_active_levels ); +} // __kmp_stg_parse_max_active_levels + +static void +__kmp_stg_print_max_active_levels( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_dflt_max_active_levels ); +} // __kmp_stg_print_max_active_levels + +#if KMP_NESTED_HOT_TEAMS +// ------------------------------------------------------------------------------------------------- +// KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_hot_teams_level( char const * name, char const * value, void * data ) { + if ( TCR_4(__kmp_init_parallel) ) { + KMP_WARNING( EnvParallelWarn, name ); + return; + } // read value before first parallel only + __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_max_level ); +} // __kmp_stg_parse_hot_teams_level + +static void +__kmp_stg_print_hot_teams_level( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_hot_teams_max_level ); +} // __kmp_stg_print_hot_teams_level + +static void +__kmp_stg_parse_hot_teams_mode( char const * name, char const * value, void * data ) { + if ( TCR_4(__kmp_init_parallel) ) { + KMP_WARNING( EnvParallelWarn, name ); + return; + } // read value before first parallel only + __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_mode ); +} // __kmp_stg_parse_hot_teams_mode + +static void +__kmp_stg_print_hot_teams_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_hot_teams_mode ); +} // __kmp_stg_print_hot_teams_mode + +#endif // KMP_NESTED_HOT_TEAMS + +// ------------------------------------------------------------------------------------------------- +// KMP_HANDLE_SIGNALS +// ------------------------------------------------------------------------------------------------- + +#if KMP_HANDLE_SIGNALS + +static void +__kmp_stg_parse_handle_signals( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_handle_signals ); +} // __kmp_stg_parse_handle_signals + +static void +__kmp_stg_print_handle_signals( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_handle_signals ); +} // __kmp_stg_print_handle_signals + +#endif // KMP_HANDLE_SIGNALS + +// ------------------------------------------------------------------------------------------------- +// KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG +// ------------------------------------------------------------------------------------------------- + +#ifdef KMP_DEBUG + +#define KMP_STG_X_DEBUG( x ) \ + static void __kmp_stg_parse_##x##_debug( char const * name, char const * value, void * data ) { \ + __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_##x##_debug ); \ + } /* __kmp_stg_parse_x_debug */ \ + static void __kmp_stg_print_##x##_debug( kmp_str_buf_t * buffer, char const * name, void * data ) { \ + __kmp_stg_print_int( buffer, name, kmp_##x##_debug ); \ + } /* __kmp_stg_print_x_debug */ + +KMP_STG_X_DEBUG( a ) +KMP_STG_X_DEBUG( b ) +KMP_STG_X_DEBUG( c ) +KMP_STG_X_DEBUG( d ) +KMP_STG_X_DEBUG( e ) +KMP_STG_X_DEBUG( f ) + +#undef KMP_STG_X_DEBUG + +static void +__kmp_stg_parse_debug( char const * name, char const * value, void * data ) { + int debug = 0; + __kmp_stg_parse_int( name, value, 0, INT_MAX, & debug ); + if ( kmp_a_debug < debug ) { + kmp_a_debug = debug; + }; // if + if ( kmp_b_debug < debug ) { + kmp_b_debug = debug; + }; // if + if ( kmp_c_debug < debug ) { + kmp_c_debug = debug; + }; // if + if ( kmp_d_debug < debug ) { + kmp_d_debug = debug; + }; // if + if ( kmp_e_debug < debug ) { + kmp_e_debug = debug; + }; // if + if ( kmp_f_debug < debug ) { + kmp_f_debug = debug; + }; // if +} // __kmp_stg_parse_debug + +static void +__kmp_stg_parse_debug_buf( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_debug_buf ); + // !!! TODO: Move buffer initialization of of this file! It may works incorrectly if + // KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or KMP_DEBUG_BUF_CHARS. + if ( __kmp_debug_buf ) { + int i; + int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; + + /* allocate and initialize all entries in debug buffer to empty */ + __kmp_debug_buffer = (char *) __kmp_page_allocate( elements * sizeof( char ) ); + for ( i = 0; i < elements; i += __kmp_debug_buf_chars ) + __kmp_debug_buffer[i] = '\0'; + + __kmp_debug_count = 0; + } + K_DIAG( 1, ( "__kmp_debug_buf = %d\n", __kmp_debug_buf ) ); +} // __kmp_stg_parse_debug_buf + +static void +__kmp_stg_print_debug_buf( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_debug_buf ); +} // __kmp_stg_print_debug_buf + +static void +__kmp_stg_parse_debug_buf_atomic( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_debug_buf_atomic ); +} // __kmp_stg_parse_debug_buf_atomic + +static void +__kmp_stg_print_debug_buf_atomic( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_debug_buf_atomic ); +} // __kmp_stg_print_debug_buf_atomic + +static void +__kmp_stg_parse_debug_buf_chars( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( + name, + value, + KMP_DEBUG_BUF_CHARS_MIN, + INT_MAX, + & __kmp_debug_buf_chars + ); +} // __kmp_stg_debug_parse_buf_chars + +static void +__kmp_stg_print_debug_buf_chars( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_debug_buf_chars ); +} // __kmp_stg_print_debug_buf_chars + +static void +__kmp_stg_parse_debug_buf_lines( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( + name, + value, + KMP_DEBUG_BUF_LINES_MIN, + INT_MAX, + & __kmp_debug_buf_lines + ); +} // __kmp_stg_parse_debug_buf_lines + +static void +__kmp_stg_print_debug_buf_lines( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_debug_buf_lines ); +} // __kmp_stg_print_debug_buf_lines + +static void +__kmp_stg_parse_diag( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_diag ); +} // __kmp_stg_parse_diag + +static void +__kmp_stg_print_diag( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, kmp_diag ); +} // __kmp_stg_print_diag + +#endif // KMP_DEBUG + +// ------------------------------------------------------------------------------------------------- +// KMP_ALIGN_ALLOC +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_align_alloc( char const * name, char const * value, void * data ) { + __kmp_stg_parse_size( + name, + value, + CACHE_LINE, + INT_MAX, + NULL, + & __kmp_align_alloc, + 1 + ); +} // __kmp_stg_parse_align_alloc + +static void +__kmp_stg_print_align_alloc( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_size( buffer, name, __kmp_align_alloc ); +} // __kmp_stg_print_align_alloc + +// ------------------------------------------------------------------------------------------------- +// KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER +// ------------------------------------------------------------------------------------------------- + +// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from parse and print +// functions, pass required info through data argument. + +static void +__kmp_stg_parse_barrier_branch_bit( char const * name, char const * value, void * data ) { + const char *var; + + /* ---------- Barrier branch bit control ------------ */ + for ( int i=bs_plain_barrier; i KMP_MAX_BRANCH_BITS ) { + __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null ); + __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; + } + } + if ( __kmp_barrier_gather_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) { + KMP_WARNING( BarrGatherValueInvalid, name, value ); + KMP_INFORM( Using_uint_Value, name, __kmp_barrier_gather_bb_dflt ); + __kmp_barrier_gather_branch_bits[ i ] = __kmp_barrier_gather_bb_dflt; + } + } + K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[ i ], \ + __kmp_barrier_gather_branch_bits [ i ], \ + __kmp_barrier_release_branch_bits [ i ])) + } +} // __kmp_stg_parse_barrier_branch_bit + +static void +__kmp_stg_print_barrier_branch_bit( kmp_str_buf_t * buffer, char const * name, void * data ) { + const char *var; + for ( int i=bs_plain_barrier; irivals ); + if ( rc ) { + return; + }; // if + if ( reduction->force ) { + if( value != 0 ) { + if( __kmp_str_match( "critical", 0, value ) ) + __kmp_force_reduction_method = critical_reduce_block; + else if( __kmp_str_match( "atomic", 0, value ) ) + __kmp_force_reduction_method = atomic_reduce_block; + else if( __kmp_str_match( "tree", 0, value ) ) + __kmp_force_reduction_method = tree_reduce_block; + else { + KMP_FATAL( UnknownForceReduction, name, value ); + } + } + } else { + __kmp_stg_parse_bool( name, value, & __kmp_determ_red ); + if( __kmp_determ_red ) { + __kmp_force_reduction_method = tree_reduce_block; + } else { + __kmp_force_reduction_method = reduction_method_not_defined; + } + } + K_DIAG( 1, ( "__kmp_force_reduction_method == %d\n", __kmp_force_reduction_method ) ); +} // __kmp_stg_parse_force_reduction + +static void +__kmp_stg_print_force_reduction( kmp_str_buf_t * buffer, char const * name, void * data ) { + + kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data; + if ( reduction->force ) { + if( __kmp_force_reduction_method == critical_reduce_block) { + __kmp_stg_print_str( buffer, name, "critical"); + } else if ( __kmp_force_reduction_method == atomic_reduce_block ) { + __kmp_stg_print_str( buffer, name, "atomic"); + } else if ( __kmp_force_reduction_method == tree_reduce_block ) { + __kmp_stg_print_str( buffer, name, "tree"); + } else { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print( buffer, " %s", name ); + } + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + } else { + __kmp_stg_print_bool( buffer, name, __kmp_determ_red ); + } + + +} // __kmp_stg_print_force_reduction + +// ------------------------------------------------------------------------------------------------- +// KMP_STORAGE_MAP +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_storage_map( char const * name, char const * value, void * data ) { + if ( __kmp_str_match( "verbose", 1, value ) ) { + __kmp_storage_map = TRUE; + __kmp_storage_map_verbose = TRUE; + __kmp_storage_map_verbose_specified = TRUE; + + } else { + __kmp_storage_map_verbose = FALSE; + __kmp_stg_parse_bool( name, value, & __kmp_storage_map ); // !!! + }; // if +} // __kmp_stg_parse_storage_map + +static void +__kmp_stg_print_storage_map( kmp_str_buf_t * buffer, char const * name, void * data ) { + if ( __kmp_storage_map_verbose || __kmp_storage_map_verbose_specified ) { + __kmp_stg_print_str( buffer, name, "verbose" ); + } else { + __kmp_stg_print_bool( buffer, name, __kmp_storage_map ); + } +} // __kmp_stg_print_storage_map + +// ------------------------------------------------------------------------------------------------- +// KMP_ALL_THREADPRIVATE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_all_threadprivate( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, __kmp_allThreadsSpecified ? __kmp_max_nth : 1, __kmp_max_nth, + & __kmp_tp_capacity ); +} // __kmp_stg_parse_all_threadprivate + +static void +__kmp_stg_print_all_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_tp_capacity ); + +} + +// ------------------------------------------------------------------------------------------------- +// KMP_FOREIGN_THREADS_THREADPRIVATE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_foreign_threads_threadprivate( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_foreign_tp ); +} // __kmp_stg_parse_foreign_threads_threadprivate + +static void +__kmp_stg_print_foreign_threads_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_foreign_tp ); +} // __kmp_stg_print_foreign_threads_threadprivate + + +// ------------------------------------------------------------------------------------------------- +// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD +// ------------------------------------------------------------------------------------------------- + +#if KMP_AFFINITY_SUPPORTED +// +// Parse the proc id list. Return TRUE if successful, FALSE otherwise. +// +static int +__kmp_parse_affinity_proc_id_list( const char *var, const char *env, + const char **nextEnv, char **proclist ) +{ + const char *scan = env; + const char *next = scan; + int empty = TRUE; + + *proclist = NULL; + + for (;;) { + int start, end, stride; + + SKIP_WS(scan); + next = scan; + if (*next == '\0') { + break; + } + + if (*next == '{') { + int num; + next++; // skip '{' + SKIP_WS(next); + scan = next; + + // + // Read the first integer in the set. + // + if ((*next < '0') || (*next > '9')) { + KMP_WARNING( AffSyntaxError, var ); + return FALSE; + } + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT(num >= 0); + + for (;;) { + // + // Check for end of set. + // + SKIP_WS(next); + if (*next == '}') { + next++; // skip '}' + break; + } + + // + // Skip optional comma. + // + if (*next == ',') { + next++; + } + SKIP_WS(next); + + // + // Read the next integer in the set. + // + scan = next; + if ((*next < '0') || (*next > '9')) { + KMP_WARNING( AffSyntaxError, var ); + return FALSE; + } + + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT(num >= 0); + } + empty = FALSE; + + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // + // Next character is not an integer => end of list + // + if ((*next < '0') || (*next > '9')) { + if (empty) { + KMP_WARNING( AffSyntaxError, var ); + return FALSE; + } + break; + } + + // + // Read the first integer. + // + SKIP_DIGITS(next); + start = __kmp_str_to_int(scan, *next); + KMP_ASSERT(start >= 0); + SKIP_WS(next); + + // + // If this isn't a range, then go on. + // + if (*next != '-') { + empty = FALSE; + + // + // Skip optional comma. + // + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // + // This is a range. Skip over the '-' and read in the 2nd int. + // + next++; // skip '-' + SKIP_WS(next); + scan = next; + if ((*next < '0') || (*next > '9')) { + KMP_WARNING( AffSyntaxError, var ); + return FALSE; + } + SKIP_DIGITS(next); + end = __kmp_str_to_int(scan, *next); + KMP_ASSERT(end >= 0); + + // + // Check for a stride parameter + // + stride = 1; + SKIP_WS(next); + if (*next == ':') { + // + // A stride is specified. Skip over the ':" and read the 3rd int. + // + int sign = +1; + next++; // skip ':' + SKIP_WS(next); + scan = next; + if (*next == '-') { + sign = -1; + next++; + SKIP_WS(next); + scan = next; + } + if ((*next < '0') || (*next > '9')) { + KMP_WARNING( AffSyntaxError, var ); + return FALSE; + } + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT(stride >= 0); + stride *= sign; + } + + // + // Do some range checks. + // + if (stride == 0) { + KMP_WARNING( AffZeroStride, var ); + return FALSE; + } + if (stride > 0) { + if (start > end) { + KMP_WARNING( AffStartGreaterEnd, var, start, end ); + return FALSE; + } + } + else { + if (start < end) { + KMP_WARNING( AffStrideLessZero, var, start, end ); + return FALSE; + } + } + if ((end - start) / stride > 65536 ) { + KMP_WARNING( AffRangeTooBig, var, end, start, stride ); + return FALSE; + } + + empty = FALSE; + + // + // Skip optional comma. + // + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + } + + *nextEnv = next; + + { + int len = next - env; + char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); + retlist[len] = '\0'; + *proclist = retlist; + } + return TRUE; +} + + +// +// If KMP_AFFINITY is specified without a type, then +// __kmp_affinity_notype should point to its setting. +// +static kmp_setting_t *__kmp_affinity_notype = NULL; + +static void +__kmp_parse_affinity_env( char const * name, char const * value, + enum affinity_type * out_type, + char ** out_proclist, + int * out_verbose, + int * out_warn, + int * out_respect, + enum affinity_gran * out_gran, + int * out_gran_levels, + int * out_dups, + int * out_compact, + int * out_offset +) +{ + char * buffer = NULL; // Copy of env var value. + char * buf = NULL; // Buffer for strtok_r() function. + char * next = NULL; // end of token / start of next. + const char * start; // start of current token (for err msgs) + int count = 0; // Counter of parsed integer numbers. + int number[ 2 ]; // Parsed numbers. + + // Guards. + int type = 0; + int proclist = 0; + int max_proclist = 0; + int verbose = 0; + int warnings = 0; + int respect = 0; + int gran = 0; + int dups = 0; + + KMP_ASSERT( value != NULL ); + + if ( TCR_4(__kmp_init_middle) ) { + KMP_WARNING( EnvMiddleWarn, name ); + __kmp_env_toPrint( name, 0 ); + return; + } + __kmp_env_toPrint( name, 1 ); + + buffer = __kmp_str_format( "%s", value ); // Copy env var to keep original intact. + buf = buffer; + SKIP_WS(buf); + + // Helper macros. + + // + // If we see a parse error, emit a warning and scan to the next ",". + // + // FIXME - there's got to be a better way to print an error + // message, hopefully without overwritting peices of buf. + // + #define EMIT_WARN(skip,errlist) \ + { \ + char ch; \ + if (skip) { \ + SKIP_TO(next, ','); \ + } \ + ch = *next; \ + *next = '\0'; \ + KMP_WARNING errlist; \ + *next = ch; \ + if (skip) { \ + if (ch == ',') next++; \ + } \ + buf = next; \ + } + + #define _set_param(_guard,_var,_val) \ + { \ + if ( _guard == 0 ) { \ + _var = _val; \ + } else { \ + EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ + }; \ + ++ _guard; \ + } + + #define set_type(val) _set_param( type, *out_type, val ) + #define set_verbose(val) _set_param( verbose, *out_verbose, val ) + #define set_warnings(val) _set_param( warnings, *out_warn, val ) + #define set_respect(val) _set_param( respect, *out_respect, val ) + #define set_dups(val) _set_param( dups, *out_dups, val ) + #define set_proclist(val) _set_param( proclist, *out_proclist, val ) + + #define set_gran(val,levels) \ + { \ + if ( gran == 0 ) { \ + *out_gran = val; \ + *out_gran_levels = levels; \ + } else { \ + EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ + }; \ + ++ gran; \ + } + +# if OMP_40_ENABLED + KMP_DEBUG_ASSERT( ( __kmp_nested_proc_bind.bind_types != NULL ) + && ( __kmp_nested_proc_bind.used > 0 ) ); +# endif + + while ( *buf != '\0' ) { + start = next = buf; + + if (__kmp_match_str("none", buf, (const char **)&next)) { + set_type( affinity_none ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +# endif + buf = next; + } else if (__kmp_match_str("scatter", buf, (const char **)&next)) { + set_type( affinity_scatter ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("compact", buf, (const char **)&next)) { + set_type( affinity_compact ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("logical", buf, (const char **)&next)) { + set_type( affinity_logical ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("physical", buf, (const char **)&next)) { + set_type( affinity_physical ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("explicit", buf, (const char **)&next)) { + set_type( affinity_explicit ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("balanced", buf, (const char **)&next)) { + set_type( affinity_balanced ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + buf = next; + } else if (__kmp_match_str("disabled", buf, (const char **)&next)) { + set_type( affinity_disabled ); +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +# endif + buf = next; + } else if (__kmp_match_str("verbose", buf, (const char **)&next)) { + set_verbose( TRUE ); + buf = next; + } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) { + set_verbose( FALSE ); + buf = next; + } else if (__kmp_match_str("warnings", buf, (const char **)&next)) { + set_warnings( TRUE ); + buf = next; + } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) { + set_warnings( FALSE ); + buf = next; + } else if (__kmp_match_str("respect", buf, (const char **)&next)) { + set_respect( TRUE ); + buf = next; + } else if (__kmp_match_str("norespect", buf, (const char **)&next)) { + set_respect( FALSE ); + buf = next; + } else if (__kmp_match_str("duplicates", buf, (const char **)&next) + || __kmp_match_str("dups", buf, (const char **)&next)) { + set_dups( TRUE ); + buf = next; + } else if (__kmp_match_str("noduplicates", buf, (const char **)&next) + || __kmp_match_str("nodups", buf, (const char **)&next)) { + set_dups( FALSE ); + buf = next; + } else if (__kmp_match_str("granularity", buf, (const char **)&next) + || __kmp_match_str("gran", buf, (const char **)&next)) { + SKIP_WS(next); + if (*next != '=') { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + next++; // skip '=' + SKIP_WS(next); + + buf = next; + if (__kmp_match_str("fine", buf, (const char **)&next)) { + set_gran( affinity_gran_fine, -1 ); + buf = next; + } else if (__kmp_match_str("thread", buf, (const char **)&next)) { + set_gran( affinity_gran_thread, -1 ); + buf = next; + } else if (__kmp_match_str("core", buf, (const char **)&next)) { + set_gran( affinity_gran_core, -1 ); + buf = next; + } else if (__kmp_match_str("package", buf, (const char **)&next)) { + set_gran( affinity_gran_package, -1 ); + buf = next; + } else if (__kmp_match_str("node", buf, (const char **)&next)) { + set_gran( affinity_gran_node, -1 ); + buf = next; +# if KMP_GROUP_AFFINITY + } else if (__kmp_match_str("group", buf, (const char **)&next)) { + set_gran( affinity_gran_group, -1 ); + buf = next; +# endif /* KMP_GROUP AFFINITY */ + } else if ((*buf >= '0') && (*buf <= '9')) { + int n; + next = buf; + SKIP_DIGITS(next); + n = __kmp_str_to_int( buf, *next ); + KMP_ASSERT(n >= 0); + buf = next; + set_gran( affinity_gran_default, n ); + } else { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + } else if (__kmp_match_str("proclist", buf, (const char **)&next)) { + char *temp_proclist; + + SKIP_WS(next); + if (*next != '=') { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + next++; // skip '=' + SKIP_WS(next); + if (*next != '[') { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + next++; // skip '[' + buf = next; + if (! __kmp_parse_affinity_proc_id_list(name, buf, + (const char **)&next, &temp_proclist)) { + // + // warning already emitted. + // + SKIP_TO(next, ']'); + if (*next == ']') next++; + SKIP_TO(next, ','); + if (*next == ',') next++; + buf = next; + continue; + } + if (*next != ']') { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + next++; // skip ']' + set_proclist( temp_proclist ); + } else if ((*buf >= '0') && (*buf <= '9')) { + // Parse integer numbers -- permute and offset. + int n; + next = buf; + SKIP_DIGITS(next); + n = __kmp_str_to_int( buf, *next ); + KMP_ASSERT(n >= 0); + buf = next; + if ( count < 2 ) { + number[ count ] = n; + } else { + KMP_WARNING( AffManyParams, name, start ); + }; // if + ++ count; + } else { + EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); + continue; + } + + SKIP_WS(next); + if (*next == ',') { + next++; + SKIP_WS(next); + } + else if (*next != '\0') { + const char *temp = next; + EMIT_WARN( TRUE, ( ParseExtraCharsWarn, name, temp ) ); + continue; + } + buf = next; + } // while + + #undef EMIT_WARN + #undef _set_param + #undef set_type + #undef set_verbose + #undef set_warnings + #undef set_respect + #undef set_granularity + + KMP_INTERNAL_FREE( buffer ); + + if ( proclist ) { + if ( ! type ) { + KMP_WARNING( AffProcListNoType, name ); + __kmp_affinity_type = affinity_explicit; + } + else if ( __kmp_affinity_type != affinity_explicit ) { + KMP_WARNING( AffProcListNotExplicit, name ); + KMP_ASSERT( *out_proclist != NULL ); + KMP_INTERNAL_FREE( *out_proclist ); + *out_proclist = NULL; + } + } + switch ( *out_type ) { + case affinity_logical: + case affinity_physical: { + if ( count > 0 ) { + *out_offset = number[ 0 ]; + }; // if + if ( count > 1 ) { + KMP_WARNING( AffManyParamsForLogic, name, number[ 1 ] ); + }; // if + } break; + case affinity_balanced: { + if ( count > 0 ) { + *out_compact = number[ 0 ]; + }; // if + if ( count > 1 ) { + *out_offset = number[ 1 ]; + }; // if + + if ( __kmp_affinity_gran == affinity_gran_default ) { +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { + KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); + } + __kmp_affinity_gran = affinity_gran_fine; + } else +#endif + { + if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { + KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); + } + __kmp_affinity_gran = affinity_gran_core; + } + } + } break; + case affinity_scatter: + case affinity_compact: { + if ( count > 0 ) { + *out_compact = number[ 0 ]; + }; // if + if ( count > 1 ) { + *out_offset = number[ 1 ]; + }; // if + } break; + case affinity_explicit: { + if ( *out_proclist == NULL ) { + KMP_WARNING( AffNoProcList, name ); + __kmp_affinity_type = affinity_none; + } + if ( count > 0 ) { + KMP_WARNING( AffNoParam, name, "explicit" ); + } + } break; + case affinity_none: { + if ( count > 0 ) { + KMP_WARNING( AffNoParam, name, "none" ); + }; // if + } break; + case affinity_disabled: { + if ( count > 0 ) { + KMP_WARNING( AffNoParam, name, "disabled" ); + }; // if + } break; + case affinity_default: { + if ( count > 0 ) { + KMP_WARNING( AffNoParam, name, "default" ); + }; // if + } break; + default: { + KMP_ASSERT( 0 ); + }; + }; // switch +} // __kmp_parse_affinity_env + +static void +__kmp_stg_parse_affinity( char const * name, char const * value, void * data ) +{ + kmp_setting_t **rivals = (kmp_setting_t **) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + } + + __kmp_parse_affinity_env( name, value, & __kmp_affinity_type, + & __kmp_affinity_proclist, & __kmp_affinity_verbose, + & __kmp_affinity_warnings, & __kmp_affinity_respect_mask, + & __kmp_affinity_gran, & __kmp_affinity_gran_levels, + & __kmp_affinity_dups, & __kmp_affinity_compact, + & __kmp_affinity_offset ); + +} // __kmp_stg_parse_affinity + +static void +__kmp_stg_print_affinity( kmp_str_buf_t * buffer, char const * name, void * data ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print( buffer, " %s='", name ); + } + if ( __kmp_affinity_verbose ) { + __kmp_str_buf_print( buffer, "%s,", "verbose"); + } else { + __kmp_str_buf_print( buffer, "%s,", "noverbose"); + } + if ( __kmp_affinity_warnings ) { + __kmp_str_buf_print( buffer, "%s,", "warnings"); + } else { + __kmp_str_buf_print( buffer, "%s,", "nowarnings"); + } + if ( KMP_AFFINITY_CAPABLE() ) { + if ( __kmp_affinity_respect_mask ) { + __kmp_str_buf_print( buffer, "%s,", "respect"); + } else { + __kmp_str_buf_print( buffer, "%s,", "norespect"); + } + switch ( __kmp_affinity_gran ) { + case affinity_gran_default: + __kmp_str_buf_print( buffer, "%s", "granularity=default,"); + break; + case affinity_gran_fine: + __kmp_str_buf_print( buffer, "%s", "granularity=fine,"); + break; + case affinity_gran_thread: + __kmp_str_buf_print( buffer, "%s", "granularity=thread,"); + break; + case affinity_gran_core: + __kmp_str_buf_print( buffer, "%s", "granularity=core,"); + break; + case affinity_gran_package: + __kmp_str_buf_print( buffer, "%s", "granularity=package,"); + break; + case affinity_gran_node: + __kmp_str_buf_print( buffer, "%s", "granularity=node,"); + break; +# if KMP_GROUP_AFFINITY + case affinity_gran_group: + __kmp_str_buf_print( buffer, "%s", "granularity=group,"); + break; +# endif /* KMP_GROUP_AFFINITY */ + } + if ( __kmp_affinity_dups ) { + __kmp_str_buf_print( buffer, "%s,", "duplicates"); + } else { + __kmp_str_buf_print( buffer, "%s,", "noduplicates"); + } + } + if ( ! KMP_AFFINITY_CAPABLE() ) { + __kmp_str_buf_print( buffer, "%s", "disabled" ); + } + else switch ( __kmp_affinity_type ){ + case affinity_none: + __kmp_str_buf_print( buffer, "%s", "none"); + break; + case affinity_physical: + __kmp_str_buf_print( buffer, "%s,%d", "physical", + __kmp_affinity_offset ); + break; + case affinity_logical: + __kmp_str_buf_print( buffer, "%s,%d", "logical", + __kmp_affinity_offset ); + break; + case affinity_compact: + __kmp_str_buf_print( buffer, "%s,%d,%d", "compact", + __kmp_affinity_compact, __kmp_affinity_offset ); + break; + case affinity_scatter: + __kmp_str_buf_print( buffer, "%s,%d,%d", "scatter", + __kmp_affinity_compact, __kmp_affinity_offset ); + break; + case affinity_explicit: + __kmp_str_buf_print( buffer, "%s=[%s],%s", "proclist", + __kmp_affinity_proclist, "explicit" ); + break; + case affinity_balanced: + __kmp_str_buf_print( buffer, "%s,%d,%d", "balanced", + __kmp_affinity_compact, __kmp_affinity_offset ); + break; + case affinity_disabled: + __kmp_str_buf_print( buffer, "%s", "disabled"); + break; + case affinity_default: + __kmp_str_buf_print( buffer, "%s", "default"); + break; + default: + __kmp_str_buf_print( buffer, "%s", ""); + break; + } + __kmp_str_buf_print( buffer, "'\n" ); +} //__kmp_stg_print_affinity + +# ifdef KMP_GOMP_COMPAT + +static void +__kmp_stg_parse_gomp_cpu_affinity( char const * name, char const * value, void * data ) +{ + const char * next = NULL; + char * temp_proclist; + kmp_setting_t **rivals = (kmp_setting_t **) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + } + + if ( TCR_4(__kmp_init_middle) ) { + KMP_WARNING( EnvMiddleWarn, name ); + __kmp_env_toPrint( name, 0 ); + return; + } + + __kmp_env_toPrint( name, 1 ); + + if ( __kmp_parse_affinity_proc_id_list( name, value, &next, + &temp_proclist )) { + SKIP_WS(next); + if (*next == '\0') { + // + // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... + // + __kmp_affinity_proclist = temp_proclist; + __kmp_affinity_type = affinity_explicit; + __kmp_affinity_gran = affinity_gran_fine; +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + } + else { + KMP_WARNING( AffSyntaxError, name ); + if (temp_proclist != NULL) { + KMP_INTERNAL_FREE((void *)temp_proclist); + } + } + } + else { + // + // Warning already emitted + // + __kmp_affinity_type = affinity_none; +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +# endif + } +} // __kmp_stg_parse_gomp_cpu_affinity + +# endif /* KMP_GOMP_COMPAT */ + + +# if OMP_40_ENABLED + +/*----------------------------------------------------------------------------- + +The OMP_PLACES proc id list parser. Here is the grammar: + +place_list := place +place_list := place , place_list +place := num +place := place : num +place := place : num : signed +place := { subplacelist } +place := ! place // (lowest priority) +subplace_list := subplace +subplace_list := subplace , subplace_list +subplace := num +subplace := num : num +subplace := num : num : signed +signed := num +signed := + signed +signed := - signed + +-----------------------------------------------------------------------------*/ + +static int +__kmp_parse_subplace_list( const char *var, const char **scan ) +{ + const char *next; + + for (;;) { + int start, count, stride; + + // + // Read in the starting proc id + // + SKIP_WS(*scan); + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + next = *scan; + SKIP_DIGITS(next); + start = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(start >= 0); + *scan = next; + + // + // valid follow sets are ',' ':' and '}' + // + SKIP_WS(*scan); + if (**scan == '}') { + break; + } + if (**scan == ',') { + (*scan)++; // skip ',' + continue; + } + if (**scan != ':') { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + (*scan)++; // skip ':' + + // + // Read count parameter + // + SKIP_WS(*scan); + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + next = *scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(count >= 0); + *scan = next; + + // + // valid follow sets are ',' ':' and '}' + // + SKIP_WS(*scan); + if (**scan == '}') { + break; + } + if (**scan == ',') { + (*scan)++; // skip ',' + continue; + } + if (**scan != ':') { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + (*scan)++; // skip ':' + + // + // Read stride parameter + // + int sign = +1; + for (;;) { + SKIP_WS(*scan); + if (**scan == '+') { + (*scan)++; // skip '+' + continue; + } + if (**scan == '-') { + sign *= -1; + (*scan)++; // skip '-' + continue; + } + break; + } + SKIP_WS(*scan); + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + next = *scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(stride >= 0); + *scan = next; + stride *= sign; + + // + // valid follow sets are ',' and '}' + // + SKIP_WS(*scan); + if (**scan == '}') { + break; + } + if (**scan == ',') { + (*scan)++; // skip ',' + continue; + } + + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + return TRUE; +} + +static int +__kmp_parse_place( const char *var, const char ** scan ) +{ + const char *next; + + // + // valid follow sets are '{' '!' and num + // + SKIP_WS(*scan); + if (**scan == '{') { + (*scan)++; // skip '{' + if (! __kmp_parse_subplace_list(var, scan)) { + return FALSE; + } + if (**scan != '}') { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + (*scan)++; // skip '}' + } + else if (**scan == '!') { + (*scan)++; // skip '!' + return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' + } + else if ((**scan >= '0') && (**scan <= '9')) { + next = *scan; + SKIP_DIGITS(next); + int proc = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(proc >= 0); + *scan = next; + } + else { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + return TRUE; +} + +static int +__kmp_parse_place_list( const char *var, const char *env, char **place_list ) +{ + const char *scan = env; + const char *next = scan; + + for (;;) { + int start, count, stride; + + if (! __kmp_parse_place(var, &scan)) { + return FALSE; + } + + // + // valid follow sets are ',' ':' and EOL + // + SKIP_WS(scan); + if (*scan == '\0') { + break; + } + if (*scan == ',') { + scan++; // skip ',' + continue; + } + if (*scan != ':') { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + scan++; // skip ':' + + // + // Read count parameter + // + SKIP_WS(scan); + if ((*scan < '0') || (*scan > '9')) { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + next = scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(scan, *next); + KMP_ASSERT(count >= 0); + scan = next; + + // + // valid follow sets are ',' ':' and EOL + // + SKIP_WS(scan); + if (*scan == '\0') { + break; + } + if (*scan == ',') { + scan++; // skip ',' + continue; + } + if (*scan != ':') { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + scan++; // skip ':' + + // + // Read stride parameter + // + int sign = +1; + for (;;) { + SKIP_WS(scan); + if (*scan == '+') { + scan++; // skip '+' + continue; + } + if (*scan == '-') { + sign *= -1; + scan++; // skip '-' + continue; + } + break; + } + SKIP_WS(scan); + if ((*scan < '0') || (*scan > '9')) { + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + next = scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT(stride >= 0); + scan = next; + stride *= sign; + + // + // valid follow sets are ',' and EOL + // + SKIP_WS(scan); + if (*scan == '\0') { + break; + } + if (*scan == ',') { + scan++; // skip ',' + continue; + } + + KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); + return FALSE; + } + + { + int len = scan - env; + char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); + retlist[len] = '\0'; + *place_list = retlist; + } + return TRUE; +} + +static void +__kmp_stg_parse_places( char const * name, char const * value, void * data ) +{ + int count; + const char *scan = value; + const char *next = scan; + const char *kind = "\"threads\""; + kmp_setting_t **rivals = (kmp_setting_t **) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + } + + // + // If OMP_PROC_BIND is not specified but OMP_PLACES is, + // then let OMP_PROC_BIND default to true. + // + if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } + + //__kmp_affinity_num_places = 0; + + if ( __kmp_match_str( "threads", scan, &next ) ) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_thread; + __kmp_affinity_dups = FALSE; + kind = "\"threads\""; + } + else if ( __kmp_match_str( "cores", scan, &next ) ) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_core; + __kmp_affinity_dups = FALSE; + kind = "\"cores\""; + } + else if ( __kmp_match_str( "sockets", scan, &next ) ) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_package; + __kmp_affinity_dups = FALSE; + kind = "\"sockets\""; + } + else { + if ( __kmp_affinity_proclist != NULL ) { + KMP_INTERNAL_FREE( (void *)__kmp_affinity_proclist ); + __kmp_affinity_proclist = NULL; + } + if ( __kmp_parse_place_list( name, value, &__kmp_affinity_proclist ) ) { + __kmp_affinity_type = affinity_explicit; + __kmp_affinity_gran = affinity_gran_fine; + __kmp_affinity_dups = FALSE; + if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } + } + return; + } + + if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } + + SKIP_WS(scan); + if ( *scan == '\0' ) { + return; + } + + // + // Parse option count parameter in parentheses + // + if ( *scan != '(' ) { + KMP_WARNING( SyntaxErrorUsing, name, kind ); + return; + } + scan++; // skip '(' + + SKIP_WS(scan); + next = scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(scan, *next); + KMP_ASSERT(count >= 0); + scan = next; + + SKIP_WS(scan); + if ( *scan != ')' ) { + KMP_WARNING( SyntaxErrorUsing, name, kind ); + return; + } + scan++; // skip ')' + + SKIP_WS(scan); + if ( *scan != '\0' ) { + KMP_WARNING( ParseExtraCharsWarn, name, scan ); + } + __kmp_affinity_num_places = count; +} + +static void +__kmp_stg_print_places( kmp_str_buf_t * buffer, char const * name, + void * data ) +{ + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print( buffer, " %s", name ); + } + if ( ( __kmp_nested_proc_bind.used == 0 ) + || ( __kmp_nested_proc_bind.bind_types == NULL ) + || ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_false ) ) { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + else if ( __kmp_affinity_type == affinity_explicit ) { + if ( __kmp_affinity_proclist != NULL ) { + __kmp_str_buf_print( buffer, "='%s'\n", __kmp_affinity_proclist ); + } + else { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + } + else if ( __kmp_affinity_type == affinity_compact ) { + int num; + if ( __kmp_affinity_num_masks > 0 ) { + num = __kmp_affinity_num_masks; + } + else if ( __kmp_affinity_num_places > 0 ) { + num = __kmp_affinity_num_places; + } + else { + num = 0; + } + if ( __kmp_affinity_gran == affinity_gran_thread ) { + if ( num > 0 ) { + __kmp_str_buf_print( buffer, "='threads(%d)'\n", num ); + } + else { + __kmp_str_buf_print( buffer, "='threads'\n" ); + } + } + else if ( __kmp_affinity_gran == affinity_gran_core ) { + if ( num > 0 ) { + __kmp_str_buf_print( buffer, "='cores(%d)' \n", num ); + } + else { + __kmp_str_buf_print( buffer, "='cores'\n" ); + } + } + else if ( __kmp_affinity_gran == affinity_gran_package ) { + if ( num > 0 ) { + __kmp_str_buf_print( buffer, "='sockets(%d)'\n", num ); + } + else { + __kmp_str_buf_print( buffer, "='sockets'\n" ); + } + } + else { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + } + else { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } +} + +# endif /* OMP_40_ENABLED */ + +# if (! OMP_40_ENABLED) + +static void +__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) +{ + int enabled; + kmp_setting_t **rivals = (kmp_setting_t **) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + } + + // + // in OMP 3.1, OMP_PROC_BIND is strictly a boolean + // + __kmp_stg_parse_bool( name, value, & enabled ); + if ( enabled ) { + // + // OMP_PROC_BIND => granularity=fine,scatter on MIC + // OMP_PROC_BIND => granularity=core,scatter elsewhere + // + __kmp_affinity_type = affinity_scatter; +# if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) + __kmp_affinity_gran = affinity_gran_fine; + else +# endif + __kmp_affinity_gran = affinity_gran_core; + } + else { + __kmp_affinity_type = affinity_none; + } +} // __kmp_parse_proc_bind + +# endif /* if (! OMP_40_ENABLED) */ + + +static void +__kmp_stg_parse_topology_method( char const * name, char const * value, + void * data ) { + if ( __kmp_str_match( "all", 1, value ) ) { + __kmp_affinity_top_method = affinity_top_method_all; + } +# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + else if ( __kmp_str_match( "x2apic id", 9, value ) + || __kmp_str_match( "x2apic_id", 9, value ) + || __kmp_str_match( "x2apic-id", 9, value ) + || __kmp_str_match( "x2apicid", 8, value ) + || __kmp_str_match( "cpuid leaf 11", 13, value ) + || __kmp_str_match( "cpuid_leaf_11", 13, value ) + || __kmp_str_match( "cpuid-leaf-11", 13, value ) + || __kmp_str_match( "cpuid leaf11", 12, value ) + || __kmp_str_match( "cpuid_leaf11", 12, value ) + || __kmp_str_match( "cpuid-leaf11", 12, value ) + || __kmp_str_match( "cpuidleaf 11", 12, value ) + || __kmp_str_match( "cpuidleaf_11", 12, value ) + || __kmp_str_match( "cpuidleaf-11", 12, value ) + || __kmp_str_match( "cpuidleaf11", 11, value ) + || __kmp_str_match( "cpuid 11", 8, value ) + || __kmp_str_match( "cpuid_11", 8, value ) + || __kmp_str_match( "cpuid-11", 8, value ) + || __kmp_str_match( "cpuid11", 7, value ) + || __kmp_str_match( "leaf 11", 7, value ) + || __kmp_str_match( "leaf_11", 7, value ) + || __kmp_str_match( "leaf-11", 7, value ) + || __kmp_str_match( "leaf11", 6, value ) ) { + __kmp_affinity_top_method = affinity_top_method_x2apicid; + } + else if ( __kmp_str_match( "apic id", 7, value ) + || __kmp_str_match( "apic_id", 7, value ) + || __kmp_str_match( "apic-id", 7, value ) + || __kmp_str_match( "apicid", 6, value ) + || __kmp_str_match( "cpuid leaf 4", 12, value ) + || __kmp_str_match( "cpuid_leaf_4", 12, value ) + || __kmp_str_match( "cpuid-leaf-4", 12, value ) + || __kmp_str_match( "cpuid leaf4", 11, value ) + || __kmp_str_match( "cpuid_leaf4", 11, value ) + || __kmp_str_match( "cpuid-leaf4", 11, value ) + || __kmp_str_match( "cpuidleaf 4", 11, value ) + || __kmp_str_match( "cpuidleaf_4", 11, value ) + || __kmp_str_match( "cpuidleaf-4", 11, value ) + || __kmp_str_match( "cpuidleaf4", 10, value ) + || __kmp_str_match( "cpuid 4", 7, value ) + || __kmp_str_match( "cpuid_4", 7, value ) + || __kmp_str_match( "cpuid-4", 7, value ) + || __kmp_str_match( "cpuid4", 6, value ) + || __kmp_str_match( "leaf 4", 6, value ) + || __kmp_str_match( "leaf_4", 6, value ) + || __kmp_str_match( "leaf-4", 6, value ) + || __kmp_str_match( "leaf4", 5, value ) ) { + __kmp_affinity_top_method = affinity_top_method_apicid; + } +# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + else if ( __kmp_str_match( "/proc/cpuinfo", 2, value ) + || __kmp_str_match( "cpuinfo", 5, value )) { + __kmp_affinity_top_method = affinity_top_method_cpuinfo; + } +# if KMP_GROUP_AFFINITY + else if ( __kmp_str_match( "group", 1, value ) ) { + __kmp_affinity_top_method = affinity_top_method_group; + } +# endif /* KMP_GROUP_AFFINITY */ + else if ( __kmp_str_match( "flat", 1, value ) ) { + __kmp_affinity_top_method = affinity_top_method_flat; + } +# if KMP_USE_HWLOC + else if ( __kmp_str_match( "hwloc", 1, value) ) { + __kmp_affinity_top_method = affinity_top_method_hwloc; + } +# endif + else { + KMP_WARNING( StgInvalidValue, name, value ); + } +} // __kmp_stg_parse_topology_method + +static void +__kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name, + void * data ) { +# if KMP_DEBUG + char const * value = NULL; + + switch ( __kmp_affinity_top_method ) { + case affinity_top_method_default: + value = "default"; + break; + + case affinity_top_method_all: + value = "all"; + break; + +# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + case affinity_top_method_x2apicid: + value = "x2APIC id"; + break; + + case affinity_top_method_apicid: + value = "APIC id"; + break; +# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + case affinity_top_method_cpuinfo: + value = "cpuinfo"; + break; + +# if KMP_GROUP_AFFINITY + case affinity_top_method_group: + value = "group"; + break; +# endif /* KMP_GROUP_AFFINITY */ + + case affinity_top_method_flat: + value = "flat"; + break; + } + + if ( value != NULL ) { + __kmp_stg_print_str( buffer, name, value ); + } +# endif /* KMP_DEBUG */ +} // __kmp_stg_print_topology_method + +#endif /* KMP_AFFINITY_SUPPORTED */ + + +#if OMP_40_ENABLED + +// +// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* +// OMP_PLACES / place-partition-var is not. +// +static void +__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) +{ + kmp_setting_t **rivals = (kmp_setting_t **) data; + int rc; + + rc = __kmp_stg_check_rivals( name, value, rivals ); + if ( rc ) { + return; + } + + // + // in OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. + // + KMP_DEBUG_ASSERT( (__kmp_nested_proc_bind.bind_types != NULL) + && ( __kmp_nested_proc_bind.used > 0 ) ); + + const char *buf = value; + const char *next; + int num; + SKIP_WS( buf ); + if ( (*buf >= '0') && (*buf <= '9') ) { + next = buf; + SKIP_DIGITS( next ); + num = __kmp_str_to_int( buf, *next ); + KMP_ASSERT( num >= 0 ); + buf = next; + SKIP_WS( buf ); + } + else { + num = -1; + } + + next = buf; + if ( __kmp_match_str( "disabled", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); +# if KMP_AFFINITY_SUPPORTED + __kmp_affinity_type = affinity_disabled; +# endif /* KMP_AFFINITY_SUPPORTED */ + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } + else if ( ( num == (int)proc_bind_false ) + || __kmp_match_str( "false", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); +# if KMP_AFFINITY_SUPPORTED + __kmp_affinity_type = affinity_none; +# endif /* KMP_AFFINITY_SUPPORTED */ + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } + else if ( ( num == (int)proc_bind_true ) + || __kmp_match_str( "true", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } + else { + // + // Count the number of values in the env var string + // + const char *scan; + int nelem = 1; + for ( scan = buf; *scan != '\0'; scan++ ) { + if ( *scan == ',' ) { + nelem++; + } + } + + // + // Create / expand the nested proc_bind array as needed + // + if ( __kmp_nested_proc_bind.size < nelem ) { + __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) + KMP_INTERNAL_REALLOC( __kmp_nested_proc_bind.bind_types, + sizeof(kmp_proc_bind_t) * nelem ); + if ( __kmp_nested_proc_bind.bind_types == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + } + __kmp_nested_proc_bind.size = nelem; + } + __kmp_nested_proc_bind.used = nelem; + + // + // Save values in the nested proc_bind array + // + int i = 0; + for (;;) { + enum kmp_proc_bind_t bind; + + if ( ( num == (int)proc_bind_master ) + || __kmp_match_str( "master", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); + bind = proc_bind_master; + } + else if ( ( num == (int)proc_bind_close ) + || __kmp_match_str( "close", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); + bind = proc_bind_close; + } + else if ( ( num == (int)proc_bind_spread ) + || __kmp_match_str( "spread", buf, &next ) ) { + buf = next; + SKIP_WS( buf ); + bind = proc_bind_spread; + } + else { + KMP_WARNING( StgInvalidValue, name, value ); + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + __kmp_nested_proc_bind.used = 1; + return; + } + + __kmp_nested_proc_bind.bind_types[i++] = bind; + if ( i >= nelem ) { + break; + } + KMP_DEBUG_ASSERT( *buf == ',' ); + buf++; + SKIP_WS( buf ); + + // + // Read next value if it was specified as an integer + // + if ( (*buf >= '0') && (*buf <= '9') ) { + next = buf; + SKIP_DIGITS( next ); + num = __kmp_str_to_int( buf, *next ); + KMP_ASSERT( num >= 0 ); + buf = next; + SKIP_WS( buf ); + } + else { + num = -1; + } + } + SKIP_WS( buf ); + } + if ( *buf != '\0' ) { + KMP_WARNING( ParseExtraCharsWarn, name, buf ); + } +} + + +static void +__kmp_stg_print_proc_bind( kmp_str_buf_t * buffer, char const * name, + void * data ) +{ + int nelem = __kmp_nested_proc_bind.used; + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print( buffer, " %s", name ); + } + if ( nelem == 0 ) { + __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + } + else { + int i; + __kmp_str_buf_print( buffer, "='", name ); + for ( i = 0; i < nelem; i++ ) { + switch ( __kmp_nested_proc_bind.bind_types[i] ) { + case proc_bind_false: + __kmp_str_buf_print( buffer, "false" ); + break; + + case proc_bind_true: + __kmp_str_buf_print( buffer, "true" ); + break; + + case proc_bind_master: + __kmp_str_buf_print( buffer, "master" ); + break; + + case proc_bind_close: + __kmp_str_buf_print( buffer, "close" ); + break; + + case proc_bind_spread: + __kmp_str_buf_print( buffer, "spread" ); + break; + + case proc_bind_intel: + __kmp_str_buf_print( buffer, "intel" ); + break; + + case proc_bind_default: + __kmp_str_buf_print( buffer, "default" ); + break; + } + if ( i < nelem - 1 ) { + __kmp_str_buf_print( buffer, "," ); + } + } + __kmp_str_buf_print( buffer, "'\n" ); + } +} + +#endif /* OMP_40_ENABLED */ + + +// ------------------------------------------------------------------------------------------------- +// OMP_DYNAMIC +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_omp_dynamic( char const * name, char const * value, void * data ) +{ + __kmp_stg_parse_bool( name, value, & (__kmp_global.g.g_dynamic) ); +} // __kmp_stg_parse_omp_dynamic + +static void +__kmp_stg_print_omp_dynamic( kmp_str_buf_t * buffer, char const * name, void * data ) +{ + __kmp_stg_print_bool( buffer, name, __kmp_global.g.g_dynamic ); +} // __kmp_stg_print_omp_dynamic + +static void +__kmp_stg_parse_kmp_dynamic_mode( char const * name, char const * value, void * data ) +{ + if ( TCR_4(__kmp_init_parallel) ) { + KMP_WARNING( EnvParallelWarn, name ); + __kmp_env_toPrint( name, 0 ); + return; + } +#ifdef USE_LOAD_BALANCE + else if ( __kmp_str_match( "load balance", 2, value ) + || __kmp_str_match( "load_balance", 2, value ) + || __kmp_str_match( "load-balance", 2, value ) + || __kmp_str_match( "loadbalance", 2, value ) + || __kmp_str_match( "balance", 1, value ) ) { + __kmp_global.g.g_dynamic_mode = dynamic_load_balance; + } +#endif /* USE_LOAD_BALANCE */ + else if ( __kmp_str_match( "thread limit", 1, value ) + || __kmp_str_match( "thread_limit", 1, value ) + || __kmp_str_match( "thread-limit", 1, value ) + || __kmp_str_match( "threadlimit", 1, value ) + || __kmp_str_match( "limit", 2, value ) ) { + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + } + else if ( __kmp_str_match( "random", 1, value ) ) { + __kmp_global.g.g_dynamic_mode = dynamic_random; + } + else { + KMP_WARNING( StgInvalidValue, name, value ); + } +} //__kmp_stg_parse_kmp_dynamic_mode + +static void +__kmp_stg_print_kmp_dynamic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) +{ +#if KMP_DEBUG + if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { + __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); + } +# ifdef USE_LOAD_BALANCE + else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { + __kmp_stg_print_str( buffer, name, "load balance" ); + } +# endif /* USE_LOAD_BALANCE */ + else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { + __kmp_stg_print_str( buffer, name, "thread limit" ); + } + else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { + __kmp_stg_print_str( buffer, name, "random" ); + } + else { + KMP_ASSERT(0); + } +#endif /* KMP_DEBUG */ +} // __kmp_stg_print_kmp_dynamic_mode + + +#ifdef USE_LOAD_BALANCE + +// ------------------------------------------------------------------------------------------------- +// KMP_LOAD_BALANCE_INTERVAL +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_ld_balance_interval( char const * name, char const * value, void * data ) +{ + double interval = __kmp_convert_to_double( value ); + if ( interval >= 0 ) { + __kmp_load_balance_interval = interval; + } else { + KMP_WARNING( StgInvalidValue, name, value ); + }; // if +} // __kmp_stg_parse_load_balance_interval + +static void +__kmp_stg_print_ld_balance_interval( kmp_str_buf_t * buffer, char const * name, void * data ) { +#if KMP_DEBUG + __kmp_str_buf_print( buffer, " %s=%8.6f\n", name, __kmp_load_balance_interval ); +#endif /* KMP_DEBUG */ +} // __kmp_stg_print_load_balance_interval + +#endif /* USE_LOAD_BALANCE */ + +// ------------------------------------------------------------------------------------------------- +// KMP_INIT_AT_FORK +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_init_at_fork( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_need_register_atfork ); + if ( __kmp_need_register_atfork ) { + __kmp_need_register_atfork_specified = TRUE; + }; +} // __kmp_stg_parse_init_at_fork + +static void +__kmp_stg_print_init_at_fork( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_need_register_atfork_specified ); +} // __kmp_stg_print_init_at_fork + +// ------------------------------------------------------------------------------------------------- +// KMP_SCHEDULE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_schedule( char const * name, char const * value, void * data ) { + + if ( value != NULL ) { + size_t length = KMP_STRLEN( value ); + if ( length > INT_MAX ) { + KMP_WARNING( LongValue, name ); + } else { + char *semicolon; + if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'' ) + KMP_WARNING( UnbalancedQuotes, name ); + do { + char sentinel; + + semicolon = (char *) strchr( value, ';' ); + if( *value && semicolon != value ) { + char *comma = (char *) strchr( value, ',' ); + + if ( comma ) { + ++comma; + sentinel = ','; + } else + sentinel = ';'; + if ( !__kmp_strcasecmp_with_sentinel( "static", value, sentinel ) ) { + if( !__kmp_strcasecmp_with_sentinel( "greedy", comma, ';' ) ) { + __kmp_static = kmp_sch_static_greedy; + continue; + } else if( !__kmp_strcasecmp_with_sentinel( "balanced", comma, ';' ) ) { + __kmp_static = kmp_sch_static_balanced; + continue; + } + } else if ( !__kmp_strcasecmp_with_sentinel( "guided", value, sentinel ) ) { + if ( !__kmp_strcasecmp_with_sentinel( "iterative", comma, ';' ) ) { + __kmp_guided = kmp_sch_guided_iterative_chunked; + continue; + } else if ( !__kmp_strcasecmp_with_sentinel( "analytical", comma, ';' ) ) { + /* analytical not allowed for too many threads */ + __kmp_guided = kmp_sch_guided_analytical_chunked; + continue; + } + } + KMP_WARNING( InvalidClause, name, value ); + } else + KMP_WARNING( EmptyClause, name ); + } while ( (value = semicolon ? semicolon + 1 : NULL) ); + } + }; // if + +} // __kmp_stg_parse__schedule + +static void +__kmp_stg_print_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print( buffer, " %s='", name ); + } + if ( __kmp_static == kmp_sch_static_greedy ) { + __kmp_str_buf_print( buffer, "%s", "static,greedy"); + } else if ( __kmp_static == kmp_sch_static_balanced ) { + __kmp_str_buf_print ( buffer, "%s", "static,balanced"); + } + if ( __kmp_guided == kmp_sch_guided_iterative_chunked ) { + __kmp_str_buf_print( buffer, ";%s'\n", "guided,iterative"); + } else if ( __kmp_guided == kmp_sch_guided_analytical_chunked ) { + __kmp_str_buf_print( buffer, ";%s'\n", "guided,analytical"); + } +} // __kmp_stg_print_schedule + +// ------------------------------------------------------------------------------------------------- +// OMP_SCHEDULE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_omp_schedule( char const * name, char const * value, void * data ) +{ + size_t length; + if( value ) { + length = KMP_STRLEN( value ); + if( length ) { + char *comma = (char *) strchr( value, ',' ); + if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'') + KMP_WARNING( UnbalancedQuotes, name ); + /* get the specified scheduling style */ + if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ',')) /* DYNAMIC */ + __kmp_sched = kmp_sch_dynamic_chunked; + else if (!__kmp_strcasecmp_with_sentinel("guided", value, ',')) /* GUIDED */ + __kmp_sched = kmp_sch_guided_chunked; +// AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 does not allow it) + else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) { /* AUTO */ + __kmp_sched = kmp_sch_auto; + if( comma ) { + __kmp_msg( kmp_ms_warning, KMP_MSG( IgnoreChunk, name, comma ), __kmp_msg_null ); + comma = NULL; + } + } + else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, ',')) /* TRAPEZOIDAL */ + __kmp_sched = kmp_sch_trapezoidal; + else if (!__kmp_strcasecmp_with_sentinel("static", value, ',')) /* STATIC */ + __kmp_sched = kmp_sch_static; +#ifdef KMP_STATIC_STEAL_ENABLED + else if (KMP_ARCH_X86_64 && + !__kmp_strcasecmp_with_sentinel("static_steal", value, ',')) + __kmp_sched = kmp_sch_static_steal; +#endif + else { + KMP_WARNING( StgInvalidValue, name, value ); + value = NULL; /* skip processing of comma */ + } + if( value && comma ) { + __kmp_env_chunk = TRUE; + + if(__kmp_sched == kmp_sch_static) + __kmp_sched = kmp_sch_static_chunked; + ++comma; + __kmp_chunk = __kmp_str_to_int( comma, 0 ); + if ( __kmp_chunk < 1 ) { + __kmp_chunk = KMP_DEFAULT_CHUNK; + __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidChunk, name, comma ), __kmp_msg_null ); + KMP_INFORM( Using_int_Value, name, __kmp_chunk ); +// AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK (to improve code coverage :) +// The default chunk size is 1 according to standard, thus making KMP_MIN_CHUNK not 1 we would introduce mess: +// wrong chunk becomes 1, but it will be impossible to explicitely set 1, because it becomes KMP_MIN_CHUNK... +// } else if ( __kmp_chunk < KMP_MIN_CHUNK ) { +// __kmp_chunk = KMP_MIN_CHUNK; + } else if ( __kmp_chunk > KMP_MAX_CHUNK ) { + __kmp_chunk = KMP_MAX_CHUNK; + __kmp_msg( kmp_ms_warning, KMP_MSG( LargeChunk, name, comma ), __kmp_msg_null ); + KMP_INFORM( Using_int_Value, name, __kmp_chunk ); + } + } else + __kmp_env_chunk = FALSE; + } else + KMP_WARNING( EmptyString, name ); + } + K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) + K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) + K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) + K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) +} // __kmp_stg_parse_omp_schedule + +static void +__kmp_stg_print_omp_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print( buffer, " %s='", name ); + } + if ( __kmp_chunk ) { + switch ( __kmp_sched ) { + case kmp_sch_dynamic_chunked: + __kmp_str_buf_print( buffer, "%s,%d'\n", "dynamic", __kmp_chunk); + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + __kmp_str_buf_print( buffer, "%s,%d'\n", "guided", __kmp_chunk); + break; + case kmp_sch_trapezoidal: + __kmp_str_buf_print( buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); + break; + case kmp_sch_static: + case kmp_sch_static_chunked: + case kmp_sch_static_balanced: + case kmp_sch_static_greedy: + __kmp_str_buf_print( buffer, "%s,%d'\n", "static", __kmp_chunk); + break; + case kmp_sch_static_steal: + __kmp_str_buf_print( buffer, "%s,%d'\n", "static_steal", __kmp_chunk); + break; + case kmp_sch_auto: + __kmp_str_buf_print( buffer, "%s,%d'\n", "auto", __kmp_chunk); + break; + } + } else { + switch ( __kmp_sched ) { + case kmp_sch_dynamic_chunked: + __kmp_str_buf_print( buffer, "%s'\n", "dynamic"); + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + __kmp_str_buf_print( buffer, "%s'\n", "guided"); + break; + case kmp_sch_trapezoidal: + __kmp_str_buf_print( buffer, "%s'\n", "trapezoidal"); + break; + case kmp_sch_static: + case kmp_sch_static_chunked: + case kmp_sch_static_balanced: + case kmp_sch_static_greedy: + __kmp_str_buf_print( buffer, "%s'\n", "static"); + break; + case kmp_sch_static_steal: + __kmp_str_buf_print( buffer, "%s'\n", "static_steal"); + break; + case kmp_sch_auto: + __kmp_str_buf_print( buffer, "%s'\n", "auto"); + break; + } + } +} // __kmp_stg_print_omp_schedule + +// ------------------------------------------------------------------------------------------------- +// KMP_ATOMIC_MODE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_atomic_mode( char const * name, char const * value, void * data ) { + // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP compatibility mode. + int mode = 0; + int max = 1; + #ifdef KMP_GOMP_COMPAT + max = 2; + #endif /* KMP_GOMP_COMPAT */ + __kmp_stg_parse_int( name, value, 0, max, & mode ); + // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use + // 0 rather that max value. + if ( mode > 0 ) { + __kmp_atomic_mode = mode; + }; // if +} // __kmp_stg_parse_atomic_mode + +static void +__kmp_stg_print_atomic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_atomic_mode ); +} // __kmp_stg_print_atomic_mode + + +// ------------------------------------------------------------------------------------------------- +// KMP_CONSISTENCY_CHECK +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_consistency_check( char const * name, char const * value, void * data ) { + if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { + // Note, this will not work from kmp_set_defaults because th_cons stack was not allocated + // for existed thread(s) thus the first __kmp_push_ will break with assertion. + // TODO: allocate th_cons if called from kmp_set_defaults. + __kmp_env_consistency_check = TRUE; + } else if ( ! __kmp_strcasecmp_with_sentinel( "none", value, 0 ) ) { + __kmp_env_consistency_check = FALSE; + } else { + KMP_WARNING( StgInvalidValue, name, value ); + }; // if +} // __kmp_stg_parse_consistency_check + +static void +__kmp_stg_print_consistency_check( kmp_str_buf_t * buffer, char const * name, void * data ) { +#if KMP_DEBUG + const char *value = NULL; + + if ( __kmp_env_consistency_check ) { + value = "all"; + } else { + value = "none"; + } + + if ( value != NULL ) { + __kmp_stg_print_str( buffer, name, value ); + } +#endif /* KMP_DEBUG */ +} // __kmp_stg_print_consistency_check + + +#if USE_ITT_BUILD +// ------------------------------------------------------------------------------------------------- +// KMP_ITT_PREPARE_DELAY +// ------------------------------------------------------------------------------------------------- + +#if USE_ITT_NOTIFY + +static void +__kmp_stg_parse_itt_prepare_delay( char const * name, char const * value, void * data ) +{ + // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop iterations. + int delay = 0; + __kmp_stg_parse_int( name, value, 0, INT_MAX, & delay ); + __kmp_itt_prepare_delay = delay; +} // __kmp_str_parse_itt_prepare_delay + +static void +__kmp_stg_print_itt_prepare_delay( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_uint64( buffer, name, __kmp_itt_prepare_delay ); + +} // __kmp_str_print_itt_prepare_delay + +#endif // USE_ITT_NOTIFY +#endif /* USE_ITT_BUILD */ + +// ------------------------------------------------------------------------------------------------- +// KMP_MALLOC_POOL_INCR +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_malloc_pool_incr( char const * name, char const * value, void * data ) { + __kmp_stg_parse_size( + name, + value, + KMP_MIN_MALLOC_POOL_INCR, + KMP_MAX_MALLOC_POOL_INCR, + NULL, + & __kmp_malloc_pool_incr, + 1 + ); +} // __kmp_stg_parse_malloc_pool_incr + +static void +__kmp_stg_print_malloc_pool_incr( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_size( buffer, name, __kmp_malloc_pool_incr ); + +} // _kmp_stg_print_malloc_pool_incr + + +#ifdef KMP_DEBUG + +// ------------------------------------------------------------------------------------------------- +// KMP_PAR_RANGE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_par_range_env( char const * name, char const * value, void * data ) { + __kmp_stg_parse_par_range( + name, + value, + & __kmp_par_range, + __kmp_par_range_routine, + __kmp_par_range_filename, + & __kmp_par_range_lb, + & __kmp_par_range_ub + ); +} // __kmp_stg_parse_par_range_env + +static void +__kmp_stg_print_par_range_env( kmp_str_buf_t * buffer, char const * name, void * data ) { + if (__kmp_par_range != 0) { + __kmp_stg_print_str( buffer, name, par_range_to_print ); + } +} // __kmp_stg_print_par_range_env + +// ------------------------------------------------------------------------------------------------- +// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_yield_cycle( char const * name, char const * value, void * data ) { + int flag = __kmp_yield_cycle; + __kmp_stg_parse_bool( name, value, & flag ); + __kmp_yield_cycle = flag; +} // __kmp_stg_parse_yield_cycle + +static void +__kmp_stg_print_yield_cycle( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_yield_cycle ); +} // __kmp_stg_print_yield_cycle + +static void +__kmp_stg_parse_yield_on( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_on_count ); +} // __kmp_stg_parse_yield_on + +static void +__kmp_stg_print_yield_on( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_yield_on_count ); +} // __kmp_stg_print_yield_on + +static void +__kmp_stg_parse_yield_off( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_off_count ); +} // __kmp_stg_parse_yield_off + +static void +__kmp_stg_print_yield_off( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_yield_off_count ); +} // __kmp_stg_print_yield_off + +#endif + +// ------------------------------------------------------------------------------------------------- +// KMP_INIT_WAIT, KMP_NEXT_WAIT +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_init_wait( char const * name, char const * value, void * data ) { + int wait; + KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); + wait = __kmp_init_wait / 2; + __kmp_stg_parse_int( name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, & wait ); + __kmp_init_wait = wait * 2; + KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); + __kmp_yield_init = __kmp_init_wait; +} // __kmp_stg_parse_init_wait + +static void +__kmp_stg_print_init_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_init_wait ); +} // __kmp_stg_print_init_wait + +static void +__kmp_stg_parse_next_wait( char const * name, char const * value, void * data ) { + int wait; + KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); + wait = __kmp_next_wait / 2; + __kmp_stg_parse_int( name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, & wait ); + __kmp_next_wait = wait * 2; + KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); + __kmp_yield_next = __kmp_next_wait; +} // __kmp_stg_parse_next_wait + +static void +__kmp_stg_print_next_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_next_wait ); +} //__kmp_stg_print_next_wait + + +// ------------------------------------------------------------------------------------------------- +// KMP_GTID_MODE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_gtid_mode( char const * name, char const * value, void * data ) { + // + // Modes: + // 0 -- do not change default + // 1 -- sp search + // 2 -- use "keyed" TLS var, i.e. + // pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) + // 3 -- __declspec(thread) TLS var in tdata section + // + int mode = 0; + int max = 2; + #ifdef KMP_TDATA_GTID + max = 3; + #endif /* KMP_TDATA_GTID */ + __kmp_stg_parse_int( name, value, 0, max, & mode ); + // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use + // 0 rather that max value. + if ( mode == 0 ) { + __kmp_adjust_gtid_mode = TRUE; + } + else { + __kmp_gtid_mode = mode; + __kmp_adjust_gtid_mode = FALSE; + }; // if +} // __kmp_str_parse_gtid_mode + +static void +__kmp_stg_print_gtid_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { + if ( __kmp_adjust_gtid_mode ) { + __kmp_stg_print_int( buffer, name, 0 ); + } + else { + __kmp_stg_print_int( buffer, name, __kmp_gtid_mode ); + } +} // __kmp_stg_print_gtid_mode + + +// ------------------------------------------------------------------------------------------------- +// KMP_NUM_LOCKS_IN_BLOCK +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_lock_block( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, KMP_INT_MAX, & __kmp_num_locks_in_block ); +} // __kmp_str_parse_lock_block + +static void +__kmp_stg_print_lock_block( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_num_locks_in_block ); +} // __kmp_stg_print_lock_block + +// ------------------------------------------------------------------------------------------------- +// KMP_LOCK_KIND +// ------------------------------------------------------------------------------------------------- + +#if KMP_USE_DYNAMIC_LOCK +# define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) +#else +# define KMP_STORE_LOCK_SEQ(a) +#endif + +static void +__kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) { + if ( __kmp_init_user_locks ) { + KMP_WARNING( EnvLockWarn, name ); + return; + } + + if ( __kmp_str_match( "tas", 2, value ) + || __kmp_str_match( "test and set", 2, value ) + || __kmp_str_match( "test_and_set", 2, value ) + || __kmp_str_match( "test-and-set", 2, value ) + || __kmp_str_match( "test andset", 2, value ) + || __kmp_str_match( "test_andset", 2, value ) + || __kmp_str_match( "test-andset", 2, value ) + || __kmp_str_match( "testand set", 2, value ) + || __kmp_str_match( "testand_set", 2, value ) + || __kmp_str_match( "testand-set", 2, value ) + || __kmp_str_match( "testandset", 2, value ) ) { + __kmp_user_lock_kind = lk_tas; + KMP_STORE_LOCK_SEQ(tas); + } +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) + else if ( __kmp_str_match( "futex", 1, value ) ) { + if ( __kmp_futex_determine_capable() ) { + __kmp_user_lock_kind = lk_futex; + KMP_STORE_LOCK_SEQ(futex); + } + else { + KMP_WARNING( FutexNotSupported, name, value ); + } + } +#endif + else if ( __kmp_str_match( "ticket", 2, value ) ) { + __kmp_user_lock_kind = lk_ticket; + KMP_STORE_LOCK_SEQ(ticket); + } + else if ( __kmp_str_match( "queuing", 1, value ) + || __kmp_str_match( "queue", 1, value ) ) { + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); + } + else if ( __kmp_str_match( "drdpa ticket", 1, value ) + || __kmp_str_match( "drdpa_ticket", 1, value ) + || __kmp_str_match( "drdpa-ticket", 1, value ) + || __kmp_str_match( "drdpaticket", 1, value ) + || __kmp_str_match( "drdpa", 1, value ) ) { + __kmp_user_lock_kind = lk_drdpa; + KMP_STORE_LOCK_SEQ(drdpa); + } +#if KMP_USE_ADAPTIVE_LOCKS + else if ( __kmp_str_match( "adaptive", 1, value ) ) { + if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here? + __kmp_user_lock_kind = lk_adaptive; + KMP_STORE_LOCK_SEQ(adaptive); + } else { + KMP_WARNING( AdaptiveNotSupported, name, value ); + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); + } + } +#endif // KMP_USE_ADAPTIVE_LOCKS +#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX + else if ( __kmp_str_match("rtm", 1, value) ) { + if ( __kmp_cpuinfo.rtm ) { + __kmp_user_lock_kind = lk_rtm; + KMP_STORE_LOCK_SEQ(rtm); + } else { + KMP_WARNING( AdaptiveNotSupported, name, value ); + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); + } + } + else if ( __kmp_str_match("hle", 1, value) ) { + __kmp_user_lock_kind = lk_hle; + KMP_STORE_LOCK_SEQ(hle); + } +#endif + else { + KMP_WARNING( StgInvalidValue, name, value ); + } +} + +static void +__kmp_stg_print_lock_kind( kmp_str_buf_t * buffer, char const * name, void * data ) { + const char *value = NULL; + + switch ( __kmp_user_lock_kind ) { + case lk_default: + value = "default"; + break; + + case lk_tas: + value = "tas"; + break; + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) + case lk_futex: + value = "futex"; + break; +#endif + +#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX + case lk_rtm: + value = "rtm"; + break; + + case lk_hle: + value = "hle"; + break; +#endif + + case lk_ticket: + value = "ticket"; + break; + + case lk_queuing: + value = "queuing"; + break; + + case lk_drdpa: + value = "drdpa"; + break; +#if KMP_USE_ADAPTIVE_LOCKS + case lk_adaptive: + value = "adaptive"; + break; +#endif + } + + if ( value != NULL ) { + __kmp_stg_print_str( buffer, name, value ); + } +} + +#if KMP_USE_ADAPTIVE_LOCKS + +// ------------------------------------------------------------------------------------------------- +// KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE +// ------------------------------------------------------------------------------------------------- + +// Parse out values for the tunable parameters from a string of the form +// KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness] +static void +__kmp_stg_parse_adaptive_lock_props( const char *name, const char *value, void *data ) +{ + int max_retries = 0; + int max_badness = 0; + + const char *next = value; + + int total = 0; // Count elements that were set. It'll be used as an array size + int prev_comma = FALSE; // For correct processing sequential commas + int i; + + // Save values in the structure __kmp_speculative_backoff_params + // Run only 3 iterations because it is enough to read two values or find a syntax error + for ( i = 0; i < 3 ; i++) { + SKIP_WS( next ); + + if ( *next == '\0' ) { + break; + } + // Next character is not an integer or not a comma OR number of values > 2 => end of list + if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) { + KMP_WARNING( EnvSyntaxError, name, value ); + return; + } + // The next character is ',' + if ( *next == ',' ) { + // ',' is the fisrt character + if ( total == 0 || prev_comma ) { + total++; + } + prev_comma = TRUE; + next++; //skip ',' + SKIP_WS( next ); + } + // Next character is a digit + if ( *next >= '0' && *next <= '9' ) { + int num; + const char *buf = next; + char const * msg = NULL; + prev_comma = FALSE; + SKIP_DIGITS( next ); + total++; + + const char *tmp = next; + SKIP_WS( tmp ); + if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { + KMP_WARNING( EnvSpacesNotAllowed, name, value ); + return; + } + + num = __kmp_str_to_int( buf, *next ); + if ( num < 0 ) { // The number of retries should be >= 0 + msg = KMP_I18N_STR( ValueTooSmall ); + num = 1; + } else if ( num > KMP_INT_MAX ) { + msg = KMP_I18N_STR( ValueTooLarge ); + num = KMP_INT_MAX; + } + if ( msg != NULL ) { + // Message is not empty. Print warning. + KMP_WARNING( ParseSizeIntWarn, name, value, msg ); + KMP_INFORM( Using_int_Value, name, num ); + } + if( total == 1 ) { + max_retries = num; + } else if( total == 2 ) { + max_badness = num; + } + } + } + KMP_DEBUG_ASSERT( total > 0 ); + if( total <= 0 ) { + KMP_WARNING( EnvSyntaxError, name, value ); + return; + } + __kmp_adaptive_backoff_params.max_soft_retries = max_retries; + __kmp_adaptive_backoff_params.max_badness = max_badness; +} + + +static void +__kmp_stg_print_adaptive_lock_props(kmp_str_buf_t * buffer, char const * name, void * data ) +{ + if( __kmp_env_format ) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print( buffer, " %s='", name ); + } + __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_adaptive_backoff_params.max_soft_retries, + __kmp_adaptive_backoff_params.max_badness ); +} // __kmp_stg_print_adaptive_lock_props + +#if KMP_DEBUG_ADAPTIVE_LOCKS + +static void +__kmp_stg_parse_speculative_statsfile( char const * name, char const * value, void * data ) { + __kmp_stg_parse_file( name, value, "", & __kmp_speculative_statsfile ); +} // __kmp_stg_parse_speculative_statsfile + +static void +__kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name, void * data ) { + if ( __kmp_str_match( "-", 0, __kmp_speculative_statsfile ) ) { + __kmp_stg_print_str( buffer, name, "stdout" ); + } else { + __kmp_stg_print_str( buffer, name, __kmp_speculative_statsfile ); + } + +} // __kmp_stg_print_speculative_statsfile + +#endif // KMP_DEBUG_ADAPTIVE_LOCKS + +#endif // KMP_USE_ADAPTIVE_LOCKS + +// ------------------------------------------------------------------------------------------------- +// KMP_PLACE_THREADS +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) { + // Value example: 5Cx2Tx15O + // Which means "use 5 cores with offset 15, 2 threads per core" + // AC: extended to sockets level, examples of + // "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core": + // 2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t + // To not break legacy code core-offset can be last; + // postfix "o" or prefix @ can be offset designator. + // Note: not all syntax errors are analyzed, some may be skipped. +#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == 'x') + int num; + int single_warning = 0; + int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0; + const char *next = value; + const char *prev; + + SKIP_WS(next); // skip white spaces + if (*next == '\0') + return; // no data provided, retain default values + // Get num_sockets first (or whatever specified) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 's' || *next == 'S') { // e.g. "2s" + __kmp_place_num_sockets = num; + flagS = 1; // got num sockets + next++; + if (*next == '@') { // socket offset, e.g. "2s@4" + flagSO = 1; + prev = ++next; // don't allow spaces for simplicity + if (!(*next >= '0' && *next <= '9')) { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + __kmp_place_socket_offset = num; + } + } else if (*next == 'c' || *next == 'C') { + __kmp_place_num_cores = num; + flagS = flagC = 1; // sockets were not specified - use default + next++; + if (*next == '@') { // core offset, e.g. "2c@6" + flagCO = 1; + prev = ++next; // don't allow spaces for simplicity + if (!(*next >= '0' && *next <= '9')) { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + __kmp_place_core_offset = num; + } + } else if (CHECK_DELIM(next)) { + __kmp_place_num_cores = num; // no letter-designator - num cores + flagS = flagC = 1; // sockets were not specified - use default + next++; + } else if (*next == 't' || *next == 'T') { + __kmp_place_num_threads_per_core = num; + // sockets, cores were not specified - use default + return; // we ignore offset value in case all cores are used + } else if (*next == '\0') { + __kmp_place_num_cores = num; + return; // the only value provided - set num cores + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here + SKIP_WS(next); + if (*next == '\0') + return; // " n " - something like this + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get second value (could be offset, num_cores, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'c' || *next == 'C') { + KMP_DEBUG_ASSERT(flagC == 0); + __kmp_place_num_cores = num; + flagC = 1; + next++; + if (*next == '@') { // core offset, e.g. "2c@6" + flagCO = 1; + prev = ++next; // don't allow spaces for simplicity + if (!(*next >= '0' && *next <= '9')) { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + __kmp_place_core_offset = num; + } + } else if (*next == 'o' || *next == 'O') { // offset specified + KMP_WARNING(AffThrPlaceDeprecated); + single_warning = 1; + if (flagC) { // whether num_cores already specified (sockets skipped) + KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both + __kmp_place_core_offset = num; + } else { + KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both + __kmp_place_socket_offset = num; + } + next++; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + flagC = 1; // num_cores could be skipped ? + flagT = 1; + next++; // can have core-offset specified after num threads + } else if (*next == '\0') { + KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core + __kmp_place_num_threads_per_core = num; + return; // two values provided without letter-designator + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_WS(next); + if (*next == '\0') + return; // " Ns,Nc " - something like this + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get third value (could be core-offset, num_cores, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + if (flagC == 0) + return; // num_cores could be skipped (e.g. 2s,4o,2t) + flagT = 1; + next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o) + } else if (*next == 'c' || *next == 'C') { + KMP_DEBUG_ASSERT(flagC == 0); + __kmp_place_num_cores = num; + flagC = 1; + next++; + //KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator + } else if (*next == 'o' || *next == 'O') { + KMP_WARNING(AffThrPlaceDeprecated); + single_warning = 1; + KMP_DEBUG_ASSERT(flagC); + //KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator + __kmp_place_core_offset = num; + next++; + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + KMP_DEBUG_ASSERT(flagC); + SKIP_WS(next); + if ( *next == '\0' ) + return; + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get 4-th value (could be core-offset, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'o' || *next == 'O') { + if (!single_warning) { // warn once + KMP_WARNING(AffThrPlaceDeprecated); + } + KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator + __kmp_place_core_offset = num; + next++; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + flagT = 1; + next++; // can have core-offset specified after num threads + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_WS(next); + if ( *next == '\0' ) + return; + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get 5-th value (could be core-offset, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'o' || *next == 'O') { + if (!single_warning) { // warn once + KMP_WARNING(AffThrPlaceDeprecated); + } + KMP_DEBUG_ASSERT(flagT); + KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator + __kmp_place_core_offset = num; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + } + return; +#undef CHECK_DELIM +} + +static void +__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { + if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) { + int comma = 0; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + if(__kmp_env_format) + KMP_STR_BUF_PRINT_NAME_EX(name); + else + __kmp_str_buf_print(buffer, " %s='", name); + if (__kmp_place_num_sockets) { + __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets); + if (__kmp_place_socket_offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset); + comma = 1; + } + if (__kmp_place_num_cores) { + __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores); + if (__kmp_place_core_offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset); + comma = 1; + } + if (__kmp_place_num_threads_per_core) + __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core); + __kmp_str_buf_print(buffer, "%s'\n", buf.str ); + __kmp_str_buf_free(&buf); +/* + } else { + __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); +*/ + } +} + +#if USE_ITT_BUILD +// ------------------------------------------------------------------------------------------------- +// KMP_FORKJOIN_FRAMES +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_forkjoin_frames( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_forkjoin_frames ); +} // __kmp_stg_parse_forkjoin_frames + +static void +__kmp_stg_print_forkjoin_frames( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_forkjoin_frames ); +} // __kmp_stg_print_forkjoin_frames + +// ------------------------------------------------------------------------------------------------- +// KMP_FORKJOIN_FRAMES_MODE +// ------------------------------------------------------------------------------------------------- + +static void +__kmp_stg_parse_forkjoin_frames_mode( char const * name, char const * value, void * data ) { + __kmp_stg_parse_int( name, value, 0, 3, & __kmp_forkjoin_frames_mode ); +} // __kmp_stg_parse_forkjoin_frames + +static void +__kmp_stg_print_forkjoin_frames_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_int( buffer, name, __kmp_forkjoin_frames_mode ); +} // __kmp_stg_print_forkjoin_frames +#endif /* USE_ITT_BUILD */ + +// ------------------------------------------------------------------------------------------------- +// OMP_DISPLAY_ENV +// ------------------------------------------------------------------------------------------------- + +#if OMP_40_ENABLED + +static void +__kmp_stg_parse_omp_display_env( char const * name, char const * value, void * data ) +{ + if ( __kmp_str_match( "VERBOSE", 1, value ) ) + { + __kmp_display_env_verbose = TRUE; + } else { + __kmp_stg_parse_bool( name, value, & __kmp_display_env ); + } + +} // __kmp_stg_parse_omp_display_env + +static void +__kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void * data ) +{ + if ( __kmp_display_env_verbose ) + { + __kmp_stg_print_str( buffer, name, "VERBOSE" ); + } else { + __kmp_stg_print_bool( buffer, name, __kmp_display_env ); + } +} // __kmp_stg_print_omp_display_env + +static void +__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) { + if ( TCR_4(__kmp_init_parallel) ) { + KMP_WARNING( EnvParallelWarn, name ); + return; + } // read value before first parallel only + __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation ); +} // __kmp_stg_parse_omp_cancellation + +static void +__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation ); +} // __kmp_stg_print_omp_cancellation + +#endif + +// ------------------------------------------------------------------------------------------------- +// Table. +// ------------------------------------------------------------------------------------------------- + + +static kmp_setting_t __kmp_stg_table[] = { + + { "KMP_ALL_THREADS", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, + { "KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, NULL, 0, 0 }, + { "KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0 }, + { "KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, + { "KMP_MAX_THREADS", __kmp_stg_parse_all_threads, NULL, NULL, 0, 0 }, // For backward compatibility + { "KMP_MONITOR_STACKSIZE", __kmp_stg_parse_monitor_stacksize, __kmp_stg_print_monitor_stacksize, NULL, 0, 0 }, + { "KMP_SETTINGS", __kmp_stg_parse_settings, __kmp_stg_print_settings, NULL, 0, 0 }, + { "KMP_STACKOFFSET", __kmp_stg_parse_stackoffset, __kmp_stg_print_stackoffset, NULL, 0, 0 }, + { "KMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, + { "KMP_STACKPAD", __kmp_stg_parse_stackpad, __kmp_stg_print_stackpad, NULL, 0, 0 }, + { "KMP_VERSION", __kmp_stg_parse_version, __kmp_stg_print_version, NULL, 0, 0 }, + { "KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL, 0, 0 }, + + { "OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0 }, + { "OMP_NUM_THREADS", __kmp_stg_parse_num_threads, __kmp_stg_print_num_threads, NULL, 0, 0 }, + { "OMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, + + { "KMP_TASKING", __kmp_stg_parse_tasking, __kmp_stg_print_tasking, NULL, 0, 0 }, + { "KMP_TASK_STEALING_CONSTRAINT", __kmp_stg_parse_task_stealing, __kmp_stg_print_task_stealing, NULL, 0, 0 }, + { "OMP_MAX_ACTIVE_LEVELS", __kmp_stg_parse_max_active_levels, __kmp_stg_print_max_active_levels, NULL, 0, 0 }, + { "OMP_THREAD_LIMIT", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, + { "OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, +#if KMP_NESTED_HOT_TEAMS + { "KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, __kmp_stg_print_hot_teams_level, NULL, 0, 0 }, + { "KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, __kmp_stg_print_hot_teams_mode, NULL, 0, 0 }, +#endif // KMP_NESTED_HOT_TEAMS + +#if KMP_HANDLE_SIGNALS + { "KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, __kmp_stg_print_handle_signals, NULL, 0, 0 }, +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + { "KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0 }, +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#ifdef KMP_GOMP_COMPAT + { "GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0 }, +#endif + +#ifdef KMP_DEBUG + { "KMP_A_DEBUG", __kmp_stg_parse_a_debug, __kmp_stg_print_a_debug, NULL, 0, 0 }, + { "KMP_B_DEBUG", __kmp_stg_parse_b_debug, __kmp_stg_print_b_debug, NULL, 0, 0 }, + { "KMP_C_DEBUG", __kmp_stg_parse_c_debug, __kmp_stg_print_c_debug, NULL, 0, 0 }, + { "KMP_D_DEBUG", __kmp_stg_parse_d_debug, __kmp_stg_print_d_debug, NULL, 0, 0 }, + { "KMP_E_DEBUG", __kmp_stg_parse_e_debug, __kmp_stg_print_e_debug, NULL, 0, 0 }, + { "KMP_F_DEBUG", __kmp_stg_parse_f_debug, __kmp_stg_print_f_debug, NULL, 0, 0 }, + { "KMP_DEBUG", __kmp_stg_parse_debug, NULL, /* no print */ NULL, 0, 0 }, + { "KMP_DEBUG_BUF", __kmp_stg_parse_debug_buf, __kmp_stg_print_debug_buf, NULL, 0, 0 }, + { "KMP_DEBUG_BUF_ATOMIC", __kmp_stg_parse_debug_buf_atomic, __kmp_stg_print_debug_buf_atomic, NULL, 0, 0 }, + { "KMP_DEBUG_BUF_CHARS", __kmp_stg_parse_debug_buf_chars, __kmp_stg_print_debug_buf_chars, NULL, 0, 0 }, + { "KMP_DEBUG_BUF_LINES", __kmp_stg_parse_debug_buf_lines, __kmp_stg_print_debug_buf_lines, NULL, 0, 0 }, + { "KMP_DIAG", __kmp_stg_parse_diag, __kmp_stg_print_diag, NULL, 0, 0 }, + + { "KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, __kmp_stg_print_par_range_env, NULL, 0, 0 }, + { "KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, __kmp_stg_print_yield_cycle, NULL, 0, 0 }, + { "KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, 0, 0 }, + { "KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, NULL, 0, 0 }, +#endif // KMP_DEBUG + + { "KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, __kmp_stg_print_align_alloc, NULL, 0, 0 }, + + { "KMP_PLAIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, + { "KMP_PLAIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, + { "KMP_FORKJOIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, + { "KMP_FORKJOIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, +#if KMP_FAST_REDUCTION_BARRIER + { "KMP_REDUCTION_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, + { "KMP_REDUCTION_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, +#endif + + { "KMP_ABORT_DELAY", __kmp_stg_parse_abort_delay, __kmp_stg_print_abort_delay, NULL, 0, 0 }, + { "KMP_CPUINFO_FILE", __kmp_stg_parse_cpuinfo_file, __kmp_stg_print_cpuinfo_file, NULL, 0, 0 }, + { "KMP_FORCE_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, + { "KMP_DETERMINISTIC_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, + { "KMP_STORAGE_MAP", __kmp_stg_parse_storage_map, __kmp_stg_print_storage_map, NULL, 0, 0 }, + { "KMP_ALL_THREADPRIVATE", __kmp_stg_parse_all_threadprivate, __kmp_stg_print_all_threadprivate, NULL, 0, 0 }, + { "KMP_FOREIGN_THREADS_THREADPRIVATE", __kmp_stg_parse_foreign_threads_threadprivate, __kmp_stg_print_foreign_threads_threadprivate, NULL, 0, 0 }, + +#if KMP_AFFINITY_SUPPORTED + { "KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL, 0, 0 }, +# ifdef KMP_GOMP_COMPAT + { "GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL, /* no print */ NULL, 0, 0 }, +# endif /* KMP_GOMP_COMPAT */ +# if OMP_40_ENABLED + { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, + { "OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0 }, +# else + { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, 0 }, +# endif /* OMP_40_ENABLED */ + + { "KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, __kmp_stg_print_topology_method, NULL, 0, 0 }, + +#else + + // + // KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. + // OMP_PROC_BIND and proc-bind-var are supported, however. + // +# if OMP_40_ENABLED + { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, +# endif + +#endif // KMP_AFFINITY_SUPPORTED + + { "KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, __kmp_stg_print_init_at_fork, NULL, 0, 0 }, + { "KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, 0, 0 }, + { "OMP_SCHEDULE", __kmp_stg_parse_omp_schedule, __kmp_stg_print_omp_schedule, NULL, 0, 0 }, + { "KMP_ATOMIC_MODE", __kmp_stg_parse_atomic_mode, __kmp_stg_print_atomic_mode, NULL, 0, 0 }, + { "KMP_CONSISTENCY_CHECK", __kmp_stg_parse_consistency_check, __kmp_stg_print_consistency_check, NULL, 0, 0 }, + +#if USE_ITT_BUILD && USE_ITT_NOTIFY + { "KMP_ITT_PREPARE_DELAY", __kmp_stg_parse_itt_prepare_delay, __kmp_stg_print_itt_prepare_delay, NULL, 0, 0 }, +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + { "KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, __kmp_stg_print_malloc_pool_incr, NULL, 0, 0 }, + { "KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, NULL, 0, 0 }, + { "KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, NULL, 0, 0 }, + { "KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, NULL, 0, 0 }, + { "OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, NULL, 0, 0 }, + { "KMP_DYNAMIC_MODE", __kmp_stg_parse_kmp_dynamic_mode, __kmp_stg_print_kmp_dynamic_mode, NULL, 0, 0 }, + +#ifdef USE_LOAD_BALANCE + { "KMP_LOAD_BALANCE_INTERVAL", __kmp_stg_parse_ld_balance_interval,__kmp_stg_print_ld_balance_interval,NULL, 0, 0 }, +#endif + + { "KMP_NUM_LOCKS_IN_BLOCK", __kmp_stg_parse_lock_block, __kmp_stg_print_lock_block, NULL, 0, 0 }, + { "KMP_LOCK_KIND", __kmp_stg_parse_lock_kind, __kmp_stg_print_lock_kind, NULL, 0, 0 }, +#if KMP_USE_ADAPTIVE_LOCKS + { "KMP_ADAPTIVE_LOCK_PROPS", __kmp_stg_parse_adaptive_lock_props,__kmp_stg_print_adaptive_lock_props, NULL, 0, 0 }, +#if KMP_DEBUG_ADAPTIVE_LOCKS + { "KMP_SPECULATIVE_STATSFILE", __kmp_stg_parse_speculative_statsfile,__kmp_stg_print_speculative_statsfile, NULL, 0, 0 }, +#endif +#endif // KMP_USE_ADAPTIVE_LOCKS + { "KMP_PLACE_THREADS", __kmp_stg_parse_place_threads, __kmp_stg_print_place_threads, NULL, 0, 0 }, +#if USE_ITT_BUILD + { "KMP_FORKJOIN_FRAMES", __kmp_stg_parse_forkjoin_frames, __kmp_stg_print_forkjoin_frames, NULL, 0, 0 }, + { "KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,__kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0 }, +#endif + +# if OMP_40_ENABLED + { "OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, __kmp_stg_print_omp_display_env, NULL, 0, 0 }, + { "OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0 }, +#endif + { "", NULL, NULL, NULL, 0, 0 } +}; // settings + +static int const __kmp_stg_count = sizeof( __kmp_stg_table ) / sizeof( kmp_setting_t ); + +static inline +kmp_setting_t * +__kmp_stg_find( char const * name ) { + + int i; + if ( name != NULL ) { + for ( i = 0; i < __kmp_stg_count; ++ i ) { + if ( strcmp( __kmp_stg_table[ i ].name, name ) == 0 ) { + return & __kmp_stg_table[ i ]; + }; // if + }; // for + }; // if + return NULL; + +} // __kmp_stg_find + + +static int +__kmp_stg_cmp( void const * _a, void const * _b ) { + kmp_setting_t * a = (kmp_setting_t *) _a; + kmp_setting_t * b = (kmp_setting_t *) _b; + + // + // Process KMP_AFFINITY last. + // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. + // + if ( strcmp( a->name, "KMP_AFFINITY" ) == 0 ) { + if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { + return 0; + } + return 1; + } + else if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { + return -1; + } + return strcmp( a->name, b->name ); +} // __kmp_stg_cmp + + +static void +__kmp_stg_init( void +) { + + static int initialized = 0; + + if ( ! initialized ) { + + // Sort table. + qsort( __kmp_stg_table, __kmp_stg_count - 1, sizeof( kmp_setting_t ), __kmp_stg_cmp ); + + { // Initialize *_STACKSIZE data. + + kmp_setting_t * kmp_stacksize = __kmp_stg_find( "KMP_STACKSIZE" ); // 1st priority. +#ifdef KMP_GOMP_COMPAT + kmp_setting_t * gomp_stacksize = __kmp_stg_find( "GOMP_STACKSIZE" ); // 2nd priority. +#endif + kmp_setting_t * omp_stacksize = __kmp_stg_find( "OMP_STACKSIZE" ); // 3rd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + // !!! Compiler does not understand rivals is used and optimizes out assignments + // !!! rivals[ i ++ ] = ...; + static kmp_setting_t * volatile rivals[ 4 ]; + static kmp_stg_ss_data_t kmp_data = { 1, (kmp_setting_t **)rivals }; +#ifdef KMP_GOMP_COMPAT + static kmp_stg_ss_data_t gomp_data = { 1024, (kmp_setting_t **)rivals }; +#endif + static kmp_stg_ss_data_t omp_data = { 1024, (kmp_setting_t **)rivals }; + int i = 0; + + rivals[ i ++ ] = kmp_stacksize; +#ifdef KMP_GOMP_COMPAT + if ( gomp_stacksize != NULL ) { + rivals[ i ++ ] = gomp_stacksize; + }; // if +#endif + rivals[ i ++ ] = omp_stacksize; + rivals[ i ++ ] = NULL; + + kmp_stacksize->data = & kmp_data; +#ifdef KMP_GOMP_COMPAT + if ( gomp_stacksize != NULL ) { + gomp_stacksize->data = & gomp_data; + }; // if +#endif + omp_stacksize->data = & omp_data; + + } + + { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. + + kmp_setting_t * kmp_library = __kmp_stg_find( "KMP_LIBRARY" ); // 1st priority. + kmp_setting_t * omp_wait_policy = __kmp_stg_find( "OMP_WAIT_POLICY" ); // 2nd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t * volatile rivals[ 3 ]; + static kmp_stg_wp_data_t kmp_data = { 0, (kmp_setting_t **)rivals }; + static kmp_stg_wp_data_t omp_data = { 1, (kmp_setting_t **)rivals }; + int i = 0; + + rivals[ i ++ ] = kmp_library; + if ( omp_wait_policy != NULL ) { + rivals[ i ++ ] = omp_wait_policy; + }; // if + rivals[ i ++ ] = NULL; + + kmp_library->data = & kmp_data; + if ( omp_wait_policy != NULL ) { + omp_wait_policy->data = & omp_data; + }; // if + + } + + { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data. + + kmp_setting_t * kmp_all_threads = __kmp_stg_find( "KMP_ALL_THREADS" ); // 1st priority. + kmp_setting_t * kmp_max_threads = __kmp_stg_find( "KMP_MAX_THREADS" ); // 2nd priority. + kmp_setting_t * omp_thread_limit = __kmp_stg_find( "OMP_THREAD_LIMIT" ); // 3rd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t * volatile rivals[ 4 ]; + int i = 0; + + rivals[ i ++ ] = kmp_all_threads; + rivals[ i ++ ] = kmp_max_threads; + if ( omp_thread_limit != NULL ) { + rivals[ i ++ ] = omp_thread_limit; + }; // if + rivals[ i ++ ] = NULL; + + kmp_all_threads->data = (void*)& rivals; + kmp_max_threads->data = (void*)& rivals; + if ( omp_thread_limit != NULL ) { + omp_thread_limit->data = (void*)& rivals; + }; // if + + } + +#if KMP_AFFINITY_SUPPORTED + { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. + + kmp_setting_t * kmp_affinity = __kmp_stg_find( "KMP_AFFINITY" ); // 1st priority. + KMP_DEBUG_ASSERT( kmp_affinity != NULL ); + +# ifdef KMP_GOMP_COMPAT + kmp_setting_t * gomp_cpu_affinity = __kmp_stg_find( "GOMP_CPU_AFFINITY" ); // 2nd priority. + KMP_DEBUG_ASSERT( gomp_cpu_affinity != NULL ); +# endif + + kmp_setting_t * omp_proc_bind = __kmp_stg_find( "OMP_PROC_BIND" ); // 3rd priority. + KMP_DEBUG_ASSERT( omp_proc_bind != NULL ); + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t * volatile rivals[ 4 ]; + int i = 0; + + rivals[ i ++ ] = kmp_affinity; + +# ifdef KMP_GOMP_COMPAT + rivals[ i ++ ] = gomp_cpu_affinity; + gomp_cpu_affinity->data = (void*)& rivals; +# endif + + rivals[ i ++ ] = omp_proc_bind; + omp_proc_bind->data = (void*)& rivals; + rivals[ i ++ ] = NULL; + +# if OMP_40_ENABLED + static kmp_setting_t * volatile places_rivals[ 4 ]; + i = 0; + + kmp_setting_t * omp_places = __kmp_stg_find( "OMP_PLACES" ); // 3rd priority. + KMP_DEBUG_ASSERT( omp_places != NULL ); + + places_rivals[ i ++ ] = kmp_affinity; +# ifdef KMP_GOMP_COMPAT + places_rivals[ i ++ ] = gomp_cpu_affinity; +# endif + places_rivals[ i ++ ] = omp_places; + omp_places->data = (void*)& places_rivals; + places_rivals[ i ++ ] = NULL; +# endif + } +#else + // KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. + // OMP_PLACES not supported yet. +#endif // KMP_AFFINITY_SUPPORTED + + { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. + + kmp_setting_t * kmp_force_red = __kmp_stg_find( "KMP_FORCE_REDUCTION" ); // 1st priority. + kmp_setting_t * kmp_determ_red = __kmp_stg_find( "KMP_DETERMINISTIC_REDUCTION" ); // 2nd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t * volatile rivals[ 3 ]; + static kmp_stg_fr_data_t force_data = { 1, (kmp_setting_t **)rivals }; + static kmp_stg_fr_data_t determ_data = { 0, (kmp_setting_t **)rivals }; + int i = 0; + + rivals[ i ++ ] = kmp_force_red; + if ( kmp_determ_red != NULL ) { + rivals[ i ++ ] = kmp_determ_red; + }; // if + rivals[ i ++ ] = NULL; + + kmp_force_red->data = & force_data; + if ( kmp_determ_red != NULL ) { + kmp_determ_red->data = & determ_data; + }; // if + } + + initialized = 1; + + }; // if + + // Reset flags. + int i; + for ( i = 0; i < __kmp_stg_count; ++ i ) { + __kmp_stg_table[ i ].set = 0; + }; // for + +} // __kmp_stg_init + + +static void +__kmp_stg_parse( + char const * name, + char const * value +) { + + // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, really nameless, they are + // presented in environment block as "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. + if ( name[ 0 ] == 0 ) { + return; + }; // if + + if ( value != NULL ) { + kmp_setting_t * setting = __kmp_stg_find( name ); + if ( setting != NULL ) { + setting->parse( name, value, setting->data ); + setting->defined = 1; + }; // if + }; // if + +} // __kmp_stg_parse + + +static int +__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. + char const * name, // Name of variable. + char const * value, // Value of the variable. + kmp_setting_t * * rivals // List of rival settings (the list must include current one). +) { + + if ( rivals == NULL ) { + return 0; + } + + // Loop thru higher priority settings (listed before current). + int i = 0; + for ( ; strcmp( rivals[ i ]->name, name ) != 0; i++ ) { + KMP_DEBUG_ASSERT( rivals[ i ] != NULL ); + +#if KMP_AFFINITY_SUPPORTED + if ( rivals[ i ] == __kmp_affinity_notype ) { + // + // If KMP_AFFINITY is specified without a type name, + // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. + // + continue; + } +#endif + + if ( rivals[ i ]->set ) { + KMP_WARNING( StgIgnored, name, rivals[ i ]->name ); + return 1; + }; // if + }; // while + + ++ i; // Skip current setting. + return 0; + +}; // __kmp_stg_check_rivals + + +static int +__kmp_env_toPrint( char const * name, int flag ) { + int rc = 0; + kmp_setting_t * setting = __kmp_stg_find( name ); + if ( setting != NULL ) { + rc = setting->defined; + if ( flag >= 0 ) { + setting->defined = flag; + }; // if + }; // if + return rc; +} + + +static void +__kmp_aux_env_initialize( kmp_env_blk_t* block ) { + + char const * value; + + /* OMP_NUM_THREADS */ + value = __kmp_env_blk_var( block, "OMP_NUM_THREADS" ); + if ( value ) { + ompc_set_num_threads( __kmp_dflt_team_nth ); + } + + /* KMP_BLOCKTIME */ + value = __kmp_env_blk_var( block, "KMP_BLOCKTIME" ); + if ( value ) { + kmpc_set_blocktime( __kmp_dflt_blocktime ); + } + + /* OMP_NESTED */ + value = __kmp_env_blk_var( block, "OMP_NESTED" ); + if ( value ) { + ompc_set_nested( __kmp_dflt_nested ); + } + + /* OMP_DYNAMIC */ + value = __kmp_env_blk_var( block, "OMP_DYNAMIC" ); + if ( value ) { + ompc_set_dynamic( __kmp_global.g.g_dynamic ); + } + +} + +void +__kmp_env_initialize( char const * string ) { + + kmp_env_blk_t block; + int i; + + __kmp_stg_init(); + + // Hack!!! + if ( string == NULL ) { + // __kmp_max_nth = __kmp_sys_max_nth; + __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); + }; // if + __kmp_env_blk_init( & block, string ); + + // + // update the set flag on all entries that have an env var + // + for ( i = 0; i < block.count; ++ i ) { + if (( block.vars[ i ].name == NULL ) + || ( *block.vars[ i ].name == '\0')) { + continue; + } + if ( block.vars[ i ].value == NULL ) { + continue; + } + kmp_setting_t * setting = __kmp_stg_find( block.vars[ i ].name ); + if ( setting != NULL ) { + setting->set = 1; + } + }; // for i + + // Special case. If we parse environment, not a string, process KMP_WARNINGS first. + if ( string == NULL ) { + char const * name = "KMP_WARNINGS"; + char const * value = __kmp_env_blk_var( & block, name ); + __kmp_stg_parse( name, value ); + }; // if + +#if KMP_AFFINITY_SUPPORTED + // + // Special case. KMP_AFFINITY is not a rival to other affinity env vars + // if no affinity type is specified. We want to allow + // KMP_AFFINITY=[no],verbose/[no]warnings/etc. to be enabled when + // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 + // affinity mechanism. + // + __kmp_affinity_notype = NULL; + char const *aff_str = __kmp_env_blk_var( & block, "KMP_AFFINITY" ); + if ( aff_str != NULL ) { + // + // Check if the KMP_AFFINITY type is specified in the string. + // We just search the string for "compact", "scatter", etc. + // without really parsing the string. The syntax of the + // KMP_AFFINITY env var is such that none of the affinity + // type names can appear anywhere other that the type + // specifier, even as substrings. + // + // I can't find a case-insensitive version of strstr on Windows* OS. + // Use the case-sensitive version for now. + // + +# if KMP_OS_WINDOWS +# define FIND strstr +# else +# define FIND strcasestr +# endif + + if ( ( FIND( aff_str, "none" ) == NULL ) + && ( FIND( aff_str, "physical" ) == NULL ) + && ( FIND( aff_str, "logical" ) == NULL ) + && ( FIND( aff_str, "compact" ) == NULL ) + && ( FIND( aff_str, "scatter" ) == NULL ) + && ( FIND( aff_str, "explicit" ) == NULL ) + && ( FIND( aff_str, "balanced" ) == NULL ) + && ( FIND( aff_str, "disabled" ) == NULL ) ) { + __kmp_affinity_notype = __kmp_stg_find( "KMP_AFFINITY" ); + } + else { + // + // A new affinity type is specified. + // Reset the affinity flags to their default values, + // in case this is called from kmp_set_defaults(). + // + __kmp_affinity_type = affinity_default; + __kmp_affinity_gran = affinity_gran_default; + __kmp_affinity_top_method = affinity_top_method_default; + __kmp_affinity_respect_mask = affinity_respect_mask_default; + } +# undef FIND + +#if OMP_40_ENABLED + // + // Also reset the affinity flags if OMP_PROC_BIND is specified. + // + aff_str = __kmp_env_blk_var( & block, "OMP_PROC_BIND" ); + if ( aff_str != NULL ) { + __kmp_affinity_type = affinity_default; + __kmp_affinity_gran = affinity_gran_default; + __kmp_affinity_top_method = affinity_top_method_default; + __kmp_affinity_respect_mask = affinity_respect_mask_default; + } +#endif /* OMP_40_ENABLED */ + } + +#endif /* KMP_AFFINITY_SUPPORTED */ + +#if OMP_40_ENABLED + // + // Set up the nested proc bind type vector. + // + if ( __kmp_nested_proc_bind.bind_types == NULL ) { + __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) + KMP_INTERNAL_MALLOC( sizeof(kmp_proc_bind_t) ); + if ( __kmp_nested_proc_bind.bind_types == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + } + __kmp_nested_proc_bind.size = 1; + __kmp_nested_proc_bind.used = 1; +# if KMP_AFFINITY_SUPPORTED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; +# else + // default proc bind is false if affinity not supported + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +# endif + + } +#endif /* OMP_40_ENABLED */ + + // + // Now process all of the settings. + // + for ( i = 0; i < block.count; ++ i ) { + __kmp_stg_parse( block.vars[ i ].name, block.vars[ i ].value ); + }; // for i + + // + // If user locks have been allocated yet, don't reset the lock vptr table. + // + if ( ! __kmp_init_user_locks ) { + if ( __kmp_user_lock_kind == lk_default ) { + __kmp_user_lock_kind = lk_queuing; + } +#if KMP_USE_DYNAMIC_LOCK + __kmp_init_dynamic_user_locks(); +#else + __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); +#endif + } + else { + KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called + KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default ); + // Binds lock functions again to follow the transition between different + // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long + // as we do not allow lock kind changes after making a call to any + // user lock functions (true). +#if KMP_USE_DYNAMIC_LOCK + __kmp_init_dynamic_user_locks(); +#else + __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); +#endif + } + +#if KMP_AFFINITY_SUPPORTED + + if ( ! TCR_4(__kmp_init_middle) ) { + // + // Determine if the machine/OS is actually capable of supporting + // affinity. + // + const char *var = "KMP_AFFINITY"; +# if KMP_USE_HWLOC + if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if(__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); + } + hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE); +# endif + if ( __kmp_affinity_type == affinity_disabled ) { + KMP_AFFINITY_DISABLE(); + } + else if ( ! KMP_AFFINITY_CAPABLE() ) { +# if KMP_USE_HWLOC + const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); + if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if(__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); + } + // Is the system capable of setting/getting this thread's affinity? + // also, is topology discovery possible? (pu indicates ability to discover processing units) + // and finally, were there no errors when calling any hwloc_* API functions? + if(topology_support->cpubind->set_thisthread_cpubind && + topology_support->cpubind->get_thisthread_cpubind && + topology_support->discovery->pu && + !__kmp_hwloc_error) + { + // enables affinity according to KMP_AFFINITY_CAPABLE() macro + KMP_AFFINITY_ENABLE(TRUE); + } else { + // indicate that hwloc didn't work and disable affinity + __kmp_hwloc_error = TRUE; + KMP_AFFINITY_DISABLE(); + } +# else + __kmp_affinity_determine_capable( var ); +# endif // KMP_USE_HWLOC + if ( ! KMP_AFFINITY_CAPABLE() ) { + if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings + && ( __kmp_affinity_type != affinity_default ) + && ( __kmp_affinity_type != affinity_none ) + && ( __kmp_affinity_type != affinity_disabled ) ) ) { + KMP_WARNING( AffNotSupported, var ); + } + __kmp_affinity_type = affinity_disabled; + __kmp_affinity_respect_mask = 0; + __kmp_affinity_gran = affinity_gran_fine; + } + } + +# if OMP_40_ENABLED + if ( __kmp_affinity_type == affinity_disabled ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } + else if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_true ) { + // + // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. + // + __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; + } +# endif /* OMP_40_ENABLED */ + + if ( KMP_AFFINITY_CAPABLE() ) { + +# if KMP_GROUP_AFFINITY + + // + // Handle the Win 64 group affinity stuff if there are multiple + // processor groups, or if the user requested it, and OMP 4.0 + // affinity is not in effect. + // + if ( ( ( __kmp_num_proc_groups > 1 ) + && ( __kmp_affinity_type == affinity_default ) +# if OMP_40_ENABLED + && ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) ) +# endif + || ( __kmp_affinity_top_method == affinity_top_method_group ) ) { + if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { + __kmp_affinity_respect_mask = FALSE; + } + if ( __kmp_affinity_type == affinity_default ) { + __kmp_affinity_type = affinity_compact; +# if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +# endif + } + if ( __kmp_affinity_top_method == affinity_top_method_default ) { + if ( __kmp_affinity_gran == affinity_gran_default ) { + __kmp_affinity_top_method = affinity_top_method_group; + __kmp_affinity_gran = affinity_gran_group; + } + else if ( __kmp_affinity_gran == affinity_gran_group ) { + __kmp_affinity_top_method = affinity_top_method_group; + } + else { + __kmp_affinity_top_method = affinity_top_method_all; + } + } + else if ( __kmp_affinity_top_method == affinity_top_method_group ) { + if ( __kmp_affinity_gran == affinity_gran_default ) { + __kmp_affinity_gran = affinity_gran_group; + } + else if ( ( __kmp_affinity_gran != affinity_gran_group ) + && ( __kmp_affinity_gran != affinity_gran_fine ) + && ( __kmp_affinity_gran != affinity_gran_thread ) ) { + char *str = NULL; + switch ( __kmp_affinity_gran ) { + case affinity_gran_core: str = "core"; break; + case affinity_gran_package: str = "package"; break; + case affinity_gran_node: str = "node"; break; + default: KMP_DEBUG_ASSERT( 0 ); + } + KMP_WARNING( AffGranTopGroup, var, str ); + __kmp_affinity_gran = affinity_gran_fine; + } + } + else { + if ( __kmp_affinity_gran == affinity_gran_default ) { + __kmp_affinity_gran = affinity_gran_core; + } + else if ( __kmp_affinity_gran == affinity_gran_group ) { + char *str = NULL; + switch ( __kmp_affinity_type ) { + case affinity_physical: str = "physical"; break; + case affinity_logical: str = "logical"; break; + case affinity_compact: str = "compact"; break; + case affinity_scatter: str = "scatter"; break; + case affinity_explicit: str = "explicit"; break; + // No MIC on windows, so no affinity_balanced case + default: KMP_DEBUG_ASSERT( 0 ); + } + KMP_WARNING( AffGranGroupType, var, str ); + __kmp_affinity_gran = affinity_gran_core; + } + } + } + else + +# endif /* KMP_GROUP_AFFINITY */ + + { + if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { +# if KMP_GROUP_AFFINITY + if ( __kmp_num_proc_groups > 1 ) { + __kmp_affinity_respect_mask = FALSE; + } + else +# endif /* KMP_GROUP_AFFINITY */ + { + __kmp_affinity_respect_mask = TRUE; + } + } +# if OMP_40_ENABLED + if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) + && ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ) ) { + if ( __kmp_affinity_type == affinity_default ) { + __kmp_affinity_type = affinity_compact; + __kmp_affinity_dups = FALSE; + } + } + else +# endif /* OMP_40_ENABLED */ + if ( __kmp_affinity_type == affinity_default ) { +#if OMP_40_ENABLED +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; + } else +#endif + { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } +#endif /* OMP_40_ENABLED */ +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_affinity_type = affinity_scatter; + } else +#endif + { + __kmp_affinity_type = affinity_none; + } + + } + if ( ( __kmp_affinity_gran == affinity_gran_default ) + && ( __kmp_affinity_gran_levels < 0 ) ) { +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_affinity_gran = affinity_gran_fine; + } else +#endif + { + __kmp_affinity_gran = affinity_gran_core; + } + } + if ( __kmp_affinity_top_method == affinity_top_method_default ) { + __kmp_affinity_top_method = affinity_top_method_all; + } + } + } + + K_DIAG( 1, ( "__kmp_affinity_type == %d\n", __kmp_affinity_type ) ); + K_DIAG( 1, ( "__kmp_affinity_compact == %d\n", __kmp_affinity_compact ) ); + K_DIAG( 1, ( "__kmp_affinity_offset == %d\n", __kmp_affinity_offset ) ); + K_DIAG( 1, ( "__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose ) ); + K_DIAG( 1, ( "__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings ) ); + K_DIAG( 1, ( "__kmp_affinity_respect_mask == %d\n", __kmp_affinity_respect_mask ) ); + K_DIAG( 1, ( "__kmp_affinity_gran == %d\n", __kmp_affinity_gran ) ); + + KMP_DEBUG_ASSERT( __kmp_affinity_type != affinity_default); +# if OMP_40_ENABLED + KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ); +# endif + } + +#endif /* KMP_AFFINITY_SUPPORTED */ + + if ( __kmp_version ) { + __kmp_print_version_1(); + }; // if + + // Post-initialization step: some env. vars need their value's further processing + if ( string != NULL) { // kmp_set_defaults() was called + __kmp_aux_env_initialize( &block ); + } + + __kmp_env_blk_free( & block ); + + KMP_MB(); + +} // __kmp_env_initialize + + +void +__kmp_env_print() { + + kmp_env_blk_t block; + int i; + kmp_str_buf_t buffer; + + __kmp_stg_init(); + __kmp_str_buf_init( & buffer ); + + __kmp_env_blk_init( & block, NULL ); + __kmp_env_blk_sort( & block ); + + // Print real environment values. + __kmp_str_buf_print( & buffer, "\n%s\n\n", KMP_I18N_STR( UserSettings ) ); + for ( i = 0; i < block.count; ++ i ) { + char const * name = block.vars[ i ].name; + char const * value = block.vars[ i ].value; + if ( + ( KMP_STRLEN( name ) > 4 && strncmp( name, "KMP_", 4 ) == 0 ) + || strncmp( name, "OMP_", 4 ) == 0 + #ifdef KMP_GOMP_COMPAT + || strncmp( name, "GOMP_", 5 ) == 0 + #endif // KMP_GOMP_COMPAT + ) { + __kmp_str_buf_print( & buffer, " %s=%s\n", name, value ); + }; // if + }; // for + __kmp_str_buf_print( & buffer, "\n" ); + + // Print internal (effective) settings. + __kmp_str_buf_print( & buffer, "%s\n\n", KMP_I18N_STR( EffectiveSettings ) ); + for ( int i = 0; i < __kmp_stg_count; ++ i ) { + if ( __kmp_stg_table[ i ].print != NULL ) { + __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); + }; // if + }; // for + + __kmp_printf( "%s", buffer.str ); + + __kmp_env_blk_free( & block ); + __kmp_str_buf_free( & buffer ); + + __kmp_printf("\n"); + +} // __kmp_env_print + + +#if OMP_40_ENABLED +void +__kmp_env_print_2() { + + kmp_env_blk_t block; + kmp_str_buf_t buffer; + + __kmp_env_format = 1; + + __kmp_stg_init(); + __kmp_str_buf_init( & buffer ); + + __kmp_env_blk_init( & block, NULL ); + __kmp_env_blk_sort( & block ); + + __kmp_str_buf_print( & buffer, "\n%s\n", KMP_I18N_STR( DisplayEnvBegin ) ); + __kmp_str_buf_print( & buffer, " _OPENMP='%d'\n", __kmp_openmp_version ); + + for ( int i = 0; i < __kmp_stg_count; ++ i ) { + if ( __kmp_stg_table[ i ].print != NULL && + ( ( __kmp_display_env && strncmp( __kmp_stg_table[ i ].name, "OMP_", 4 ) == 0 ) || __kmp_display_env_verbose ) ) { + __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); + }; // if + }; // for + + __kmp_str_buf_print( & buffer, "%s\n", KMP_I18N_STR( DisplayEnvEnd ) ); + __kmp_str_buf_print( & buffer, "\n" ); + + __kmp_printf( "%s", buffer.str ); + + __kmp_env_blk_free( & block ); + __kmp_str_buf_free( & buffer ); + + __kmp_printf("\n"); + +} // __kmp_env_print_2 +#endif // OMP_40_ENABLED + +// end of file + diff --git a/contrib/libs/cxxsupp/openmp/kmp_settings.h b/contrib/libs/cxxsupp/openmp/kmp_settings.h index ff355d7c3e1..7232e619756 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_settings.h +++ b/contrib/libs/cxxsupp/openmp/kmp_settings.h @@ -1,50 +1,50 @@ -/* - * kmp_settings.h -- Initialize environment variables - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_SETTINGS_H -#define KMP_SETTINGS_H - -void __kmp_reset_global_vars( void ); -void __kmp_env_initialize( char const * ); -void __kmp_env_print(); -#if OMP_40_ENABLED -void __kmp_env_print_2(); -#endif // OMP_40_ENABLED - -int __kmp_initial_threads_capacity( int req_nproc ); -void __kmp_init_dflt_team_nth(); -int __kmp_convert_to_milliseconds( char const * ); -int __kmp_default_tp_capacity( int, int, int); - -#if KMP_MIC -#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Device), name ) -#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Device), x ) -#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ? "TRUE" : "FALSE" ); -#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Device), name, value ) -#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Device), name, value ); -#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ) -#else -#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Host), name ) -#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Host), x ) -#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ? "TRUE" : "FALSE" ); -#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Host), name, value ) -#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Host), name, value ); -#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ) -#endif - -#endif // KMP_SETTINGS_H - -// end of file // - +/* + * kmp_settings.h -- Initialize environment variables + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_SETTINGS_H +#define KMP_SETTINGS_H + +void __kmp_reset_global_vars( void ); +void __kmp_env_initialize( char const * ); +void __kmp_env_print(); +#if OMP_40_ENABLED +void __kmp_env_print_2(); +#endif // OMP_40_ENABLED + +int __kmp_initial_threads_capacity( int req_nproc ); +void __kmp_init_dflt_team_nth(); +int __kmp_convert_to_milliseconds( char const * ); +int __kmp_default_tp_capacity( int, int, int); + +#if KMP_MIC +#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Device), name ) +#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Device), x ) +#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ? "TRUE" : "FALSE" ); +#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Device), name, value ) +#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Device), name, value ); +#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ) +#else +#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Host), name ) +#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Host), x ) +#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ? "TRUE" : "FALSE" ); +#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Host), name, value ) +#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Host), name, value ); +#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ) +#endif + +#endif // KMP_SETTINGS_H + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats.cpp b/contrib/libs/cxxsupp/openmp/kmp_stats.cpp index c2b8c8c4891..d1f43afe4ae 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stats.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_stats.cpp @@ -1,609 +1,609 @@ -/** @file kmp_stats.cpp - * Statistics gathering and processing. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp.h" -#include "kmp_str.h" -#include "kmp_lock.h" -#include "kmp_stats.h" - -#include -#include -#include -#include // for atexit - -#define STRINGIZE2(x) #x -#define STRINGIZE(x) STRINGIZE2(x) - -#define expandName(name,flags,ignore) {STRINGIZE(name),flags}, -statInfo timeStat::timerInfo[] = { - KMP_FOREACH_TIMER(expandName,0) - {0,0} -}; -const statInfo counter::counterInfo[] = { - KMP_FOREACH_COUNTER(expandName,0) - {0,0} -}; -#undef expandName - -#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0}, -kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { - KMP_FOREACH_TIMER(expandName,0) - {0.0,0.0,0.0} -}; -#undef expandName - -const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { - {1.0, 0.0, 0.0}, // red - {1.0, 0.6, 0.0}, // orange - {1.0, 1.0, 0.0}, // yellow - {0.0, 1.0, 0.0}, // green - {0.0, 0.0, 1.0}, // blue - {0.6, 0.2, 0.8}, // purple - {1.0, 0.0, 1.0}, // magenta - {0.0, 0.4, 0.2}, // dark green - {1.0, 1.0, 0.6}, // light yellow - {0.6, 0.4, 0.6}, // dirty purple - {0.0, 1.0, 1.0}, // cyan - {1.0, 0.4, 0.8}, // pink - {0.5, 0.5, 0.5}, // grey - {0.8, 0.7, 0.5}, // brown - {0.6, 0.6, 1.0}, // light blue - {1.0, 0.7, 0.5}, // peach - {0.8, 0.5, 1.0}, // lavender - {0.6, 0.0, 0.0}, // dark red - {0.7, 0.6, 0.0}, // gold - {0.0, 0.0, 0.0} // black -}; - -// Ensure that the atexit handler only runs once. -static uint32_t statsPrinted = 0; - -// output interface -static kmp_stats_output_module __kmp_stats_global_output; - -/* ****************************************************** */ -/* ************* statistic member functions ************* */ - -void statistic::addSample(double sample) -{ - double delta = sample - meanVal; - - sampleCount = sampleCount + 1; - meanVal = meanVal + delta/sampleCount; - m2 = m2 + delta*(sample - meanVal); - - minVal = std::min(minVal, sample); - maxVal = std::max(maxVal, sample); -} - -statistic & statistic::operator+= (const statistic & other) -{ - if (sampleCount == 0) - { - *this = other; - return *this; - } - - uint64_t newSampleCount = sampleCount + other.sampleCount; - double dnsc = double(newSampleCount); - double dsc = double(sampleCount); - double dscBydnsc = dsc/dnsc; - double dosc = double(other.sampleCount); - double delta = other.meanVal - meanVal; - - // Try to order these calculations to avoid overflows. - // If this were Fortran, then the compiler would not be able to re-order over brackets. - // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition - // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation - // really is associative (which floating addition isn't...)). - meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); - m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta; - minVal = std::min (minVal, other.minVal); - maxVal = std::max (maxVal, other.maxVal); - sampleCount = newSampleCount; - - - return *this; -} - -void statistic::scale(double factor) -{ - minVal = minVal*factor; - maxVal = maxVal*factor; - meanVal= meanVal*factor; - m2 = m2*factor*factor; - return; -} - -std::string statistic::format(char unit, bool total) const -{ - std::string result = formatSI(sampleCount,9,' '); - - result = result + std::string(", ") + formatSI(minVal, 9, unit); - result = result + std::string(", ") + formatSI(meanVal, 9, unit); - result = result + std::string(", ") + formatSI(maxVal, 9, unit); - if (total) - result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); - result = result + std::string(", ") + formatSI(getSD(), 9, unit); - - return result; -} - -/* ********************************************************** */ -/* ************* explicitTimer member functions ************* */ - -void explicitTimer::start(timer_e timerEnumValue) { - startTime = tsc_tick_count::now(); - if(timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->incrementNestValue(); - } - return; -} - -void explicitTimer::stop(timer_e timerEnumValue) { - if (startTime.getValue() == 0) - return; - - tsc_tick_count finishTime = tsc_tick_count::now(); - - //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); - stat->addSample ((finishTime - startTime).ticks()); - - if(timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); - __kmp_stats_thread_ptr->decrementNestValue(); - } - - /* We accept the risk that we drop a sample because it really did start at t==0. */ - startTime = 0; - return; -} - -/* ******************************************************************* */ -/* ************* kmp_stats_event_vector member functions ************* */ - -void kmp_stats_event_vector::deallocate() { - __kmp_free(events); - internal_size = 0; - allocated_size = 0; - events = NULL; -} - -// This function is for qsort() which requires the compare function to return -// either a negative number if event1 < event2, a positive number if event1 > event2 -// or zero if event1 == event2. -// This sorts by start time (lowest to highest). -int compare_two_events(const void* event1, const void* event2) { - kmp_stats_event* ev1 = (kmp_stats_event*)event1; - kmp_stats_event* ev2 = (kmp_stats_event*)event2; - - if(ev1->getStart() < ev2->getStart()) return -1; - else if(ev1->getStart() > ev2->getStart()) return 1; - else return 0; -} - -void kmp_stats_event_vector::sort() { - qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); -} - -/* *********************************************************** */ -/* ************* kmp_stats_list member functions ************* */ - -// returns a pointer to newly created stats node -kmp_stats_list* kmp_stats_list::push_back(int gtid) { - kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); - // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) - new (newnode) kmp_stats_list(); - newnode->setGtid(gtid); - newnode->prev = this->prev; - newnode->next = this; - newnode->prev->next = newnode; - newnode->next->prev = newnode; - return newnode; -} -void kmp_stats_list::deallocate() { - kmp_stats_list* ptr = this->next; - kmp_stats_list* delptr = this->next; - while(ptr != this) { - delptr = ptr; - ptr=ptr->next; - // placement new means we have to explicitly call destructor. - delptr->_event_vector.deallocate(); - delptr->~kmp_stats_list(); - __kmp_free(delptr); - } -} -kmp_stats_list::iterator kmp_stats_list::begin() { - kmp_stats_list::iterator it; - it.ptr = this->next; - return it; -} -kmp_stats_list::iterator kmp_stats_list::end() { - kmp_stats_list::iterator it; - it.ptr = this; - return it; -} -int kmp_stats_list::size() { - int retval; - kmp_stats_list::iterator it; - for(retval=0, it=begin(); it!=end(); it++, retval++) {} - return retval; -} - -/* ********************************************************************* */ -/* ************* kmp_stats_list::iterator member functions ************* */ - -kmp_stats_list::iterator::iterator() : ptr(NULL) {} -kmp_stats_list::iterator::~iterator() {} -kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { - this->ptr = this->ptr->next; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { - this->ptr = this->ptr->next; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { - this->ptr = this->ptr->prev; - return *this; -} -kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { - this->ptr = this->ptr->prev; - return *this; -} -bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { - return this->ptr!=rhs.ptr; -} -bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { - return this->ptr==rhs.ptr; -} -kmp_stats_list* kmp_stats_list::iterator::operator*() const { - return this->ptr; -} - -/* *************************************************************** */ -/* ************* kmp_stats_output_module functions ************** */ - -const char* kmp_stats_output_module::outputFileName = NULL; -const char* kmp_stats_output_module::eventsFileName = NULL; -const char* kmp_stats_output_module::plotFileName = NULL; -int kmp_stats_output_module::printPerThreadFlag = 0; -int kmp_stats_output_module::printPerThreadEventsFlag = 0; - -// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output -void kmp_stats_output_module::init() -{ - char * statsFileName = getenv("KMP_STATS_FILE"); - eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); - plotFileName = getenv("KMP_STATS_PLOT_FILE"); - char * threadStats = getenv("KMP_STATS_THREADS"); - char * threadEvents = getenv("KMP_STATS_EVENTS"); - - // set the stats output filenames based on environment variables and defaults - outputFileName = statsFileName; - eventsFileName = eventsFileName ? eventsFileName : "events.dat"; - plotFileName = plotFileName ? plotFileName : "events.plt"; - - // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes - printPerThreadFlag = __kmp_str_match_true(threadStats); - printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); - - if(printPerThreadEventsFlag) { - // assigns a color to each timer for printing - setupEventColors(); - } else { - // will clear flag so that no event will be logged - timeStat::clearEventFlags(); - } - - return; -} - -void kmp_stats_output_module::setupEventColors() { - int i; - int globalColorIndex = 0; - int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); - for(i=0;igetCount() != 0) { - char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; - fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); - } - } - } else { // Counters - for (int s = 0; sformat(' ', true).c_str()); - } - } -} - -void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) -{ - // We print all the counters even if they are zero. - // That makes it easier to slice them into a spreadsheet if you need to. - fprintf (statsOut, "\nCounter, Count\n"); - for (int c = 0; cgetValue(), 9, ' ').c_str()); - } -} - -void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { - // sort by start time before printing - theEvents->sort(); - for (int i = 0; i < theEvents->size(); i++) { - kmp_stats_event ev = theEvents->at(i); - rgb_color color = getEventColor(ev.getTimerName()); - fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", - gtid, - ev.getStart(), - ev.getStop(), - 1.2 - (ev.getNestLevel() * 0.2), - color.r, color.g, color.b, - timeStat::name(ev.getTimerName()) - ); - } - return; -} - -void kmp_stats_output_module::windupExplicitTimers() -{ - // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads - // and say "it's over". - // If the timer wasn't running, this won't record anything anyway. - kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { - for (int timer=0; timergetExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); - } - } -} - -void kmp_stats_output_module::printPloticusFile() { - int i; - int size = __kmp_stats_list.size(); - FILE* plotOut = fopen(plotFileName, "w+"); - - fprintf(plotOut, "#proc page\n" - " pagesize: 15 10\n" - " scale: 1.0\n\n"); - - fprintf(plotOut, "#proc getdata\n" - " file: %s\n\n", - eventsFileName); - - fprintf(plotOut, "#proc areadef\n" - " title: OpenMP Sampling Timeline\n" - " titledetails: align=center size=16\n" - " rectangle: 1 1 13 9\n" - " xautorange: datafield=2,3\n" - " yautorange: -1 %d\n\n", - size); - - fprintf(plotOut, "#proc xaxis\n" - " stubs: inc\n" - " stubdetails: size=12\n" - " label: Time (ticks)\n" - " labeldetails: size=14\n\n"); - - fprintf(plotOut, "#proc yaxis\n" - " stubs: inc 1\n" - " stubrange: 0 %d\n" - " stubdetails: size=12\n" - " label: Thread #\n" - " labeldetails: size=14\n\n", - size-1); - - fprintf(plotOut, "#proc bars\n" - " exactcolorfield: 5\n" - " axis: x\n" - " locfield: 1\n" - " segmentfields: 2 3\n" - " barwidthfield: 4\n\n"); - - // create legend entries corresponding to the timer color - for(i=0;igetGtid(); - // Output per thread stats if requested. - if (perThreadPrintingEnabled()) { - fprintf (statsOut, "Thread %d\n", t); - printStats(statsOut, (*it)->getTimers(), true); - printCounters(statsOut, (*it)->getCounters()); - fprintf(statsOut,"\n"); - } - // Output per thread events if requested. - if (eventPrintingEnabled()) { - kmp_stats_event_vector events = (*it)->getEventVector(); - printEvents(eventsOut, &events, t); - } - - for (int s = 0; sgetTimer(timer_e(s)); - allStats[s] += *threadStat; - } - - // Special handling for synthesized statistics. - // These just have to be coded specially here for now. - // At present we only have a few: - // The total parallel work done in each thread. - // The variance here makes it easy to see load imbalance over the whole program (though, of course, - // it's possible to have a code with awful load balance in every parallel region but perfect load - // balance oever the whole program.) - // The time spent in barriers in each thread. - allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); - - // Time in explicit barriers. - allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); - - for (int c = 0; cgetCounter(counter_e(c))->getValue()); - } - } - - if (eventPrintingEnabled()) { - printPloticusFile(); - fclose(eventsOut); - } - - fprintf (statsOut, "Aggregate for all threads\n"); - printStats (statsOut, &allStats[0], true); - fprintf (statsOut, "\n"); - printStats (statsOut, &allCounters[0], false); - - if (statsOut != stderr) - fclose(statsOut); - -} - -/* ************************************************** */ -/* ************* exported C functions ************** */ - -// no name mangling for these functions, we want the c files to be able to get at these functions -extern "C" { - -void __kmp_reset_stats() -{ - kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { - timeStat * timers = (*it)->getTimers(); - counter * counters = (*it)->getCounters(); - explicitTimer * eTimers = (*it)->getExplicitTimers(); - - for (int t = 0; tresetEventVector(); - - // May need to restart the explicit timers in thread zero? - } - KMP_START_EXPLICIT_TIMER(OMP_serial); - KMP_START_EXPLICIT_TIMER(OMP_start_end); -} - -// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. -void __kmp_output_stats(const char * heading) -{ - __kmp_stats_global_output.outputStats(heading); - __kmp_reset_stats(); -} - -void __kmp_accumulate_stats_at_exit(void) -{ - // Only do this once. - if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) - return; - - __kmp_output_stats("Statistics on exit"); - return; -} - -void __kmp_stats_init(void) -{ - return; -} - -} // extern "C" - +/** @file kmp_stats.cpp + * Statistics gathering and processing. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "kmp.h" +#include "kmp_str.h" +#include "kmp_lock.h" +#include "kmp_stats.h" + +#include +#include +#include +#include // for atexit + +#define STRINGIZE2(x) #x +#define STRINGIZE(x) STRINGIZE2(x) + +#define expandName(name,flags,ignore) {STRINGIZE(name),flags}, +statInfo timeStat::timerInfo[] = { + KMP_FOREACH_TIMER(expandName,0) + {0,0} +}; +const statInfo counter::counterInfo[] = { + KMP_FOREACH_COUNTER(expandName,0) + {0,0} +}; +#undef expandName + +#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0}, +kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { + KMP_FOREACH_TIMER(expandName,0) + {0.0,0.0,0.0} +}; +#undef expandName + +const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { + {1.0, 0.0, 0.0}, // red + {1.0, 0.6, 0.0}, // orange + {1.0, 1.0, 0.0}, // yellow + {0.0, 1.0, 0.0}, // green + {0.0, 0.0, 1.0}, // blue + {0.6, 0.2, 0.8}, // purple + {1.0, 0.0, 1.0}, // magenta + {0.0, 0.4, 0.2}, // dark green + {1.0, 1.0, 0.6}, // light yellow + {0.6, 0.4, 0.6}, // dirty purple + {0.0, 1.0, 1.0}, // cyan + {1.0, 0.4, 0.8}, // pink + {0.5, 0.5, 0.5}, // grey + {0.8, 0.7, 0.5}, // brown + {0.6, 0.6, 1.0}, // light blue + {1.0, 0.7, 0.5}, // peach + {0.8, 0.5, 1.0}, // lavender + {0.6, 0.0, 0.0}, // dark red + {0.7, 0.6, 0.0}, // gold + {0.0, 0.0, 0.0} // black +}; + +// Ensure that the atexit handler only runs once. +static uint32_t statsPrinted = 0; + +// output interface +static kmp_stats_output_module __kmp_stats_global_output; + +/* ****************************************************** */ +/* ************* statistic member functions ************* */ + +void statistic::addSample(double sample) +{ + double delta = sample - meanVal; + + sampleCount = sampleCount + 1; + meanVal = meanVal + delta/sampleCount; + m2 = m2 + delta*(sample - meanVal); + + minVal = std::min(minVal, sample); + maxVal = std::max(maxVal, sample); +} + +statistic & statistic::operator+= (const statistic & other) +{ + if (sampleCount == 0) + { + *this = other; + return *this; + } + + uint64_t newSampleCount = sampleCount + other.sampleCount; + double dnsc = double(newSampleCount); + double dsc = double(sampleCount); + double dscBydnsc = dsc/dnsc; + double dosc = double(other.sampleCount); + double delta = other.meanVal - meanVal; + + // Try to order these calculations to avoid overflows. + // If this were Fortran, then the compiler would not be able to re-order over brackets. + // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition + // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation + // really is associative (which floating addition isn't...)). + meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); + m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta; + minVal = std::min (minVal, other.minVal); + maxVal = std::max (maxVal, other.maxVal); + sampleCount = newSampleCount; + + + return *this; +} + +void statistic::scale(double factor) +{ + minVal = minVal*factor; + maxVal = maxVal*factor; + meanVal= meanVal*factor; + m2 = m2*factor*factor; + return; +} + +std::string statistic::format(char unit, bool total) const +{ + std::string result = formatSI(sampleCount,9,' '); + + result = result + std::string(", ") + formatSI(minVal, 9, unit); + result = result + std::string(", ") + formatSI(meanVal, 9, unit); + result = result + std::string(", ") + formatSI(maxVal, 9, unit); + if (total) + result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); + result = result + std::string(", ") + formatSI(getSD(), 9, unit); + + return result; +} + +/* ********************************************************** */ +/* ************* explicitTimer member functions ************* */ + +void explicitTimer::start(timer_e timerEnumValue) { + startTime = tsc_tick_count::now(); + if(timeStat::logEvent(timerEnumValue)) { + __kmp_stats_thread_ptr->incrementNestValue(); + } + return; +} + +void explicitTimer::stop(timer_e timerEnumValue) { + if (startTime.getValue() == 0) + return; + + tsc_tick_count finishTime = tsc_tick_count::now(); + + //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); + stat->addSample ((finishTime - startTime).ticks()); + + if(timeStat::logEvent(timerEnumValue)) { + __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); + __kmp_stats_thread_ptr->decrementNestValue(); + } + + /* We accept the risk that we drop a sample because it really did start at t==0. */ + startTime = 0; + return; +} + +/* ******************************************************************* */ +/* ************* kmp_stats_event_vector member functions ************* */ + +void kmp_stats_event_vector::deallocate() { + __kmp_free(events); + internal_size = 0; + allocated_size = 0; + events = NULL; +} + +// This function is for qsort() which requires the compare function to return +// either a negative number if event1 < event2, a positive number if event1 > event2 +// or zero if event1 == event2. +// This sorts by start time (lowest to highest). +int compare_two_events(const void* event1, const void* event2) { + kmp_stats_event* ev1 = (kmp_stats_event*)event1; + kmp_stats_event* ev2 = (kmp_stats_event*)event2; + + if(ev1->getStart() < ev2->getStart()) return -1; + else if(ev1->getStart() > ev2->getStart()) return 1; + else return 0; +} + +void kmp_stats_event_vector::sort() { + qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); +} + +/* *********************************************************** */ +/* ************* kmp_stats_list member functions ************* */ + +// returns a pointer to newly created stats node +kmp_stats_list* kmp_stats_list::push_back(int gtid) { + kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); + // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) + new (newnode) kmp_stats_list(); + newnode->setGtid(gtid); + newnode->prev = this->prev; + newnode->next = this; + newnode->prev->next = newnode; + newnode->next->prev = newnode; + return newnode; +} +void kmp_stats_list::deallocate() { + kmp_stats_list* ptr = this->next; + kmp_stats_list* delptr = this->next; + while(ptr != this) { + delptr = ptr; + ptr=ptr->next; + // placement new means we have to explicitly call destructor. + delptr->_event_vector.deallocate(); + delptr->~kmp_stats_list(); + __kmp_free(delptr); + } +} +kmp_stats_list::iterator kmp_stats_list::begin() { + kmp_stats_list::iterator it; + it.ptr = this->next; + return it; +} +kmp_stats_list::iterator kmp_stats_list::end() { + kmp_stats_list::iterator it; + it.ptr = this; + return it; +} +int kmp_stats_list::size() { + int retval; + kmp_stats_list::iterator it; + for(retval=0, it=begin(); it!=end(); it++, retval++) {} + return retval; +} + +/* ********************************************************************* */ +/* ************* kmp_stats_list::iterator member functions ************* */ + +kmp_stats_list::iterator::iterator() : ptr(NULL) {} +kmp_stats_list::iterator::~iterator() {} +kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { + this->ptr = this->ptr->next; + return *this; +} +kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { + this->ptr = this->ptr->next; + return *this; +} +kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { + this->ptr = this->ptr->prev; + return *this; +} +kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { + this->ptr = this->ptr->prev; + return *this; +} +bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { + return this->ptr!=rhs.ptr; +} +bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { + return this->ptr==rhs.ptr; +} +kmp_stats_list* kmp_stats_list::iterator::operator*() const { + return this->ptr; +} + +/* *************************************************************** */ +/* ************* kmp_stats_output_module functions ************** */ + +const char* kmp_stats_output_module::outputFileName = NULL; +const char* kmp_stats_output_module::eventsFileName = NULL; +const char* kmp_stats_output_module::plotFileName = NULL; +int kmp_stats_output_module::printPerThreadFlag = 0; +int kmp_stats_output_module::printPerThreadEventsFlag = 0; + +// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output +void kmp_stats_output_module::init() +{ + char * statsFileName = getenv("KMP_STATS_FILE"); + eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); + plotFileName = getenv("KMP_STATS_PLOT_FILE"); + char * threadStats = getenv("KMP_STATS_THREADS"); + char * threadEvents = getenv("KMP_STATS_EVENTS"); + + // set the stats output filenames based on environment variables and defaults + outputFileName = statsFileName; + eventsFileName = eventsFileName ? eventsFileName : "events.dat"; + plotFileName = plotFileName ? plotFileName : "events.plt"; + + // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes + printPerThreadFlag = __kmp_str_match_true(threadStats); + printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); + + if(printPerThreadEventsFlag) { + // assigns a color to each timer for printing + setupEventColors(); + } else { + // will clear flag so that no event will be logged + timeStat::clearEventFlags(); + } + + return; +} + +void kmp_stats_output_module::setupEventColors() { + int i; + int globalColorIndex = 0; + int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); + for(i=0;igetCount() != 0) { + char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; + fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); + } + } + } else { // Counters + for (int s = 0; sformat(' ', true).c_str()); + } + } +} + +void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) +{ + // We print all the counters even if they are zero. + // That makes it easier to slice them into a spreadsheet if you need to. + fprintf (statsOut, "\nCounter, Count\n"); + for (int c = 0; cgetValue(), 9, ' ').c_str()); + } +} + +void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { + // sort by start time before printing + theEvents->sort(); + for (int i = 0; i < theEvents->size(); i++) { + kmp_stats_event ev = theEvents->at(i); + rgb_color color = getEventColor(ev.getTimerName()); + fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", + gtid, + ev.getStart(), + ev.getStop(), + 1.2 - (ev.getNestLevel() * 0.2), + color.r, color.g, color.b, + timeStat::name(ev.getTimerName()) + ); + } + return; +} + +void kmp_stats_output_module::windupExplicitTimers() +{ + // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads + // and say "it's over". + // If the timer wasn't running, this won't record anything anyway. + kmp_stats_list::iterator it; + for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for (int timer=0; timergetExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); + } + } +} + +void kmp_stats_output_module::printPloticusFile() { + int i; + int size = __kmp_stats_list.size(); + FILE* plotOut = fopen(plotFileName, "w+"); + + fprintf(plotOut, "#proc page\n" + " pagesize: 15 10\n" + " scale: 1.0\n\n"); + + fprintf(plotOut, "#proc getdata\n" + " file: %s\n\n", + eventsFileName); + + fprintf(plotOut, "#proc areadef\n" + " title: OpenMP Sampling Timeline\n" + " titledetails: align=center size=16\n" + " rectangle: 1 1 13 9\n" + " xautorange: datafield=2,3\n" + " yautorange: -1 %d\n\n", + size); + + fprintf(plotOut, "#proc xaxis\n" + " stubs: inc\n" + " stubdetails: size=12\n" + " label: Time (ticks)\n" + " labeldetails: size=14\n\n"); + + fprintf(plotOut, "#proc yaxis\n" + " stubs: inc 1\n" + " stubrange: 0 %d\n" + " stubdetails: size=12\n" + " label: Thread #\n" + " labeldetails: size=14\n\n", + size-1); + + fprintf(plotOut, "#proc bars\n" + " exactcolorfield: 5\n" + " axis: x\n" + " locfield: 1\n" + " segmentfields: 2 3\n" + " barwidthfield: 4\n\n"); + + // create legend entries corresponding to the timer color + for(i=0;igetGtid(); + // Output per thread stats if requested. + if (perThreadPrintingEnabled()) { + fprintf (statsOut, "Thread %d\n", t); + printStats(statsOut, (*it)->getTimers(), true); + printCounters(statsOut, (*it)->getCounters()); + fprintf(statsOut,"\n"); + } + // Output per thread events if requested. + if (eventPrintingEnabled()) { + kmp_stats_event_vector events = (*it)->getEventVector(); + printEvents(eventsOut, &events, t); + } + + for (int s = 0; sgetTimer(timer_e(s)); + allStats[s] += *threadStat; + } + + // Special handling for synthesized statistics. + // These just have to be coded specially here for now. + // At present we only have a few: + // The total parallel work done in each thread. + // The variance here makes it easy to see load imbalance over the whole program (though, of course, + // it's possible to have a code with awful load balance in every parallel region but perfect load + // balance oever the whole program.) + // The time spent in barriers in each thread. + allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); + + // Time in explicit barriers. + allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); + + for (int c = 0; cgetCounter(counter_e(c))->getValue()); + } + } + + if (eventPrintingEnabled()) { + printPloticusFile(); + fclose(eventsOut); + } + + fprintf (statsOut, "Aggregate for all threads\n"); + printStats (statsOut, &allStats[0], true); + fprintf (statsOut, "\n"); + printStats (statsOut, &allCounters[0], false); + + if (statsOut != stderr) + fclose(statsOut); + +} + +/* ************************************************** */ +/* ************* exported C functions ************** */ + +// no name mangling for these functions, we want the c files to be able to get at these functions +extern "C" { + +void __kmp_reset_stats() +{ + kmp_stats_list::iterator it; + for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + timeStat * timers = (*it)->getTimers(); + counter * counters = (*it)->getCounters(); + explicitTimer * eTimers = (*it)->getExplicitTimers(); + + for (int t = 0; tresetEventVector(); + + // May need to restart the explicit timers in thread zero? + } + KMP_START_EXPLICIT_TIMER(OMP_serial); + KMP_START_EXPLICIT_TIMER(OMP_start_end); +} + +// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. +void __kmp_output_stats(const char * heading) +{ + __kmp_stats_global_output.outputStats(heading); + __kmp_reset_stats(); +} + +void __kmp_accumulate_stats_at_exit(void) +{ + // Only do this once. + if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) + return; + + __kmp_output_stats("Statistics on exit"); + return; +} + +void __kmp_stats_init(void) +{ + return; +} + +} // extern "C" + diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats.h b/contrib/libs/cxxsupp/openmp/kmp_stats.h index c52c9644561..20cec3efccf 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stats.h +++ b/contrib/libs/cxxsupp/openmp/kmp_stats.h @@ -1,748 +1,748 @@ -#ifndef KMP_STATS_H -#define KMP_STATS_H - -/** @file kmp_stats.h - * Functions for collecting statistics. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" - -#if KMP_STATS_ENABLED -/* - * Statistics accumulator. - * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. - * - * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm - */ - -#include -#include -#include -#include -#include // placement new -#include "kmp_stats_timing.h" - -/* - * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and - * are intended for the runtime library developer. - */ -// #define KMP_DEVELOPER_STATS 1 - -/*! - * @ingroup STATS_GATHERING - * \brief flags to describe the statistic ( timers or counter ) - * -*/ -class stats_flags_e { - public: - const static int onlyInMaster = 1<<0; //!< statistic is valid only for master - const static int noUnits = 1<<1; //!< statistic doesn't need units printed next to it in output - const static int synthesized = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function - const static int notInMaster = 1<<3; //!< statistic is valid for non-master threads - const static int logEvent = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) -}; - -/*! - * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A counter counts the occurrence of some event. - * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread - * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). - * The min,mean,max are therefore the values for the threads. - * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. - * All of the tables and printing is generated from this macro. - * Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING -*/ -#define KMP_FOREACH_COUNTER(macro, arg) \ - macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_NESTED_PARALLEL, 0, arg) \ - macro (OMP_FOR_static, 0, arg) \ - macro (OMP_FOR_dynamic, 0, arg) \ - macro (OMP_DISTRIBUTE, 0, arg) \ - macro (OMP_BARRIER, 0, arg) \ - macro (OMP_CRITICAL,0, arg) \ - macro (OMP_SINGLE, 0, arg) \ - macro (OMP_MASTER, 0, arg) \ - macro (OMP_TEAMS, 0, arg) \ - macro (OMP_set_lock, 0, arg) \ - macro (OMP_test_lock, 0, arg) \ - macro (REDUCE_wait, 0, arg) \ - macro (REDUCE_nowait, 0, arg) \ - macro (OMP_TASKYIELD, 0, arg) \ - macro (TASK_executed, 0, arg) \ - macro (TASK_cancelled, 0, arg) \ - macro (TASK_stolen, 0, arg) \ - macro (LAST,0,arg) - -// OMP_PARALLEL_args -- the number of arguments passed to a fork -// FOR_static_iterations -- Number of available parallel chunks of work in a static for -// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for -// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. - -/*! - * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h - * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads. - * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" - * as well). - * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. - * Format is "macro(name, flags, arg)" - * - * @ingroup STATS_GATHERING2 - */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_work, 0, arg) \ - macro (Total_work, stats_flags_e::synthesized, arg) \ - macro (OMP_barrier, 0, arg) \ - macro (Total_barrier, stats_flags_e::synthesized, arg) \ - macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (TASK_execution, 0, arg) \ - macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ - macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (LAST,0, arg) - - -// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit -// OMP_serial -- thread zero time executing serial code -// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread) -// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed. -// OMP_barrier -- time at "real" barriers -// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread -// FOR_static_scheduling -- time spent doing scheduling for a static "for" -// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" - -#if (KMP_DEVELOPER_STATS) -// Timers which are of interest tio runtime library developers, not end users. -// THese have to be explicitly enabled in addition to the other stats. - -// KMP_fork_barrier -- time in __kmp_fork_barrier -// KMP_join_barrier -- time in __kmp_join_barrier -// KMP_barrier -- time in __kmp_barrier -// KMP_end_split_barrier -- time in __kmp_end_split_barrier -// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy -// KMP_icv_copy -- start/stop timer for any ICV copying -// KMP_linear_gather -- time in __kmp_linear_barrier_gather -// KMP_linear_release -- time in __kmp_linear_barrier_release -// KMP_tree_gather -- time in __kmp_tree_barrier_gather -// KMP_tree_release -- time in __kmp_tree_barrier_release -// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather -// KMP_hyper_release -- time in __kmp_hyper_barrier_release -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (KMP_fork_call, 0, arg) \ - macro (KMP_join_call, 0, arg) \ - macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_barrier, 0, arg) \ - macro (KMP_end_split_barrier, 0, arg) \ - macro (KMP_hier_gather, 0, arg) \ - macro (KMP_hier_release, 0, arg) \ - macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ - macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ - macro (KMP_linear_gather, 0, arg) \ - macro (KMP_linear_release, 0, arg) \ - macro (KMP_tree_gather, 0, arg) \ - macro (KMP_tree_release, 0, arg) \ - macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_resume, stats_flags_e::logEvent, arg) \ - macro (USER_suspend, stats_flags_e::logEvent, arg) \ - macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ - macro (KMP_allocate_team, 0, arg) \ - macro (KMP_setup_icv_copy, 0, arg) \ - macro (USER_icv_copy, 0, arg) -#else -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) -#endif - -/*! - * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. - * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) - * @param arg a user defined argument to send to the user defined macro - * - * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! - * - * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). - * We allocate these on a per-thread basis, and explicitly start and stop them. - * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't - * need to be defined here. - * The name here should be the same as that of a timer above. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_start_end, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ - KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ - macro(LAST, 0, arg) - -#if (KMP_DEVELOPER_STATS) -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ - macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) -#else -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) -#endif - -#define ENUMERATE(name,ignore,prefix) prefix##name, -enum timer_e { - KMP_FOREACH_TIMER(ENUMERATE, TIMER_) -}; - -enum explicit_timer_e { - KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) -}; - -enum counter_e { - KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) -}; -#undef ENUMERATE - -class statistic -{ - double minVal; - double maxVal; - double meanVal; - double m2; - uint64_t sampleCount; - - public: - statistic() { reset(); } - statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} - - double getMin() const { return minVal; } - double getMean() const { return meanVal; } - double getMax() const { return maxVal; } - uint64_t getCount() const { return sampleCount; } - double getSD() const { return sqrt(m2/sampleCount); } - double getTotal() const { return sampleCount*meanVal; } - - void reset() - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - meanVal= 0.0; - m2 = 0.0; - sampleCount = 0; - } - void addSample(double sample); - void scale (double factor); - void scaleDown(double f) { scale (1./f); } - statistic & operator+= (statistic const & other); - - std::string format(char unit, bool total=false) const; -}; - -struct statInfo -{ - const char * name; - uint32_t flags; -}; - -class timeStat : public statistic -{ - static statInfo timerInfo[]; - - public: - timeStat() : statistic() {} - static const char * name(timer_e e) { return timerInfo[e].name; } - static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } - static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } - static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } - static bool synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized; } - static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } - static void clearEventFlags() { - int i; - for(i=0;i Time - | | | | | | - A B C C B A - start start start end end end - - Then A, B, C will have a nest level of 1, 2, 3 respectively. - These values are then used to calculate the barwidth so you can - see that inside A, B has occurred, and inside B, C has occurred. - Currently, this is shown with A's bar width being larger than B's - bar width, and B's bar width being larger than C's bar width. - -**************************************************************** */ -class kmp_stats_event { - uint64_t start; - uint64_t stop; - int nest_level; - timer_e timer_name; - public: - kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} - kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} - inline uint64_t getStart() const { return start; } - inline uint64_t getStop() const { return stop; } - inline int getNestLevel() const { return nest_level; } - inline timer_e getTimerName() const { return timer_name; } -}; - -/* **************************************************************** - Class to implement a dynamically expandable array of events - - --------------------------------------------------------- - | event 1 | event 2 | event 3 | event 4 | ... | event N | - --------------------------------------------------------- - - An event is pushed onto the back of this array at every - explicitTimer->stop() call. The event records the thread #, - start time, stop time, and nest level related to the bar width. - - The event vector starts at size INIT_SIZE and grows (doubles in size) - if needed. An implication of this behavior is that log(N) - reallocations are needed (where N is number of events). If you want - to avoid reallocations, then set INIT_SIZE to a large value. - - the interface to this class is through six operations: - 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory - 2) size() -- returns the number of valid elements in the vector - 3) push_back(start, stop, nest, timer_name) -- pushes an event onto - the back of the array - 4) deallocate() -- frees all memory associated with the vector - 5) sort() -- sorts the vector by start time - 6) operator[index] or at(index) -- returns event reference at that index - -**************************************************************** */ -class kmp_stats_event_vector { - kmp_stats_event* events; - int internal_size; - int allocated_size; - static const int INIT_SIZE = 1024; - public: - kmp_stats_event_vector() { - events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); - internal_size = 0; - allocated_size = INIT_SIZE; - } - ~kmp_stats_event_vector() {} - inline void reset() { internal_size = 0; } - inline int size() const { return internal_size; } - void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - int i; - if(internal_size == allocated_size) { - kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); - for(i=0;i |---| ---> |---| ---> |---| ---> ... next - | | | | | | | | - |---| <--- |---| <--- |---| <--- |---| <--- ... prev - Sentinel first second third - Node node node node - - The Sentinel Node is the user handle on the list. - The first node corresponds to thread 0's statistics. - The second node corresponds to thread 1's statistics and so on... - - Each node has a _timers, _counters, and _explicitTimers array to - hold that thread's statistics. The _explicitTimers - point to the correct _timer and update its statistics at every stop() call. - The explicitTimers' pointers are set up in the constructor. - Each node also has an event vector to hold that thread's timing events. - The event vector expands as necessary and records the start-stop times - for each timer. - - The nestLevel variable is for plotting events and is related - to the bar width in the timeline graph. - - Every thread will have a __thread local pointer to its node in - the list. The sentinel node is used by the master thread to - store "dummy" statistics before __kmp_create_worker() is called. - -**************************************************************** */ -class kmp_stats_list { - int gtid; - timeStat _timers[TIMER_LAST+1]; - counter _counters[COUNTER_LAST+1]; - explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; - int _nestLevel; // one per thread - kmp_stats_event_vector _event_vector; - kmp_stats_list* next; - kmp_stats_list* prev; - public: - kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { -#define doInit(name,ignore1,ignore2) \ - getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); - KMP_FOREACH_EXPLICIT_TIMER(doInit,0); -#undef doInit - } - ~kmp_stats_list() { } - inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } - inline counter * getCounter(counter_e idx) { return &_counters[idx]; } - inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } - inline timeStat * getTimers() { return _timers; } - inline counter * getCounters() { return _counters; } - inline explicitTimer * getExplicitTimers() { return _explicitTimers; } - inline kmp_stats_event_vector & getEventVector() { return _event_vector; } - inline void resetEventVector() { _event_vector.reset(); } - inline void incrementNestValue() { _nestLevel++; } - inline int getNestValue() { return _nestLevel; } - inline void decrementNestValue() { _nestLevel--; } - inline int getGtid() const { return gtid; } - inline void setGtid(int newgtid) { gtid = newgtid; } - kmp_stats_list* push_back(int gtid); // returns newly created list node - inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - _event_vector.push_back(start_time, stop_time, nest_level, name); - } - void deallocate(); - class iterator; - kmp_stats_list::iterator begin(); - kmp_stats_list::iterator end(); - int size(); - class iterator { - kmp_stats_list* ptr; - friend kmp_stats_list::iterator kmp_stats_list::begin(); - friend kmp_stats_list::iterator kmp_stats_list::end(); - public: - iterator(); - ~iterator(); - iterator operator++(); - iterator operator++(int dummy); - iterator operator--(); - iterator operator--(int dummy); - bool operator!=(const iterator & rhs); - bool operator==(const iterator & rhs); - kmp_stats_list* operator*() const; // dereference operator - }; -}; - -/* **************************************************************** - Class to encapsulate all output functions and the environment variables - - This module holds filenames for various outputs (normal stats, events, plot file), - as well as coloring information for the plot file. - - The filenames and flags variables are read from environment variables. - These are read once by the constructor of the global variable __kmp_stats_output - which calls init(). - - During this init() call, event flags for the timeStat::timerInfo[] global array - are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). - - The only interface function that is public is outputStats(heading). This function - should print out everything it needs to, either to files or stderr, - depending on the environment variables described below - - ENVIRONMENT VARIABLES: - KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, - otherwise, print to stderr - KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either - KMP_STATS_FILE or stderr - KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, - otherwise, the plot file is sent to "events.plt" - KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events - KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, - otherwise, output is sent to "events.dat" - -**************************************************************** */ -class kmp_stats_output_module { - - public: - struct rgb_color { - float r; - float g; - float b; - }; - - private: - static const char* outputFileName; - static const char* eventsFileName; - static const char* plotFileName; - static int printPerThreadFlag; - static int printPerThreadEventsFlag; - static const rgb_color globalColorArray[]; - static rgb_color timerColorInfo[]; - - void init(); - static void setupEventColors(); - static void printPloticusFile(); - static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); - static void printCounters(FILE * statsOut, counter const * theCounters); - static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); - static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } - static void windupExplicitTimers(); - bool eventPrintingEnabled() { - if(printPerThreadEventsFlag) return true; - else return false; - } - bool perThreadPrintingEnabled() { - if(printPerThreadFlag) return true; - else return false; - } - - public: - kmp_stats_output_module() { init(); } - void outputStats(const char* heading); -}; - -#ifdef __cplusplus -extern "C" { -#endif -void __kmp_stats_init(); -void __kmp_reset_stats(); -void __kmp_output_stats(const char *); -void __kmp_accumulate_stats_at_exit(void); -// thread local pointer to stats node within list -extern __thread kmp_stats_list* __kmp_stats_thread_ptr; -// head to stats list. -extern kmp_stats_list __kmp_stats_list; -// lock for __kmp_stats_list -extern kmp_tas_lock_t __kmp_stats_lock; -// reference start time -extern tsc_tick_count __kmp_stats_start_time; -// interface to output -extern kmp_stats_output_module __kmp_stats_output; - -#ifdef __cplusplus -} -#endif - -// Simple, standard interfaces that drop out completely if stats aren't enabled - - -/*! - * \brief Uses specified timer (name) to time code block. - * - * @param name timer name as specified under the KMP_FOREACH_TIMER() macro - * - * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block - * and use the destructor to stop the timer. Convenient! - * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. - * I don't think that's a problem. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_TIME_BLOCK(name) \ - blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) - -/*! - * \brief Adds value to specified timer (name). - * - * @param name timer name as specified under the KMP_FOREACH_TIMER() macro - * @param value double precision sample value to add to statistics for the timer - * - * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_VALUE(name, value) \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) - -/*! - * \brief Increments specified counter (name). - * - * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro - * - * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_COUNT_BLOCK(name) \ - __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() - -/*! - * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. - * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro - * - * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER() - * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end - * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_START_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) - -/*! - * \brief "Stops" an explicit timer. - * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro - * - * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) - * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset. - * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_STOP_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) - -/*! - * \brief Outputs the current thread statistics and reset them. - * - * @param heading_string heading put above the final stats output - * - * \details Explicitly stops all timers and outputs all stats. - * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr - * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats - * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread - * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed - * It should be noted that all statistics are reset when this macro is called. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_OUTPUT_STATS(heading_string) \ - __kmp_output_stats(heading_string) - -/*! - * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) - * - * \details Reset all stats for all threads. - * - * @ingroup STATS_GATHERING -*/ -#define KMP_RESET_STATS() __kmp_reset_stats() - -#if (KMP_DEVELOPER_STATS) -# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) -# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) -#else -// Null definitions -# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#endif - -#else // KMP_STATS_ENABLED - -// Null definitions -#define KMP_TIME_BLOCK(n) ((void)0) -#define KMP_COUNT_VALUE(n,v) ((void)0) -#define KMP_COUNT_BLOCK(n) ((void)0) -#define KMP_START_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) - -#define KMP_OUTPUT_STATS(heading_string) ((void)0) -#define KMP_RESET_STATS() ((void)0) - -#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#endif // KMP_STATS_ENABLED - -#endif // KMP_STATS_H +#ifndef KMP_STATS_H +#define KMP_STATS_H + +/** @file kmp_stats.h + * Functions for collecting statistics. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "kmp_config.h" + +#if KMP_STATS_ENABLED +/* + * Statistics accumulator. + * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. + * + * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm + */ + +#include +#include +#include +#include +#include // placement new +#include "kmp_stats_timing.h" + +/* + * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and + * are intended for the runtime library developer. + */ +// #define KMP_DEVELOPER_STATS 1 + +/*! + * @ingroup STATS_GATHERING + * \brief flags to describe the statistic ( timers or counter ) + * +*/ +class stats_flags_e { + public: + const static int onlyInMaster = 1<<0; //!< statistic is valid only for master + const static int noUnits = 1<<1; //!< statistic doesn't need units printed next to it in output + const static int synthesized = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function + const static int notInMaster = 1<<3; //!< statistic is valid for non-master threads + const static int logEvent = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) +}; + +/*! + * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h + * + * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \details A counter counts the occurrence of some event. + * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread + * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). + * The min,mean,max are therefore the values for the threads. + * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. + * All of the tables and printing is generated from this macro. + * Format is "macro(name, flags, arg)" + * + * @ingroup STATS_GATHERING +*/ +#define KMP_FOREACH_COUNTER(macro, arg) \ + macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_NESTED_PARALLEL, 0, arg) \ + macro (OMP_FOR_static, 0, arg) \ + macro (OMP_FOR_dynamic, 0, arg) \ + macro (OMP_DISTRIBUTE, 0, arg) \ + macro (OMP_BARRIER, 0, arg) \ + macro (OMP_CRITICAL,0, arg) \ + macro (OMP_SINGLE, 0, arg) \ + macro (OMP_MASTER, 0, arg) \ + macro (OMP_TEAMS, 0, arg) \ + macro (OMP_set_lock, 0, arg) \ + macro (OMP_test_lock, 0, arg) \ + macro (REDUCE_wait, 0, arg) \ + macro (REDUCE_nowait, 0, arg) \ + macro (OMP_TASKYIELD, 0, arg) \ + macro (TASK_executed, 0, arg) \ + macro (TASK_cancelled, 0, arg) \ + macro (TASK_stolen, 0, arg) \ + macro (LAST,0,arg) + +// OMP_PARALLEL_args -- the number of arguments passed to a fork +// FOR_static_iterations -- Number of available parallel chunks of work in a static for +// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for +// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. + +/*! + * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h + * + * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads. + * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" + * as well). + * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. + * Format is "macro(name, flags, arg)" + * + * @ingroup STATS_GATHERING2 + */ +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_work, 0, arg) \ + macro (Total_work, stats_flags_e::synthesized, arg) \ + macro (OMP_barrier, 0, arg) \ + macro (Total_barrier, stats_flags_e::synthesized, arg) \ + macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (TASK_execution, 0, arg) \ + macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ + macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (LAST,0, arg) + + +// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit +// OMP_serial -- thread zero time executing serial code +// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread) +// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed. +// OMP_barrier -- time at "real" barriers +// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread +// FOR_static_scheduling -- time spent doing scheduling for a static "for" +// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" + +#if (KMP_DEVELOPER_STATS) +// Timers which are of interest tio runtime library developers, not end users. +// THese have to be explicitly enabled in addition to the other stats. + +// KMP_fork_barrier -- time in __kmp_fork_barrier +// KMP_join_barrier -- time in __kmp_join_barrier +// KMP_barrier -- time in __kmp_barrier +// KMP_end_split_barrier -- time in __kmp_end_split_barrier +// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy +// KMP_icv_copy -- start/stop timer for any ICV copying +// KMP_linear_gather -- time in __kmp_linear_barrier_gather +// KMP_linear_release -- time in __kmp_linear_barrier_release +// KMP_tree_gather -- time in __kmp_tree_barrier_gather +// KMP_tree_release -- time in __kmp_tree_barrier_release +// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather +// KMP_hyper_release -- time in __kmp_hyper_barrier_release +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (KMP_fork_call, 0, arg) \ + macro (KMP_join_call, 0, arg) \ + macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ + macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ + macro (KMP_barrier, 0, arg) \ + macro (KMP_end_split_barrier, 0, arg) \ + macro (KMP_hier_gather, 0, arg) \ + macro (KMP_hier_release, 0, arg) \ + macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ + macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ + macro (KMP_linear_gather, 0, arg) \ + macro (KMP_linear_release, 0, arg) \ + macro (KMP_tree_gather, 0, arg) \ + macro (KMP_tree_release, 0, arg) \ + macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ + macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ + macro (USER_resume, stats_flags_e::logEvent, arg) \ + macro (USER_suspend, stats_flags_e::logEvent, arg) \ + macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ + macro (KMP_allocate_team, 0, arg) \ + macro (KMP_setup_icv_copy, 0, arg) \ + macro (USER_icv_copy, 0, arg) +#else +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) +#endif + +/*! + * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. + * + * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param arg a user defined argument to send to the user defined macro + * + * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! + * + * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). + * We allocate these on a per-thread basis, and explicitly start and stop them. + * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't + * need to be defined here. + * The name here should be the same as that of a timer above. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + macro(OMP_serial, 0, arg) \ + macro(OMP_start_end, 0, arg) \ + macro(OMP_single, 0, arg) \ + macro(OMP_master, 0, arg) \ + KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ + macro(LAST, 0, arg) + +#if (KMP_DEVELOPER_STATS) +# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ + macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) +#else +# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) +#endif + +#define ENUMERATE(name,ignore,prefix) prefix##name, +enum timer_e { + KMP_FOREACH_TIMER(ENUMERATE, TIMER_) +}; + +enum explicit_timer_e { + KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) +}; + +enum counter_e { + KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) +}; +#undef ENUMERATE + +class statistic +{ + double minVal; + double maxVal; + double meanVal; + double m2; + uint64_t sampleCount; + + public: + statistic() { reset(); } + statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} + + double getMin() const { return minVal; } + double getMean() const { return meanVal; } + double getMax() const { return maxVal; } + uint64_t getCount() const { return sampleCount; } + double getSD() const { return sqrt(m2/sampleCount); } + double getTotal() const { return sampleCount*meanVal; } + + void reset() + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + meanVal= 0.0; + m2 = 0.0; + sampleCount = 0; + } + void addSample(double sample); + void scale (double factor); + void scaleDown(double f) { scale (1./f); } + statistic & operator+= (statistic const & other); + + std::string format(char unit, bool total=false) const; +}; + +struct statInfo +{ + const char * name; + uint32_t flags; +}; + +class timeStat : public statistic +{ + static statInfo timerInfo[]; + + public: + timeStat() : statistic() {} + static const char * name(timer_e e) { return timerInfo[e].name; } + static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } + static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } + static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } + static bool synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized; } + static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } + static void clearEventFlags() { + int i; + for(i=0;i Time + | | | | | | + A B C C B A + start start start end end end + + Then A, B, C will have a nest level of 1, 2, 3 respectively. + These values are then used to calculate the barwidth so you can + see that inside A, B has occurred, and inside B, C has occurred. + Currently, this is shown with A's bar width being larger than B's + bar width, and B's bar width being larger than C's bar width. + +**************************************************************** */ +class kmp_stats_event { + uint64_t start; + uint64_t stop; + int nest_level; + timer_e timer_name; + public: + kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} + kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} + inline uint64_t getStart() const { return start; } + inline uint64_t getStop() const { return stop; } + inline int getNestLevel() const { return nest_level; } + inline timer_e getTimerName() const { return timer_name; } +}; + +/* **************************************************************** + Class to implement a dynamically expandable array of events + + --------------------------------------------------------- + | event 1 | event 2 | event 3 | event 4 | ... | event N | + --------------------------------------------------------- + + An event is pushed onto the back of this array at every + explicitTimer->stop() call. The event records the thread #, + start time, stop time, and nest level related to the bar width. + + The event vector starts at size INIT_SIZE and grows (doubles in size) + if needed. An implication of this behavior is that log(N) + reallocations are needed (where N is number of events). If you want + to avoid reallocations, then set INIT_SIZE to a large value. + + the interface to this class is through six operations: + 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory + 2) size() -- returns the number of valid elements in the vector + 3) push_back(start, stop, nest, timer_name) -- pushes an event onto + the back of the array + 4) deallocate() -- frees all memory associated with the vector + 5) sort() -- sorts the vector by start time + 6) operator[index] or at(index) -- returns event reference at that index + +**************************************************************** */ +class kmp_stats_event_vector { + kmp_stats_event* events; + int internal_size; + int allocated_size; + static const int INIT_SIZE = 1024; + public: + kmp_stats_event_vector() { + events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); + internal_size = 0; + allocated_size = INIT_SIZE; + } + ~kmp_stats_event_vector() {} + inline void reset() { internal_size = 0; } + inline int size() const { return internal_size; } + void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { + int i; + if(internal_size == allocated_size) { + kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); + for(i=0;i |---| ---> |---| ---> |---| ---> ... next + | | | | | | | | + |---| <--- |---| <--- |---| <--- |---| <--- ... prev + Sentinel first second third + Node node node node + + The Sentinel Node is the user handle on the list. + The first node corresponds to thread 0's statistics. + The second node corresponds to thread 1's statistics and so on... + + Each node has a _timers, _counters, and _explicitTimers array to + hold that thread's statistics. The _explicitTimers + point to the correct _timer and update its statistics at every stop() call. + The explicitTimers' pointers are set up in the constructor. + Each node also has an event vector to hold that thread's timing events. + The event vector expands as necessary and records the start-stop times + for each timer. + + The nestLevel variable is for plotting events and is related + to the bar width in the timeline graph. + + Every thread will have a __thread local pointer to its node in + the list. The sentinel node is used by the master thread to + store "dummy" statistics before __kmp_create_worker() is called. + +**************************************************************** */ +class kmp_stats_list { + int gtid; + timeStat _timers[TIMER_LAST+1]; + counter _counters[COUNTER_LAST+1]; + explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; + int _nestLevel; // one per thread + kmp_stats_event_vector _event_vector; + kmp_stats_list* next; + kmp_stats_list* prev; + public: + kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { +#define doInit(name,ignore1,ignore2) \ + getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); + KMP_FOREACH_EXPLICIT_TIMER(doInit,0); +#undef doInit + } + ~kmp_stats_list() { } + inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } + inline counter * getCounter(counter_e idx) { return &_counters[idx]; } + inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } + inline timeStat * getTimers() { return _timers; } + inline counter * getCounters() { return _counters; } + inline explicitTimer * getExplicitTimers() { return _explicitTimers; } + inline kmp_stats_event_vector & getEventVector() { return _event_vector; } + inline void resetEventVector() { _event_vector.reset(); } + inline void incrementNestValue() { _nestLevel++; } + inline int getNestValue() { return _nestLevel; } + inline void decrementNestValue() { _nestLevel--; } + inline int getGtid() const { return gtid; } + inline void setGtid(int newgtid) { gtid = newgtid; } + kmp_stats_list* push_back(int gtid); // returns newly created list node + inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { + _event_vector.push_back(start_time, stop_time, nest_level, name); + } + void deallocate(); + class iterator; + kmp_stats_list::iterator begin(); + kmp_stats_list::iterator end(); + int size(); + class iterator { + kmp_stats_list* ptr; + friend kmp_stats_list::iterator kmp_stats_list::begin(); + friend kmp_stats_list::iterator kmp_stats_list::end(); + public: + iterator(); + ~iterator(); + iterator operator++(); + iterator operator++(int dummy); + iterator operator--(); + iterator operator--(int dummy); + bool operator!=(const iterator & rhs); + bool operator==(const iterator & rhs); + kmp_stats_list* operator*() const; // dereference operator + }; +}; + +/* **************************************************************** + Class to encapsulate all output functions and the environment variables + + This module holds filenames for various outputs (normal stats, events, plot file), + as well as coloring information for the plot file. + + The filenames and flags variables are read from environment variables. + These are read once by the constructor of the global variable __kmp_stats_output + which calls init(). + + During this init() call, event flags for the timeStat::timerInfo[] global array + are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). + + The only interface function that is public is outputStats(heading). This function + should print out everything it needs to, either to files or stderr, + depending on the environment variables described below + + ENVIRONMENT VARIABLES: + KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, + otherwise, print to stderr + KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either + KMP_STATS_FILE or stderr + KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, + otherwise, the plot file is sent to "events.plt" + KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events + KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, + otherwise, output is sent to "events.dat" + +**************************************************************** */ +class kmp_stats_output_module { + + public: + struct rgb_color { + float r; + float g; + float b; + }; + + private: + static const char* outputFileName; + static const char* eventsFileName; + static const char* plotFileName; + static int printPerThreadFlag; + static int printPerThreadEventsFlag; + static const rgb_color globalColorArray[]; + static rgb_color timerColorInfo[]; + + void init(); + static void setupEventColors(); + static void printPloticusFile(); + static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); + static void printCounters(FILE * statsOut, counter const * theCounters); + static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); + static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } + static void windupExplicitTimers(); + bool eventPrintingEnabled() { + if(printPerThreadEventsFlag) return true; + else return false; + } + bool perThreadPrintingEnabled() { + if(printPerThreadFlag) return true; + else return false; + } + + public: + kmp_stats_output_module() { init(); } + void outputStats(const char* heading); +}; + +#ifdef __cplusplus +extern "C" { +#endif +void __kmp_stats_init(); +void __kmp_reset_stats(); +void __kmp_output_stats(const char *); +void __kmp_accumulate_stats_at_exit(void); +// thread local pointer to stats node within list +extern __thread kmp_stats_list* __kmp_stats_thread_ptr; +// head to stats list. +extern kmp_stats_list __kmp_stats_list; +// lock for __kmp_stats_list +extern kmp_tas_lock_t __kmp_stats_lock; +// reference start time +extern tsc_tick_count __kmp_stats_start_time; +// interface to output +extern kmp_stats_output_module __kmp_stats_output; + +#ifdef __cplusplus +} +#endif + +// Simple, standard interfaces that drop out completely if stats aren't enabled + + +/*! + * \brief Uses specified timer (name) to time code block. + * + * @param name timer name as specified under the KMP_FOREACH_TIMER() macro + * + * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block + * and use the destructor to stop the timer. Convenient! + * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. + * I don't think that's a problem. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_TIME_BLOCK(name) \ + blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) + +/*! + * \brief Adds value to specified timer (name). + * + * @param name timer name as specified under the KMP_FOREACH_TIMER() macro + * @param value double precision sample value to add to statistics for the timer + * + * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_COUNT_VALUE(name, value) \ + __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) + +/*! + * \brief Increments specified counter (name). + * + * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro + * + * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_COUNT_BLOCK(name) \ + __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() + +/*! + * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. + * + * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * + * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER() + * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end + * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_START_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) + +/*! + * \brief "Stops" an explicit timer. + * + * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * + * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) + * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset. + * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_STOP_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) + +/*! + * \brief Outputs the current thread statistics and reset them. + * + * @param heading_string heading put above the final stats output + * + * \details Explicitly stops all timers and outputs all stats. + * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr + * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats + * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread + * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed + * It should be noted that all statistics are reset when this macro is called. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_OUTPUT_STATS(heading_string) \ + __kmp_output_stats(heading_string) + +/*! + * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) + * + * \details Reset all stats for all threads. + * + * @ingroup STATS_GATHERING +*/ +#define KMP_RESET_STATS() __kmp_reset_stats() + +#if (KMP_DEVELOPER_STATS) +# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) +# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) +# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) +# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) +# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) +#else +// Null definitions +# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) +# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#endif + +#else // KMP_STATS_ENABLED + +// Null definitions +#define KMP_TIME_BLOCK(n) ((void)0) +#define KMP_COUNT_VALUE(n,v) ((void)0) +#define KMP_COUNT_BLOCK(n) ((void)0) +#define KMP_START_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) + +#define KMP_OUTPUT_STATS(heading_string) ((void)0) +#define KMP_RESET_STATS() ((void)0) + +#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) +#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#endif // KMP_STATS_ENABLED + +#endif // KMP_STATS_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp index 33e032fc76a..40e29eb0d8d 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp @@ -1,168 +1,168 @@ -/** @file kmp_stats_timing.cpp - * Timing functions - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include -#include - -#include -#include -#include - -#include "kmp.h" -#include "kmp_stats_timing.h" - -using namespace std; - -#if KMP_HAVE_TICK_TIME -# if KMP_MIC -double tsc_tick_count::tick_time() -{ - // pretty bad assumption of 1GHz clock for MIC - return 1/((double)1000*1.e6); -} -# elif KMP_ARCH_X86 || KMP_ARCH_X86_64 -# include -// Extract the value from the CPUID information -double tsc_tick_count::tick_time() -{ - static double result = 0.0; - - if (result == 0.0) - { - kmp_cpuid_t cpuinfo; - char brand[256]; - - __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); - memset(brand, 0, sizeof(brand)); - int ids = cpuinfo.eax; - - for (unsigned int i=2; i<(ids^0x80000000)+2; i++) - __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t))); - - char * start = &brand[0]; - for (;*start == ' '; start++) - ; - - char * end = brand + KMP_STRLEN(brand) - 3; - uint64_t multiplier; - - if (*end == 'M') multiplier = 1000LL*1000LL; - else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL; - else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL; - else - { - cout << "Error determining multiplier '" << *end << "'\n"; - exit (-1); - } - *end = 0; - while (*end != ' ') end--; - end++; - - double freq = strtod(end, &start); - if (freq == 0.0) - { - cout << "Error calculating frequency " << end << "\n"; - exit (-1); - } - - result = ((double)1.0)/(freq * multiplier); - } - return result; -} -# endif -#endif - -static bool useSI = true; - -// Return a formatted string after normalising the value into -// engineering style and using a suitable unit prefix (e.g. ms, us, ns). -std::string formatSI(double interval, int width, char unit) -{ - std::stringstream os; - - if (useSI) - { - // Preserve accuracy for small numbers, since we only multiply and the positive powers - // of ten are precisely representable. - static struct { double scale; char prefix; } ranges[] = { - {1.e12,'f'}, - {1.e9, 'p'}, - {1.e6, 'n'}, - {1.e3, 'u'}, - {1.0, 'm'}, - {1.e-3,' '}, - {1.e-6,'k'}, - {1.e-9,'M'}, - {1.e-12,'G'}, - {1.e-15,'T'}, - {1.e-18,'P'}, - {1.e-21,'E'}, - {1.e-24,'Z'}, - {1.e-27,'Y'} - }; - - if (interval == 0.0) - { - os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit; - return os.str(); - } - - bool negative = false; - if (interval < 0.0) - { - negative = true; - interval = -interval; - } - - for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++) - { - if (interval*ranges[i].scale < 1.e0) - { - interval = interval * 1000.e0 * ranges[i].scale; - os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right << - (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit; - - return os.str(); - } - } - } - os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit; - - return os.str(); -} - -tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes) -{ - timePair lastTimes = times[0]; - tsc_tick_count * startp = lastTimes.get_startp(); - tsc_tick_count * endp = lastTimes.get_endp(); - - for (int i=1; ilater(times[i].get_start()); - (*endp) = endp->later (times[i].get_end()); - } - - return lastTimes.duration(); -} - -std::string timePair::format() const -{ - std::ostringstream oss; - - oss << start.getValue() << ":" << end.getValue() << " = " << (end-start).getValue(); - - return oss.str(); -} +/** @file kmp_stats_timing.cpp + * Timing functions + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include +#include + +#include +#include +#include + +#include "kmp.h" +#include "kmp_stats_timing.h" + +using namespace std; + +#if KMP_HAVE_TICK_TIME +# if KMP_MIC +double tsc_tick_count::tick_time() +{ + // pretty bad assumption of 1GHz clock for MIC + return 1/((double)1000*1.e6); +} +# elif KMP_ARCH_X86 || KMP_ARCH_X86_64 +# include +// Extract the value from the CPUID information +double tsc_tick_count::tick_time() +{ + static double result = 0.0; + + if (result == 0.0) + { + kmp_cpuid_t cpuinfo; + char brand[256]; + + __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); + memset(brand, 0, sizeof(brand)); + int ids = cpuinfo.eax; + + for (unsigned int i=2; i<(ids^0x80000000)+2; i++) + __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t))); + + char * start = &brand[0]; + for (;*start == ' '; start++) + ; + + char * end = brand + KMP_STRLEN(brand) - 3; + uint64_t multiplier; + + if (*end == 'M') multiplier = 1000LL*1000LL; + else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL; + else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL; + else + { + cout << "Error determining multiplier '" << *end << "'\n"; + exit (-1); + } + *end = 0; + while (*end != ' ') end--; + end++; + + double freq = strtod(end, &start); + if (freq == 0.0) + { + cout << "Error calculating frequency " << end << "\n"; + exit (-1); + } + + result = ((double)1.0)/(freq * multiplier); + } + return result; +} +# endif +#endif + +static bool useSI = true; + +// Return a formatted string after normalising the value into +// engineering style and using a suitable unit prefix (e.g. ms, us, ns). +std::string formatSI(double interval, int width, char unit) +{ + std::stringstream os; + + if (useSI) + { + // Preserve accuracy for small numbers, since we only multiply and the positive powers + // of ten are precisely representable. + static struct { double scale; char prefix; } ranges[] = { + {1.e12,'f'}, + {1.e9, 'p'}, + {1.e6, 'n'}, + {1.e3, 'u'}, + {1.0, 'm'}, + {1.e-3,' '}, + {1.e-6,'k'}, + {1.e-9,'M'}, + {1.e-12,'G'}, + {1.e-15,'T'}, + {1.e-18,'P'}, + {1.e-21,'E'}, + {1.e-24,'Z'}, + {1.e-27,'Y'} + }; + + if (interval == 0.0) + { + os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit; + return os.str(); + } + + bool negative = false; + if (interval < 0.0) + { + negative = true; + interval = -interval; + } + + for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++) + { + if (interval*ranges[i].scale < 1.e0) + { + interval = interval * 1000.e0 * ranges[i].scale; + os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right << + (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit; + + return os.str(); + } + } + } + os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit; + + return os.str(); +} + +tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes) +{ + timePair lastTimes = times[0]; + tsc_tick_count * startp = lastTimes.get_startp(); + tsc_tick_count * endp = lastTimes.get_endp(); + + for (int i=1; ilater(times[i].get_start()); + (*endp) = endp->later (times[i].get_end()); + } + + return lastTimes.duration(); +} + +std::string timePair::format() const +{ + std::ostringstream oss; + + oss << start.getValue() << ":" << end.getValue() << " = " << (end-start).getValue(); + + return oss.str(); +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h index 03b0c92b3bd..83fb85bea32 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h +++ b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h @@ -1,110 +1,110 @@ -#ifndef KMP_STATS_TIMING_H -#define KMP_STATS_TIMING_H - -/** @file kmp_stats_timing.h - * Access to real time clock and timers. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - - -#include -#include -#include -#include "kmp_os.h" - -class tsc_tick_count { - private: - int64_t my_count; - - public: - class tsc_interval_t { - int64_t value; - explicit tsc_interval_t(int64_t _value) : value(_value) {} - public: - tsc_interval_t() : value(0) {}; // Construct 0 time duration -#if KMP_HAVE_TICK_TIME - double seconds() const; // Return the length of a time interval in seconds -#endif - double ticks() const { return double(value); } - int64_t getValue() const { return value; } - - friend class tsc_tick_count; - - friend tsc_interval_t operator-( - const tsc_tick_count t1, const tsc_tick_count t0); - }; - - tsc_tick_count() : my_count(static_cast(__rdtsc())) {}; - tsc_tick_count(int64_t value) : my_count(value) {}; - int64_t getValue() const { return my_count; } - tsc_tick_count later (tsc_tick_count const other) const { - return my_count > other.my_count ? (*this) : other; - } - tsc_tick_count earlier(tsc_tick_count const other) const { - return my_count < other.my_count ? (*this) : other; - } -#if KMP_HAVE_TICK_TIME - static double tick_time(); // returns seconds per cycle (period) of clock -#endif - static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value - friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0); -}; - -inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0) -{ - return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count ); -} - -#if KMP_HAVE_TICK_TIME -inline double tsc_tick_count::tsc_interval_t::seconds() const -{ - return value*tick_time(); -} -#endif - -extern std::string formatSI(double interval, int width, char unit); - -inline std::string formatSeconds(double interval, int width) -{ - return formatSI(interval, width, 'S'); -} - -inline std::string formatTicks(double interval, int width) -{ - return formatSI(interval, width, 'T'); -} - -class timePair -{ - tsc_tick_count KMP_ALIGN_CACHE start; - tsc_tick_count end; - -public: - timePair() : start(-std::numeric_limits::max()), end(-std::numeric_limits::max()) {} - tsc_tick_count get_start() const { return start; } - tsc_tick_count get_end() const { return end; } - tsc_tick_count * get_startp() { return &start; } - tsc_tick_count * get_endp() { return &end; } - - void markStart() { start = tsc_tick_count::now(); } - void markEnd() { end = tsc_tick_count::now(); } - void set_start(tsc_tick_count s) { start = s; } - void set_end (tsc_tick_count e) { end = e; } - - tsc_tick_count::tsc_interval_t duration() const { return end-start; } - std::string format() const; - -}; - -extern tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes); -#endif // KMP_STATS_TIMING_H +#ifndef KMP_STATS_TIMING_H +#define KMP_STATS_TIMING_H + +/** @file kmp_stats_timing.h + * Access to real time clock and timers. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + + +#include +#include +#include +#include "kmp_os.h" + +class tsc_tick_count { + private: + int64_t my_count; + + public: + class tsc_interval_t { + int64_t value; + explicit tsc_interval_t(int64_t _value) : value(_value) {} + public: + tsc_interval_t() : value(0) {}; // Construct 0 time duration +#if KMP_HAVE_TICK_TIME + double seconds() const; // Return the length of a time interval in seconds +#endif + double ticks() const { return double(value); } + int64_t getValue() const { return value; } + + friend class tsc_tick_count; + + friend tsc_interval_t operator-( + const tsc_tick_count t1, const tsc_tick_count t0); + }; + + tsc_tick_count() : my_count(static_cast(__rdtsc())) {}; + tsc_tick_count(int64_t value) : my_count(value) {}; + int64_t getValue() const { return my_count; } + tsc_tick_count later (tsc_tick_count const other) const { + return my_count > other.my_count ? (*this) : other; + } + tsc_tick_count earlier(tsc_tick_count const other) const { + return my_count < other.my_count ? (*this) : other; + } +#if KMP_HAVE_TICK_TIME + static double tick_time(); // returns seconds per cycle (period) of clock +#endif + static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value + friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0); +}; + +inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0) +{ + return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count ); +} + +#if KMP_HAVE_TICK_TIME +inline double tsc_tick_count::tsc_interval_t::seconds() const +{ + return value*tick_time(); +} +#endif + +extern std::string formatSI(double interval, int width, char unit); + +inline std::string formatSeconds(double interval, int width) +{ + return formatSI(interval, width, 'S'); +} + +inline std::string formatTicks(double interval, int width) +{ + return formatSI(interval, width, 'T'); +} + +class timePair +{ + tsc_tick_count KMP_ALIGN_CACHE start; + tsc_tick_count end; + +public: + timePair() : start(-std::numeric_limits::max()), end(-std::numeric_limits::max()) {} + tsc_tick_count get_start() const { return start; } + tsc_tick_count get_end() const { return end; } + tsc_tick_count * get_startp() { return &start; } + tsc_tick_count * get_endp() { return &end; } + + void markStart() { start = tsc_tick_count::now(); } + void markEnd() { end = tsc_tick_count::now(); } + void set_start(tsc_tick_count s) { start = s; } + void set_end (tsc_tick_count e) { end = e; } + + tsc_tick_count::tsc_interval_t duration() const { return end-start; } + std::string format() const; + +}; + +extern tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes); +#endif // KMP_STATS_TIMING_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_str.c b/contrib/libs/cxxsupp/openmp/kmp_str.c index 8adf3e38769..b5f700551e4 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_str.c +++ b/contrib/libs/cxxsupp/openmp/kmp_str.c @@ -1,883 +1,883 @@ -/* - * kmp_str.c -- String manipulation routines. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp_str.h" - -#include // va_* -#include // vsnprintf() -#include // malloc(), realloc() - -#include "kmp.h" -#include "kmp_i18n.h" - -/* - ------------------------------------------------------------------------------------------------ - String buffer. - ------------------------------------------------------------------------------------------------ - - Usage: - - // Declare buffer and initialize it. - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - - // Print to buffer. - __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 ); - __kmp_str_buf_print( & buffer, " <%s>\n", line ); - - // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed - // characters (not including terminating zero). - write( fd, buffer.str, buffer.used ); - - // Free buffer. - __kmp_str_buf_free( & buffer ); - - // Alternatively, you can detach allocated memory from buffer: - __kmp_str_buf_detach( & buffer ); - return buffer.str; // That memory should be freed eventually. - - - Notes: - - * Buffer users may use buffer.str and buffer.used. Users should not change any fields of - buffer directly. - - * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string (""). - - * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is - exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc() - as amount of used memory grows. - - * Buffer doubles amount of allocated memory each time it is exhausted. - - ------------------------------------------------------------------------------------------------ -*/ - -// TODO: __kmp_str_buf_print() can use thread local memory allocator. - -#define KMP_STR_BUF_INVARIANT( b ) \ - { \ - KMP_DEBUG_ASSERT( (b)->str != NULL ); \ - KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) ); \ - KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 ); \ - KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size ); \ - KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 ); \ - KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 ); \ - } - -void - __kmp_str_buf_clear( - kmp_str_buf_t * buffer -) { - KMP_STR_BUF_INVARIANT( buffer ); - if ( buffer->used > 0 ) { - buffer->used = 0; - buffer->str[ 0 ] = 0; - }; // if - KMP_STR_BUF_INVARIANT( buffer ); -} // __kmp_str_buf_clear - - -void -__kmp_str_buf_reserve( - kmp_str_buf_t * buffer, - int size -) { - - KMP_STR_BUF_INVARIANT( buffer ); - KMP_DEBUG_ASSERT( size >= 0 ); - - if ( buffer->size < (unsigned int)size ) { - - // Calculate buffer size. - do { - buffer->size *= 2; - } while ( buffer->size < (unsigned int)size ); - - // Enlarge buffer. - if ( buffer->str == & buffer->bulk[ 0 ] ) { - buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); - } else { - buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - }; // if - - }; // if - - KMP_DEBUG_ASSERT( buffer->size > 0 ); - KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size ); - KMP_STR_BUF_INVARIANT( buffer ); - -} // __kmp_str_buf_reserve - - -void -__kmp_str_buf_detach( - kmp_str_buf_t * buffer -) { - - KMP_STR_BUF_INVARIANT( buffer ); - - // If internal bulk is used, allocate memory and copy it. - if ( buffer->size <= sizeof( buffer->bulk ) ) { - buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); - }; // if - -} // __kmp_str_buf_detach - - -void -__kmp_str_buf_free( - kmp_str_buf_t * buffer -) { - KMP_STR_BUF_INVARIANT( buffer ); - if ( buffer->size > sizeof( buffer->bulk ) ) { - KMP_INTERNAL_FREE( buffer->str ); - }; // if - buffer->str = buffer->bulk; - buffer->size = sizeof( buffer->bulk ); - buffer->used = 0; - KMP_STR_BUF_INVARIANT( buffer ); -} // __kmp_str_buf_free - - -void -__kmp_str_buf_cat( - kmp_str_buf_t * buffer, - char const * str, - int len -) { - KMP_STR_BUF_INVARIANT( buffer ); - KMP_DEBUG_ASSERT( str != NULL ); - KMP_DEBUG_ASSERT( len >= 0 ); - __kmp_str_buf_reserve( buffer, buffer->used + len + 1 ); - KMP_MEMCPY( buffer->str + buffer->used, str, len ); - buffer->str[ buffer->used + len ] = 0; - buffer->used += len; - KMP_STR_BUF_INVARIANT( buffer ); -} // __kmp_str_buf_cat - - -void -__kmp_str_buf_vprint( - kmp_str_buf_t * buffer, - char const * format, - va_list args -) { - - KMP_STR_BUF_INVARIANT( buffer ); - - for ( ; ; ) { - - int const free = buffer->size - buffer->used; - int rc; - int size; - - // Try to format string. - { - /* - On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it - is called for the second time with the same args. To prevent the crash, we have to - pass a fresh intact copy of args to vsnprintf() on each iteration. - - Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it - seems vsnprintf() does not modify args argument on Windows* OS. - */ - - #if ! KMP_OS_WINDOWS - va_list _args; - __va_copy( _args, args ); // Make copy of args. - #define args _args // Substitute args with its copy, _args. - #endif // KMP_OS_WINDOWS - rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args ); - #if ! KMP_OS_WINDOWS - #undef args // Remove substitution. - va_end( _args ); - #endif // KMP_OS_WINDOWS - } - - // No errors, string has been formatted. - if ( rc >= 0 && rc < free ) { - buffer->used += rc; - break; - }; // if - - // Error occurred, buffer is too small. - if ( rc >= 0 ) { - // C99-conforming implementation of vsnprintf returns required buffer size. - size = buffer->used + rc + 1; - } else { - // Older implementations just return -1. Double buffer size. - size = buffer->size * 2; - }; // if - - // Enlarge buffer. - __kmp_str_buf_reserve( buffer, size ); - - // And try again. - - }; // forever - - KMP_DEBUG_ASSERT( buffer->size > 0 ); - KMP_STR_BUF_INVARIANT( buffer ); - -} // __kmp_str_buf_vprint - - -void -__kmp_str_buf_print( - kmp_str_buf_t * buffer, - char const * format, - ... -) { - - va_list args; - va_start( args, format ); - __kmp_str_buf_vprint( buffer, format, args ); - va_end( args ); - -} // __kmp_str_buf_print - - -/* - The function prints specified size to buffer. Size is expressed using biggest possible unit, for - example 1024 is printed as "1k". -*/ - -void -__kmp_str_buf_print_size( - kmp_str_buf_t * buf, - size_t size -) { - - char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" }; - int const units = sizeof( names ) / sizeof( char const * ); - int u = 0; - if ( size > 0 ) { - while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) { - size = size / 1024; - ++ u; - }; // while - }; // if - - __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] ); - -} // __kmp_str_buf_print_size - - -void -__kmp_str_fname_init( - kmp_str_fname_t * fname, - char const * path -) { - - fname->path = NULL; - fname->dir = NULL; - fname->base = NULL; - - if ( path != NULL ) { - char * slash = NULL; // Pointer to the last character of dir. - char * base = NULL; // Pointer to the beginning of basename. - fname->path = __kmp_str_format( "%s", path ); - // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it - // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format(). - if ( KMP_OS_WINDOWS ) { - __kmp_str_replace( fname->path, '\\', '/' ); - }; // if - fname->dir = __kmp_str_format( "%s", fname->path ); - slash = strrchr( fname->dir, '/' ); - if ( KMP_OS_WINDOWS && slash == NULL ) { // On Windows* OS, if slash not found, - char first = TOLOWER( fname->dir[ 0 ] ); // look for drive. - if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) { - slash = & fname->dir[ 1 ]; - }; // if - }; // if - base = ( slash == NULL ? fname->dir : slash + 1 ); - fname->base = __kmp_str_format( "%s", base ); // Copy basename - * base = 0; // and truncate dir. - }; // if - -} // kmp_str_fname_init - - -void -__kmp_str_fname_free( - kmp_str_fname_t * fname -) { - __kmp_str_free( (char const **)( & fname->path ) ); - __kmp_str_free( (char const **)( & fname->dir ) ); - __kmp_str_free( (char const **)( & fname->base ) ); -} // kmp_str_fname_free - - -int -__kmp_str_fname_match( - kmp_str_fname_t const * fname, - char const * pattern -) { - - int dir_match = 1; - int base_match = 1; - - if ( pattern != NULL ) { - kmp_str_fname_t ptrn; - __kmp_str_fname_init( & ptrn, pattern ); - dir_match = - strcmp( ptrn.dir, "*/" ) == 0 - || - ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) ); - base_match = - strcmp( ptrn.base, "*" ) == 0 - || - ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) ); - __kmp_str_fname_free( & ptrn ); - }; // if - - return dir_match && base_match; - -} // __kmp_str_fname_match - - -kmp_str_loc_t -__kmp_str_loc_init( - char const * psource, - int init_fname -) { - - kmp_str_loc_t loc; - - loc._bulk = NULL; - loc.file = NULL; - loc.func = NULL; - loc.line = 0; - loc.col = 0; - - if ( psource != NULL ) { - - char * str = NULL; - char * dummy = NULL; - char * line = NULL; - char * col = NULL; - - // Copy psource to keep it intact. - loc._bulk = __kmp_str_format( "%s", psource ); - - // Parse psource string: ";file;func;line;col;;" - str = loc._bulk; - __kmp_str_split( str, ';', & dummy, & str ); - __kmp_str_split( str, ';', & loc.file, & str ); - __kmp_str_split( str, ';', & loc.func, & str ); - __kmp_str_split( str, ';', & line, & str ); - __kmp_str_split( str, ';', & col, & str ); - - // Convert line and col into numberic values. - if ( line != NULL ) { - loc.line = atoi( line ); - if ( loc.line < 0 ) { - loc.line = 0; - }; // if - }; // if - if ( col != NULL ) { - loc.col = atoi( col ); - if ( loc.col < 0 ) { - loc.col = 0; - }; // if - }; // if - - }; // if - - __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL ); - - return loc; - -} // kmp_str_loc_init - - -void -__kmp_str_loc_free( - kmp_str_loc_t * loc -) { - __kmp_str_fname_free( & loc->fname ); - KMP_INTERNAL_FREE( loc->_bulk ); - loc->_bulk = NULL; - loc->file = NULL; - loc->func = NULL; -} // kmp_str_loc_free - - - -/* - This function is intended to compare file names. On Windows* OS file names are case-insensitive, - so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive - comparison. - Note: The function returns *true* if strings are *equal*. -*/ - -int -__kmp_str_eqf( // True, if strings are equal, false otherwise. - char const * lhs, // First string. - char const * rhs // Second string. -) { - int result; - #if KMP_OS_WINDOWS - result = ( _stricmp( lhs, rhs ) == 0 ); - #else - result = ( strcmp( lhs, rhs ) == 0 ); - #endif - return result; -} // __kmp_str_eqf - - -/* - This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by - __kmp_str_free(). The function is very convenient for constructing strings, it successfully - replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid - buffer overflows. Examples: - - str = __kmp_str_format( "%s", orig ); // strdup(), do not care about buffer size. - __kmp_str_free( & str ); - str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), do not care about buffer size. - __kmp_str_free( & str ); - str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. - __kmp_str_free( & str ); - - Performance note: - This function allocates memory with malloc() calls, so do not call it from - performance-critical code. In performance-critical code consider using kmp_str_buf_t - instead, since it uses stack-allocated buffer for short strings. - - Why does this function use malloc()? - 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no - reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned - memory is not necessary. - 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure. - We need to perform string operations during library startup (for example, in - __kmp_register_library_startup()) when no thread structures are allocated yet. - So standard malloc() is the only available option. -*/ - -// TODO: Find and replace all regular free() with __kmp_str_free(). - -char * -__kmp_str_format( // Allocated string. - char const * format, // Format string. - ... // Other parameters. -) { - - va_list args; - int size = 512; - char * buffer = NULL; - int rc; - - // Allocate buffer. - buffer = (char *) KMP_INTERNAL_MALLOC( size ); - if ( buffer == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - - for ( ; ; ) { - - // Try to format string. - va_start( args, format ); - rc = KMP_VSNPRINTF( buffer, size, format, args ); - va_end( args ); - - // No errors, string has been formatted. - if ( rc >= 0 && rc < size ) { - break; - }; // if - - // Error occurred, buffer is too small. - if ( rc >= 0 ) { - // C99-conforming implementation of vsnprintf returns required buffer size. - size = rc + 1; - } else { - // Older implementations just return -1. - size = size * 2; - }; // if - - // Enlarge buffer and try again. - buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size ); - if ( buffer == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - - }; // forever - - return buffer; - -} // func __kmp_str_format - - -void -__kmp_str_free( - char const * * str -) { - KMP_DEBUG_ASSERT( str != NULL ); - KMP_INTERNAL_FREE( (void *) * str ); - * str = NULL; -} // func __kmp_str_free - - -/* If len is zero, returns true iff target and data have exact case-insensitive match. - If len is negative, returns true iff target is a case-insensitive substring of data. - If len is positive, returns true iff target is a case-insensitive substring of data or - vice versa, and neither is shorter than len. -*/ -int -__kmp_str_match( - char const * target, - int len, - char const * data -) { - int i; - if ( target == NULL || data == NULL ) { - return FALSE; - }; // if - for ( i = 0; target[i] && data[i]; ++ i ) { - if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) { - return FALSE; - }; // if - }; // for i - return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) ); -} // __kmp_str_match - - -int -__kmp_str_match_false( char const * data ) { - int result = - __kmp_str_match( "false", 1, data ) || - __kmp_str_match( "off", 2, data ) || - __kmp_str_match( "0", 1, data ) || - __kmp_str_match( ".false.", 2, data ) || - __kmp_str_match( ".f.", 2, data ) || - __kmp_str_match( "no", 1, data ); - return result; -} // __kmp_str_match_false - - -int -__kmp_str_match_true( char const * data ) { - int result = - __kmp_str_match( "true", 1, data ) || - __kmp_str_match( "on", 2, data ) || - __kmp_str_match( "1", 1, data ) || - __kmp_str_match( ".true.", 2, data ) || - __kmp_str_match( ".t.", 2, data ) || - __kmp_str_match( "yes", 1, data ); - return result; -} // __kmp_str_match_true - -void -__kmp_str_replace( - char * str, - char search_for, - char replace_with -) { - - char * found = NULL; - - found = strchr( str, search_for ); - while ( found ) { - * found = replace_with; - found = strchr( found + 1, search_for ); - }; // while - -} // __kmp_str_replace - - -void -__kmp_str_split( - char * str, // I: String to split. - char delim, // I: Character to split on. - char ** head, // O: Pointer to head (may be NULL). - char ** tail // O: Pointer to tail (may be NULL). -) { - char * h = str; - char * t = NULL; - if ( str != NULL ) { - char * ptr = strchr( str, delim ); - if ( ptr != NULL ) { - * ptr = 0; - t = ptr + 1; - }; // if - }; // if - if ( head != NULL ) { - * head = h; - }; // if - if ( tail != NULL ) { - * tail = t; - }; // if -} // __kmp_str_split - -/* - strtok_r() is not available on Windows* OS. This function reimplements strtok_r(). -*/ -char * -__kmp_str_token( - char * str, // String to split into tokens. Note: String *is* modified! - char const * delim, // Delimiters. - char ** buf // Internal buffer. -) { - char * token = NULL; - #if KMP_OS_WINDOWS - // On Windows* OS there is no strtok_r() function. Let us implement it. - if ( str != NULL ) { - * buf = str; // First call, initialize buf. - }; // if - * buf += strspn( * buf, delim ); // Skip leading delimiters. - if ( ** buf != 0 ) { // Rest of the string is not yet empty. - token = * buf; // Use it as result. - * buf += strcspn( * buf, delim ); // Skip non-delimiters. - if ( ** buf != 0 ) { // Rest of the string is not yet empty. - ** buf = 0; // Terminate token here. - * buf += 1; // Advance buf to start with the next token next time. - }; // if - }; // if - #else - // On Linux* OS and OS X*, strtok_r() is available. Let us use it. - token = strtok_r( str, delim, buf ); - #endif - return token; -}; // __kmp_str_token - - -int -__kmp_str_to_int( - char const * str, - char sentinel -) { - int result, factor; - char const * t; - - result = 0; - - for (t = str; *t != '\0'; ++t) { - if (*t < '0' || *t > '9') - break; - result = (result * 10) + (*t - '0'); - } - - switch (*t) { - case '\0': /* the current default for no suffix is bytes */ - factor = 1; - break; - case 'b': case 'B': /* bytes */ - ++t; - factor = 1; - break; - case 'k': case 'K': /* kilo-bytes */ - ++t; - factor = 1024; - break; - case 'm': case 'M': /* mega-bytes */ - ++t; - factor = (1024 * 1024); - break; - default: - if(*t != sentinel) - return (-1); - t = ""; - factor = 1; - } - - if (result > (INT_MAX / factor)) - result = INT_MAX; - else - result *= factor; - - return (*t != 0 ? 0 : result); - -} // __kmp_str_to_int - - -/* - The routine parses input string. It is expected it is a unsigned integer with optional unit. - Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb" - or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is - ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed - value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set - to zero. -*/ -void -__kmp_str_to_size( // R: Error code. - char const * str, // I: String of characters, unsigned number and unit ("b", "kb", etc). - size_t * out, // O: Parsed number. - size_t dfactor, // I: The factor if none of the letters specified. - char const * * error // O: Null if everything is ok, error message otherwise. -) { - - size_t value = 0; - size_t factor = 0; - int overflow = 0; - int i = 0; - int digit; - - - KMP_DEBUG_ASSERT( str != NULL ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t') { - ++ i; - }; // while - - // Parse number. - if ( str[ i ] < '0' || str[ i ] > '9' ) { - * error = KMP_I18N_STR( NotANumber ); - return; - }; // if - do { - digit = str[ i ] - '0'; - overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); - value = ( value * 10 ) + digit; - ++ i; - } while ( str[ i ] >= '0' && str[ i ] <= '9' ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - // Parse unit. - #define _case( ch, exp ) \ - case ch : \ - case ch - ( 'a' - 'A' ) : { \ - size_t shift = (exp) * 10; \ - ++ i; \ - if ( shift < sizeof( size_t ) * 8 ) { \ - factor = (size_t)( 1 ) << shift; \ - } else { \ - overflow = 1; \ - }; \ - } break; - switch ( str[ i ] ) { - _case( 'k', 1 ); // Kilo - _case( 'm', 2 ); // Mega - _case( 'g', 3 ); // Giga - _case( 't', 4 ); // Tera - _case( 'p', 5 ); // Peta - _case( 'e', 6 ); // Exa - _case( 'z', 7 ); // Zetta - _case( 'y', 8 ); // Yotta - // Oops. No more units... - }; // switch - #undef _case - if ( str[ i ] == 'b' || str[ i ] == 'B' ) { // Skip optional "b". - if ( factor == 0 ) { - factor = 1; - } - ++ i; - }; // if - if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit - * error = KMP_I18N_STR( BadUnit ); - return; - }; // if - - if ( factor == 0 ) { - factor = dfactor; - } - - // Apply factor. - overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) ); - value *= factor; - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - if ( str[ i ] != 0 ) { - * error = KMP_I18N_STR( IllegalCharacters ); - return; - }; // if - - if ( overflow ) { - * error = KMP_I18N_STR( ValueTooLarge ); - * out = KMP_SIZE_T_MAX; - return; - }; // if - - * error = NULL; - * out = value; - -} // __kmp_str_to_size - - -void -__kmp_str_to_uint( // R: Error code. - char const * str, // I: String of characters, unsigned number. - kmp_uint64 * out, // O: Parsed number. - char const * * error // O: Null if everything is ok, error message otherwise. -) { - - size_t value = 0; - int overflow = 0; - int i = 0; - int digit; - - - KMP_DEBUG_ASSERT( str != NULL ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - // Parse number. - if ( str[ i ] < '0' || str[ i ] > '9' ) { - * error = KMP_I18N_STR( NotANumber ); - return; - }; // if - do { - digit = str[ i ] - '0'; - overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); - value = ( value * 10 ) + digit; - ++ i; - } while ( str[ i ] >= '0' && str[ i ] <= '9' ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - if ( str[ i ] != 0 ) { - * error = KMP_I18N_STR( IllegalCharacters ); - return; - }; // if - - if ( overflow ) { - * error = KMP_I18N_STR( ValueTooLarge ); - * out = (kmp_uint64) -1; - return; - }; // if - - * error = NULL; - * out = value; - -} // __kmp_str_to_unit - - - -// end of file // +/* + * kmp_str.c -- String manipulation routines. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp_str.h" + +#include // va_* +#include // vsnprintf() +#include // malloc(), realloc() + +#include "kmp.h" +#include "kmp_i18n.h" + +/* + ------------------------------------------------------------------------------------------------ + String buffer. + ------------------------------------------------------------------------------------------------ + + Usage: + + // Declare buffer and initialize it. + kmp_str_buf_t buffer; + __kmp_str_buf_init( & buffer ); + + // Print to buffer. + __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 ); + __kmp_str_buf_print( & buffer, " <%s>\n", line ); + + // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed + // characters (not including terminating zero). + write( fd, buffer.str, buffer.used ); + + // Free buffer. + __kmp_str_buf_free( & buffer ); + + // Alternatively, you can detach allocated memory from buffer: + __kmp_str_buf_detach( & buffer ); + return buffer.str; // That memory should be freed eventually. + + + Notes: + + * Buffer users may use buffer.str and buffer.used. Users should not change any fields of + buffer directly. + + * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string (""). + + * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is + exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc() + as amount of used memory grows. + + * Buffer doubles amount of allocated memory each time it is exhausted. + + ------------------------------------------------------------------------------------------------ +*/ + +// TODO: __kmp_str_buf_print() can use thread local memory allocator. + +#define KMP_STR_BUF_INVARIANT( b ) \ + { \ + KMP_DEBUG_ASSERT( (b)->str != NULL ); \ + KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) ); \ + KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 ); \ + KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size ); \ + KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 ); \ + KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 ); \ + } + +void + __kmp_str_buf_clear( + kmp_str_buf_t * buffer +) { + KMP_STR_BUF_INVARIANT( buffer ); + if ( buffer->used > 0 ) { + buffer->used = 0; + buffer->str[ 0 ] = 0; + }; // if + KMP_STR_BUF_INVARIANT( buffer ); +} // __kmp_str_buf_clear + + +void +__kmp_str_buf_reserve( + kmp_str_buf_t * buffer, + int size +) { + + KMP_STR_BUF_INVARIANT( buffer ); + KMP_DEBUG_ASSERT( size >= 0 ); + + if ( buffer->size < (unsigned int)size ) { + + // Calculate buffer size. + do { + buffer->size *= 2; + } while ( buffer->size < (unsigned int)size ); + + // Enlarge buffer. + if ( buffer->str == & buffer->bulk[ 0 ] ) { + buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); + if ( buffer->str == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); + } else { + buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size ); + if ( buffer->str == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + }; // if + + }; // if + + KMP_DEBUG_ASSERT( buffer->size > 0 ); + KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size ); + KMP_STR_BUF_INVARIANT( buffer ); + +} // __kmp_str_buf_reserve + + +void +__kmp_str_buf_detach( + kmp_str_buf_t * buffer +) { + + KMP_STR_BUF_INVARIANT( buffer ); + + // If internal bulk is used, allocate memory and copy it. + if ( buffer->size <= sizeof( buffer->bulk ) ) { + buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); + if ( buffer->str == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); + }; // if + +} // __kmp_str_buf_detach + + +void +__kmp_str_buf_free( + kmp_str_buf_t * buffer +) { + KMP_STR_BUF_INVARIANT( buffer ); + if ( buffer->size > sizeof( buffer->bulk ) ) { + KMP_INTERNAL_FREE( buffer->str ); + }; // if + buffer->str = buffer->bulk; + buffer->size = sizeof( buffer->bulk ); + buffer->used = 0; + KMP_STR_BUF_INVARIANT( buffer ); +} // __kmp_str_buf_free + + +void +__kmp_str_buf_cat( + kmp_str_buf_t * buffer, + char const * str, + int len +) { + KMP_STR_BUF_INVARIANT( buffer ); + KMP_DEBUG_ASSERT( str != NULL ); + KMP_DEBUG_ASSERT( len >= 0 ); + __kmp_str_buf_reserve( buffer, buffer->used + len + 1 ); + KMP_MEMCPY( buffer->str + buffer->used, str, len ); + buffer->str[ buffer->used + len ] = 0; + buffer->used += len; + KMP_STR_BUF_INVARIANT( buffer ); +} // __kmp_str_buf_cat + + +void +__kmp_str_buf_vprint( + kmp_str_buf_t * buffer, + char const * format, + va_list args +) { + + KMP_STR_BUF_INVARIANT( buffer ); + + for ( ; ; ) { + + int const free = buffer->size - buffer->used; + int rc; + int size; + + // Try to format string. + { + /* + On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it + is called for the second time with the same args. To prevent the crash, we have to + pass a fresh intact copy of args to vsnprintf() on each iteration. + + Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it + seems vsnprintf() does not modify args argument on Windows* OS. + */ + + #if ! KMP_OS_WINDOWS + va_list _args; + __va_copy( _args, args ); // Make copy of args. + #define args _args // Substitute args with its copy, _args. + #endif // KMP_OS_WINDOWS + rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args ); + #if ! KMP_OS_WINDOWS + #undef args // Remove substitution. + va_end( _args ); + #endif // KMP_OS_WINDOWS + } + + // No errors, string has been formatted. + if ( rc >= 0 && rc < free ) { + buffer->used += rc; + break; + }; // if + + // Error occurred, buffer is too small. + if ( rc >= 0 ) { + // C99-conforming implementation of vsnprintf returns required buffer size. + size = buffer->used + rc + 1; + } else { + // Older implementations just return -1. Double buffer size. + size = buffer->size * 2; + }; // if + + // Enlarge buffer. + __kmp_str_buf_reserve( buffer, size ); + + // And try again. + + }; // forever + + KMP_DEBUG_ASSERT( buffer->size > 0 ); + KMP_STR_BUF_INVARIANT( buffer ); + +} // __kmp_str_buf_vprint + + +void +__kmp_str_buf_print( + kmp_str_buf_t * buffer, + char const * format, + ... +) { + + va_list args; + va_start( args, format ); + __kmp_str_buf_vprint( buffer, format, args ); + va_end( args ); + +} // __kmp_str_buf_print + + +/* + The function prints specified size to buffer. Size is expressed using biggest possible unit, for + example 1024 is printed as "1k". +*/ + +void +__kmp_str_buf_print_size( + kmp_str_buf_t * buf, + size_t size +) { + + char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" }; + int const units = sizeof( names ) / sizeof( char const * ); + int u = 0; + if ( size > 0 ) { + while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) { + size = size / 1024; + ++ u; + }; // while + }; // if + + __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] ); + +} // __kmp_str_buf_print_size + + +void +__kmp_str_fname_init( + kmp_str_fname_t * fname, + char const * path +) { + + fname->path = NULL; + fname->dir = NULL; + fname->base = NULL; + + if ( path != NULL ) { + char * slash = NULL; // Pointer to the last character of dir. + char * base = NULL; // Pointer to the beginning of basename. + fname->path = __kmp_str_format( "%s", path ); + // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it + // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format(). + if ( KMP_OS_WINDOWS ) { + __kmp_str_replace( fname->path, '\\', '/' ); + }; // if + fname->dir = __kmp_str_format( "%s", fname->path ); + slash = strrchr( fname->dir, '/' ); + if ( KMP_OS_WINDOWS && slash == NULL ) { // On Windows* OS, if slash not found, + char first = TOLOWER( fname->dir[ 0 ] ); // look for drive. + if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) { + slash = & fname->dir[ 1 ]; + }; // if + }; // if + base = ( slash == NULL ? fname->dir : slash + 1 ); + fname->base = __kmp_str_format( "%s", base ); // Copy basename + * base = 0; // and truncate dir. + }; // if + +} // kmp_str_fname_init + + +void +__kmp_str_fname_free( + kmp_str_fname_t * fname +) { + __kmp_str_free( (char const **)( & fname->path ) ); + __kmp_str_free( (char const **)( & fname->dir ) ); + __kmp_str_free( (char const **)( & fname->base ) ); +} // kmp_str_fname_free + + +int +__kmp_str_fname_match( + kmp_str_fname_t const * fname, + char const * pattern +) { + + int dir_match = 1; + int base_match = 1; + + if ( pattern != NULL ) { + kmp_str_fname_t ptrn; + __kmp_str_fname_init( & ptrn, pattern ); + dir_match = + strcmp( ptrn.dir, "*/" ) == 0 + || + ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) ); + base_match = + strcmp( ptrn.base, "*" ) == 0 + || + ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) ); + __kmp_str_fname_free( & ptrn ); + }; // if + + return dir_match && base_match; + +} // __kmp_str_fname_match + + +kmp_str_loc_t +__kmp_str_loc_init( + char const * psource, + int init_fname +) { + + kmp_str_loc_t loc; + + loc._bulk = NULL; + loc.file = NULL; + loc.func = NULL; + loc.line = 0; + loc.col = 0; + + if ( psource != NULL ) { + + char * str = NULL; + char * dummy = NULL; + char * line = NULL; + char * col = NULL; + + // Copy psource to keep it intact. + loc._bulk = __kmp_str_format( "%s", psource ); + + // Parse psource string: ";file;func;line;col;;" + str = loc._bulk; + __kmp_str_split( str, ';', & dummy, & str ); + __kmp_str_split( str, ';', & loc.file, & str ); + __kmp_str_split( str, ';', & loc.func, & str ); + __kmp_str_split( str, ';', & line, & str ); + __kmp_str_split( str, ';', & col, & str ); + + // Convert line and col into numberic values. + if ( line != NULL ) { + loc.line = atoi( line ); + if ( loc.line < 0 ) { + loc.line = 0; + }; // if + }; // if + if ( col != NULL ) { + loc.col = atoi( col ); + if ( loc.col < 0 ) { + loc.col = 0; + }; // if + }; // if + + }; // if + + __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL ); + + return loc; + +} // kmp_str_loc_init + + +void +__kmp_str_loc_free( + kmp_str_loc_t * loc +) { + __kmp_str_fname_free( & loc->fname ); + KMP_INTERNAL_FREE( loc->_bulk ); + loc->_bulk = NULL; + loc->file = NULL; + loc->func = NULL; +} // kmp_str_loc_free + + + +/* + This function is intended to compare file names. On Windows* OS file names are case-insensitive, + so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive + comparison. + Note: The function returns *true* if strings are *equal*. +*/ + +int +__kmp_str_eqf( // True, if strings are equal, false otherwise. + char const * lhs, // First string. + char const * rhs // Second string. +) { + int result; + #if KMP_OS_WINDOWS + result = ( _stricmp( lhs, rhs ) == 0 ); + #else + result = ( strcmp( lhs, rhs ) == 0 ); + #endif + return result; +} // __kmp_str_eqf + + +/* + This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by + __kmp_str_free(). The function is very convenient for constructing strings, it successfully + replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid + buffer overflows. Examples: + + str = __kmp_str_format( "%s", orig ); // strdup(), do not care about buffer size. + __kmp_str_free( & str ); + str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), do not care about buffer size. + __kmp_str_free( & str ); + str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. + __kmp_str_free( & str ); + + Performance note: + This function allocates memory with malloc() calls, so do not call it from + performance-critical code. In performance-critical code consider using kmp_str_buf_t + instead, since it uses stack-allocated buffer for short strings. + + Why does this function use malloc()? + 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no + reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned + memory is not necessary. + 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure. + We need to perform string operations during library startup (for example, in + __kmp_register_library_startup()) when no thread structures are allocated yet. + So standard malloc() is the only available option. +*/ + +// TODO: Find and replace all regular free() with __kmp_str_free(). + +char * +__kmp_str_format( // Allocated string. + char const * format, // Format string. + ... // Other parameters. +) { + + va_list args; + int size = 512; + char * buffer = NULL; + int rc; + + // Allocate buffer. + buffer = (char *) KMP_INTERNAL_MALLOC( size ); + if ( buffer == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + + for ( ; ; ) { + + // Try to format string. + va_start( args, format ); + rc = KMP_VSNPRINTF( buffer, size, format, args ); + va_end( args ); + + // No errors, string has been formatted. + if ( rc >= 0 && rc < size ) { + break; + }; // if + + // Error occurred, buffer is too small. + if ( rc >= 0 ) { + // C99-conforming implementation of vsnprintf returns required buffer size. + size = rc + 1; + } else { + // Older implementations just return -1. + size = size * 2; + }; // if + + // Enlarge buffer and try again. + buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size ); + if ( buffer == NULL ) { + KMP_FATAL( MemoryAllocFailed ); + }; // if + + }; // forever + + return buffer; + +} // func __kmp_str_format + + +void +__kmp_str_free( + char const * * str +) { + KMP_DEBUG_ASSERT( str != NULL ); + KMP_INTERNAL_FREE( (void *) * str ); + * str = NULL; +} // func __kmp_str_free + + +/* If len is zero, returns true iff target and data have exact case-insensitive match. + If len is negative, returns true iff target is a case-insensitive substring of data. + If len is positive, returns true iff target is a case-insensitive substring of data or + vice versa, and neither is shorter than len. +*/ +int +__kmp_str_match( + char const * target, + int len, + char const * data +) { + int i; + if ( target == NULL || data == NULL ) { + return FALSE; + }; // if + for ( i = 0; target[i] && data[i]; ++ i ) { + if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) { + return FALSE; + }; // if + }; // for i + return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) ); +} // __kmp_str_match + + +int +__kmp_str_match_false( char const * data ) { + int result = + __kmp_str_match( "false", 1, data ) || + __kmp_str_match( "off", 2, data ) || + __kmp_str_match( "0", 1, data ) || + __kmp_str_match( ".false.", 2, data ) || + __kmp_str_match( ".f.", 2, data ) || + __kmp_str_match( "no", 1, data ); + return result; +} // __kmp_str_match_false + + +int +__kmp_str_match_true( char const * data ) { + int result = + __kmp_str_match( "true", 1, data ) || + __kmp_str_match( "on", 2, data ) || + __kmp_str_match( "1", 1, data ) || + __kmp_str_match( ".true.", 2, data ) || + __kmp_str_match( ".t.", 2, data ) || + __kmp_str_match( "yes", 1, data ); + return result; +} // __kmp_str_match_true + +void +__kmp_str_replace( + char * str, + char search_for, + char replace_with +) { + + char * found = NULL; + + found = strchr( str, search_for ); + while ( found ) { + * found = replace_with; + found = strchr( found + 1, search_for ); + }; // while + +} // __kmp_str_replace + + +void +__kmp_str_split( + char * str, // I: String to split. + char delim, // I: Character to split on. + char ** head, // O: Pointer to head (may be NULL). + char ** tail // O: Pointer to tail (may be NULL). +) { + char * h = str; + char * t = NULL; + if ( str != NULL ) { + char * ptr = strchr( str, delim ); + if ( ptr != NULL ) { + * ptr = 0; + t = ptr + 1; + }; // if + }; // if + if ( head != NULL ) { + * head = h; + }; // if + if ( tail != NULL ) { + * tail = t; + }; // if +} // __kmp_str_split + +/* + strtok_r() is not available on Windows* OS. This function reimplements strtok_r(). +*/ +char * +__kmp_str_token( + char * str, // String to split into tokens. Note: String *is* modified! + char const * delim, // Delimiters. + char ** buf // Internal buffer. +) { + char * token = NULL; + #if KMP_OS_WINDOWS + // On Windows* OS there is no strtok_r() function. Let us implement it. + if ( str != NULL ) { + * buf = str; // First call, initialize buf. + }; // if + * buf += strspn( * buf, delim ); // Skip leading delimiters. + if ( ** buf != 0 ) { // Rest of the string is not yet empty. + token = * buf; // Use it as result. + * buf += strcspn( * buf, delim ); // Skip non-delimiters. + if ( ** buf != 0 ) { // Rest of the string is not yet empty. + ** buf = 0; // Terminate token here. + * buf += 1; // Advance buf to start with the next token next time. + }; // if + }; // if + #else + // On Linux* OS and OS X*, strtok_r() is available. Let us use it. + token = strtok_r( str, delim, buf ); + #endif + return token; +}; // __kmp_str_token + + +int +__kmp_str_to_int( + char const * str, + char sentinel +) { + int result, factor; + char const * t; + + result = 0; + + for (t = str; *t != '\0'; ++t) { + if (*t < '0' || *t > '9') + break; + result = (result * 10) + (*t - '0'); + } + + switch (*t) { + case '\0': /* the current default for no suffix is bytes */ + factor = 1; + break; + case 'b': case 'B': /* bytes */ + ++t; + factor = 1; + break; + case 'k': case 'K': /* kilo-bytes */ + ++t; + factor = 1024; + break; + case 'm': case 'M': /* mega-bytes */ + ++t; + factor = (1024 * 1024); + break; + default: + if(*t != sentinel) + return (-1); + t = ""; + factor = 1; + } + + if (result > (INT_MAX / factor)) + result = INT_MAX; + else + result *= factor; + + return (*t != 0 ? 0 : result); + +} // __kmp_str_to_int + + +/* + The routine parses input string. It is expected it is a unsigned integer with optional unit. + Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb" + or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is + ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed + value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set + to zero. +*/ +void +__kmp_str_to_size( // R: Error code. + char const * str, // I: String of characters, unsigned number and unit ("b", "kb", etc). + size_t * out, // O: Parsed number. + size_t dfactor, // I: The factor if none of the letters specified. + char const * * error // O: Null if everything is ok, error message otherwise. +) { + + size_t value = 0; + size_t factor = 0; + int overflow = 0; + int i = 0; + int digit; + + + KMP_DEBUG_ASSERT( str != NULL ); + + // Skip spaces. + while ( str[ i ] == ' ' || str[ i ] == '\t') { + ++ i; + }; // while + + // Parse number. + if ( str[ i ] < '0' || str[ i ] > '9' ) { + * error = KMP_I18N_STR( NotANumber ); + return; + }; // if + do { + digit = str[ i ] - '0'; + overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); + value = ( value * 10 ) + digit; + ++ i; + } while ( str[ i ] >= '0' && str[ i ] <= '9' ); + + // Skip spaces. + while ( str[ i ] == ' ' || str[ i ] == '\t' ) { + ++ i; + }; // while + + // Parse unit. + #define _case( ch, exp ) \ + case ch : \ + case ch - ( 'a' - 'A' ) : { \ + size_t shift = (exp) * 10; \ + ++ i; \ + if ( shift < sizeof( size_t ) * 8 ) { \ + factor = (size_t)( 1 ) << shift; \ + } else { \ + overflow = 1; \ + }; \ + } break; + switch ( str[ i ] ) { + _case( 'k', 1 ); // Kilo + _case( 'm', 2 ); // Mega + _case( 'g', 3 ); // Giga + _case( 't', 4 ); // Tera + _case( 'p', 5 ); // Peta + _case( 'e', 6 ); // Exa + _case( 'z', 7 ); // Zetta + _case( 'y', 8 ); // Yotta + // Oops. No more units... + }; // switch + #undef _case + if ( str[ i ] == 'b' || str[ i ] == 'B' ) { // Skip optional "b". + if ( factor == 0 ) { + factor = 1; + } + ++ i; + }; // if + if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit + * error = KMP_I18N_STR( BadUnit ); + return; + }; // if + + if ( factor == 0 ) { + factor = dfactor; + } + + // Apply factor. + overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) ); + value *= factor; + + // Skip spaces. + while ( str[ i ] == ' ' || str[ i ] == '\t' ) { + ++ i; + }; // while + + if ( str[ i ] != 0 ) { + * error = KMP_I18N_STR( IllegalCharacters ); + return; + }; // if + + if ( overflow ) { + * error = KMP_I18N_STR( ValueTooLarge ); + * out = KMP_SIZE_T_MAX; + return; + }; // if + + * error = NULL; + * out = value; + +} // __kmp_str_to_size + + +void +__kmp_str_to_uint( // R: Error code. + char const * str, // I: String of characters, unsigned number. + kmp_uint64 * out, // O: Parsed number. + char const * * error // O: Null if everything is ok, error message otherwise. +) { + + size_t value = 0; + int overflow = 0; + int i = 0; + int digit; + + + KMP_DEBUG_ASSERT( str != NULL ); + + // Skip spaces. + while ( str[ i ] == ' ' || str[ i ] == '\t' ) { + ++ i; + }; // while + + // Parse number. + if ( str[ i ] < '0' || str[ i ] > '9' ) { + * error = KMP_I18N_STR( NotANumber ); + return; + }; // if + do { + digit = str[ i ] - '0'; + overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); + value = ( value * 10 ) + digit; + ++ i; + } while ( str[ i ] >= '0' && str[ i ] <= '9' ); + + // Skip spaces. + while ( str[ i ] == ' ' || str[ i ] == '\t' ) { + ++ i; + }; // while + + if ( str[ i ] != 0 ) { + * error = KMP_I18N_STR( IllegalCharacters ); + return; + }; // if + + if ( overflow ) { + * error = KMP_I18N_STR( ValueTooLarge ); + * out = (kmp_uint64) -1; + return; + }; // if + + * error = NULL; + * out = value; + +} // __kmp_str_to_unit + + + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_str.h b/contrib/libs/cxxsupp/openmp/kmp_str.h index 80de47694ba..ba71bbaa34e 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_str.h +++ b/contrib/libs/cxxsupp/openmp/kmp_str.h @@ -1,119 +1,119 @@ -/* - * kmp_str.h -- String manipulation routines. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_STR_H -#define KMP_STR_H - -#include -#include - -#include "kmp_os.h" - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -#if KMP_OS_WINDOWS -# define strdup _strdup -#endif - -/* some macros to replace ctype.h functions */ -#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) - -struct kmp_str_buf { - char * str; // Pointer to buffer content, read only. - unsigned int size; // Do not change this field! - int used; // Number of characters printed to buffer, read only. - char bulk[ 512 ]; // Do not use this field! -}; // struct kmp_str_buf -typedef struct kmp_str_buf kmp_str_buf_t; - -#define __kmp_str_buf_init( b ) { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; } - -void __kmp_str_buf_clear( kmp_str_buf_t * buffer ); -void __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size ); -void __kmp_str_buf_detach( kmp_str_buf_t * buffer ); -void __kmp_str_buf_free( kmp_str_buf_t * buffer ); -void __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len ); -void __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args ); -void __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... ); -void __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size ); - -/* - File name parser. Usage: - - kmp_str_fname_t fname = __kmp_str_fname_init( path ); - // Use fname.path (copy of original path ), fname.dir, fname.base. - // Note fname.dir concatenated with fname.base gives exact copy of path. - __kmp_str_fname_free( & fname ); - -*/ -struct kmp_str_fname { - char * path; - char * dir; - char * base; -}; // struct kmp_str_fname -typedef struct kmp_str_fname kmp_str_fname_t; -void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path ); -void __kmp_str_fname_free( kmp_str_fname_t * fname ); -// Compares file name with specified patern. If pattern is NULL, any fname matched. -int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern ); - -/* - The compiler provides source locations in string form ";file;func;line;col;;". It not not - convenient for manupulation. These structure keeps source location in more convenient form. - Usage: - - kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); - // use loc.file, loc.func, loc.line, loc.col. - // loc.fname is available if the second argument of __kmp_str_loc_init is true. - __kmp_str_loc_free( & loc ); - - If psource is NULL or does not follow format above, file and/or func may be NULL pointers. -*/ -struct kmp_str_loc { - char * _bulk; // Do not use thid field. - kmp_str_fname_t fname; // Will be initialized if init_fname is true. - char * file; - char * func; - int line; - int col; -}; // struct kmp_str_loc -typedef struct kmp_str_loc kmp_str_loc_t; -kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname ); -void __kmp_str_loc_free( kmp_str_loc_t * loc ); - -int __kmp_str_eqf( char const * lhs, char const * rhs ); -char * __kmp_str_format( char const * format, ... ); -void __kmp_str_free( char const * * str ); -int __kmp_str_match( char const * target, int len, char const * data ); -int __kmp_str_match_false( char const * data ); -int __kmp_str_match_true( char const * data ); -void __kmp_str_replace( char * str, char search_for, char replace_with ); -void __kmp_str_split( char * str, char delim, char ** head, char ** tail ); -char * __kmp_str_token( char * str, char const * delim, char ** buf ); -int __kmp_str_to_int( char const * str, char sentinel ); - -void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error ); -void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error ); - -#ifdef __cplusplus - } // extern "C" -#endif // __cplusplus - -#endif // KMP_STR_H - -// end of file // - +/* + * kmp_str.h -- String manipulation routines. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_STR_H +#define KMP_STR_H + +#include +#include + +#include "kmp_os.h" + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +#if KMP_OS_WINDOWS +# define strdup _strdup +#endif + +/* some macros to replace ctype.h functions */ +#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) + +struct kmp_str_buf { + char * str; // Pointer to buffer content, read only. + unsigned int size; // Do not change this field! + int used; // Number of characters printed to buffer, read only. + char bulk[ 512 ]; // Do not use this field! +}; // struct kmp_str_buf +typedef struct kmp_str_buf kmp_str_buf_t; + +#define __kmp_str_buf_init( b ) { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; } + +void __kmp_str_buf_clear( kmp_str_buf_t * buffer ); +void __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size ); +void __kmp_str_buf_detach( kmp_str_buf_t * buffer ); +void __kmp_str_buf_free( kmp_str_buf_t * buffer ); +void __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len ); +void __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args ); +void __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... ); +void __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size ); + +/* + File name parser. Usage: + + kmp_str_fname_t fname = __kmp_str_fname_init( path ); + // Use fname.path (copy of original path ), fname.dir, fname.base. + // Note fname.dir concatenated with fname.base gives exact copy of path. + __kmp_str_fname_free( & fname ); + +*/ +struct kmp_str_fname { + char * path; + char * dir; + char * base; +}; // struct kmp_str_fname +typedef struct kmp_str_fname kmp_str_fname_t; +void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path ); +void __kmp_str_fname_free( kmp_str_fname_t * fname ); +// Compares file name with specified patern. If pattern is NULL, any fname matched. +int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern ); + +/* + The compiler provides source locations in string form ";file;func;line;col;;". It not not + convenient for manupulation. These structure keeps source location in more convenient form. + Usage: + + kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); + // use loc.file, loc.func, loc.line, loc.col. + // loc.fname is available if the second argument of __kmp_str_loc_init is true. + __kmp_str_loc_free( & loc ); + + If psource is NULL or does not follow format above, file and/or func may be NULL pointers. +*/ +struct kmp_str_loc { + char * _bulk; // Do not use thid field. + kmp_str_fname_t fname; // Will be initialized if init_fname is true. + char * file; + char * func; + int line; + int col; +}; // struct kmp_str_loc +typedef struct kmp_str_loc kmp_str_loc_t; +kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname ); +void __kmp_str_loc_free( kmp_str_loc_t * loc ); + +int __kmp_str_eqf( char const * lhs, char const * rhs ); +char * __kmp_str_format( char const * format, ... ); +void __kmp_str_free( char const * * str ); +int __kmp_str_match( char const * target, int len, char const * data ); +int __kmp_str_match_false( char const * data ); +int __kmp_str_match_true( char const * data ); +void __kmp_str_replace( char * str, char search_for, char replace_with ); +void __kmp_str_split( char * str, char delim, char ** head, char ** tail ); +char * __kmp_str_token( char * str, char const * delim, char ** buf ); +int __kmp_str_to_int( char const * str, char sentinel ); + +void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error ); +void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error ); + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + +#endif // KMP_STR_H + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/kmp_stub.c b/contrib/libs/cxxsupp/openmp/kmp_stub.c index b1379a770b1..1e0953a0fcd 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stub.c +++ b/contrib/libs/cxxsupp/openmp/kmp_stub.c @@ -1,252 +1,252 @@ -/* - * kmp_stub.c -- stub versions of user-callable OpenMP RT functions. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include -#include -#include - -#include "omp.h" // Function renamings. -#include "kmp.h" // KMP_DEFAULT_STKSIZE -#include "kmp_stub.h" - -#if KMP_OS_WINDOWS - #include -#else - #include -#endif - -// Moved from omp.h -#define omp_set_max_active_levels ompc_set_max_active_levels -#define omp_set_schedule ompc_set_schedule -#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -#define omp_get_team_size ompc_get_team_size - -#define omp_set_num_threads ompc_set_num_threads -#define omp_set_dynamic ompc_set_dynamic -#define omp_set_nested ompc_set_nested -#define kmp_set_stacksize kmpc_set_stacksize -#define kmp_set_stacksize_s kmpc_set_stacksize_s -#define kmp_set_blocktime kmpc_set_blocktime -#define kmp_set_library kmpc_set_library -#define kmp_set_defaults kmpc_set_defaults -#define kmp_malloc kmpc_malloc -#define kmp_calloc kmpc_calloc -#define kmp_realloc kmpc_realloc -#define kmp_free kmpc_free - -static double frequency = 0.0; - -// Helper functions. -static size_t __kmps_init() { - static int initialized = 0; - static size_t dummy = 0; - if ( ! initialized ) { - - // TODO: Analyze KMP_VERSION environment variable, print __kmp_version_copyright and - // __kmp_version_build_time. - // WARNING: Do not use "fprintf( stderr, ... )" because it will cause unresolved "__iob" - // symbol (see C70080). We need to extract __kmp_printf() stuff from kmp_runtime.c and use - // it. - - // Trick with dummy variable forces linker to keep __kmp_version_copyright and - // __kmp_version_build_time strings in executable file (in case of static linkage). - // When KMP_VERSION analyze is implemented, dummy variable should be deleted, function - // should return void. - dummy = __kmp_version_copyright - __kmp_version_build_time; - - #if KMP_OS_WINDOWS - LARGE_INTEGER freq; - BOOL status = QueryPerformanceFrequency( & freq ); - if ( status ) { - frequency = double( freq.QuadPart ); - }; // if - #endif - - initialized = 1; - }; // if - return dummy; -}; // __kmps_init - -#define i __kmps_init(); - -/* set API functions */ -void omp_set_num_threads( omp_int_t num_threads ) { i; } -void omp_set_dynamic( omp_int_t dynamic ) { i; __kmps_set_dynamic( dynamic ); } -void omp_set_nested( omp_int_t nested ) { i; __kmps_set_nested( nested ); } -void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; } -void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); } -int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); } -int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); } -int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; } -int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; } -int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; } - -/* kmp API functions */ -void kmp_set_stacksize( omp_int_t arg ) { i; __kmps_set_stacksize( arg ); } -void kmp_set_stacksize_s( size_t arg ) { i; __kmps_set_stacksize( arg ); } -void kmp_set_blocktime( omp_int_t arg ) { i; __kmps_set_blocktime( arg ); } -void kmp_set_library( omp_int_t arg ) { i; __kmps_set_library( arg ); } -void kmp_set_defaults( char const * str ) { i; } - -/* KMP memory management functions. */ -void * kmp_malloc( size_t size ) { i; return malloc( size ); } -void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); } -void * kmp_realloc( void *ptr, size_t size ) { i; return realloc( ptr, size ); } -void kmp_free( void * ptr ) { i; free( ptr ); } - -static int __kmps_blocktime = INT_MAX; - -void __kmps_set_blocktime( int arg ) { - i; - __kmps_blocktime = arg; -} // __kmps_set_blocktime - -int __kmps_get_blocktime( void ) { - i; - return __kmps_blocktime; -} // __kmps_get_blocktime - -static int __kmps_dynamic = 0; - -void __kmps_set_dynamic( int arg ) { - i; - __kmps_dynamic = arg; -} // __kmps_set_dynamic - -int __kmps_get_dynamic( void ) { - i; - return __kmps_dynamic; -} // __kmps_get_dynamic - -static int __kmps_library = 1000; - -void __kmps_set_library( int arg ) { - i; - __kmps_library = arg; -} // __kmps_set_library - -int __kmps_get_library( void ) { - i; - return __kmps_library; -} // __kmps_get_library - -static int __kmps_nested = 0; - -void __kmps_set_nested( int arg ) { - i; - __kmps_nested = arg; -} // __kmps_set_nested - -int __kmps_get_nested( void ) { - i; - return __kmps_nested; -} // __kmps_get_nested - -static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE; - -void __kmps_set_stacksize( int arg ) { - i; - __kmps_stacksize = arg; -} // __kmps_set_stacksize - -int __kmps_get_stacksize( void ) { - i; - return __kmps_stacksize; -} // __kmps_get_stacksize - -static kmp_sched_t __kmps_sched_kind = kmp_sched_default; -static int __kmps_sched_modifier = 0; - - void __kmps_set_schedule( kmp_sched_t kind, int modifier ) { - i; - __kmps_sched_kind = kind; - __kmps_sched_modifier = modifier; - } // __kmps_set_schedule - - void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) { - i; - *kind = __kmps_sched_kind; - *modifier = __kmps_sched_modifier; - } // __kmps_get_schedule - -#if OMP_40_ENABLED - -static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false; - -void __kmps_set_proc_bind( kmp_proc_bind_t arg ) { - i; - __kmps_proc_bind = arg; -} // __kmps_set_proc_bind - -kmp_proc_bind_t __kmps_get_proc_bind( void ) { - i; - return __kmps_proc_bind; -} // __kmps_get_proc_bind - -#endif /* OMP_40_ENABLED */ - -double __kmps_get_wtime( void ) { - // Elapsed wall clock time (in second) from "sometime in the past". - double wtime = 0.0; - i; - #if KMP_OS_WINDOWS - if ( frequency > 0.0 ) { - LARGE_INTEGER now; - BOOL status = QueryPerformanceCounter( & now ); - if ( status ) { - wtime = double( now.QuadPart ) / frequency; - }; // if - }; // if - #else - // gettimeofday() returns seconds and microseconds since the Epoch. - struct timeval tval; - int rc; - rc = gettimeofday( & tval, NULL ); - if ( rc == 0 ) { - wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec ); - } else { - // TODO: Assert or abort here. - }; // if - #endif - return wtime; -}; // __kmps_get_wtime - -double __kmps_get_wtick( void ) { - // Number of seconds between successive clock ticks. - double wtick = 0.0; - i; - #if KMP_OS_WINDOWS - { - DWORD increment; - DWORD adjustment; - BOOL disabled; - BOOL rc; - rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled ); - if ( rc ) { - wtick = 1.0E-07 * (double)( disabled ? increment : adjustment ); - } else { - // TODO: Assert or abort here. - wtick = 1.0E-03; - }; // if - } - #else - // TODO: gettimeofday() returns in microseconds, but what the precision? - wtick = 1.0E-06; - #endif - return wtick; -}; // __kmps_get_wtick - -// end of file // - +/* + * kmp_stub.c -- stub versions of user-callable OpenMP RT functions. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include +#include +#include + +#include "omp.h" // Function renamings. +#include "kmp.h" // KMP_DEFAULT_STKSIZE +#include "kmp_stub.h" + +#if KMP_OS_WINDOWS + #include +#else + #include +#endif + +// Moved from omp.h +#define omp_set_max_active_levels ompc_set_max_active_levels +#define omp_set_schedule ompc_set_schedule +#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num +#define omp_get_team_size ompc_get_team_size + +#define omp_set_num_threads ompc_set_num_threads +#define omp_set_dynamic ompc_set_dynamic +#define omp_set_nested ompc_set_nested +#define kmp_set_stacksize kmpc_set_stacksize +#define kmp_set_stacksize_s kmpc_set_stacksize_s +#define kmp_set_blocktime kmpc_set_blocktime +#define kmp_set_library kmpc_set_library +#define kmp_set_defaults kmpc_set_defaults +#define kmp_malloc kmpc_malloc +#define kmp_calloc kmpc_calloc +#define kmp_realloc kmpc_realloc +#define kmp_free kmpc_free + +static double frequency = 0.0; + +// Helper functions. +static size_t __kmps_init() { + static int initialized = 0; + static size_t dummy = 0; + if ( ! initialized ) { + + // TODO: Analyze KMP_VERSION environment variable, print __kmp_version_copyright and + // __kmp_version_build_time. + // WARNING: Do not use "fprintf( stderr, ... )" because it will cause unresolved "__iob" + // symbol (see C70080). We need to extract __kmp_printf() stuff from kmp_runtime.c and use + // it. + + // Trick with dummy variable forces linker to keep __kmp_version_copyright and + // __kmp_version_build_time strings in executable file (in case of static linkage). + // When KMP_VERSION analyze is implemented, dummy variable should be deleted, function + // should return void. + dummy = __kmp_version_copyright - __kmp_version_build_time; + + #if KMP_OS_WINDOWS + LARGE_INTEGER freq; + BOOL status = QueryPerformanceFrequency( & freq ); + if ( status ) { + frequency = double( freq.QuadPart ); + }; // if + #endif + + initialized = 1; + }; // if + return dummy; +}; // __kmps_init + +#define i __kmps_init(); + +/* set API functions */ +void omp_set_num_threads( omp_int_t num_threads ) { i; } +void omp_set_dynamic( omp_int_t dynamic ) { i; __kmps_set_dynamic( dynamic ); } +void omp_set_nested( omp_int_t nested ) { i; __kmps_set_nested( nested ); } +void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; } +void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); } +int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); } +int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); } +int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; } +int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; } +int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; } + +/* kmp API functions */ +void kmp_set_stacksize( omp_int_t arg ) { i; __kmps_set_stacksize( arg ); } +void kmp_set_stacksize_s( size_t arg ) { i; __kmps_set_stacksize( arg ); } +void kmp_set_blocktime( omp_int_t arg ) { i; __kmps_set_blocktime( arg ); } +void kmp_set_library( omp_int_t arg ) { i; __kmps_set_library( arg ); } +void kmp_set_defaults( char const * str ) { i; } + +/* KMP memory management functions. */ +void * kmp_malloc( size_t size ) { i; return malloc( size ); } +void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); } +void * kmp_realloc( void *ptr, size_t size ) { i; return realloc( ptr, size ); } +void kmp_free( void * ptr ) { i; free( ptr ); } + +static int __kmps_blocktime = INT_MAX; + +void __kmps_set_blocktime( int arg ) { + i; + __kmps_blocktime = arg; +} // __kmps_set_blocktime + +int __kmps_get_blocktime( void ) { + i; + return __kmps_blocktime; +} // __kmps_get_blocktime + +static int __kmps_dynamic = 0; + +void __kmps_set_dynamic( int arg ) { + i; + __kmps_dynamic = arg; +} // __kmps_set_dynamic + +int __kmps_get_dynamic( void ) { + i; + return __kmps_dynamic; +} // __kmps_get_dynamic + +static int __kmps_library = 1000; + +void __kmps_set_library( int arg ) { + i; + __kmps_library = arg; +} // __kmps_set_library + +int __kmps_get_library( void ) { + i; + return __kmps_library; +} // __kmps_get_library + +static int __kmps_nested = 0; + +void __kmps_set_nested( int arg ) { + i; + __kmps_nested = arg; +} // __kmps_set_nested + +int __kmps_get_nested( void ) { + i; + return __kmps_nested; +} // __kmps_get_nested + +static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE; + +void __kmps_set_stacksize( int arg ) { + i; + __kmps_stacksize = arg; +} // __kmps_set_stacksize + +int __kmps_get_stacksize( void ) { + i; + return __kmps_stacksize; +} // __kmps_get_stacksize + +static kmp_sched_t __kmps_sched_kind = kmp_sched_default; +static int __kmps_sched_modifier = 0; + + void __kmps_set_schedule( kmp_sched_t kind, int modifier ) { + i; + __kmps_sched_kind = kind; + __kmps_sched_modifier = modifier; + } // __kmps_set_schedule + + void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) { + i; + *kind = __kmps_sched_kind; + *modifier = __kmps_sched_modifier; + } // __kmps_get_schedule + +#if OMP_40_ENABLED + +static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false; + +void __kmps_set_proc_bind( kmp_proc_bind_t arg ) { + i; + __kmps_proc_bind = arg; +} // __kmps_set_proc_bind + +kmp_proc_bind_t __kmps_get_proc_bind( void ) { + i; + return __kmps_proc_bind; +} // __kmps_get_proc_bind + +#endif /* OMP_40_ENABLED */ + +double __kmps_get_wtime( void ) { + // Elapsed wall clock time (in second) from "sometime in the past". + double wtime = 0.0; + i; + #if KMP_OS_WINDOWS + if ( frequency > 0.0 ) { + LARGE_INTEGER now; + BOOL status = QueryPerformanceCounter( & now ); + if ( status ) { + wtime = double( now.QuadPart ) / frequency; + }; // if + }; // if + #else + // gettimeofday() returns seconds and microseconds since the Epoch. + struct timeval tval; + int rc; + rc = gettimeofday( & tval, NULL ); + if ( rc == 0 ) { + wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec ); + } else { + // TODO: Assert or abort here. + }; // if + #endif + return wtime; +}; // __kmps_get_wtime + +double __kmps_get_wtick( void ) { + // Number of seconds between successive clock ticks. + double wtick = 0.0; + i; + #if KMP_OS_WINDOWS + { + DWORD increment; + DWORD adjustment; + BOOL disabled; + BOOL rc; + rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled ); + if ( rc ) { + wtick = 1.0E-07 * (double)( disabled ? increment : adjustment ); + } else { + // TODO: Assert or abort here. + wtick = 1.0E-03; + }; // if + } + #else + // TODO: gettimeofday() returns in microseconds, but what the precision? + wtick = 1.0E-06; + #endif + return wtick; +}; // __kmps_get_wtick + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/kmp_stub.h b/contrib/libs/cxxsupp/openmp/kmp_stub.h index 2d357b854a5..cdcffa3d8c3 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_stub.h +++ b/contrib/libs/cxxsupp/openmp/kmp_stub.h @@ -1,61 +1,61 @@ -/* - * kmp_stub.h - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_STUB_H -#define KMP_STUB_H - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -void __kmps_set_blocktime( int arg ); -int __kmps_get_blocktime( void ); -void __kmps_set_dynamic( int arg ); -int __kmps_get_dynamic( void ); -void __kmps_set_library( int arg ); -int __kmps_get_library( void ); -void __kmps_set_nested( int arg ); -int __kmps_get_nested( void ); -void __kmps_set_stacksize( int arg ); -int __kmps_get_stacksize(); - -#ifndef KMP_SCHED_TYPE_DEFINED -#define KMP_SCHED_TYPE_DEFINED -typedef enum kmp_sched { - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_default = kmp_sched_static // default scheduling -} kmp_sched_t; -#endif -void __kmps_set_schedule( kmp_sched_t kind, int modifier ); -void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ); - -#if OMP_40_ENABLED -void __kmps_set_proc_bind( kmp_proc_bind_t arg ); -kmp_proc_bind_t __kmps_get_proc_bind( void ); -#endif /* OMP_40_ENABLED */ - -double __kmps_get_wtime(); -double __kmps_get_wtick(); - -#ifdef __cplusplus - } // extern "C" -#endif // __cplusplus - -#endif // KMP_STUB_H - -// end of file // +/* + * kmp_stub.h + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_STUB_H +#define KMP_STUB_H + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +void __kmps_set_blocktime( int arg ); +int __kmps_get_blocktime( void ); +void __kmps_set_dynamic( int arg ); +int __kmps_get_dynamic( void ); +void __kmps_set_library( int arg ); +int __kmps_get_library( void ); +void __kmps_set_nested( int arg ); +int __kmps_get_nested( void ); +void __kmps_set_stacksize( int arg ); +int __kmps_get_stacksize(); + +#ifndef KMP_SCHED_TYPE_DEFINED +#define KMP_SCHED_TYPE_DEFINED +typedef enum kmp_sched { + kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) + kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) + kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) + kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) + kmp_sched_default = kmp_sched_static // default scheduling +} kmp_sched_t; +#endif +void __kmps_set_schedule( kmp_sched_t kind, int modifier ); +void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ); + +#if OMP_40_ENABLED +void __kmps_set_proc_bind( kmp_proc_bind_t arg ); +kmp_proc_bind_t __kmps_get_proc_bind( void ); +#endif /* OMP_40_ENABLED */ + +double __kmps_get_wtime(); +double __kmps_get_wtick(); + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + +#endif // KMP_STUB_H + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp index 7251f50b0ac..da085ce50cc 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp @@ -1,513 +1,513 @@ -/* - * kmp_taskdeps.cpp - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -//#define KMP_SUPPORT_GRAPH_OUTPUT 1 - -#include "kmp.h" -#include "kmp_io.h" -#include "kmp_wait_release.h" - -#if OMP_40_ENABLED - -//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries? -//TODO: don't use atomic ref counters for stack-allocated nodes. -//TODO: find an alternate to atomic refs for heap-allocated nodes? -//TODO: Finish graph output support -//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks -//TODO: Any ITT support needed? - -#ifdef KMP_SUPPORT_GRAPH_OUTPUT -static kmp_int32 kmp_node_id_seed = 0; -#endif - -static void -__kmp_init_node ( kmp_depnode_t *node ) -{ - node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed - node->dn.successors = NULL; - __kmp_init_lock(&node->dn.lock); - node->dn.nrefs = 1; // init creates the first reference to the node -#ifdef KMP_SUPPORT_GRAPH_OUTPUT - node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); -#endif -} - -static inline kmp_depnode_t * -__kmp_node_ref ( kmp_depnode_t *node ) -{ - KMP_TEST_THEN_INC32(&node->dn.nrefs); - return node; -} - -static inline void -__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node ) -{ - if (!node) return; - - kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1; - if ( n == 0 ) { - KMP_ASSERT(node->dn.nrefs == 0); -#if USE_FAST_MEMORY - __kmp_fast_free(thread,node); -#else - __kmp_thread_free(thread,node); -#endif - } -} - -#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid)) -#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid)) - -static void -__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ); - -static const kmp_int32 kmp_dephash_log2 = 6; -static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2); - -static inline kmp_int32 -__kmp_dephash_hash ( kmp_intptr_t addr ) -{ - //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets ); - return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size; -} - -static kmp_dephash_t * -__kmp_dephash_create ( kmp_info_t *thread ) -{ - kmp_dephash_t *h; - - kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t); - -#if USE_FAST_MEMORY - h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size ); -#else - h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size ); -#endif - -#ifdef KMP_DEBUG - h->nelements = 0; -#endif - h->buckets = (kmp_dephash_entry **)(h+1); - - for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ ) - h->buckets[i] = 0; - - return h; -} - -static void -__kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h ) -{ - for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) { - if ( h->buckets[i] ) { - kmp_dephash_entry_t *next; - for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) { - next = entry->next_in_bucket; - __kmp_depnode_list_free(thread,entry->last_ins); - __kmp_node_deref(thread,entry->last_out); -#if USE_FAST_MEMORY - __kmp_fast_free(thread,entry); -#else - __kmp_thread_free(thread,entry); -#endif - } - } - } -#if USE_FAST_MEMORY - __kmp_fast_free(thread,h); -#else - __kmp_thread_free(thread,h); -#endif -} - -static kmp_dephash_entry * -__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr ) -{ - kmp_int32 bucket = __kmp_dephash_hash(addr); - - kmp_dephash_entry_t *entry; - for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket ) - if ( entry->addr == addr ) break; - - if ( entry == NULL ) { - // create entry. This is only done by one thread so no locking required -#if USE_FAST_MEMORY - entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) ); -#else - entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) ); -#endif - entry->addr = addr; - entry->last_out = NULL; - entry->last_ins = NULL; - entry->next_in_bucket = h->buckets[bucket]; - h->buckets[bucket] = entry; -#ifdef KMP_DEBUG - h->nelements++; - if ( entry->next_in_bucket ) h->nconflicts++; -#endif - } - return entry; -} - -static kmp_depnode_list_t * -__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node ) -{ - kmp_depnode_list_t *new_head; - -#if USE_FAST_MEMORY - new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t)); -#else - new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t)); -#endif - - new_head->node = __kmp_node_ref(node); - new_head->next = list; - - return new_head; -} - -static void -__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ) -{ - kmp_depnode_list *next; - - for ( ; list ; list = next ) { - next = list->next; - - __kmp_node_deref(thread,list->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread,list); -#else - __kmp_thread_free(thread,list); -#endif - } -} - -static inline void -__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) -{ -#ifdef KMP_SUPPORT_GRAPH_OUTPUT - kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task); // this can be NULL when if(0) ... - - __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); -#endif -} - -template< bool filter > -static inline kmp_int32 -__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, - bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list) -{ - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); - - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_int32 npredecessors=0; - for ( kmp_int32 i = 0; i < ndeps ; i++ ) { - const kmp_depend_info_t * dep = &dep_list[i]; - - KMP_DEBUG_ASSERT(dep->flags.in); - - if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries - - kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr); - kmp_depnode_t *last_out = info->last_out; - - if ( dep->flags.out && info->last_ins ) { - for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) { - kmp_depnode_t * indep = p->node; - if ( indep->dn.task ) { - KMP_ACQUIRE_DEPNODE(gtid,indep); - if ( indep->dn.task ) { - __kmp_track_dependence(indep,node); - indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); - KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid,indep); - } - } - - __kmp_depnode_list_free(thread,info->last_ins); - info->last_ins = NULL; - - } else if ( last_out && last_out->dn.task ) { - KMP_ACQUIRE_DEPNODE(gtid,last_out); - if ( last_out->dn.task ) { - __kmp_track_dependence(last_out,node); - last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); - KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); - - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid,last_out); - } - - if ( dep_barrier ) { - // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after - // the execution of this task so the previous output nodes can be cleared. - __kmp_node_deref(thread,last_out); - info->last_out = NULL; - } else { - if ( dep->flags.out ) { - __kmp_node_deref(thread,last_out); - info->last_out = __kmp_node_ref(node); - } else - info->last_ins = __kmp_add_node(thread, info->last_ins, node); - } - - } - - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) ); - - return npredecessors; -} - -#define NO_DEP_BARRIER (false) -#define DEP_BARRIER (true) - -// returns true if the task has any outstanding dependence -static bool -__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ - int i; - -#if KMP_DEBUG - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); -#endif - KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) ); - - // Filter deps in dep_list - // TODO: Different algorithm for large dep_list ( > 10 ? ) - for ( i = 0; i < ndeps; i ++ ) { - if ( dep_list[i].base_addr != 0 ) - for ( int j = i+1; j < ndeps; j++ ) - if ( dep_list[i].base_addr == dep_list[j].base_addr ) { - dep_list[i].flags.in |= dep_list[j].flags.in; - dep_list[i].flags.out |= dep_list[j].flags.out; - dep_list[j].base_addr = 0; // Mark j element as void - } - } - - // doesn't need to be atomic as no other thread is going to be accessing this node just yet - // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies - node->dn.npredecessors = -1; - - // used to pack all npredecessors additions into a single atomic operation at the end - int npredecessors; - - npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, dep_list); - npredecessors += __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list); - - node->dn.task = task; - KMP_MB(); - - // Account for our initial fake value - npredecessors++; - - // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences) - npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors; - - KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) ); - - // beyond this point the task could be queued (and executed) by a releasing task... - return npredecessors > 0 ? true : false; -} - -void -__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) -{ - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_depnode_t *node = task->td_depnode; - - if ( task->td_dephash ) { - KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); - __kmp_dephash_free(thread,task->td_dephash); - } - - if ( !node ) return; - - KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) ); - - KMP_ACQUIRE_DEPNODE(gtid,node); - node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated - KMP_RELEASE_DEPNODE(gtid,node); - - kmp_depnode_list_t *next; - for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) { - kmp_depnode_t *successor = p->node; - kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1; - - // successor task can be NULL for wait_depends or because deps are still being processed - if ( npredecessors == 0 ) { - KMP_MB(); - if ( successor->dn.task ) { - KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) ); - __kmp_omp_task(gtid,successor->dn.task,false); - } - } - - next = p->next; - __kmp_node_deref(thread,p->node); -#if USE_FAST_MEMORY - __kmp_fast_free(thread,p); -#else - __kmp_thread_free(thread,p); -#endif - } - - __kmp_node_deref(thread,node); - - KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) ); -} - -/*! -@ingroup TASKING -@param loc_ref location of the original task directive -@param gtid Global Thread ID of encountering thread -@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' -@param ndeps Number of depend items with possible aliasing -@param dep_list List of depend items with possible aliasing -@param ndeps_noalias Number of depend items with no aliasing -@param noalias_dep_list List of depend items with no aliasing - -@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued - -Schedule a non-thread-switchable task with dependences for execution -*/ -kmp_int32 -__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ - - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * current_task = thread->th.th_current_task; - - bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; -#if OMP_41_ENABLED - serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY); -#endif - - if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) { - /* if no dependencies have been tracked yet, create the dependence hash */ - if ( current_task->td_dephash == NULL ) - current_task->td_dephash = __kmp_dephash_create(thread); - -#if USE_FAST_MEMORY - kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t)); -#else - kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t)); -#endif - - __kmp_init_node(node); - new_taskdata->td_depnode = node; - - if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER, - ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) { - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, - new_taskdata ) ); - return TASK_CURRENT_NOT_QUEUED; - } - } else { -#if OMP_41_ENABLED - kmp_task_team_t * task_team = thread->th.th_task_team; - if ( task_team && task_team->tt.tt_found_proxy_tasks ) - __kmpc_omp_wait_deps ( loc_ref, gtid, ndeps, dep_list, ndeps_noalias, noalias_dep_list ); - else -#endif - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)" - "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) ); - } - - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : " - "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, - new_taskdata ) ); - - return __kmpc_omp_task(loc_ref,gtid,new_task); -} - -/*! -@ingroup TASKING -@param loc_ref location of the original task directive -@param gtid Global Thread ID of encountering thread -@param ndeps Number of depend items with possible aliasing -@param dep_list List of depend items with possible aliasing -@param ndeps_noalias Number of depend items with no aliasing -@param noalias_dep_list List of depend items with no aliasing - -Blocks the current task until all specifies dependencies have been fulfilled. -*/ -void -__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ - KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) ); - - if ( ndeps == 0 && ndeps_noalias == 0 ) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) ); - return; - } - - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * current_task = thread->th.th_current_task; - - // We can return immediately as: - // - dependences are not computed in serial teams (except if we have proxy tasks) - // - if the dephash is not yet created it means we have nothing to wait for - bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; -#if OMP_41_ENABLED - ignore = ignore && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; -#endif - ignore = ignore || current_task->td_dephash == NULL; - - if ( ignore ) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); - return; - } - - kmp_depnode_t node; - __kmp_init_node(&node); - - if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER, - ndeps, dep_list, ndeps_noalias, noalias_dep_list )) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); - return; - } - - int thread_finished = FALSE; - kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); - while ( node.dn.npredecessors > 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished, -#if USE_ITT_BUILD - NULL, -#endif - __kmp_task_stealing_constraint ); - } - - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) ); -} - -#endif /* OMP_40_ENABLED */ - +/* + * kmp_taskdeps.cpp + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +//#define KMP_SUPPORT_GRAPH_OUTPUT 1 + +#include "kmp.h" +#include "kmp_io.h" +#include "kmp_wait_release.h" + +#if OMP_40_ENABLED + +//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries? +//TODO: don't use atomic ref counters for stack-allocated nodes. +//TODO: find an alternate to atomic refs for heap-allocated nodes? +//TODO: Finish graph output support +//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks +//TODO: Any ITT support needed? + +#ifdef KMP_SUPPORT_GRAPH_OUTPUT +static kmp_int32 kmp_node_id_seed = 0; +#endif + +static void +__kmp_init_node ( kmp_depnode_t *node ) +{ + node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed + node->dn.successors = NULL; + __kmp_init_lock(&node->dn.lock); + node->dn.nrefs = 1; // init creates the first reference to the node +#ifdef KMP_SUPPORT_GRAPH_OUTPUT + node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); +#endif +} + +static inline kmp_depnode_t * +__kmp_node_ref ( kmp_depnode_t *node ) +{ + KMP_TEST_THEN_INC32(&node->dn.nrefs); + return node; +} + +static inline void +__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node ) +{ + if (!node) return; + + kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1; + if ( n == 0 ) { + KMP_ASSERT(node->dn.nrefs == 0); +#if USE_FAST_MEMORY + __kmp_fast_free(thread,node); +#else + __kmp_thread_free(thread,node); +#endif + } +} + +#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid)) +#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid)) + +static void +__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ); + +static const kmp_int32 kmp_dephash_log2 = 6; +static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2); + +static inline kmp_int32 +__kmp_dephash_hash ( kmp_intptr_t addr ) +{ + //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets ); + return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size; +} + +static kmp_dephash_t * +__kmp_dephash_create ( kmp_info_t *thread ) +{ + kmp_dephash_t *h; + + kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t); + +#if USE_FAST_MEMORY + h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size ); +#else + h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size ); +#endif + +#ifdef KMP_DEBUG + h->nelements = 0; +#endif + h->buckets = (kmp_dephash_entry **)(h+1); + + for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ ) + h->buckets[i] = 0; + + return h; +} + +static void +__kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h ) +{ + for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) { + if ( h->buckets[i] ) { + kmp_dephash_entry_t *next; + for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) { + next = entry->next_in_bucket; + __kmp_depnode_list_free(thread,entry->last_ins); + __kmp_node_deref(thread,entry->last_out); +#if USE_FAST_MEMORY + __kmp_fast_free(thread,entry); +#else + __kmp_thread_free(thread,entry); +#endif + } + } + } +#if USE_FAST_MEMORY + __kmp_fast_free(thread,h); +#else + __kmp_thread_free(thread,h); +#endif +} + +static kmp_dephash_entry * +__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr ) +{ + kmp_int32 bucket = __kmp_dephash_hash(addr); + + kmp_dephash_entry_t *entry; + for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket ) + if ( entry->addr == addr ) break; + + if ( entry == NULL ) { + // create entry. This is only done by one thread so no locking required +#if USE_FAST_MEMORY + entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) ); +#else + entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) ); +#endif + entry->addr = addr; + entry->last_out = NULL; + entry->last_ins = NULL; + entry->next_in_bucket = h->buckets[bucket]; + h->buckets[bucket] = entry; +#ifdef KMP_DEBUG + h->nelements++; + if ( entry->next_in_bucket ) h->nconflicts++; +#endif + } + return entry; +} + +static kmp_depnode_list_t * +__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node ) +{ + kmp_depnode_list_t *new_head; + +#if USE_FAST_MEMORY + new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t)); +#else + new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t)); +#endif + + new_head->node = __kmp_node_ref(node); + new_head->next = list; + + return new_head; +} + +static void +__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ) +{ + kmp_depnode_list *next; + + for ( ; list ; list = next ) { + next = list->next; + + __kmp_node_deref(thread,list->node); +#if USE_FAST_MEMORY + __kmp_fast_free(thread,list); +#else + __kmp_thread_free(thread,list); +#endif + } +} + +static inline void +__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) +{ +#ifdef KMP_SUPPORT_GRAPH_OUTPUT + kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); + kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task); // this can be NULL when if(0) ... + + __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); +#endif +} + +template< bool filter > +static inline kmp_int32 +__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, + bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list) +{ + KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); + + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_int32 npredecessors=0; + for ( kmp_int32 i = 0; i < ndeps ; i++ ) { + const kmp_depend_info_t * dep = &dep_list[i]; + + KMP_DEBUG_ASSERT(dep->flags.in); + + if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries + + kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr); + kmp_depnode_t *last_out = info->last_out; + + if ( dep->flags.out && info->last_ins ) { + for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) { + kmp_depnode_t * indep = p->node; + if ( indep->dn.task ) { + KMP_ACQUIRE_DEPNODE(gtid,indep); + if ( indep->dn.task ) { + __kmp_track_dependence(indep,node); + indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); + KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", + filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); + npredecessors++; + } + KMP_RELEASE_DEPNODE(gtid,indep); + } + } + + __kmp_depnode_list_free(thread,info->last_ins); + info->last_ins = NULL; + + } else if ( last_out && last_out->dn.task ) { + KMP_ACQUIRE_DEPNODE(gtid,last_out); + if ( last_out->dn.task ) { + __kmp_track_dependence(last_out,node); + last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); + KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", + filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); + + npredecessors++; + } + KMP_RELEASE_DEPNODE(gtid,last_out); + } + + if ( dep_barrier ) { + // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after + // the execution of this task so the previous output nodes can be cleared. + __kmp_node_deref(thread,last_out); + info->last_out = NULL; + } else { + if ( dep->flags.out ) { + __kmp_node_deref(thread,last_out); + info->last_out = __kmp_node_ref(node); + } else + info->last_ins = __kmp_add_node(thread, info->last_ins, node); + } + + } + + KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) ); + + return npredecessors; +} + +#define NO_DEP_BARRIER (false) +#define DEP_BARRIER (true) + +// returns true if the task has any outstanding dependence +static bool +__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) +{ + int i; + +#if KMP_DEBUG + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); +#endif + KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) ); + + // Filter deps in dep_list + // TODO: Different algorithm for large dep_list ( > 10 ? ) + for ( i = 0; i < ndeps; i ++ ) { + if ( dep_list[i].base_addr != 0 ) + for ( int j = i+1; j < ndeps; j++ ) + if ( dep_list[i].base_addr == dep_list[j].base_addr ) { + dep_list[i].flags.in |= dep_list[j].flags.in; + dep_list[i].flags.out |= dep_list[j].flags.out; + dep_list[j].base_addr = 0; // Mark j element as void + } + } + + // doesn't need to be atomic as no other thread is going to be accessing this node just yet + // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies + node->dn.npredecessors = -1; + + // used to pack all npredecessors additions into a single atomic operation at the end + int npredecessors; + + npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, dep_list); + npredecessors += __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list); + + node->dn.task = task; + KMP_MB(); + + // Account for our initial fake value + npredecessors++; + + // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences) + npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors; + + KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) ); + + // beyond this point the task could be queued (and executed) by a releasing task... + return npredecessors > 0 ? true : false; +} + +void +__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) +{ + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_depnode_t *node = task->td_depnode; + + if ( task->td_dephash ) { + KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); + __kmp_dephash_free(thread,task->td_dephash); + } + + if ( !node ) return; + + KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) ); + + KMP_ACQUIRE_DEPNODE(gtid,node); + node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated + KMP_RELEASE_DEPNODE(gtid,node); + + kmp_depnode_list_t *next; + for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) { + kmp_depnode_t *successor = p->node; + kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1; + + // successor task can be NULL for wait_depends or because deps are still being processed + if ( npredecessors == 0 ) { + KMP_MB(); + if ( successor->dn.task ) { + KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) ); + __kmp_omp_task(gtid,successor->dn.task,false); + } + } + + next = p->next; + __kmp_node_deref(thread,p->node); +#if USE_FAST_MEMORY + __kmp_fast_free(thread,p); +#else + __kmp_thread_free(thread,p); +#endif + } + + __kmp_node_deref(thread,node); + + KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) ); +} + +/*! +@ingroup TASKING +@param loc_ref location of the original task directive +@param gtid Global Thread ID of encountering thread +@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' +@param ndeps Number of depend items with possible aliasing +@param dep_list List of depend items with possible aliasing +@param ndeps_noalias Number of depend items with no aliasing +@param noalias_dep_list List of depend items with no aliasing + +@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued + +Schedule a non-thread-switchable task with dependences for execution +*/ +kmp_int32 +__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) +{ + + kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, new_taskdata ) ); + + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_taskdata_t * current_task = thread->th.th_current_task; + + bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; +#if OMP_41_ENABLED + serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY); +#endif + + if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) { + /* if no dependencies have been tracked yet, create the dependence hash */ + if ( current_task->td_dephash == NULL ) + current_task->td_dephash = __kmp_dephash_create(thread); + +#if USE_FAST_MEMORY + kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t)); +#else + kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t)); +#endif + + __kmp_init_node(node); + new_taskdata->td_depnode = node; + + if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER, + ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) { + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: " + "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, + new_taskdata ) ); + return TASK_CURRENT_NOT_QUEUED; + } + } else { +#if OMP_41_ENABLED + kmp_task_team_t * task_team = thread->th.th_task_team; + if ( task_team && task_team->tt.tt_found_proxy_tasks ) + __kmpc_omp_wait_deps ( loc_ref, gtid, ndeps, dep_list, ndeps_noalias, noalias_dep_list ); + else +#endif + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)" + "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) ); + } + + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : " + "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, + new_taskdata ) ); + + return __kmpc_omp_task(loc_ref,gtid,new_task); +} + +/*! +@ingroup TASKING +@param loc_ref location of the original task directive +@param gtid Global Thread ID of encountering thread +@param ndeps Number of depend items with possible aliasing +@param dep_list List of depend items with possible aliasing +@param ndeps_noalias Number of depend items with no aliasing +@param noalias_dep_list List of depend items with no aliasing + +Blocks the current task until all specifies dependencies have been fulfilled. +*/ +void +__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) +{ + KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) ); + + if ( ndeps == 0 && ndeps_noalias == 0 ) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) ); + return; + } + + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_taskdata_t * current_task = thread->th.th_current_task; + + // We can return immediately as: + // - dependences are not computed in serial teams (except if we have proxy tasks) + // - if the dephash is not yet created it means we have nothing to wait for + bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; +#if OMP_41_ENABLED + ignore = ignore && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; +#endif + ignore = ignore || current_task->td_dephash == NULL; + + if ( ignore ) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); + return; + } + + kmp_depnode_t node; + __kmp_init_node(&node); + + if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER, + ndeps, dep_list, ndeps_noalias, noalias_dep_list )) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); + return; + } + + int thread_finished = FALSE; + kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); + while ( node.dn.npredecessors > 0 ) { + flag.execute_tasks(thread, gtid, FALSE, &thread_finished, +#if USE_ITT_BUILD + NULL, +#endif + __kmp_task_stealing_constraint ); + } + + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) ); +} + +#endif /* OMP_40_ENABLED */ + diff --git a/contrib/libs/cxxsupp/openmp/kmp_tasking.c b/contrib/libs/cxxsupp/openmp/kmp_tasking.c index ec588c33f63..432f9192319 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_tasking.c +++ b/contrib/libs/cxxsupp/openmp/kmp_tasking.c @@ -1,2860 +1,2860 @@ -/* - * kmp_tasking.c -- OpenMP 3.0 tasking support. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_itt.h" -#include "kmp_wait_release.h" -#include "kmp_stats.h" - -#if OMPT_SUPPORT -#include "ompt-specific.h" -#endif - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -/* forward declaration */ -static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ); -static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ); -static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ); - -#ifdef OMP_41_ENABLED -static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ); -#endif - -static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { - if (!flag) return; - // Attempt to wake up a thread: examine its type and call appropriate template - switch (((kmp_flag_64 *)flag)->get_type()) { - case flag32: __kmp_resume_32(gtid, NULL); break; - case flag64: __kmp_resume_64(gtid, NULL); break; - case flag_oncore: __kmp_resume_oncore(gtid, NULL); break; - } -} - -#ifdef BUILD_TIED_TASK_STACK - -//--------------------------------------------------------------------------- -// __kmp_trace_task_stack: print the tied tasks from the task stack in order -// from top do bottom -// -// gtid: global thread identifier for thread containing stack -// thread_data: thread data for task team thread containing stack -// threshold: value above which the trace statement triggers -// location: string identifying call site of this function (for trace) - -static void -__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t **stack_top = task_stack -> ts_top; - kmp_int32 entries = task_stack -> ts_entries; - kmp_taskdata_t *tied_task; - - KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " - "first_block = %p, stack_top = %p \n", - location, gtid, entries, task_stack->ts_first_block, stack_top ) ); - - KMP_DEBUG_ASSERT( stack_top != NULL ); - KMP_DEBUG_ASSERT( entries > 0 ); - - while ( entries != 0 ) - { - KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] ); - // fix up ts_top if we need to pop from previous block - if ( entries & TASK_STACK_INDEX_MASK == 0 ) - { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ; - - stack_block = stack_block -> sb_prev; - stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - stack_top--; - entries--; - - tied_task = * stack_top; - - KMP_DEBUG_ASSERT( tied_task != NULL ); - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); - - KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " - "stack_top=%p, tied_task=%p\n", - location, gtid, entries, stack_top, tied_task ) ); - } - KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] ); - - KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", - location, gtid ) ); -} - -//--------------------------------------------------------------------------- -// __kmp_init_task_stack: initialize the task stack for the first time -// after a thread_data structure is created. -// It should not be necessary to do this again (assuming the stack works). -// -// gtid: global thread identifier of calling thread -// thread_data: thread data for task team thread containing stack - -static void -__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *first_block; - - // set up the first block of the stack - first_block = & task_stack -> ts_first_block; - task_stack -> ts_top = (kmp_taskdata_t **) first_block; - memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); - - // initialize the stack to be empty - task_stack -> ts_entries = TASK_STACK_EMPTY; - first_block -> sb_next = NULL; - first_block -> sb_prev = NULL; -} - - -//--------------------------------------------------------------------------- -// __kmp_free_task_stack: free the task stack when thread_data is destroyed. -// -// gtid: global thread identifier for calling thread -// thread_data: thread info for thread containing stack - -static void -__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *stack_block = & task_stack -> ts_first_block; - - KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY ); - // free from the second block of the stack - while ( stack_block != NULL ) { - kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL; - - stack_block -> sb_next = NULL; - stack_block -> sb_prev = NULL; - if (stack_block != & task_stack -> ts_first_block) { - __kmp_thread_free( thread, stack_block ); // free the block, if not the first - } - stack_block = next_block; - } - // initialize the stack to be empty - task_stack -> ts_entries = 0; - task_stack -> ts_top = NULL; -} - - -//--------------------------------------------------------------------------- -// __kmp_push_task_stack: Push the tied task onto the task stack. -// Grow the stack if necessary by allocating another block. -// -// gtid: global thread identifier for calling thread -// thread: thread info for thread containing stack -// tied_task: the task to push on the stack - -static void -__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task ) -{ - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> - tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; - - if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) { - return; // Don't push anything on stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); - KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); - - KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", - gtid, thread, tied_task ) ); - // Store entry - * (task_stack -> ts_top) = tied_task; - - // Do bookkeeping for next push - task_stack -> ts_top++; - task_stack -> ts_entries++; - - if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) - { - // Find beginning of this task block - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE); - - // Check if we already have a block - if ( stack_block -> sb_next != NULL ) - { // reset ts_top to beginning of next block - task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0]; - } - else - { // Alloc new block and link it up - kmp_stack_block_t *new_block = (kmp_stack_block_t *) - __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t)); - - task_stack -> ts_top = & new_block -> sb_block[0]; - stack_block -> sb_next = new_block; - new_block -> sb_prev = stack_block; - new_block -> sb_next = NULL; - - KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", - gtid, tied_task, new_block ) ); - } - } - KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); -} - -//--------------------------------------------------------------------------- -// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return -// the task, just check to make sure it matches the ending task passed in. -// -// gtid: global thread identifier for the calling thread -// thread: thread info structure containing stack -// tied_task: the task popped off the stack -// ending_task: the task that is ending (should match popped task) - -static void -__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task ) -{ - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; - kmp_taskdata_t *tied_task; - - if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) { - return; // Don't pop anything from stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); - KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 ); - - KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) ); - - // fix up ts_top if we need to pop from previous block - if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) - { - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *) (task_stack -> ts_top) ; - - stack_block = stack_block -> sb_prev; - task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - task_stack -> ts_top--; - task_stack -> ts_entries--; - - tied_task = * (task_stack -> ts_top ); - - KMP_DEBUG_ASSERT( tied_task != NULL ); - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); - KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly - - KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); - return; -} -#endif /* BUILD_TIED_TASK_STACK */ - -//--------------------------------------------------- -// __kmp_push_task: Add a task to the thread's deque - -static kmp_int32 -__kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_task_team_t * task_team = thread->th.th_task_team; - kmp_int32 tid = __kmp_tid_from_gtid( gtid ); - kmp_thread_data_t * thread_data; - - KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) ); - - // The first check avoids building task_team thread data if serialized - if ( taskdata->td_flags.task_serial ) { - KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } - - // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - if ( ! KMP_TASKING_ENABLED(task_team) ) { - __kmp_enable_tasking( task_team, thread ); - } - KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); - KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL ); - - // Find tasking deque specific to encountering thread - thread_data = & task_team -> tt.tt_threads_data[ tid ]; - - // No lock needed since only owner can allocate - if (thread_data -> td.td_deque == NULL ) { - __kmp_alloc_task_deque( thread, thread_data ); - } - - // Check if deque is full - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) - { - KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } - - // Lock the deque for the task push operation - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); - -#if OMP_41_ENABLED - // Need to recheck as we can get a proxy task from a thread outside of OpenMP - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) - { - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } -#else - // Must have room since no thread can add tasks but calling thread - KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE ); -#endif - - thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata - // Wrap index. - thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; - TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count - - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - - KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " - "task=%p ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_tail, thread_data->td.td_deque_head) ); - - return TASK_SUCCESSFULLY_PUSHED; -} - - -//----------------------------------------------------------------------------------------- -// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends -// this_thr: thread structure to set current_task in. - -void -__kmp_pop_current_task_from_thread( kmp_info_t *this_thr ) -{ - KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr -> th.th_current_task, - this_thr -> th.th_current_task -> td_parent ) ); - - this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent; - - KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr -> th.th_current_task, - this_thr -> th.th_current_task -> td_parent ) ); -} - - -//--------------------------------------------------------------------------------------- -// __kmp_push_current_task_to_thread: set up current task in called thread for a new team -// this_thr: thread structure to set up -// team: team for implicit task data -// tid: thread within team to set up - -void -__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid ) -{ - // current task of the thread is a parent of the new just created implicit tasks of new team - KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent ) ); - - KMP_DEBUG_ASSERT (this_thr != NULL); - - if( tid == 0 ) { - if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) { - team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task; - this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ]; - } - } else { - team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent; - this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ]; - } - - KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent ) ); -} - - -//---------------------------------------------------------------------- -// __kmp_task_start: bookkeeping for a task starting execution -// GTID: global thread id of calling thread -// task: task starting execution -// current_task: task suspending - -static void -__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t * thread = __kmp_threads[ gtid ]; - - KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", - gtid, taskdata, current_task) ); - - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); - - // mark currently executing task as suspended - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); - current_task -> td_flags.executing = 0; - - // Add task to stack if tied -#ifdef BUILD_TIED_TASK_STACK - if ( taskdata -> td_flags.tiedness == TASK_TIED ) - { - __kmp_push_task_stack( gtid, thread, taskdata ); - } -#endif /* BUILD_TIED_TASK_STACK */ - - // mark starting task as executing and as current task - thread -> th.th_current_task = taskdata; - - KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 ); - taskdata -> td_flags.started = 1; - taskdata -> td_flags.executing = 1; - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); - - // GEH TODO: shouldn't we pass some sort of location identifier here? - // APT: yes, we will pass location here. - // need to store current thread state (in a thread or taskdata structure) - // before setting work_state, otherwise wrong state is set after end of task - - KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", - gtid, taskdata ) ); - -#if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_begin)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_begin)( - parent ? parent->ompt_task_info.task_id : ompt_task_id_none, - parent ? &(parent->ompt_task_info.frame) : NULL, - taskdata->ompt_task_info.task_id, - taskdata->ompt_task_info.function); - } -#endif - - return; -} - - -//---------------------------------------------------------------------- -// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution -// loc_ref: source location information; points to beginning of task block. -// gtid: global thread number. -// task: task thunk for the started task. - -void -__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n", - gtid, loc_ref, taskdata, current_task ) ); - - taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred. - __kmp_task_start( gtid, task, current_task ); - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", - gtid, loc_ref, taskdata ) ); - - return; -} - -#ifdef TASK_UNUSED -//---------------------------------------------------------------------- -// __kmpc_omp_task_begin: report that a given task has started execution -// NEVER GENERATED BY COMPILER, DEPRECATED!!! - -void -__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - - KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) ); - - __kmp_task_start( gtid, task, current_task ); - - KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - return; -} -#endif // TASK_UNUSED - - -//------------------------------------------------------------------------------------- -// __kmp_free_task: free the current task space and the space for shareds -// gtid: Global thread ID of calling thread -// taskdata: task to free -// thread: thread data structure of caller - -static void -__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) -{ - KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", - gtid, taskdata) ); - - // Check to make sure all flags and counters have the correct values - KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT ); - KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 ); - KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 ); - KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 ); - KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1); - KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 ); - - taskdata->td_flags.freed = 1; - // deallocate the taskdata and shared variable blocks associated with this task - #if USE_FAST_MEMORY - __kmp_fast_free( thread, taskdata ); - #else /* ! USE_FAST_MEMORY */ - __kmp_thread_free( thread, taskdata ); - #endif - - KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", - gtid, taskdata) ); -} - -//------------------------------------------------------------------------------------- -// __kmp_free_task_and_ancestors: free the current task and ancestors without children -// -// gtid: Global thread ID of calling thread -// taskdata: task to free -// thread: thread data structure of caller - -static void -__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) -{ - kmp_int32 children = 0; - kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser; - - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); - - if ( !team_or_tasking_serialized ) { - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); - } - - // Now, go up the ancestor tree to see if any ancestors can now be freed. - while ( children == 0 ) - { - kmp_taskdata_t * parent_taskdata = taskdata -> td_parent; - - KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " - "and freeing itself\n", gtid, taskdata) ); - - // --- Deallocate my ancestor task --- - __kmp_free_task( gtid, taskdata, thread ); - - taskdata = parent_taskdata; - - // Stop checking ancestors at implicit task or if tasking serialized - // instead of walking up ancestor tree to avoid premature deallocation of ancestors. - if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT ) - return; - - if ( !team_or_tasking_serialized ) { - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); - } - } - - KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " - "not freeing it yet\n", gtid, taskdata, children) ); -} - -//--------------------------------------------------------------------- -// __kmp_task_finish: bookkeeping to do when a task finishes execution -// gtid: global thread ID for calling thread -// task: task to be finished -// resumed_task: task to be resumed. (may be NULL if task is serialized) - -static void -__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_int32 children = 0; - -#if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_end)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_end)( - taskdata->ompt_task_info.task_id); - } -#endif - - KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n", - gtid, taskdata, resumed_task) ); - - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); - - // Pop task from stack if tied -#ifdef BUILD_TIED_TASK_STACK - if ( taskdata -> td_flags.tiedness == TASK_TIED ) - { - __kmp_pop_task_stack( gtid, thread, taskdata ); - } -#endif /* BUILD_TIED_TASK_STACK */ - - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - taskdata -> td_flags.complete = 1; // mark the task as completed - KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); - - // Only need to keep track of count if team parallel and tasking not serialized - if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); -#if OMP_40_ENABLED - if ( taskdata->td_taskgroup ) - KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); - __kmp_release_deps(gtid,taskdata); -#endif - } - - // td_flags.executing must be marked as 0 after __kmp_release_deps has been called - // Othertwise, if a task is executed immediately from the release_deps code - // the flag will be reset to 1 again by this same function - KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 ); - taskdata -> td_flags.executing = 0; // suspend the finishing task - - KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", - gtid, taskdata, children) ); - -#if OMP_40_ENABLED - /* If the tasks' destructor thunk flag has been set, we need to invoke the - destructor thunk that has been generated by the compiler. - The code is placed here, since at this point other tasks might have been released - hence overlapping the destructor invokations with some other work in the - released tasks. The OpenMP spec is not specific on when the destructors are - invoked, so we should be free to choose. - */ - if (taskdata->td_flags.destructors_thunk) { - kmp_routine_entry_t destr_thunk = task->destructors; - KMP_ASSERT(destr_thunk); - destr_thunk(gtid, task); - } -#endif // OMP_40_ENABLED - - // bookkeeping for resuming task: - // GEH - note tasking_ser => task_serial - KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == - taskdata->td_flags.task_serial); - if ( taskdata->td_flags.task_serial ) - { - if (resumed_task == NULL) { - resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent - } - else { - // verify resumed task passed in points to parent - KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent ); - } - } - else { - KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt - } - - // Free this task and then ancestor tasks if they have no children. - __kmp_free_task_and_ancestors(gtid, taskdata, thread); - - // FIXME johnmc: I this statement should be before the last one so if an - // asynchronous inquiry peers into the runtime system it doesn't see the freed - // task as the current task - __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task - - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); - resumed_task->td_flags.executing = 1; // resume previous task - - KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", - gtid, taskdata, resumed_task) ); - - return; -} - -//--------------------------------------------------------------------- -// __kmpc_omp_task_complete_if0: report that a task has completed execution -// loc_ref: source location information; points to end of task block. -// gtid: global thread number. -// task: task thunk for the completed task. - -void -__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) -{ - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume - - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - return; -} - -#ifdef TASK_UNUSED -//--------------------------------------------------------------------- -// __kmpc_omp_task_complete: report that a task has completed execution -// NEVER GENERATED BY COMPILER, DEPRECATED!!! - -void -__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) -{ - KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume - - KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - return; -} -#endif // TASK_UNUSED - - -#if OMPT_SUPPORT -//---------------------------------------------------------------------------------------------------- -// __kmp_task_init_ompt: -// Initialize OMPT fields maintained by a task. This will only be called after -// ompt_tool, so we already know whether ompt is enabled or not. - -static inline void -__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) -{ - if (ompt_enabled) { - task->ompt_task_info.task_id = __ompt_task_id_new(tid); - task->ompt_task_info.function = function; - task->ompt_task_info.frame.exit_runtime_frame = NULL; - task->ompt_task_info.frame.reenter_runtime_frame = NULL; - } -} -#endif - - -//---------------------------------------------------------------------------------------------------- -// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread -// -// loc_ref: reference to source location of parallel region -// this_thr: thread data structure corresponding to implicit task -// team: team for this_thr -// tid: thread id of given thread within team -// set_curr_task: TRUE if need to push current task to thread -// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere. -// TODO: Get better loc_ref. Value passed in may be NULL - -void -__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ) -{ - kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ]; - - KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", - tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) ); - - task->td_task_id = KMP_GEN_TASK_ID(); - task->td_team = team; -// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger) - task->td_ident = loc_ref; - task->td_taskwait_ident = NULL; - task->td_taskwait_counter = 0; - task->td_taskwait_thread = 0; - - task->td_flags.tiedness = TASK_TIED; - task->td_flags.tasktype = TASK_IMPLICIT; -#if OMP_41_ENABLED - task->td_flags.proxy = TASK_FULL; -#endif - - // All implicit tasks are executed immediately, not deferred - task->td_flags.task_serial = 1; - task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); - task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; - - task->td_flags.started = 1; - task->td_flags.executing = 1; - task->td_flags.complete = 0; - task->td_flags.freed = 0; - -#if OMP_40_ENABLED - task->td_dephash = NULL; - task->td_depnode = NULL; -#endif - - if (set_curr_task) { // only do this initialization the first time a thread is created - task->td_incomplete_child_tasks = 0; - task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task -#if OMP_40_ENABLED - task->td_taskgroup = NULL; // An implicit task does not have taskgroup -#endif - __kmp_push_current_task_to_thread( this_thr, team, tid ); - } else { - KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); - KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); - } - -#if OMPT_SUPPORT - __kmp_task_init_ompt(task, tid, NULL); -#endif - - KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", - tid, team, task ) ); -} - -// Round up a size to a power of two specified by val -// Used to insert padding between structures co-allocated using a single malloc() call -static size_t -__kmp_round_up_to_val( size_t size, size_t val ) { - if ( size & ( val - 1 ) ) { - size &= ~ ( val - 1 ); - if ( size <= KMP_SIZE_T_MAX - val ) { - size += val; // Round up if there is no overflow. - }; // if - }; // if - return size; -} // __kmp_round_up_to_va - - -//--------------------------------------------------------------------------------- -// __kmp_task_alloc: Allocate the taskdata and task data structures for a task -// -// loc_ref: source location information -// gtid: global thread number. -// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered. -// Converted from kmp_int32 to kmp_tasking_flags_t in routine. -// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task. -// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task. -// task_entry: Pointer to task code entry point generated by compiler. -// returns: a pointer to the allocated kmp_task_t structure (task). - -kmp_task_t * -__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ) -{ - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_team_t *team = thread->th.th_team; - kmp_taskdata_t *parent_task = thread->th.th_current_task; - size_t shareds_offset; - - KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, - sizeof_shareds, task_entry) ); - - if ( parent_task->td_flags.final ) { - if (flags->merged_if0) { - } - flags->final = 1; - } - -#if OMP_41_ENABLED - if ( flags->proxy == TASK_PROXY ) { - flags->tiedness = TASK_UNTIED; - flags->merged_if0 = 1; - - /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */ - if ( (thread->th.th_task_team) == NULL ) { - /* This should only happen if the team is serialized - setup a task team and propagate it to the thread - */ - KMP_DEBUG_ASSERT(team->t.t_serialized); - KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads - thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; - } - kmp_task_team_t * task_team = thread->th.th_task_team; - - /* tasking must be enabled now as the task might not be pushed */ - if ( !KMP_TASKING_ENABLED( task_team ) ) { - KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_enable_tasking( task_team, thread ); - kmp_int32 tid = thread->th.th_info.ds.ds_tid; - kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; - // No lock needed since only owner can allocate - if (thread_data -> td.td_deque == NULL ) { - __kmp_alloc_task_deque( thread, thread_data ); - } - } - - if ( task_team->tt.tt_found_proxy_tasks == FALSE ) - TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE); - } -#endif - - // Calculate shared structure offset including padding after kmp_task_t struct - // to align pointers in shared struct - shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t; - shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * )); - - // Allocate a kmp_taskdata_t block and a kmp_task_t block. - KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", - gtid, shareds_offset) ); - KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", - gtid, sizeof_shareds) ); - - // Avoid double allocation here by combining shareds with taskdata - #if USE_FAST_MEMORY - taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds ); - #else /* ! USE_FAST_MEMORY */ - taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds ); - #endif /* USE_FAST_MEMORY */ - - task = KMP_TASKDATA_TO_TASK(taskdata); - - // Make sure task & taskdata are aligned appropriately -#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 ); - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 ); -#else - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 ); - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 ); -#endif - if (sizeof_shareds > 0) { - // Avoid double allocation here by combining shareds with taskdata - task->shareds = & ((char *) taskdata)[ shareds_offset ]; - // Make sure shareds struct is aligned to pointer size - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 ); - } else { - task->shareds = NULL; - } - task->routine = task_entry; - task->part_id = 0; // AC: Always start with 0 part id - - taskdata->td_task_id = KMP_GEN_TASK_ID(); - taskdata->td_team = team; - taskdata->td_alloc_thread = thread; - taskdata->td_parent = parent_task; - taskdata->td_level = parent_task->td_level + 1; // increment nesting level - taskdata->td_ident = loc_ref; - taskdata->td_taskwait_ident = NULL; - taskdata->td_taskwait_counter = 0; - taskdata->td_taskwait_thread = 0; - KMP_DEBUG_ASSERT( taskdata->td_parent != NULL ); -#if OMP_41_ENABLED - // avoid copying icvs for proxy tasks - if ( flags->proxy == TASK_FULL ) -#endif - copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs ); - - taskdata->td_flags.tiedness = flags->tiedness; - taskdata->td_flags.final = flags->final; - taskdata->td_flags.merged_if0 = flags->merged_if0; -#if OMP_40_ENABLED - taskdata->td_flags.destructors_thunk = flags->destructors_thunk; -#endif // OMP_40_ENABLED -#if OMP_41_ENABLED - taskdata->td_flags.proxy = flags->proxy; -#endif - taskdata->td_flags.tasktype = TASK_EXPLICIT; - - // GEH - TODO: fix this to copy parent task's value of tasking_ser flag - taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); - - // GEH - TODO: fix this to copy parent task's value of team_serial flag - taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; - - // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region - // tasks are not left until program termination to execute. Also, it helps locality to execute - // immediately. - taskdata->td_flags.task_serial = ( parent_task->td_flags.final - || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ); - - taskdata->td_flags.started = 0; - taskdata->td_flags.executing = 0; - taskdata->td_flags.complete = 0; - taskdata->td_flags.freed = 0; - - taskdata->td_flags.native = flags->native; - - taskdata->td_incomplete_child_tasks = 0; - taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children -#if OMP_40_ENABLED - taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task - taskdata->td_dephash = NULL; - taskdata->td_depnode = NULL; -#endif - - // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task -#if OMP_41_ENABLED - if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) -#else - if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) -#endif - { - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); -#if OMP_40_ENABLED - if ( parent_task->td_taskgroup ) - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); -#endif - // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated - if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) { - KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); - } - } - - KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", - gtid, taskdata, taskdata->td_parent) ); - -#if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry); -#endif - - return task; -} - - -kmp_task_t * -__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ) -{ - kmp_task_t *retval; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; - - input_flags->native = FALSE; - // __kmp_task_alloc() sets up all other runtime flags - -#if OMP_41_ENABLED - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - input_flags->proxy ? "proxy" : "", - sizeof_kmp_task_t, sizeof_shareds, task_entry) ); -#else - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - sizeof_kmp_task_t, sizeof_shareds, task_entry) ); -#endif - - retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t, - sizeof_shareds, task_entry ); - - KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) ); - - return retval; -} - -//----------------------------------------------------------- -// __kmp_invoke_task: invoke the specified task -// -// gtid: global thread ID of caller -// task: the task to invoke -// current_task: the task to resume after task invokation - -static void -__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); -#if OMP_40_ENABLED - int discard = 0 /* false */; -#endif - KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", - gtid, taskdata, current_task) ); - KMP_DEBUG_ASSERT(task); -#if OMP_41_ENABLED - if ( taskdata->td_flags.proxy == TASK_PROXY && - taskdata->td_flags.complete == 1) - { - // This is a proxy task that was already completed but it needs to run - // its bottom-half finish - KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", - gtid, taskdata) ); - - __kmp_bottom_half_finish_proxy(gtid,task); - - KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) ); - - return; - } -#endif - -#if OMP_41_ENABLED - // Proxy tasks are not handled by the runtime - if ( taskdata->td_flags.proxy != TASK_PROXY ) -#endif - __kmp_task_start( gtid, task, current_task ); - -#if OMPT_SUPPORT - ompt_thread_info_t oldInfo; - kmp_info_t * thread; - if (ompt_enabled) { - // Store the threads states and restore them after the task - thread = __kmp_threads[ gtid ]; - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0); - } -#endif - -#if OMP_40_ENABLED - // TODO: cancel tasks if the parallel region has also been cancelled - // TODO: check if this sequence can be hoisted above __kmp_task_start - // if cancellation has been enabled for this run ... - if (__kmp_omp_cancellation) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - kmp_team_t * this_team = this_thr->th.th_team; - kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; - if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { - KMP_COUNT_BLOCK(TASK_cancelled); - // this task belongs to a task group and we need to cancel it - discard = 1 /* true */; - } - } - - // - // Invoke the task routine and pass in relevant data. - // Thunks generated by gcc take a different argument list. - // - if (!discard) { - KMP_COUNT_BLOCK(TASK_executed); - KMP_TIME_BLOCK (TASK_execution); -#endif // OMP_40_ENABLED - -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're about to run this task */ - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_switch)) - { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - current_task->ompt_task_info.task_id, - taskdata->ompt_task_info.task_id); - } -#endif - -#ifdef KMP_GOMP_COMPAT - if (taskdata->td_flags.native) { - ((void (*)(void *))(*(task->routine)))(task->shareds); - } - else -#endif /* KMP_GOMP_COMPAT */ - { - (*(task->routine))(gtid, task); - } - -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're returning to the callee task */ - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_switch)) - { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - taskdata->ompt_task_info.task_id, - current_task->ompt_task_info.task_id); - } -#endif - -#if OMP_40_ENABLED - } -#endif // OMP_40_ENABLED - - -#if OMPT_SUPPORT - if (ompt_enabled) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_runtime_frame = 0; - } -#endif - -#if OMP_41_ENABLED - // Proxy tasks are not handled by the runtime - if ( taskdata->td_flags.proxy != TASK_PROXY ) -#endif - __kmp_task_finish( gtid, task, current_task ); - - KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", - gtid, taskdata, current_task) ); - return; -} - -//----------------------------------------------------------------------- -// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution -// -// loc_ref: location of original task pragma (ignored) -// gtid: Global Thread ID of encountering thread -// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' -// Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. - -kmp_int32 -__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) -{ - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - - KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - - /* Should we execute the new task or queue it? For now, let's just always try to - queue it. If the queue fills up, then we'll execute it. */ - - if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer - { // Execute this task immediately - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - new_taskdata->td_flags.task_serial = 1; - __kmp_invoke_task( gtid, new_task, current_task ); - } - - KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, - new_taskdata ) ); - - return TASK_CURRENT_NOT_QUEUED; -} - -//--------------------------------------------------------------------- -// __kmp_omp_task: Schedule a non-thread-switchable task for execution -// gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() -// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized -// returns: -// -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. -kmp_int32 -__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ) -{ - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(0); - } -#endif - - /* Should we execute the new task or queue it? For now, let's just always try to - queue it. If the queue fills up, then we'll execute it. */ -#if OMP_41_ENABLED - if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer -#else - if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer -#endif - { // Execute this task immediately - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - if ( serialize_immediate ) - new_taskdata -> td_flags.task_serial = 1; - __kmp_invoke_task( gtid, new_task, current_task ); - } - -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0; - } -#endif - - return TASK_CURRENT_NOT_QUEUED; -} - -//--------------------------------------------------------------------- -// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from -// the parent thread only! -// loc_ref: location of original task pragma (ignored) -// gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() -// returns: -// -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. - -kmp_int32 -__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) -{ - kmp_int32 res; - -#if KMP_DEBUG - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#endif - KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - - res = __kmp_omp_task(gtid,new_task,true); - - KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - return res; -} - -//------------------------------------------------------------------------------------- -// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete - -kmp_int32 -__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) -{ - kmp_taskdata_t * taskdata; - kmp_info_t * thread; - int thread_finished = FALSE; - - KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? - - thread = __kmp_threads[ gtid ]; - taskdata = thread -> th.th_current_task; - -#if OMPT_SUPPORT && OMPT_TRACE - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - kmp_team_t *team = thread->th.th_team; - my_task_id = taskdata->ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)( - my_parallel_id, my_task_id); - } - } -#endif - -#if USE_ITT_BUILD - // Note: These values are used by ITT events as well. -#endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; - -#if USE_ITT_BUILD - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - -#if OMP_41_ENABLED - if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) -#else - if ( ! taskdata->td_flags.team_serial ) -#endif - { - // GEH: if team serialized, avoid reading the volatile variable below. - kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); - while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? - taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; - -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_end)( - my_parallel_id, my_task_id); - } -#endif - } - - KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " - "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); - - return TASK_CURRENT_NOT_QUEUED; -} - - -//------------------------------------------------- -// __kmpc_omp_taskyield: switch to a different task - -kmp_int32 -__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) -{ - kmp_taskdata_t * taskdata; - kmp_info_t * thread; - int thread_finished = FALSE; - - KMP_COUNT_BLOCK(OMP_TASKYIELD); - - KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", - gtid, loc_ref, end_part) ); - - if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) { - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? - - thread = __kmp_threads[ gtid ]; - taskdata = thread -> th.th_current_task; - // Should we model this as a task wait or not? -#if USE_ITT_BUILD - // Note: These values are used by ITT events as well. -#endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; - -#if USE_ITT_BUILD - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - if ( ! taskdata->td_flags.team_serial ) { - kmp_task_team_t * task_team = thread->th.th_task_team; - if (task_team != NULL) { - if (KMP_TASKING_ENABLED(task_team)) { - __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } - } -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? - taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; - } - - KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " - "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); - - return TASK_CURRENT_NOT_QUEUED; -} - - -#if OMP_40_ENABLED -//------------------------------------------------------------------------------------- -// __kmpc_taskgroup: Start a new taskgroup - -void -__kmpc_taskgroup( ident_t* loc, int gtid ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = thread->th.th_current_task; - kmp_taskgroup_t * tg_new = - (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); - KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); - tg_new->count = 0; - tg_new->cancel_request = cancel_noreq; - tg_new->parent = taskdata->td_taskgroup; - taskdata->td_taskgroup = tg_new; -} - - -//------------------------------------------------------------------------------------- -// __kmpc_end_taskgroup: Wait until all tasks generated by the current task -// and its descendants are complete - -void -__kmpc_end_taskgroup( ident_t* loc, int gtid ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = thread->th.th_current_task; - kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; - int thread_finished = FALSE; - - KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) ); - KMP_DEBUG_ASSERT( taskgroup != NULL ); - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { -#if USE_ITT_BUILD - // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - -#if OMP_41_ENABLED - if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) -#else - if ( ! taskdata->td_flags.team_serial ) -#endif - { - kmp_flag_32 flag(&(taskgroup->count), 0U); - while ( TCR_4(taskgroup->count) != 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } - -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - } - KMP_DEBUG_ASSERT( taskgroup->count == 0 ); - - // Restore parent taskgroup for the current task - taskdata->td_taskgroup = taskgroup->parent; - __kmp_thread_free( thread, taskgroup ); - - KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) ); -} -#endif - - -//------------------------------------------------------ -// __kmp_remove_my_task: remove a task from my own deque - -static kmp_task_t * -__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team, - kmp_int32 is_constrained ) -{ - kmp_task_t * task; - kmp_taskdata_t * taskdata; - kmp_thread_data_t *thread_data; - kmp_uint32 tail; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition - - thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - - KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - - if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { - KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); - - if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - - tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index. - taskdata = thread_data -> td.td_deque[ tail ]; - - if (is_constrained) { - // we need to check if the candidate obeys task scheduling constraint: - // only child of current task can be scheduled - kmp_taskdata_t * current = thread->th.th_current_task; - kmp_int32 level = current->td_level; - kmp_taskdata_t * parent = taskdata->td_parent; - while ( parent != current && parent->td_level > level ) { - parent = parent->td_parent; // check generation up to the level of the current task - KMP_DEBUG_ASSERT(parent != NULL); - } - if ( parent != current ) { - // If the tail task is not a child, then no other childs can appear in the deque. - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - } - - thread_data -> td.td_deque_tail = tail; - TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1); - - __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); - - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - - task = KMP_TASKDATA_TO_TASK( taskdata ); - return task; -} - - -//----------------------------------------------------------- -// __kmp_steal_task: remove a task from another thread's deque -// Assume that calling thread has already checked existence of -// task_team thread_data before calling this routine. - -static kmp_task_t * -__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, - volatile kmp_uint32 *unfinished_threads, int *thread_finished, - kmp_int32 is_constrained ) -{ - kmp_task_t * task; - kmp_taskdata_t * taskdata; - kmp_thread_data_t *victim_td, *threads_data; - kmp_int32 victim_tid; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - threads_data = task_team -> tt.tt_threads_data; - KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition - - victim_tid = victim->th.th_info.ds.ds_tid; - victim_td = & threads_data[ victim_tid ]; - - KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " - "head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - - if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition - (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? - { - KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - - __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock ); - - // Check again after we acquire the lock - if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || - (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? - { - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - - KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL ); - - if ( !is_constrained ) { - taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ]; - // Bump head pointer and Wrap. - victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK; - } else { - // While we have postponed tasks let's steal from tail of the deque (smaller tasks) - kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index. - taskdata = victim_td -> td.td_deque[ tail ]; - // we need to check if the candidate obeys task scheduling constraint: - // only child of current task can be scheduled - kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task; - kmp_int32 level = current->td_level; - kmp_taskdata_t * parent = taskdata->td_parent; - while ( parent != current && parent->td_level > level ) { - parent = parent->td_parent; // check generation up to the level of the current task - KMP_DEBUG_ASSERT(parent != NULL); - } - if ( parent != current ) { - // If the tail task is not a child, then no other childs can appear in the deque (?). - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ), - task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - victim_td -> td.td_deque_tail = tail; - } - if (*thread_finished) { - // We need to un-mark this victim as a finished victim. This must be done before - // releasing the lock, or else other threads (starting with the master victim) - // might be prematurely released from the barrier!!! - kmp_uint32 count; - - count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads ); - - KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", - gtid, count + 1, task_team) ); - - *thread_finished = FALSE; - } - TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1); - - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - - KMP_COUNT_BLOCK(TASK_stolen); - KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team, - victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, - victim_td->td.td_deque_tail) ); - - task = KMP_TASKDATA_TO_TASK( taskdata ); - return task; -} - - -//----------------------------------------------------------------------------- -// __kmp_execute_tasks_template: Choose and execute tasks until either the condition -// is statisfied (return true) or there are none left (return false). -// final_spin is TRUE if this is the spin at the release barrier. -// thread_finished indicates whether the thread is finished executing all -// the tasks it has on its deque, and is at the release barrier. -// spinner is the location on which to spin. -// spinner == NULL means only execute a single task and return. -// checker is the value to check to terminate the spin. -template -static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - kmp_task_team_t * task_team; - kmp_thread_data_t * threads_data; - kmp_task_t * task; - kmp_taskdata_t * current_task = thread -> th.th_current_task; - volatile kmp_uint32 * unfinished_threads; - kmp_int32 nthreads, last_stolen, k, tid; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); - - task_team = thread -> th.th_task_team; - if (task_team == NULL) return FALSE; - - KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", - gtid, final_spin, *thread_finished) ); - - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); - KMP_DEBUG_ASSERT( threads_data != NULL ); - - nthreads = task_team -> tt.tt_nproc; - unfinished_threads = &(task_team -> tt.tt_unfinished_threads); -#if OMP_41_ENABLED - KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks); -#else - KMP_DEBUG_ASSERT( nthreads > 1 ); -#endif - KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 ); - - // Choose tasks from our own work queue. - start: - while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { - if ( itt_sync_obj == NULL ) { - // we are at fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - } - __kmp_itt_task_starting( itt_sync_obj ); - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - __kmp_invoke_task( gtid, task, current_task ); -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_task_finished( itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - - // If this thread is only partway through the barrier and the condition - // is met, then return now, so that the barrier gather/release pattern can proceed. - // If this thread is in the last spin loop in the barrier, waiting to be - // released, we know that the termination condition will not be satisified, - // so don't waste any cycles checking it. - if (flag == NULL || (!final_spin && flag->done_check())) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) ); - return TRUE; - } - if (thread->th.th_task_team == NULL) break; - KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task - } - - // This thread's work queue is empty. If we are in the final spin loop - // of the barrier, check and see if the termination condition is satisfied. -#if OMP_41_ENABLED - // The work queue may be empty but there might be proxy tasks still executing - if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) -#else - if (final_spin) -#endif - { - // First, decrement the #unfinished threads, if that has not already - // been done. This decrement might be to the spin location, and - // result in the termination condition being satisfied. - if (! *thread_finished) { - kmp_uint32 count; - - count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; - KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n", - gtid, count, task_team) ); - *thread_finished = TRUE; - } - - // It is now unsafe to reference thread->th.th_team !!! - // Decrementing task_team->tt.tt_unfinished_threads can allow the master - // thread to pass through the barrier, where it might reset each thread's - // th.th_team field for the next parallel region. - // If we can steal more work, we know that this has not happened yet. - if (flag != NULL && flag->done_check()) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) ); - return TRUE; - } - } - - if (thread->th.th_task_team == NULL) return FALSE; -#if OMP_41_ENABLED - // check if there are other threads to steal from, otherwise go back - if ( nthreads == 1 ) - goto start; -#endif - - // Try to steal from the last place I stole from successfully. - tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid ); - last_stolen = threads_data[ tid ].td.td_deque_last_stolen; - - if (last_stolen != -1) { - kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr; - - while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, - thread_finished, is_constrained )) != NULL) - { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { - if ( itt_sync_obj == NULL ) { - // we are at fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - } - __kmp_itt_task_starting( itt_sync_obj ); - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - __kmp_invoke_task( gtid, task, current_task ); -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_task_finished( itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - - // Check to see if this thread can proceed. - if (flag == NULL || (!final_spin && flag->done_check())) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n", - gtid) ); - return TRUE; - } - - if (thread->th.th_task_team == NULL) break; - KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task - // If the execution of the stolen task resulted in more tasks being - // placed on our run queue, then restart the whole process. - if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", - gtid) ); - goto start; - } - } - - // Don't give priority to stealing from this thread anymore. - threads_data[ tid ].td.td_deque_last_stolen = -1; - - // The victims's work queue is empty. If we are in the final spin loop - // of the barrier, check and see if the termination condition is satisfied. -#if OMP_41_ENABLED - // The work queue may be empty but there might be proxy tasks still executing - if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) -#else - if (final_spin) -#endif - { - // First, decrement the #unfinished threads, if that has not already - // been done. This decrement might be to the spin location, and - // result in the termination condition being satisfied. - if (! *thread_finished) { - kmp_uint32 count; - - count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; - KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d " - "task_team=%p\n", gtid, count, task_team) ); - *thread_finished = TRUE; - } - - // If __kmp_tasking_mode != tskm_immediate_exec - // then it is now unsafe to reference thread->th.th_team !!! - // Decrementing task_team->tt.tt_unfinished_threads can allow the master - // thread to pass through the barrier, where it might reset each thread's - // th.th_team field for the next parallel region. - // If we can steal more work, we know that this has not happened yet. - if (flag != NULL && flag->done_check()) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n", - gtid) ); - return TRUE; - } - } - if (thread->th.th_task_team == NULL) return FALSE; - } - - // Find a different thread to steal work from. Pick a random thread. - // My initial plan was to cycle through all the threads, and only return - // if we tried to steal from every thread, and failed. Arch says that's - // not such a great idea. - // GEH - need yield code in this loop for throughput library mode? - new_victim: - k = __kmp_get_random( thread ) % (nthreads - 1); - if ( k >= thread -> th.th_info.ds.ds_tid ) { - ++k; // Adjusts random distribution to exclude self - } - { - kmp_info_t *other_thread = threads_data[k].td.td_thr; - int first; - - // There is a slight chance that __kmp_enable_tasking() did not wake up - // all threads waiting at the barrier. If this thread is sleeping, then - // wake it up. Since we were going to pay the cache miss penalty - // for referencing another thread's kmp_info_t struct anyway, the check - // shouldn't cost too much performance at this point. - // In extra barrier mode, tasks do not sleep at the separate tasking - // barrier, so this isn't a problem. - if ( ( __kmp_tasking_mode == tskm_task_teams ) && - (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && - (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) - { - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); - // A sleeping thread should not have any tasks on it's queue. - // There is a slight possibility that it resumes, steals a task from - // another thread, which spawns more tasks, all in the time that it takes - // this thread to check => don't write an assertion that the victim's - // queue is empty. Try stealing from a different thread. - goto new_victim; - } - - // Now try to steal work from the selected thread - first = TRUE; - while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, - thread_finished, is_constrained )) != NULL) - { -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { - if ( itt_sync_obj == NULL ) { - // we are at fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - } - __kmp_itt_task_starting( itt_sync_obj ); - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - __kmp_invoke_task( gtid, task, current_task ); -#if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_task_finished( itt_sync_obj ); -#endif /* USE_ITT_BUILD */ - - // Try stealing from this victim again, in the future. - if (first) { - threads_data[ tid ].td.td_deque_last_stolen = k; - first = FALSE; - } - - // Check to see if this thread can proceed. - if (flag == NULL || (!final_spin && flag->done_check())) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n", - gtid) ); - return TRUE; - } - if (thread->th.th_task_team == NULL) break; - KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task - - // If the execution of the stolen task resulted in more tasks being - // placed on our run queue, then restart the whole process. - if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", - gtid) ); - goto start; - } - } - - // The victims's work queue is empty. If we are in the final spin loop - // of the barrier, check and see if the termination condition is satisfied. - // Going on and finding a new victim to steal from is expensive, as it - // involves a lot of cache misses, so we definitely want to re-check the - // termination condition before doing that. -#if OMP_41_ENABLED - // The work queue may be empty but there might be proxy tasks still executing - if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) -#else - if (final_spin) -#endif - { - // First, decrement the #unfinished threads, if that has not already - // been done. This decrement might be to the spin location, and - // result in the termination condition being satisfied. - if (! *thread_finished) { - kmp_uint32 count; - - count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; - KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; " - "task_team=%p\n", - gtid, count, task_team) ); - *thread_finished = TRUE; - } - - // If __kmp_tasking_mode != tskm_immediate_exec, - // then it is now unsafe to reference thread->th.th_team !!! - // Decrementing task_team->tt.tt_unfinished_threads can allow the master - // thread to pass through the barrier, where it might reset each thread's - // th.th_team field for the next parallel region. - // If we can steal more work, we know that this has not happened yet. - if (flag != NULL && flag->done_check()) { - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) ); - return TRUE; - } - } - if (thread->th.th_task_team == NULL) return FALSE; - } - - KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) ); - return FALSE; -} - -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - - - -//----------------------------------------------------------------------------- -// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the -// next barrier so they can assist in executing enqueued tasks. -// First thread in allocates the task team atomically. - -static void -__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) -{ - kmp_thread_data_t *threads_data; - int nthreads, i, is_init_thread; - - KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n", - __kmp_gtid_from_thread( this_thr ) ) ); - - KMP_DEBUG_ASSERT(task_team != NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); - - nthreads = task_team->tt.tt_nproc; - KMP_DEBUG_ASSERT(nthreads > 0); - KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); - - // Allocate or increase the size of threads_data if necessary - is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team ); - - if (!is_init_thread) { - // Some other thread already set up the array. - KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", - __kmp_gtid_from_thread( this_thr ) ) ); - return; - } - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); - KMP_DEBUG_ASSERT( threads_data != NULL ); - - if ( ( __kmp_tasking_mode == tskm_task_teams ) && - ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) ) - { - // Release any threads sleeping at the barrier, so that they can steal - // tasks and execute them. In extra barrier mode, tasks do not sleep - // at the separate tasking barrier, so this isn't a problem. - for (i = 0; i < nthreads; i++) { - volatile void *sleep_loc; - kmp_info_t *thread = threads_data[i].td.td_thr; - - if (i == this_thr->th.th_info.ds.ds_tid) { - continue; - } - // Since we haven't locked the thread's suspend mutex lock at this - // point, there is a small window where a thread might be putting - // itself to sleep, but hasn't set the th_sleep_loc field yet. - // To work around this, __kmp_execute_tasks_template() periodically checks - // see if other threads are sleeping (using the same random - // mechanism that is used for task stealing) and awakens them if - // they are. - if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL ) - { - KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread( this_thr ), - __kmp_gtid_from_thread( thread ) ) ); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } - else { - KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", - __kmp_gtid_from_thread( this_thr ), - __kmp_gtid_from_thread( thread ) ) ); - } - } - } - - KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n", - __kmp_gtid_from_thread( this_thr ) ) ); -} - - -/* ------------------------------------------------------------------------ */ -/* // TODO: Check the comment consistency - * Utility routines for "task teams". A task team (kmp_task_t) is kind of - * like a shadow of the kmp_team_t data struct, with a different lifetime. - * After a child * thread checks into a barrier and calls __kmp_release() from - * the particular variant of __kmp__barrier_gather(), it can no - * longer assume that the kmp_team_t structure is intact (at any moment, the - * master thread may exit the barrier code and free the team data structure, - * and return the threads to the thread pool). - * - * This does not work with the the tasking code, as the thread is still - * expected to participate in the execution of any tasks that may have been - * spawned my a member of the team, and the thread still needs access to all - * to each thread in the team, so that it can steal work from it. - * - * Enter the existence of the kmp_task_team_t struct. It employs a reference - * counting mechanims, and is allocated by the master thread before calling - * __kmp__release, and then is release by the last thread to - * exit __kmp__release at the next barrier. I.e. the lifetimes - * of the kmp_task_team_t structs for consecutive barriers can overlap - * (and will, unless the master thread is the last thread to exit the barrier - * release phase, which is not typical). - * - * The existence of such a struct is useful outside the context of tasking, - * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro, - * so that any performance differences show up when comparing the 2.5 vs. 3.0 - * libraries. - * - * We currently use the existence of the threads array as an indicator that - * tasks were spawned since the last barrier. If the structure is to be - * useful outside the context of tasking, then this will have to change, but - * not settting the field minimizes the performance impact of tasking on - * barriers, when no explicit tasks were spawned (pushed, actually). - */ - - -static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures -// Lock for task team data structures -static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock ); - - -//------------------------------------------------------------------------------ -// __kmp_alloc_task_deque: -// Allocates a task deque for a particular thread, and initialize the necessary -// data structures relating to the deque. This only happens once per thread -// per task team since task teams are recycled. -// No lock is needed during allocation since each thread allocates its own -// deque. - -static void -__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ) -{ - __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL ); - - // Initialize last stolen task field to "none" - thread_data -> td.td_deque_last_stolen = -1; - - KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 ); - - KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", - __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) ); - // Allocate space for task deque, and zero the deque - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - thread_data -> td.td_deque = (kmp_taskdata_t **) - __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); -} - - -//------------------------------------------------------------------------------ -// __kmp_free_task_deque: -// Deallocates a task deque for a particular thread. -// Happens at library deallocation so don't need to reset all thread data fields. - -static void -__kmp_free_task_deque( kmp_thread_data_t *thread_data ) -{ - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); - - if ( thread_data -> td.td_deque != NULL ) { - TCW_4(thread_data -> td.td_deque_ntasks, 0); - __kmp_free( thread_data -> td.td_deque ); - thread_data -> td.td_deque = NULL; - } - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out what to do here for td_susp_tied_tasks - if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) { - __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data ); - } -#endif // BUILD_TIED_TASK_STACK -} - - -//------------------------------------------------------------------------------ -// __kmp_realloc_task_threads_data: -// Allocates a threads_data array for a task team, either by allocating an initial -// array or enlarging an existing array. Only the first thread to get the lock -// allocs or enlarges the array and re-initializes the array eleemnts. -// That thread returns "TRUE", the rest return "FALSE". -// Assumes that the new array size is given by task_team -> tt.tt_nproc. -// The current size is given by task_team -> tt.tt_max_threads. - -static int -__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ) -{ - kmp_thread_data_t ** threads_data_p; - kmp_int32 nthreads, maxthreads; - int is_init_thread = FALSE; - - if ( TCR_4(task_team -> tt.tt_found_tasks) ) { - // Already reallocated and initialized. - return FALSE; - } - - threads_data_p = & task_team -> tt.tt_threads_data; - nthreads = task_team -> tt.tt_nproc; - maxthreads = task_team -> tt.tt_max_threads; - - // All threads must lock when they encounter the first task of the implicit task - // region to make sure threads_data fields are (re)initialized before used. - __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - - if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) { - // first thread to enable tasking - kmp_team_t *team = thread -> th.th_team; - int i; - - is_init_thread = TRUE; - if ( maxthreads < nthreads ) { - - if ( *threads_data_p != NULL ) { - kmp_thread_data_t *old_data = *threads_data_p; - kmp_thread_data_t *new_data = NULL; - - KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating " - "threads data for task_team %p, new_size = %d, old_size = %d\n", - __kmp_gtid_from_thread( thread ), task_team, - nthreads, maxthreads ) ); - // Reallocate threads_data to have more elements than current array - // Cannot use __kmp_thread_realloc() because threads not around for - // kmp_reap_task_team( ). Note all new array entries are initialized - // to zero by __kmp_allocate(). - new_data = (kmp_thread_data_t *) - __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); - // copy old data to new data - KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t), - (void *) old_data, - maxthreads * sizeof(kmp_taskdata_t *) ); - -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = maxthreads; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); - } -#endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data - (*threads_data_p) = new_data; - __kmp_free( old_data ); - } - else { - KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating " - "threads data for task_team %p, size = %d\n", - __kmp_gtid_from_thread( thread ), task_team, nthreads ) ); - // Make the initial allocate for threads_data array, and zero entries - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - *threads_data_p = (kmp_thread_data_t *) - __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); - } -#endif // BUILD_TIED_TASK_STACK - } - task_team -> tt.tt_max_threads = nthreads; - } - else { - // If array has (more than) enough elements, go ahead and use it - KMP_DEBUG_ASSERT( *threads_data_p != NULL ); - } - - // initialize threads_data pointers back to thread_info structures - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - thread_data -> td.td_thr = team -> t.t_threads[i]; - - if ( thread_data -> td.td_deque_last_stolen >= nthreads) { - // The last stolen field survives across teams / barrier, and the number - // of threads may have changed. It's possible (likely?) that a new - // parallel region will exhibit the same behavior as the previous region. - thread_data -> td.td_deque_last_stolen = -1; - } - } - - KMP_MB(); - TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE); - } - - __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - return is_init_thread; -} - - -//------------------------------------------------------------------------------ -// __kmp_free_task_threads_data: -// Deallocates a threads_data array for a task team, including any attached -// tasking deques. Only occurs at library shutdown. - -static void -__kmp_free_task_threads_data( kmp_task_team_t *task_team ) -{ - __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - if ( task_team -> tt.tt_threads_data != NULL ) { - int i; - for (i = 0; i < task_team->tt.tt_max_threads; i++ ) { - __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] ); - } - __kmp_free( task_team -> tt.tt_threads_data ); - task_team -> tt.tt_threads_data = NULL; - } - __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); -} - - -//------------------------------------------------------------------------------ -// __kmp_allocate_task_team: -// Allocates a task team associated with a specific team, taking it from -// the global task team free list if possible. Also initializes data structures. - -static kmp_task_team_t * -__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) -{ - kmp_task_team_t *task_team = NULL; - int nthreads; - - KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n", - (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) ); - - if (TCR_PTR(__kmp_free_task_teams) != NULL) { - // Take a task team from the task team pool - __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); - if (__kmp_free_task_teams != NULL) { - task_team = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next); - task_team -> tt.tt_next = NULL; - } - __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); - } - - if (task_team == NULL) { - KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating " - "task team for team %p\n", - __kmp_gtid_from_thread( thread ), team ) ); - // Allocate a new task team if one is not available. - // Cannot use __kmp_thread_malloc() because threads not around for - // kmp_reap_task_team( ). - task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) ); - __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory - //task_team -> tt.tt_max_threads = 0; - //task_team -> tt.tt_next = NULL; - } - - TCW_4(task_team -> tt.tt_found_tasks, FALSE); -#if OMP_41_ENABLED - TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE); -#endif - task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; - - TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); - TCW_4( task_team -> tt.tt_active, TRUE ); - - KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", - (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); - return task_team; -} - - -//------------------------------------------------------------------------------ -// __kmp_free_task_team: -// Frees the task team associated with a specific thread, and adds it -// to the global task team free list. - -void -__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) -{ - KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", - thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); - - // Put task team back on free list - __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); - - KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL ); - task_team -> tt.tt_next = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team); - - __kmp_release_bootstrap_lock( & __kmp_task_team_lock ); -} - - -//------------------------------------------------------------------------------ -// __kmp_reap_task_teams: -// Free all the task teams on the task team free list. -// Should only be done during library shutdown. -// Cannot do anything that needs a thread structure or gtid since they are already gone. - -void -__kmp_reap_task_teams( void ) -{ - kmp_task_team_t *task_team; - - if ( TCR_PTR(__kmp_free_task_teams) != NULL ) { - // Free all task_teams on the free list - __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); - while ( ( task_team = __kmp_free_task_teams ) != NULL ) { - __kmp_free_task_teams = task_team -> tt.tt_next; - task_team -> tt.tt_next = NULL; - - // Free threads_data if necessary - if ( task_team -> tt.tt_threads_data != NULL ) { - __kmp_free_task_threads_data( task_team ); - } - __kmp_free( task_team ); - } - __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); - } -} - -//------------------------------------------------------------------------------ -// __kmp_wait_to_unref_task_teams: -// Some threads could still be in the fork barrier release code, possibly -// trying to steal tasks. Wait for each thread to unreference its task team. -// -void -__kmp_wait_to_unref_task_teams(void) -{ - kmp_info_t *thread; - kmp_uint32 spins; - int done; - - KMP_INIT_YIELD( spins ); - - - for (;;) { - done = TRUE; - - // TODO: GEH - this may be is wrong because some sync would be necessary - // in case threads are added to the pool during the traversal. - // Need to verify that lock for thread pool is held when calling - // this routine. - for (thread = (kmp_info_t *)__kmp_thread_pool; - thread != NULL; - thread = thread->th.th_next_pool) - { -#if KMP_OS_WINDOWS - DWORD exit_val; -#endif - if ( TCR_PTR(thread->th.th_task_team) == NULL ) { - KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", - __kmp_gtid_from_thread( thread ) ) ); - continue; - } -#if KMP_OS_WINDOWS - // TODO: GEH - add this check for Linux* OS / OS X* as well? - if (!__kmp_is_thread_alive(thread, &exit_val)) { - thread->th.th_task_team = NULL; - continue; - } -#endif - - done = FALSE; // Because th_task_team pointer is not NULL for this thread - - KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n", - __kmp_gtid_from_thread( thread ) ) ); - - if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { - volatile void *sleep_loc; - // If the thread is sleeping, awaken it. - if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) { - KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) ); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } - } - } - if (done) { - break; - } - - // If we are oversubscribed, - // or have waited a bit (and library mode is throughput), yield. - // Pause is in the following code. - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput - } - - -} - - -//------------------------------------------------------------------------------ -// __kmp_task_team_setup: Create a task_team for the current team, but use -// an already created, unused one if it already exists. -void -__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) -{ - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. - // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. - if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { - team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], - ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); - } - - // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is - // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the - // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely - // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for - // serialized teams. - if (team->t.t_nproc > 1) { - int other_team = 1 - this_thr->th.th_task_state; - if (team->t.t_task_team[other_team] == NULL) { // setup other team as well - team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team )); - } - else { // Leave the old task team struct in place for the upcoming region; adjust as needed - kmp_task_team_t *task_team = team->t.t_task_team[other_team]; - if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { - TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); - TCW_4(task_team->tt.tt_found_tasks, FALSE); -#if OMP_41_ENABLED - TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); -#endif - TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); - TCW_4(task_team->tt.tt_active, TRUE ); - } - // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team )); - } - } -} - - -//------------------------------------------------------------------------------ -// __kmp_task_team_sync: Propagation of task team data from team to threads -// which happens just after the release phase of a team barrier. This may be -// called by any thread, but only for teams with # threads > 1. - -void -__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) -{ - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - // Toggle the th_task_state field, to switch which task_team this thread refers to - this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; - // It is now safe to propagate the task team pointer from the team struct to the current thread. - TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); - KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", - __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, - ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); -} - - -//-------------------------------------------------------------------------------------------- -// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather -// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created. -// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 -// optionally as the last argument. When wait is zero, master thread does not wait for -// unfinished_threads to reach 0. -void -__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team - USE_ITT_BUILD_ARG(void * itt_sync_obj) - , int wait) -{ - kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); - - if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { - if (wait) { - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", - __kmp_gtid_from_thread(this_thr), task_team)); - // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait - // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. - kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " - "setting active to false, setting local and team's pointer to NULL\n", - __kmp_gtid_from_thread(this_thr), task_team)); -#if OMP_41_ENABLED - KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); - TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE ); -#else - KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); -#endif - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - - TCW_PTR(this_thr->th.th_task_team, NULL); - } -} - - -//------------------------------------------------------------------------------ -// __kmp_tasking_barrier: -// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. -// Internal function to execute all tasks prior to a regular barrier or a -// join barrier. It is a full barrier itself, which unfortunately turns -// regular barriers into double barriers and join barriers into 1 1/2 -// barriers. -void -__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) -{ - volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; - int flag = FALSE; - KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); - -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); -#endif /* USE_ITT_BUILD */ - kmp_flag_32 spin_flag(spin, 0U); - while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag - USE_ITT_BUILD_ARG(NULL), 0 ) ) { -#if USE_ITT_BUILD - // TODO: What about itt_sync_obj?? - KMP_FSYNC_SPIN_PREPARE( spin ); -#endif /* USE_ITT_BUILD */ - - if( TCR_4(__kmp_global.g.g_done) ) { - if( __kmp_global.g.g_abort ) - __kmp_abort_thread( ); - break; - } - KMP_YIELD( TRUE ); // GH: We always yield here - } -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_ACQUIRED( (void*) spin ); -#endif /* USE_ITT_BUILD */ -} - - -#if OMP_41_ENABLED - -/* __kmp_give_task puts a task into a given thread queue if: - - the queue for that thread it was created - - there's space in that queue - - Because of this, __kmp_push_task needs to check if there's space after getting the lock - */ -static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task ) -{ - kmp_task_team_t * task_team = thread->th.th_task_team; - kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - bool result = false; - - KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) ); - - // assert tasking is enabled? what if not? - KMP_DEBUG_ASSERT( task_team != NULL ); - - if (thread_data -> td.td_deque == NULL ) { - // There's no queue in this thread, go find another one - // We're guaranteed that at least one thread has a queue - KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) ); - return result; - } - - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) - { - KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); - return result; - } - - __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); - - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) - { - KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); - goto release_and_exit; - } - - thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; - // Wrap index. - thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; - TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); - - result = true; - KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) ); - -release_and_exit: - __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock ); - - return result; -} - - -/* The finish of the a proxy tasks is divided in two pieces: - - the top half is the one that can be done from a thread outside the team - - the bottom half must be run from a them within the team - - In order to run the bottom half the task gets queued back into one of the threads of the team. - Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers. - So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts: - - things that can be run before queuing the bottom half - - things that must be run after queuing the bottom half - - This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this - we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half. -*/ - -static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata ) -{ - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); - - taskdata -> td_flags.complete = 1; // mark the task as completed - - if ( taskdata->td_taskgroup ) - KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); - - // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half - TCR_4(taskdata->td_incomplete_child_tasks++); -} - -static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata ) -{ - kmp_int32 children = 0; - - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); - - // Remove the imaginary children - TCR_4(taskdata->td_incomplete_child_tasks--); -} - -static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); - kmp_info_t * thread = __kmp_threads[ gtid ]; - - KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half - - // We need to wait to make sure the top half is finished - // Spinning here should be ok as this should happen quickly - while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ; - - __kmp_release_deps(gtid,taskdata); - __kmp_free_task_and_ancestors(gtid, taskdata, thread); -} - -/*! -@ingroup TASKING -@param gtid Global Thread ID of encountering thread -@param ptask Task which execution is completed - -Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. -*/ -void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ) -{ - KMP_DEBUG_ASSERT( ptask != NULL ); - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); - KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) ); - - KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); - - __kmp_first_top_half_finish_proxy(taskdata); - __kmp_second_top_half_finish_proxy(taskdata); - __kmp_bottom_half_finish_proxy(gtid,ptask); - - KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) ); -} - -/*! -@ingroup TASKING -@param ptask Task which execution is completed - -Execute the completation of a proxy task from a thread that could not belong to the team. -*/ -void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ) -{ - KMP_DEBUG_ASSERT( ptask != NULL ); - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); - - KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) ); - - KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); - - __kmp_first_top_half_finish_proxy(taskdata); - - // Enqueue task to complete bottom half completation from a thread within the corresponding team - kmp_team_t * team = taskdata->td_team; - kmp_int32 nthreads = team->t.t_nproc; - kmp_info_t *thread; - kmp_int32 k = 0; - - do { - //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here - //For now we're just linearly trying to find a thread - k = (k+1) % nthreads; - thread = team->t.t_threads[k]; - } while ( !__kmp_give_task( thread, k, ptask ) ); - - __kmp_second_top_half_finish_proxy(taskdata); - - KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) ); -} - -#endif +/* + * kmp_tasking.c -- OpenMP 3.0 tasking support. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_itt.h" +#include "kmp_wait_release.h" +#include "kmp_stats.h" + +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + + +/* forward declaration */ +static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ); +static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ); +static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ); + +#ifdef OMP_41_ENABLED +static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ); +#endif + +static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { + if (!flag) return; + // Attempt to wake up a thread: examine its type and call appropriate template + switch (((kmp_flag_64 *)flag)->get_type()) { + case flag32: __kmp_resume_32(gtid, NULL); break; + case flag64: __kmp_resume_64(gtid, NULL); break; + case flag_oncore: __kmp_resume_oncore(gtid, NULL); break; + } +} + +#ifdef BUILD_TIED_TASK_STACK + +//--------------------------------------------------------------------------- +// __kmp_trace_task_stack: print the tied tasks from the task stack in order +// from top do bottom +// +// gtid: global thread identifier for thread containing stack +// thread_data: thread data for task team thread containing stack +// threshold: value above which the trace statement triggers +// location: string identifying call site of this function (for trace) + +static void +__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location ) +{ + kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; + kmp_taskdata_t **stack_top = task_stack -> ts_top; + kmp_int32 entries = task_stack -> ts_entries; + kmp_taskdata_t *tied_task; + + KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " + "first_block = %p, stack_top = %p \n", + location, gtid, entries, task_stack->ts_first_block, stack_top ) ); + + KMP_DEBUG_ASSERT( stack_top != NULL ); + KMP_DEBUG_ASSERT( entries > 0 ); + + while ( entries != 0 ) + { + KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] ); + // fix up ts_top if we need to pop from previous block + if ( entries & TASK_STACK_INDEX_MASK == 0 ) + { + kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ; + + stack_block = stack_block -> sb_prev; + stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; + } + + // finish bookkeeping + stack_top--; + entries--; + + tied_task = * stack_top; + + KMP_DEBUG_ASSERT( tied_task != NULL ); + KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); + + KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " + "stack_top=%p, tied_task=%p\n", + location, gtid, entries, stack_top, tied_task ) ); + } + KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] ); + + KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", + location, gtid ) ); +} + +//--------------------------------------------------------------------------- +// __kmp_init_task_stack: initialize the task stack for the first time +// after a thread_data structure is created. +// It should not be necessary to do this again (assuming the stack works). +// +// gtid: global thread identifier of calling thread +// thread_data: thread data for task team thread containing stack + +static void +__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) +{ + kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; + kmp_stack_block_t *first_block; + + // set up the first block of the stack + first_block = & task_stack -> ts_first_block; + task_stack -> ts_top = (kmp_taskdata_t **) first_block; + memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); + + // initialize the stack to be empty + task_stack -> ts_entries = TASK_STACK_EMPTY; + first_block -> sb_next = NULL; + first_block -> sb_prev = NULL; +} + + +//--------------------------------------------------------------------------- +// __kmp_free_task_stack: free the task stack when thread_data is destroyed. +// +// gtid: global thread identifier for calling thread +// thread_data: thread info for thread containing stack + +static void +__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) +{ + kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; + kmp_stack_block_t *stack_block = & task_stack -> ts_first_block; + + KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY ); + // free from the second block of the stack + while ( stack_block != NULL ) { + kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL; + + stack_block -> sb_next = NULL; + stack_block -> sb_prev = NULL; + if (stack_block != & task_stack -> ts_first_block) { + __kmp_thread_free( thread, stack_block ); // free the block, if not the first + } + stack_block = next_block; + } + // initialize the stack to be empty + task_stack -> ts_entries = 0; + task_stack -> ts_top = NULL; +} + + +//--------------------------------------------------------------------------- +// __kmp_push_task_stack: Push the tied task onto the task stack. +// Grow the stack if necessary by allocating another block. +// +// gtid: global thread identifier for calling thread +// thread: thread info for thread containing stack +// tied_task: the task to push on the stack + +static void +__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task ) +{ + // GEH - need to consider what to do if tt_threads_data not allocated yet + kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> + tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; + kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; + + if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) { + return; // Don't push anything on stack if team or team tasks are serialized + } + + KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); + KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); + + KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", + gtid, thread, tied_task ) ); + // Store entry + * (task_stack -> ts_top) = tied_task; + + // Do bookkeeping for next push + task_stack -> ts_top++; + task_stack -> ts_entries++; + + if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) + { + // Find beginning of this task block + kmp_stack_block_t *stack_block = + (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE); + + // Check if we already have a block + if ( stack_block -> sb_next != NULL ) + { // reset ts_top to beginning of next block + task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0]; + } + else + { // Alloc new block and link it up + kmp_stack_block_t *new_block = (kmp_stack_block_t *) + __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t)); + + task_stack -> ts_top = & new_block -> sb_block[0]; + stack_block -> sb_next = new_block; + new_block -> sb_prev = stack_block; + new_block -> sb_next = NULL; + + KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", + gtid, tied_task, new_block ) ); + } + } + KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); +} + +//--------------------------------------------------------------------------- +// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return +// the task, just check to make sure it matches the ending task passed in. +// +// gtid: global thread identifier for the calling thread +// thread: thread info structure containing stack +// tied_task: the task popped off the stack +// ending_task: the task that is ending (should match popped task) + +static void +__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task ) +{ + // GEH - need to consider what to do if tt_threads_data not allocated yet + kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; + kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; + kmp_taskdata_t *tied_task; + + if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) { + return; // Don't pop anything from stack if team or team tasks are serialized + } + + KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); + KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 ); + + KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) ); + + // fix up ts_top if we need to pop from previous block + if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) + { + kmp_stack_block_t *stack_block = + (kmp_stack_block_t *) (task_stack -> ts_top) ; + + stack_block = stack_block -> sb_prev; + task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; + } + + // finish bookkeeping + task_stack -> ts_top--; + task_stack -> ts_entries--; + + tied_task = * (task_stack -> ts_top ); + + KMP_DEBUG_ASSERT( tied_task != NULL ); + KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); + KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly + + KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); + return; +} +#endif /* BUILD_TIED_TASK_STACK */ + +//--------------------------------------------------- +// __kmp_push_task: Add a task to the thread's deque + +static kmp_int32 +__kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) +{ + kmp_info_t * thread = __kmp_threads[ gtid ]; + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_task_team_t * task_team = thread->th.th_task_team; + kmp_int32 tid = __kmp_tid_from_gtid( gtid ); + kmp_thread_data_t * thread_data; + + KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) ); + + // The first check avoids building task_team thread data if serialized + if ( taskdata->td_flags.task_serial ) { + KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n", + gtid, taskdata ) ); + return TASK_NOT_PUSHED; + } + + // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + if ( ! KMP_TASKING_ENABLED(task_team) ) { + __kmp_enable_tasking( task_team, thread ); + } + KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); + KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL ); + + // Find tasking deque specific to encountering thread + thread_data = & task_team -> tt.tt_threads_data[ tid ]; + + // No lock needed since only owner can allocate + if (thread_data -> td.td_deque == NULL ) { + __kmp_alloc_task_deque( thread, thread_data ); + } + + // Check if deque is full + if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) + { + KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n", + gtid, taskdata ) ); + return TASK_NOT_PUSHED; + } + + // Lock the deque for the task push operation + __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); + +#if OMP_41_ENABLED + // Need to recheck as we can get a proxy task from a thread outside of OpenMP + if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) + { + __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n", + gtid, taskdata ) ); + return TASK_NOT_PUSHED; + } +#else + // Must have room since no thread can add tasks but calling thread + KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE ); +#endif + + thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata + // Wrap index. + thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; + TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count + + __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + + KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " + "task=%p ntasks=%d head=%u tail=%u\n", + gtid, taskdata, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_tail, thread_data->td.td_deque_head) ); + + return TASK_SUCCESSFULLY_PUSHED; +} + + +//----------------------------------------------------------------------------------------- +// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends +// this_thr: thread structure to set current_task in. + +void +__kmp_pop_current_task_from_thread( kmp_info_t *this_thr ) +{ + KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " + "curtask_parent=%p\n", + 0, this_thr, this_thr -> th.th_current_task, + this_thr -> th.th_current_task -> td_parent ) ); + + this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent; + + KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " + "curtask_parent=%p\n", + 0, this_thr, this_thr -> th.th_current_task, + this_thr -> th.th_current_task -> td_parent ) ); +} + + +//--------------------------------------------------------------------------------------- +// __kmp_push_current_task_to_thread: set up current task in called thread for a new team +// this_thr: thread structure to set up +// team: team for implicit task data +// tid: thread within team to set up + +void +__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid ) +{ + // current task of the thread is a parent of the new just created implicit tasks of new team + KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " + "parent_task=%p\n", + tid, this_thr, this_thr->th.th_current_task, + team->t.t_implicit_task_taskdata[tid].td_parent ) ); + + KMP_DEBUG_ASSERT (this_thr != NULL); + + if( tid == 0 ) { + if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) { + team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task; + this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ]; + } + } else { + team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent; + this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ]; + } + + KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " + "parent_task=%p\n", + tid, this_thr, this_thr->th.th_current_task, + team->t.t_implicit_task_taskdata[tid].td_parent ) ); +} + + +//---------------------------------------------------------------------- +// __kmp_task_start: bookkeeping for a task starting execution +// GTID: global thread id of calling thread +// task: task starting execution +// current_task: task suspending + +static void +__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task ) +{ + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_info_t * thread = __kmp_threads[ gtid ]; + + KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", + gtid, taskdata, current_task) ); + + KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + + // mark currently executing task as suspended + // TODO: GEH - make sure root team implicit task is initialized properly. + // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); + current_task -> td_flags.executing = 0; + + // Add task to stack if tied +#ifdef BUILD_TIED_TASK_STACK + if ( taskdata -> td_flags.tiedness == TASK_TIED ) + { + __kmp_push_task_stack( gtid, thread, taskdata ); + } +#endif /* BUILD_TIED_TASK_STACK */ + + // mark starting task as executing and as current task + thread -> th.th_current_task = taskdata; + + KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 ); + taskdata -> td_flags.started = 1; + taskdata -> td_flags.executing = 1; + KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); + + // GEH TODO: shouldn't we pass some sort of location identifier here? + // APT: yes, we will pass location here. + // need to store current thread state (in a thread or taskdata structure) + // before setting work_state, otherwise wrong state is set after end of task + + KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", + gtid, taskdata ) ); + +#if OMPT_SUPPORT + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_begin)) { + kmp_taskdata_t *parent = taskdata->td_parent; + ompt_callbacks.ompt_callback(ompt_event_task_begin)( + parent ? parent->ompt_task_info.task_id : ompt_task_id_none, + parent ? &(parent->ompt_task_info.frame) : NULL, + taskdata->ompt_task_info.task_id, + taskdata->ompt_task_info.function); + } +#endif + + return; +} + + +//---------------------------------------------------------------------- +// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution +// loc_ref: source location information; points to beginning of task block. +// gtid: global thread number. +// task: task thunk for the started task. + +void +__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) +{ + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n", + gtid, loc_ref, taskdata, current_task ) ); + + taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred. + __kmp_task_start( gtid, task, current_task ); + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", + gtid, loc_ref, taskdata ) ); + + return; +} + +#ifdef TASK_UNUSED +//---------------------------------------------------------------------- +// __kmpc_omp_task_begin: report that a given task has started execution +// NEVER GENERATED BY COMPILER, DEPRECATED!!! + +void +__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) +{ + kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; + + KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) ); + + __kmp_task_start( gtid, task, current_task ); + + KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); + + return; +} +#endif // TASK_UNUSED + + +//------------------------------------------------------------------------------------- +// __kmp_free_task: free the current task space and the space for shareds +// gtid: Global thread ID of calling thread +// taskdata: task to free +// thread: thread data structure of caller + +static void +__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) +{ + KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", + gtid, taskdata) ); + + // Check to make sure all flags and counters have the correct values + KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT ); + KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 ); + KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 ); + KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 ); + KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1); + KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 ); + + taskdata->td_flags.freed = 1; + // deallocate the taskdata and shared variable blocks associated with this task + #if USE_FAST_MEMORY + __kmp_fast_free( thread, taskdata ); + #else /* ! USE_FAST_MEMORY */ + __kmp_thread_free( thread, taskdata ); + #endif + + KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", + gtid, taskdata) ); +} + +//------------------------------------------------------------------------------------- +// __kmp_free_task_and_ancestors: free the current task and ancestors without children +// +// gtid: Global thread ID of calling thread +// taskdata: task to free +// thread: thread data structure of caller + +static void +__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) +{ + kmp_int32 children = 0; + kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser; + + KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + + if ( !team_or_tasking_serialized ) { + children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; + KMP_DEBUG_ASSERT( children >= 0 ); + } + + // Now, go up the ancestor tree to see if any ancestors can now be freed. + while ( children == 0 ) + { + kmp_taskdata_t * parent_taskdata = taskdata -> td_parent; + + KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " + "and freeing itself\n", gtid, taskdata) ); + + // --- Deallocate my ancestor task --- + __kmp_free_task( gtid, taskdata, thread ); + + taskdata = parent_taskdata; + + // Stop checking ancestors at implicit task or if tasking serialized + // instead of walking up ancestor tree to avoid premature deallocation of ancestors. + if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT ) + return; + + if ( !team_or_tasking_serialized ) { + // Predecrement simulated by "- 1" calculation + children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; + KMP_DEBUG_ASSERT( children >= 0 ); + } + } + + KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " + "not freeing it yet\n", gtid, taskdata, children) ); +} + +//--------------------------------------------------------------------- +// __kmp_task_finish: bookkeeping to do when a task finishes execution +// gtid: global thread ID for calling thread +// task: task to be finished +// resumed_task: task to be resumed. (may be NULL if task is serialized) + +static void +__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task ) +{ + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_info_t * thread = __kmp_threads[ gtid ]; + kmp_int32 children = 0; + +#if OMPT_SUPPORT + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_end)) { + kmp_taskdata_t *parent = taskdata->td_parent; + ompt_callbacks.ompt_callback(ompt_event_task_end)( + taskdata->ompt_task_info.task_id); + } +#endif + + KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n", + gtid, taskdata, resumed_task) ); + + KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + + // Pop task from stack if tied +#ifdef BUILD_TIED_TASK_STACK + if ( taskdata -> td_flags.tiedness == TASK_TIED ) + { + __kmp_pop_task_stack( gtid, thread, taskdata ); + } +#endif /* BUILD_TIED_TASK_STACK */ + + KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); + taskdata -> td_flags.complete = 1; // mark the task as completed + KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); + + // Only need to keep track of count if team parallel and tasking not serialized + if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { + // Predecrement simulated by "- 1" calculation + children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; + KMP_DEBUG_ASSERT( children >= 0 ); +#if OMP_40_ENABLED + if ( taskdata->td_taskgroup ) + KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); + __kmp_release_deps(gtid,taskdata); +#endif + } + + // td_flags.executing must be marked as 0 after __kmp_release_deps has been called + // Othertwise, if a task is executed immediately from the release_deps code + // the flag will be reset to 1 again by this same function + KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 ); + taskdata -> td_flags.executing = 0; // suspend the finishing task + + KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", + gtid, taskdata, children) ); + +#if OMP_40_ENABLED + /* If the tasks' destructor thunk flag has been set, we need to invoke the + destructor thunk that has been generated by the compiler. + The code is placed here, since at this point other tasks might have been released + hence overlapping the destructor invokations with some other work in the + released tasks. The OpenMP spec is not specific on when the destructors are + invoked, so we should be free to choose. + */ + if (taskdata->td_flags.destructors_thunk) { + kmp_routine_entry_t destr_thunk = task->destructors; + KMP_ASSERT(destr_thunk); + destr_thunk(gtid, task); + } +#endif // OMP_40_ENABLED + + // bookkeeping for resuming task: + // GEH - note tasking_ser => task_serial + KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == + taskdata->td_flags.task_serial); + if ( taskdata->td_flags.task_serial ) + { + if (resumed_task == NULL) { + resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent + } + else { + // verify resumed task passed in points to parent + KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent ); + } + } + else { + KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt + } + + // Free this task and then ancestor tasks if they have no children. + __kmp_free_task_and_ancestors(gtid, taskdata, thread); + + // FIXME johnmc: I this statement should be before the last one so if an + // asynchronous inquiry peers into the runtime system it doesn't see the freed + // task as the current task + __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task + + // TODO: GEH - make sure root team implicit task is initialized properly. + // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); + resumed_task->td_flags.executing = 1; // resume previous task + + KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", + gtid, taskdata, resumed_task) ); + + return; +} + +//--------------------------------------------------------------------- +// __kmpc_omp_task_complete_if0: report that a task has completed execution +// loc_ref: source location information; points to end of task block. +// gtid: global thread number. +// task: task thunk for the completed task. + +void +__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) +{ + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); + + __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume + + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); + + return; +} + +#ifdef TASK_UNUSED +//--------------------------------------------------------------------- +// __kmpc_omp_task_complete: report that a task has completed execution +// NEVER GENERATED BY COMPILER, DEPRECATED!!! + +void +__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) +{ + KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); + + __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume + + KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); + return; +} +#endif // TASK_UNUSED + + +#if OMPT_SUPPORT +//---------------------------------------------------------------------------------------------------- +// __kmp_task_init_ompt: +// Initialize OMPT fields maintained by a task. This will only be called after +// ompt_tool, so we already know whether ompt is enabled or not. + +static inline void +__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) +{ + if (ompt_enabled) { + task->ompt_task_info.task_id = __ompt_task_id_new(tid); + task->ompt_task_info.function = function; + task->ompt_task_info.frame.exit_runtime_frame = NULL; + task->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +} +#endif + + +//---------------------------------------------------------------------------------------------------- +// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread +// +// loc_ref: reference to source location of parallel region +// this_thr: thread data structure corresponding to implicit task +// team: team for this_thr +// tid: thread id of given thread within team +// set_curr_task: TRUE if need to push current task to thread +// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere. +// TODO: Get better loc_ref. Value passed in may be NULL + +void +__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ) +{ + kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ]; + + KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", + tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) ); + + task->td_task_id = KMP_GEN_TASK_ID(); + task->td_team = team; +// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger) + task->td_ident = loc_ref; + task->td_taskwait_ident = NULL; + task->td_taskwait_counter = 0; + task->td_taskwait_thread = 0; + + task->td_flags.tiedness = TASK_TIED; + task->td_flags.tasktype = TASK_IMPLICIT; +#if OMP_41_ENABLED + task->td_flags.proxy = TASK_FULL; +#endif + + // All implicit tasks are executed immediately, not deferred + task->td_flags.task_serial = 1; + task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); + task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; + + task->td_flags.started = 1; + task->td_flags.executing = 1; + task->td_flags.complete = 0; + task->td_flags.freed = 0; + +#if OMP_40_ENABLED + task->td_dephash = NULL; + task->td_depnode = NULL; +#endif + + if (set_curr_task) { // only do this initialization the first time a thread is created + task->td_incomplete_child_tasks = 0; + task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task +#if OMP_40_ENABLED + task->td_taskgroup = NULL; // An implicit task does not have taskgroup +#endif + __kmp_push_current_task_to_thread( this_thr, team, tid ); + } else { + KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); + KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); + } + +#if OMPT_SUPPORT + __kmp_task_init_ompt(task, tid, NULL); +#endif + + KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", + tid, team, task ) ); +} + +// Round up a size to a power of two specified by val +// Used to insert padding between structures co-allocated using a single malloc() call +static size_t +__kmp_round_up_to_val( size_t size, size_t val ) { + if ( size & ( val - 1 ) ) { + size &= ~ ( val - 1 ); + if ( size <= KMP_SIZE_T_MAX - val ) { + size += val; // Round up if there is no overflow. + }; // if + }; // if + return size; +} // __kmp_round_up_to_va + + +//--------------------------------------------------------------------------------- +// __kmp_task_alloc: Allocate the taskdata and task data structures for a task +// +// loc_ref: source location information +// gtid: global thread number. +// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered. +// Converted from kmp_int32 to kmp_tasking_flags_t in routine. +// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task. +// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task. +// task_entry: Pointer to task code entry point generated by compiler. +// returns: a pointer to the allocated kmp_task_t structure (task). + +kmp_task_t * +__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ) +{ + kmp_task_t *task; + kmp_taskdata_t *taskdata; + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_team_t *team = thread->th.th_team; + kmp_taskdata_t *parent_task = thread->th.th_current_task; + size_t shareds_offset; + + KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, + sizeof_shareds, task_entry) ); + + if ( parent_task->td_flags.final ) { + if (flags->merged_if0) { + } + flags->final = 1; + } + +#if OMP_41_ENABLED + if ( flags->proxy == TASK_PROXY ) { + flags->tiedness = TASK_UNTIED; + flags->merged_if0 = 1; + + /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */ + if ( (thread->th.th_task_team) == NULL ) { + /* This should only happen if the team is serialized + setup a task team and propagate it to the thread + */ + KMP_DEBUG_ASSERT(team->t.t_serialized); + KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); + __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads + thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; + } + kmp_task_team_t * task_team = thread->th.th_task_team; + + /* tasking must be enabled now as the task might not be pushed */ + if ( !KMP_TASKING_ENABLED( task_team ) ) { + KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); + __kmp_enable_tasking( task_team, thread ); + kmp_int32 tid = thread->th.th_info.ds.ds_tid; + kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; + // No lock needed since only owner can allocate + if (thread_data -> td.td_deque == NULL ) { + __kmp_alloc_task_deque( thread, thread_data ); + } + } + + if ( task_team->tt.tt_found_proxy_tasks == FALSE ) + TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE); + } +#endif + + // Calculate shared structure offset including padding after kmp_task_t struct + // to align pointers in shared struct + shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t; + shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * )); + + // Allocate a kmp_taskdata_t block and a kmp_task_t block. + KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", + gtid, shareds_offset) ); + KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", + gtid, sizeof_shareds) ); + + // Avoid double allocation here by combining shareds with taskdata + #if USE_FAST_MEMORY + taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds ); + #else /* ! USE_FAST_MEMORY */ + taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds ); + #endif /* USE_FAST_MEMORY */ + + task = KMP_TASKDATA_TO_TASK(taskdata); + + // Make sure task & taskdata are aligned appropriately +#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD + KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 ); + KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 ); +#else + KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 ); + KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 ); +#endif + if (sizeof_shareds > 0) { + // Avoid double allocation here by combining shareds with taskdata + task->shareds = & ((char *) taskdata)[ shareds_offset ]; + // Make sure shareds struct is aligned to pointer size + KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 ); + } else { + task->shareds = NULL; + } + task->routine = task_entry; + task->part_id = 0; // AC: Always start with 0 part id + + taskdata->td_task_id = KMP_GEN_TASK_ID(); + taskdata->td_team = team; + taskdata->td_alloc_thread = thread; + taskdata->td_parent = parent_task; + taskdata->td_level = parent_task->td_level + 1; // increment nesting level + taskdata->td_ident = loc_ref; + taskdata->td_taskwait_ident = NULL; + taskdata->td_taskwait_counter = 0; + taskdata->td_taskwait_thread = 0; + KMP_DEBUG_ASSERT( taskdata->td_parent != NULL ); +#if OMP_41_ENABLED + // avoid copying icvs for proxy tasks + if ( flags->proxy == TASK_FULL ) +#endif + copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs ); + + taskdata->td_flags.tiedness = flags->tiedness; + taskdata->td_flags.final = flags->final; + taskdata->td_flags.merged_if0 = flags->merged_if0; +#if OMP_40_ENABLED + taskdata->td_flags.destructors_thunk = flags->destructors_thunk; +#endif // OMP_40_ENABLED +#if OMP_41_ENABLED + taskdata->td_flags.proxy = flags->proxy; +#endif + taskdata->td_flags.tasktype = TASK_EXPLICIT; + + // GEH - TODO: fix this to copy parent task's value of tasking_ser flag + taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); + + // GEH - TODO: fix this to copy parent task's value of team_serial flag + taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; + + // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region + // tasks are not left until program termination to execute. Also, it helps locality to execute + // immediately. + taskdata->td_flags.task_serial = ( parent_task->td_flags.final + || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ); + + taskdata->td_flags.started = 0; + taskdata->td_flags.executing = 0; + taskdata->td_flags.complete = 0; + taskdata->td_flags.freed = 0; + + taskdata->td_flags.native = flags->native; + + taskdata->td_incomplete_child_tasks = 0; + taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children +#if OMP_40_ENABLED + taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task + taskdata->td_dephash = NULL; + taskdata->td_depnode = NULL; +#endif + + // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task +#if OMP_41_ENABLED + if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) +#else + if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) +#endif + { + KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); +#if OMP_40_ENABLED + if ( parent_task->td_taskgroup ) + KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); +#endif + // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated + if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) { + KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); + } + } + + KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", + gtid, taskdata, taskdata->td_parent) ); + +#if OMPT_SUPPORT + __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry); +#endif + + return task; +} + + +kmp_task_t * +__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry ) +{ + kmp_task_t *retval; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; + + input_flags->native = FALSE; + // __kmp_task_alloc() sets up all other runtime flags + +#if OMP_41_ENABLED + KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", + input_flags->proxy ? "proxy" : "", + sizeof_kmp_task_t, sizeof_shareds, task_entry) ); +#else + KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", + sizeof_kmp_task_t, sizeof_shareds, task_entry) ); +#endif + + retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t, + sizeof_shareds, task_entry ); + + KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) ); + + return retval; +} + +//----------------------------------------------------------- +// __kmp_invoke_task: invoke the specified task +// +// gtid: global thread ID of caller +// task: the task to invoke +// current_task: the task to resume after task invokation + +static void +__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) +{ + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); +#if OMP_40_ENABLED + int discard = 0 /* false */; +#endif + KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", + gtid, taskdata, current_task) ); + KMP_DEBUG_ASSERT(task); +#if OMP_41_ENABLED + if ( taskdata->td_flags.proxy == TASK_PROXY && + taskdata->td_flags.complete == 1) + { + // This is a proxy task that was already completed but it needs to run + // its bottom-half finish + KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", + gtid, taskdata) ); + + __kmp_bottom_half_finish_proxy(gtid,task); + + KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) ); + + return; + } +#endif + +#if OMP_41_ENABLED + // Proxy tasks are not handled by the runtime + if ( taskdata->td_flags.proxy != TASK_PROXY ) +#endif + __kmp_task_start( gtid, task, current_task ); + +#if OMPT_SUPPORT + ompt_thread_info_t oldInfo; + kmp_info_t * thread; + if (ompt_enabled) { + // Store the threads states and restore them after the task + thread = __kmp_threads[ gtid ]; + oldInfo = thread->th.ompt_thread_info; + thread->th.ompt_thread_info.wait_id = 0; + thread->th.ompt_thread_info.state = ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0); + } +#endif + +#if OMP_40_ENABLED + // TODO: cancel tasks if the parallel region has also been cancelled + // TODO: check if this sequence can be hoisted above __kmp_task_start + // if cancellation has been enabled for this run ... + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + kmp_team_t * this_team = this_thr->th.th_team; + kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; + if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { + KMP_COUNT_BLOCK(TASK_cancelled); + // this task belongs to a task group and we need to cancel it + discard = 1 /* true */; + } + } + + // + // Invoke the task routine and pass in relevant data. + // Thunks generated by gcc take a different argument list. + // + if (!discard) { + KMP_COUNT_BLOCK(TASK_executed); + KMP_TIME_BLOCK (TASK_execution); +#endif // OMP_40_ENABLED + +#if OMPT_SUPPORT && OMPT_TRACE + /* let OMPT know that we're about to run this task */ + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_switch)) + { + ompt_callbacks.ompt_callback(ompt_event_task_switch)( + current_task->ompt_task_info.task_id, + taskdata->ompt_task_info.task_id); + } +#endif + +#ifdef KMP_GOMP_COMPAT + if (taskdata->td_flags.native) { + ((void (*)(void *))(*(task->routine)))(task->shareds); + } + else +#endif /* KMP_GOMP_COMPAT */ + { + (*(task->routine))(gtid, task); + } + +#if OMPT_SUPPORT && OMPT_TRACE + /* let OMPT know that we're returning to the callee task */ + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_switch)) + { + ompt_callbacks.ompt_callback(ompt_event_task_switch)( + taskdata->ompt_task_info.task_id, + current_task->ompt_task_info.task_id); + } +#endif + +#if OMP_40_ENABLED + } +#endif // OMP_40_ENABLED + + +#if OMPT_SUPPORT + if (ompt_enabled) { + thread->th.ompt_thread_info = oldInfo; + taskdata->ompt_task_info.frame.exit_runtime_frame = 0; + } +#endif + +#if OMP_41_ENABLED + // Proxy tasks are not handled by the runtime + if ( taskdata->td_flags.proxy != TASK_PROXY ) +#endif + __kmp_task_finish( gtid, task, current_task ); + + KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", + gtid, taskdata, current_task) ); + return; +} + +//----------------------------------------------------------------------- +// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution +// +// loc_ref: location of original task pragma (ignored) +// gtid: Global Thread ID of encountering thread +// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' +// Returns: +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. + +kmp_int32 +__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) +{ + kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + + KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, new_taskdata ) ); + + /* Should we execute the new task or queue it? For now, let's just always try to + queue it. If the queue fills up, then we'll execute it. */ + + if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer + { // Execute this task immediately + kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; + new_taskdata->td_flags.task_serial = 1; + __kmp_invoke_task( gtid, new_task, current_task ); + } + + KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " + "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, + new_taskdata ) ); + + return TASK_CURRENT_NOT_QUEUED; +} + +//--------------------------------------------------------------------- +// __kmp_omp_task: Schedule a non-thread-switchable task for execution +// gtid: Global Thread ID of encountering thread +// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() +// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized +// returns: +// +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. +kmp_int32 +__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ) +{ + kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + +#if OMPT_SUPPORT + if (ompt_enabled) { + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = + __builtin_frame_address(0); + } +#endif + + /* Should we execute the new task or queue it? For now, let's just always try to + queue it. If the queue fills up, then we'll execute it. */ +#if OMP_41_ENABLED + if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer +#else + if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer +#endif + { // Execute this task immediately + kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; + if ( serialize_immediate ) + new_taskdata -> td_flags.task_serial = 1; + __kmp_invoke_task( gtid, new_task, current_task ); + } + +#if OMPT_SUPPORT + if (ompt_enabled) { + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0; + } +#endif + + return TASK_CURRENT_NOT_QUEUED; +} + +//--------------------------------------------------------------------- +// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from +// the parent thread only! +// loc_ref: location of original task pragma (ignored) +// gtid: Global Thread ID of encountering thread +// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() +// returns: +// +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. + +kmp_int32 +__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) +{ + kmp_int32 res; + +#if KMP_DEBUG + kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); +#endif + KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, new_taskdata ) ); + + res = __kmp_omp_task(gtid,new_task,true); + + KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", + gtid, loc_ref, new_taskdata ) ); + return res; +} + +//------------------------------------------------------------------------------------- +// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete + +kmp_int32 +__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) +{ + kmp_taskdata_t * taskdata; + kmp_info_t * thread; + int thread_finished = FALSE; + + KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? + + thread = __kmp_threads[ gtid ]; + taskdata = thread -> th.th_current_task; + +#if OMPT_SUPPORT && OMPT_TRACE + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; + + if (ompt_enabled) { + kmp_team_t *team = thread->th.th_team; + my_task_id = taskdata->ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; + + if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { + ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)( + my_parallel_id, my_task_id); + } + } +#endif + +#if USE_ITT_BUILD + // Note: These values are used by ITT events as well. +#endif /* USE_ITT_BUILD */ + taskdata->td_taskwait_counter += 1; + taskdata->td_taskwait_ident = loc_ref; + taskdata->td_taskwait_thread = gtid + 1; + +#if USE_ITT_BUILD + void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + +#if OMP_41_ENABLED + if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) +#else + if ( ! taskdata->td_flags.team_serial ) +#endif + { + // GEH: if team serialized, avoid reading the volatile variable below. + kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); + while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { + flag.execute_tasks(thread, gtid, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); + } + } +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? + taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; + +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { + ompt_callbacks.ompt_callback(ompt_event_taskwait_end)( + my_parallel_id, my_task_id); + } +#endif + } + + KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " + "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); + + return TASK_CURRENT_NOT_QUEUED; +} + + +//------------------------------------------------- +// __kmpc_omp_taskyield: switch to a different task + +kmp_int32 +__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) +{ + kmp_taskdata_t * taskdata; + kmp_info_t * thread; + int thread_finished = FALSE; + + KMP_COUNT_BLOCK(OMP_TASKYIELD); + + KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", + gtid, loc_ref, end_part) ); + + if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) { + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? + + thread = __kmp_threads[ gtid ]; + taskdata = thread -> th.th_current_task; + // Should we model this as a task wait or not? +#if USE_ITT_BUILD + // Note: These values are used by ITT events as well. +#endif /* USE_ITT_BUILD */ + taskdata->td_taskwait_counter += 1; + taskdata->td_taskwait_ident = loc_ref; + taskdata->td_taskwait_thread = gtid + 1; + +#if USE_ITT_BUILD + void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + if ( ! taskdata->td_flags.team_serial ) { + kmp_task_team_t * task_team = thread->th.th_task_team; + if (task_team != NULL) { + if (KMP_TASKING_ENABLED(task_team)) { + __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); + } + } + } +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? + taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; + } + + KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " + "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); + + return TASK_CURRENT_NOT_QUEUED; +} + + +#if OMP_40_ENABLED +//------------------------------------------------------------------------------------- +// __kmpc_taskgroup: Start a new taskgroup + +void +__kmpc_taskgroup( ident_t* loc, int gtid ) +{ + kmp_info_t * thread = __kmp_threads[ gtid ]; + kmp_taskdata_t * taskdata = thread->th.th_current_task; + kmp_taskgroup_t * tg_new = + (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); + KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); + tg_new->count = 0; + tg_new->cancel_request = cancel_noreq; + tg_new->parent = taskdata->td_taskgroup; + taskdata->td_taskgroup = tg_new; +} + + +//------------------------------------------------------------------------------------- +// __kmpc_end_taskgroup: Wait until all tasks generated by the current task +// and its descendants are complete + +void +__kmpc_end_taskgroup( ident_t* loc, int gtid ) +{ + kmp_info_t * thread = __kmp_threads[ gtid ]; + kmp_taskdata_t * taskdata = thread->th.th_current_task; + kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; + int thread_finished = FALSE; + + KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) ); + KMP_DEBUG_ASSERT( taskgroup != NULL ); + + if ( __kmp_tasking_mode != tskm_immediate_exec ) { +#if USE_ITT_BUILD + // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them + void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + +#if OMP_41_ENABLED + if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) +#else + if ( ! taskdata->td_flags.team_serial ) +#endif + { + kmp_flag_32 flag(&(taskgroup->count), 0U); + while ( TCR_4(taskgroup->count) != 0 ) { + flag.execute_tasks(thread, gtid, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); + } + } + +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + } + KMP_DEBUG_ASSERT( taskgroup->count == 0 ); + + // Restore parent taskgroup for the current task + taskdata->td_taskgroup = taskgroup->parent; + __kmp_thread_free( thread, taskgroup ); + + KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) ); +} +#endif + + +//------------------------------------------------------ +// __kmp_remove_my_task: remove a task from my own deque + +static kmp_task_t * +__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team, + kmp_int32 is_constrained ) +{ + kmp_task_t * task; + kmp_taskdata_t * taskdata; + kmp_thread_data_t *thread_data; + kmp_uint32 tail; + + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition + + thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; + + KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, + thread_data->td.td_deque_tail) ); + + if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { + KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, + thread_data->td.td_deque_tail) ); + return NULL; + } + + __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); + + if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { + __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, + thread_data->td.td_deque_tail) ); + return NULL; + } + + tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index. + taskdata = thread_data -> td.td_deque[ tail ]; + + if (is_constrained) { + // we need to check if the candidate obeys task scheduling constraint: + // only child of current task can be scheduled + kmp_taskdata_t * current = thread->th.th_current_task; + kmp_int32 level = current->td_level; + kmp_taskdata_t * parent = taskdata->td_parent; + while ( parent != current && parent->td_level > level ) { + parent = parent->td_parent; // check generation up to the level of the current task + KMP_DEBUG_ASSERT(parent != NULL); + } + if ( parent != current ) { + // If the tail task is not a child, then no other childs can appear in the deque. + __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, + thread_data->td.td_deque_tail) ); + return NULL; + } + } + + thread_data -> td.td_deque_tail = tail; + TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1); + + __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); + + KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n", + gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, + thread_data->td.td_deque_tail) ); + + task = KMP_TASKDATA_TO_TASK( taskdata ); + return task; +} + + +//----------------------------------------------------------- +// __kmp_steal_task: remove a task from another thread's deque +// Assume that calling thread has already checked existence of +// task_team thread_data before calling this routine. + +static kmp_task_t * +__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, + volatile kmp_uint32 *unfinished_threads, int *thread_finished, + kmp_int32 is_constrained ) +{ + kmp_task_t * task; + kmp_taskdata_t * taskdata; + kmp_thread_data_t *victim_td, *threads_data; + kmp_int32 victim_tid; + + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + + threads_data = task_team -> tt.tt_threads_data; + KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition + + victim_tid = victim->th.th_info.ds.ds_tid; + victim_td = & threads_data[ victim_tid ]; + + KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " + "head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, + victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); + + if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition + (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? + { + KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, + victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); + return NULL; + } + + __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock ); + + // Check again after we acquire the lock + if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || + (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? + { + __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); + KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, + victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); + return NULL; + } + + KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL ); + + if ( !is_constrained ) { + taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ]; + // Bump head pointer and Wrap. + victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK; + } else { + // While we have postponed tasks let's steal from tail of the deque (smaller tasks) + kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index. + taskdata = victim_td -> td.td_deque[ tail ]; + // we need to check if the candidate obeys task scheduling constraint: + // only child of current task can be scheduled + kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task; + kmp_int32 level = current->td_level; + kmp_taskdata_t * parent = taskdata->td_parent; + while ( parent != current && parent->td_level > level ) { + parent = parent->td_parent; // check generation up to the level of the current task + KMP_DEBUG_ASSERT(parent != NULL); + } + if ( parent != current ) { + // If the tail task is not a child, then no other childs can appear in the deque (?). + __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); + KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ), + task_team, victim_td->td.td_deque_ntasks, + victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); + return NULL; + } + victim_td -> td.td_deque_tail = tail; + } + if (*thread_finished) { + // We need to un-mark this victim as a finished victim. This must be done before + // releasing the lock, or else other threads (starting with the master victim) + // might be prematurely released from the barrier!!! + kmp_uint32 count; + + count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads ); + + KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", + gtid, count + 1, task_team) ); + + *thread_finished = FALSE; + } + TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1); + + __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); + + KMP_COUNT_BLOCK(TASK_stolen); + KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team, + victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, + victim_td->td.td_deque_tail) ); + + task = KMP_TASKDATA_TO_TASK( taskdata ); + return task; +} + + +//----------------------------------------------------------------------------- +// __kmp_execute_tasks_template: Choose and execute tasks until either the condition +// is statisfied (return true) or there are none left (return false). +// final_spin is TRUE if this is the spin at the release barrier. +// thread_finished indicates whether the thread is finished executing all +// the tasks it has on its deque, and is at the release barrier. +// spinner is the location on which to spin. +// spinner == NULL means only execute a single task and return. +// checker is the value to check to terminate the spin. +template +static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) +{ + kmp_task_team_t * task_team; + kmp_thread_data_t * threads_data; + kmp_task_t * task; + kmp_taskdata_t * current_task = thread -> th.th_current_task; + volatile kmp_uint32 * unfinished_threads; + kmp_int32 nthreads, last_stolen, k, tid; + + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); + + task_team = thread -> th.th_task_team; + if (task_team == NULL) return FALSE; + + KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", + gtid, final_spin, *thread_finished) ); + + threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); + KMP_DEBUG_ASSERT( threads_data != NULL ); + + nthreads = task_team -> tt.tt_nproc; + unfinished_threads = &(task_team -> tt.tt_unfinished_threads); +#if OMP_41_ENABLED + KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks); +#else + KMP_DEBUG_ASSERT( nthreads > 1 ); +#endif + KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 ); + + // Choose tasks from our own work queue. + start: + while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) { +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { + if ( itt_sync_obj == NULL ) { + // we are at fork barrier where we could not get the object reliably + itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); + } + __kmp_itt_task_starting( itt_sync_obj ); + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + __kmp_invoke_task( gtid, task, current_task ); +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_task_finished( itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + + // If this thread is only partway through the barrier and the condition + // is met, then return now, so that the barrier gather/release pattern can proceed. + // If this thread is in the last spin loop in the barrier, waiting to be + // released, we know that the termination condition will not be satisified, + // so don't waste any cycles checking it. + if (flag == NULL || (!final_spin && flag->done_check())) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) ); + return TRUE; + } + if (thread->th.th_task_team == NULL) break; + KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task + } + + // This thread's work queue is empty. If we are in the final spin loop + // of the barrier, check and see if the termination condition is satisfied. +#if OMP_41_ENABLED + // The work queue may be empty but there might be proxy tasks still executing + if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) +#else + if (final_spin) +#endif + { + // First, decrement the #unfinished threads, if that has not already + // been done. This decrement might be to the spin location, and + // result in the termination condition being satisfied. + if (! *thread_finished) { + kmp_uint32 count; + + count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; + KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n", + gtid, count, task_team) ); + *thread_finished = TRUE; + } + + // It is now unsafe to reference thread->th.th_team !!! + // Decrementing task_team->tt.tt_unfinished_threads can allow the master + // thread to pass through the barrier, where it might reset each thread's + // th.th_team field for the next parallel region. + // If we can steal more work, we know that this has not happened yet. + if (flag != NULL && flag->done_check()) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) ); + return TRUE; + } + } + + if (thread->th.th_task_team == NULL) return FALSE; +#if OMP_41_ENABLED + // check if there are other threads to steal from, otherwise go back + if ( nthreads == 1 ) + goto start; +#endif + + // Try to steal from the last place I stole from successfully. + tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid ); + last_stolen = threads_data[ tid ].td.td_deque_last_stolen; + + if (last_stolen != -1) { + kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr; + + while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, + thread_finished, is_constrained )) != NULL) + { +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { + if ( itt_sync_obj == NULL ) { + // we are at fork barrier where we could not get the object reliably + itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); + } + __kmp_itt_task_starting( itt_sync_obj ); + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + __kmp_invoke_task( gtid, task, current_task ); +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_task_finished( itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + + // Check to see if this thread can proceed. + if (flag == NULL || (!final_spin && flag->done_check())) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n", + gtid) ); + return TRUE; + } + + if (thread->th.th_task_team == NULL) break; + KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task + // If the execution of the stolen task resulted in more tasks being + // placed on our run queue, then restart the whole process. + if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { + KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", + gtid) ); + goto start; + } + } + + // Don't give priority to stealing from this thread anymore. + threads_data[ tid ].td.td_deque_last_stolen = -1; + + // The victims's work queue is empty. If we are in the final spin loop + // of the barrier, check and see if the termination condition is satisfied. +#if OMP_41_ENABLED + // The work queue may be empty but there might be proxy tasks still executing + if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) +#else + if (final_spin) +#endif + { + // First, decrement the #unfinished threads, if that has not already + // been done. This decrement might be to the spin location, and + // result in the termination condition being satisfied. + if (! *thread_finished) { + kmp_uint32 count; + + count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; + KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d " + "task_team=%p\n", gtid, count, task_team) ); + *thread_finished = TRUE; + } + + // If __kmp_tasking_mode != tskm_immediate_exec + // then it is now unsafe to reference thread->th.th_team !!! + // Decrementing task_team->tt.tt_unfinished_threads can allow the master + // thread to pass through the barrier, where it might reset each thread's + // th.th_team field for the next parallel region. + // If we can steal more work, we know that this has not happened yet. + if (flag != NULL && flag->done_check()) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n", + gtid) ); + return TRUE; + } + } + if (thread->th.th_task_team == NULL) return FALSE; + } + + // Find a different thread to steal work from. Pick a random thread. + // My initial plan was to cycle through all the threads, and only return + // if we tried to steal from every thread, and failed. Arch says that's + // not such a great idea. + // GEH - need yield code in this loop for throughput library mode? + new_victim: + k = __kmp_get_random( thread ) % (nthreads - 1); + if ( k >= thread -> th.th_info.ds.ds_tid ) { + ++k; // Adjusts random distribution to exclude self + } + { + kmp_info_t *other_thread = threads_data[k].td.td_thr; + int first; + + // There is a slight chance that __kmp_enable_tasking() did not wake up + // all threads waiting at the barrier. If this thread is sleeping, then + // wake it up. Since we were going to pay the cache miss penalty + // for referencing another thread's kmp_info_t struct anyway, the check + // shouldn't cost too much performance at this point. + // In extra barrier mode, tasks do not sleep at the separate tasking + // barrier, so this isn't a problem. + if ( ( __kmp_tasking_mode == tskm_task_teams ) && + (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && + (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) + { + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); + // A sleeping thread should not have any tasks on it's queue. + // There is a slight possibility that it resumes, steals a task from + // another thread, which spawns more tasks, all in the time that it takes + // this thread to check => don't write an assertion that the victim's + // queue is empty. Try stealing from a different thread. + goto new_victim; + } + + // Now try to steal work from the selected thread + first = TRUE; + while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, + thread_finished, is_constrained )) != NULL) + { +#if USE_ITT_BUILD && USE_ITT_NOTIFY + if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { + if ( itt_sync_obj == NULL ) { + // we are at fork barrier where we could not get the object reliably + itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); + } + __kmp_itt_task_starting( itt_sync_obj ); + } +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + __kmp_invoke_task( gtid, task, current_task ); +#if USE_ITT_BUILD + if ( itt_sync_obj != NULL ) + __kmp_itt_task_finished( itt_sync_obj ); +#endif /* USE_ITT_BUILD */ + + // Try stealing from this victim again, in the future. + if (first) { + threads_data[ tid ].td.td_deque_last_stolen = k; + first = FALSE; + } + + // Check to see if this thread can proceed. + if (flag == NULL || (!final_spin && flag->done_check())) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n", + gtid) ); + return TRUE; + } + if (thread->th.th_task_team == NULL) break; + KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task + + // If the execution of the stolen task resulted in more tasks being + // placed on our run queue, then restart the whole process. + if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { + KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", + gtid) ); + goto start; + } + } + + // The victims's work queue is empty. If we are in the final spin loop + // of the barrier, check and see if the termination condition is satisfied. + // Going on and finding a new victim to steal from is expensive, as it + // involves a lot of cache misses, so we definitely want to re-check the + // termination condition before doing that. +#if OMP_41_ENABLED + // The work queue may be empty but there might be proxy tasks still executing + if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) +#else + if (final_spin) +#endif + { + // First, decrement the #unfinished threads, if that has not already + // been done. This decrement might be to the spin location, and + // result in the termination condition being satisfied. + if (! *thread_finished) { + kmp_uint32 count; + + count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; + KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; " + "task_team=%p\n", + gtid, count, task_team) ); + *thread_finished = TRUE; + } + + // If __kmp_tasking_mode != tskm_immediate_exec, + // then it is now unsafe to reference thread->th.th_team !!! + // Decrementing task_team->tt.tt_unfinished_threads can allow the master + // thread to pass through the barrier, where it might reset each thread's + // th.th_team field for the next parallel region. + // If we can steal more work, we know that this has not happened yet. + if (flag != NULL && flag->done_check()) { + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) ); + return TRUE; + } + } + if (thread->th.th_task_team == NULL) return FALSE; + } + + KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) ); + return FALSE; +} + +int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) +{ + return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +} + +int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) +{ + return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +} + +int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) +{ + return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +} + + + +//----------------------------------------------------------------------------- +// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the +// next barrier so they can assist in executing enqueued tasks. +// First thread in allocates the task team atomically. + +static void +__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) +{ + kmp_thread_data_t *threads_data; + int nthreads, i, is_init_thread; + + KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n", + __kmp_gtid_from_thread( this_thr ) ) ); + + KMP_DEBUG_ASSERT(task_team != NULL); + KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); + + nthreads = task_team->tt.tt_nproc; + KMP_DEBUG_ASSERT(nthreads > 0); + KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); + + // Allocate or increase the size of threads_data if necessary + is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team ); + + if (!is_init_thread) { + // Some other thread already set up the array. + KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", + __kmp_gtid_from_thread( this_thr ) ) ); + return; + } + threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); + KMP_DEBUG_ASSERT( threads_data != NULL ); + + if ( ( __kmp_tasking_mode == tskm_task_teams ) && + ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) ) + { + // Release any threads sleeping at the barrier, so that they can steal + // tasks and execute them. In extra barrier mode, tasks do not sleep + // at the separate tasking barrier, so this isn't a problem. + for (i = 0; i < nthreads; i++) { + volatile void *sleep_loc; + kmp_info_t *thread = threads_data[i].td.td_thr; + + if (i == this_thr->th.th_info.ds.ds_tid) { + continue; + } + // Since we haven't locked the thread's suspend mutex lock at this + // point, there is a small window where a thread might be putting + // itself to sleep, but hasn't set the th_sleep_loc field yet. + // To work around this, __kmp_execute_tasks_template() periodically checks + // see if other threads are sleeping (using the same random + // mechanism that is used for task stealing) and awakens them if + // they are. + if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL ) + { + KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n", + __kmp_gtid_from_thread( this_thr ), + __kmp_gtid_from_thread( thread ) ) ); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); + } + else { + KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", + __kmp_gtid_from_thread( this_thr ), + __kmp_gtid_from_thread( thread ) ) ); + } + } + } + + KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n", + __kmp_gtid_from_thread( this_thr ) ) ); +} + + +/* ------------------------------------------------------------------------ */ +/* // TODO: Check the comment consistency + * Utility routines for "task teams". A task team (kmp_task_t) is kind of + * like a shadow of the kmp_team_t data struct, with a different lifetime. + * After a child * thread checks into a barrier and calls __kmp_release() from + * the particular variant of __kmp__barrier_gather(), it can no + * longer assume that the kmp_team_t structure is intact (at any moment, the + * master thread may exit the barrier code and free the team data structure, + * and return the threads to the thread pool). + * + * This does not work with the the tasking code, as the thread is still + * expected to participate in the execution of any tasks that may have been + * spawned my a member of the team, and the thread still needs access to all + * to each thread in the team, so that it can steal work from it. + * + * Enter the existence of the kmp_task_team_t struct. It employs a reference + * counting mechanims, and is allocated by the master thread before calling + * __kmp__release, and then is release by the last thread to + * exit __kmp__release at the next barrier. I.e. the lifetimes + * of the kmp_task_team_t structs for consecutive barriers can overlap + * (and will, unless the master thread is the last thread to exit the barrier + * release phase, which is not typical). + * + * The existence of such a struct is useful outside the context of tasking, + * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro, + * so that any performance differences show up when comparing the 2.5 vs. 3.0 + * libraries. + * + * We currently use the existence of the threads array as an indicator that + * tasks were spawned since the last barrier. If the structure is to be + * useful outside the context of tasking, then this will have to change, but + * not settting the field minimizes the performance impact of tasking on + * barriers, when no explicit tasks were spawned (pushed, actually). + */ + + +static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures +// Lock for task team data structures +static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock ); + + +//------------------------------------------------------------------------------ +// __kmp_alloc_task_deque: +// Allocates a task deque for a particular thread, and initialize the necessary +// data structures relating to the deque. This only happens once per thread +// per task team since task teams are recycled. +// No lock is needed during allocation since each thread allocates its own +// deque. + +static void +__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ) +{ + __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock ); + KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL ); + + // Initialize last stolen task field to "none" + thread_data -> td.td_deque_last_stolen = -1; + + KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 ); + KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 ); + KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 ); + + KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", + __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) ); + // Allocate space for task deque, and zero the deque + // Cannot use __kmp_thread_calloc() because threads not around for + // kmp_reap_task_team( ). + thread_data -> td.td_deque = (kmp_taskdata_t **) + __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); +} + + +//------------------------------------------------------------------------------ +// __kmp_free_task_deque: +// Deallocates a task deque for a particular thread. +// Happens at library deallocation so don't need to reset all thread data fields. + +static void +__kmp_free_task_deque( kmp_thread_data_t *thread_data ) +{ + __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); + + if ( thread_data -> td.td_deque != NULL ) { + TCW_4(thread_data -> td.td_deque_ntasks, 0); + __kmp_free( thread_data -> td.td_deque ); + thread_data -> td.td_deque = NULL; + } + __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + +#ifdef BUILD_TIED_TASK_STACK + // GEH: Figure out what to do here for td_susp_tied_tasks + if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) { + __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data ); + } +#endif // BUILD_TIED_TASK_STACK +} + + +//------------------------------------------------------------------------------ +// __kmp_realloc_task_threads_data: +// Allocates a threads_data array for a task team, either by allocating an initial +// array or enlarging an existing array. Only the first thread to get the lock +// allocs or enlarges the array and re-initializes the array eleemnts. +// That thread returns "TRUE", the rest return "FALSE". +// Assumes that the new array size is given by task_team -> tt.tt_nproc. +// The current size is given by task_team -> tt.tt_max_threads. + +static int +__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ) +{ + kmp_thread_data_t ** threads_data_p; + kmp_int32 nthreads, maxthreads; + int is_init_thread = FALSE; + + if ( TCR_4(task_team -> tt.tt_found_tasks) ) { + // Already reallocated and initialized. + return FALSE; + } + + threads_data_p = & task_team -> tt.tt_threads_data; + nthreads = task_team -> tt.tt_nproc; + maxthreads = task_team -> tt.tt_max_threads; + + // All threads must lock when they encounter the first task of the implicit task + // region to make sure threads_data fields are (re)initialized before used. + __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); + + if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) { + // first thread to enable tasking + kmp_team_t *team = thread -> th.th_team; + int i; + + is_init_thread = TRUE; + if ( maxthreads < nthreads ) { + + if ( *threads_data_p != NULL ) { + kmp_thread_data_t *old_data = *threads_data_p; + kmp_thread_data_t *new_data = NULL; + + KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating " + "threads data for task_team %p, new_size = %d, old_size = %d\n", + __kmp_gtid_from_thread( thread ), task_team, + nthreads, maxthreads ) ); + // Reallocate threads_data to have more elements than current array + // Cannot use __kmp_thread_realloc() because threads not around for + // kmp_reap_task_team( ). Note all new array entries are initialized + // to zero by __kmp_allocate(). + new_data = (kmp_thread_data_t *) + __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); + // copy old data to new data + KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t), + (void *) old_data, + maxthreads * sizeof(kmp_taskdata_t *) ); + +#ifdef BUILD_TIED_TASK_STACK + // GEH: Figure out if this is the right thing to do + for (i = maxthreads; i < nthreads; i++) { + kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; + __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); + } +#endif // BUILD_TIED_TASK_STACK + // Install the new data and free the old data + (*threads_data_p) = new_data; + __kmp_free( old_data ); + } + else { + KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating " + "threads data for task_team %p, size = %d\n", + __kmp_gtid_from_thread( thread ), task_team, nthreads ) ); + // Make the initial allocate for threads_data array, and zero entries + // Cannot use __kmp_thread_calloc() because threads not around for + // kmp_reap_task_team( ). + *threads_data_p = (kmp_thread_data_t *) + __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); +#ifdef BUILD_TIED_TASK_STACK + // GEH: Figure out if this is the right thing to do + for (i = 0; i < nthreads; i++) { + kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; + __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); + } +#endif // BUILD_TIED_TASK_STACK + } + task_team -> tt.tt_max_threads = nthreads; + } + else { + // If array has (more than) enough elements, go ahead and use it + KMP_DEBUG_ASSERT( *threads_data_p != NULL ); + } + + // initialize threads_data pointers back to thread_info structures + for (i = 0; i < nthreads; i++) { + kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; + thread_data -> td.td_thr = team -> t.t_threads[i]; + + if ( thread_data -> td.td_deque_last_stolen >= nthreads) { + // The last stolen field survives across teams / barrier, and the number + // of threads may have changed. It's possible (likely?) that a new + // parallel region will exhibit the same behavior as the previous region. + thread_data -> td.td_deque_last_stolen = -1; + } + } + + KMP_MB(); + TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE); + } + + __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); + return is_init_thread; +} + + +//------------------------------------------------------------------------------ +// __kmp_free_task_threads_data: +// Deallocates a threads_data array for a task team, including any attached +// tasking deques. Only occurs at library shutdown. + +static void +__kmp_free_task_threads_data( kmp_task_team_t *task_team ) +{ + __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); + if ( task_team -> tt.tt_threads_data != NULL ) { + int i; + for (i = 0; i < task_team->tt.tt_max_threads; i++ ) { + __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] ); + } + __kmp_free( task_team -> tt.tt_threads_data ); + task_team -> tt.tt_threads_data = NULL; + } + __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); +} + + +//------------------------------------------------------------------------------ +// __kmp_allocate_task_team: +// Allocates a task team associated with a specific team, taking it from +// the global task team free list if possible. Also initializes data structures. + +static kmp_task_team_t * +__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) +{ + kmp_task_team_t *task_team = NULL; + int nthreads; + + KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n", + (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) ); + + if (TCR_PTR(__kmp_free_task_teams) != NULL) { + // Take a task team from the task team pool + __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); + if (__kmp_free_task_teams != NULL) { + task_team = __kmp_free_task_teams; + TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next); + task_team -> tt.tt_next = NULL; + } + __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); + } + + if (task_team == NULL) { + KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating " + "task team for team %p\n", + __kmp_gtid_from_thread( thread ), team ) ); + // Allocate a new task team if one is not available. + // Cannot use __kmp_thread_malloc() because threads not around for + // kmp_reap_task_team( ). + task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) ); + __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock ); + //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory + //task_team -> tt.tt_max_threads = 0; + //task_team -> tt.tt_next = NULL; + } + + TCW_4(task_team -> tt.tt_found_tasks, FALSE); +#if OMP_41_ENABLED + TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE); +#endif + task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; + + TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); + TCW_4( task_team -> tt.tt_active, TRUE ); + + KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", + (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); + return task_team; +} + + +//------------------------------------------------------------------------------ +// __kmp_free_task_team: +// Frees the task team associated with a specific thread, and adds it +// to the global task team free list. + +void +__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) +{ + KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", + thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); + + // Put task team back on free list + __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); + + KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL ); + task_team -> tt.tt_next = __kmp_free_task_teams; + TCW_PTR(__kmp_free_task_teams, task_team); + + __kmp_release_bootstrap_lock( & __kmp_task_team_lock ); +} + + +//------------------------------------------------------------------------------ +// __kmp_reap_task_teams: +// Free all the task teams on the task team free list. +// Should only be done during library shutdown. +// Cannot do anything that needs a thread structure or gtid since they are already gone. + +void +__kmp_reap_task_teams( void ) +{ + kmp_task_team_t *task_team; + + if ( TCR_PTR(__kmp_free_task_teams) != NULL ) { + // Free all task_teams on the free list + __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); + while ( ( task_team = __kmp_free_task_teams ) != NULL ) { + __kmp_free_task_teams = task_team -> tt.tt_next; + task_team -> tt.tt_next = NULL; + + // Free threads_data if necessary + if ( task_team -> tt.tt_threads_data != NULL ) { + __kmp_free_task_threads_data( task_team ); + } + __kmp_free( task_team ); + } + __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); + } +} + +//------------------------------------------------------------------------------ +// __kmp_wait_to_unref_task_teams: +// Some threads could still be in the fork barrier release code, possibly +// trying to steal tasks. Wait for each thread to unreference its task team. +// +void +__kmp_wait_to_unref_task_teams(void) +{ + kmp_info_t *thread; + kmp_uint32 spins; + int done; + + KMP_INIT_YIELD( spins ); + + + for (;;) { + done = TRUE; + + // TODO: GEH - this may be is wrong because some sync would be necessary + // in case threads are added to the pool during the traversal. + // Need to verify that lock for thread pool is held when calling + // this routine. + for (thread = (kmp_info_t *)__kmp_thread_pool; + thread != NULL; + thread = thread->th.th_next_pool) + { +#if KMP_OS_WINDOWS + DWORD exit_val; +#endif + if ( TCR_PTR(thread->th.th_task_team) == NULL ) { + KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", + __kmp_gtid_from_thread( thread ) ) ); + continue; + } +#if KMP_OS_WINDOWS + // TODO: GEH - add this check for Linux* OS / OS X* as well? + if (!__kmp_is_thread_alive(thread, &exit_val)) { + thread->th.th_task_team = NULL; + continue; + } +#endif + + done = FALSE; // Because th_task_team pointer is not NULL for this thread + + KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n", + __kmp_gtid_from_thread( thread ) ) ); + + if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { + volatile void *sleep_loc; + // If the thread is sleeping, awaken it. + if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) { + KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", + __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) ); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); + } + } + } + if (done) { + break; + } + + // If we are oversubscribed, + // or have waited a bit (and library mode is throughput), yield. + // Pause is in the following code. + KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput + } + + +} + + +//------------------------------------------------------------------------------ +// __kmp_task_team_setup: Create a task_team for the current team, but use +// an already created, unused one if it already exists. +void +__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) +{ + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + + // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. + // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. + if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { + team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], + ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); + } + + // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is + // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the + // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely + // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for + // serialized teams. + if (team->t.t_nproc > 1) { + int other_team = 1 - this_thr->th.th_task_state; + if (team->t.t_task_team[other_team] == NULL) { // setup other team as well + team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team )); + } + else { // Leave the old task team struct in place for the upcoming region; adjust as needed + kmp_task_team_t *task_team = team->t.t_task_team[other_team]; + if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { + TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); + TCW_4(task_team->tt.tt_found_tasks, FALSE); +#if OMP_41_ENABLED + TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); +#endif + TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); + TCW_4(task_team->tt.tt_active, TRUE ); + } + // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team )); + } + } +} + + +//------------------------------------------------------------------------------ +// __kmp_task_team_sync: Propagation of task team data from team to threads +// which happens just after the release phase of a team barrier. This may be +// called by any thread, but only for teams with # threads > 1. + +void +__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) +{ + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + + // Toggle the th_task_state field, to switch which task_team this thread refers to + this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; + // It is now safe to propagate the task team pointer from the team struct to the current thread. + TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); + KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", + __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, + ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); +} + + +//-------------------------------------------------------------------------------------------- +// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather +// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created. +// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 +// optionally as the last argument. When wait is zero, master thread does not wait for +// unfinished_threads to reach 0. +void +__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team + USE_ITT_BUILD_ARG(void * itt_sync_obj) + , int wait) +{ + kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; + + KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); + KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); + + if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { + if (wait) { + KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", + __kmp_gtid_from_thread(this_thr), task_team)); + // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait + // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. + kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj)); + } + // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. + KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " + "setting active to false, setting local and team's pointer to NULL\n", + __kmp_gtid_from_thread(this_thr), task_team)); +#if OMP_41_ENABLED + KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); + TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE ); +#else + KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); +#endif + TCW_SYNC_4( task_team->tt.tt_active, FALSE ); + KMP_MB(); + + TCW_PTR(this_thr->th.th_task_team, NULL); + } +} + + +//------------------------------------------------------------------------------ +// __kmp_tasking_barrier: +// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. +// Internal function to execute all tasks prior to a regular barrier or a +// join barrier. It is a full barrier itself, which unfortunately turns +// regular barriers into double barriers and join barriers into 1 1/2 +// barriers. +void +__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) +{ + volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; + int flag = FALSE; + KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); + +#if USE_ITT_BUILD + KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); +#endif /* USE_ITT_BUILD */ + kmp_flag_32 spin_flag(spin, 0U); + while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag + USE_ITT_BUILD_ARG(NULL), 0 ) ) { +#if USE_ITT_BUILD + // TODO: What about itt_sync_obj?? + KMP_FSYNC_SPIN_PREPARE( spin ); +#endif /* USE_ITT_BUILD */ + + if( TCR_4(__kmp_global.g.g_done) ) { + if( __kmp_global.g.g_abort ) + __kmp_abort_thread( ); + break; + } + KMP_YIELD( TRUE ); // GH: We always yield here + } +#if USE_ITT_BUILD + KMP_FSYNC_SPIN_ACQUIRED( (void*) spin ); +#endif /* USE_ITT_BUILD */ +} + + +#if OMP_41_ENABLED + +/* __kmp_give_task puts a task into a given thread queue if: + - the queue for that thread it was created + - there's space in that queue + + Because of this, __kmp_push_task needs to check if there's space after getting the lock + */ +static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task ) +{ + kmp_task_team_t * task_team = thread->th.th_task_team; + kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + bool result = false; + + KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) ); + + // assert tasking is enabled? what if not? + KMP_DEBUG_ASSERT( task_team != NULL ); + + if (thread_data -> td.td_deque == NULL ) { + // There's no queue in this thread, go find another one + // We're guaranteed that at least one thread has a queue + KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) ); + return result; + } + + if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) + { + KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); + return result; + } + + __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); + + if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) + { + KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); + goto release_and_exit; + } + + thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; + // Wrap index. + thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; + TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); + + result = true; + KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) ); + +release_and_exit: + __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock ); + + return result; +} + + +/* The finish of the a proxy tasks is divided in two pieces: + - the top half is the one that can be done from a thread outside the team + - the bottom half must be run from a them within the team + + In order to run the bottom half the task gets queued back into one of the threads of the team. + Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers. + So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts: + - things that can be run before queuing the bottom half + - things that must be run after queuing the bottom half + + This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this + we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half. +*/ + +static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata ) +{ + KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); + + taskdata -> td_flags.complete = 1; // mark the task as completed + + if ( taskdata->td_taskgroup ) + KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); + + // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half + TCR_4(taskdata->td_incomplete_child_tasks++); +} + +static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata ) +{ + kmp_int32 children = 0; + + // Predecrement simulated by "- 1" calculation + children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; + KMP_DEBUG_ASSERT( children >= 0 ); + + // Remove the imaginary children + TCR_4(taskdata->td_incomplete_child_tasks--); +} + +static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) +{ + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); + kmp_info_t * thread = __kmp_threads[ gtid ]; + + KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); + KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half + + // We need to wait to make sure the top half is finished + // Spinning here should be ok as this should happen quickly + while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ; + + __kmp_release_deps(gtid,taskdata); + __kmp_free_task_and_ancestors(gtid, taskdata, thread); +} + +/*! +@ingroup TASKING +@param gtid Global Thread ID of encountering thread +@param ptask Task which execution is completed + +Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. +*/ +void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ) +{ + KMP_DEBUG_ASSERT( ptask != NULL ); + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); + KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) ); + + KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); + + __kmp_first_top_half_finish_proxy(taskdata); + __kmp_second_top_half_finish_proxy(taskdata); + __kmp_bottom_half_finish_proxy(gtid,ptask); + + KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) ); +} + +/*! +@ingroup TASKING +@param ptask Task which execution is completed + +Execute the completation of a proxy task from a thread that could not belong to the team. +*/ +void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ) +{ + KMP_DEBUG_ASSERT( ptask != NULL ); + kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); + + KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) ); + + KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); + + __kmp_first_top_half_finish_proxy(taskdata); + + // Enqueue task to complete bottom half completation from a thread within the corresponding team + kmp_team_t * team = taskdata->td_team; + kmp_int32 nthreads = team->t.t_nproc; + kmp_info_t *thread; + kmp_int32 k = 0; + + do { + //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here + //For now we're just linearly trying to find a thread + k = (k+1) % nthreads; + thread = team->t.t_threads[k]; + } while ( !__kmp_give_task( thread, k, ptask ) ); + + __kmp_second_top_half_finish_proxy(taskdata); + + KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) ); +} + +#endif diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskq.c b/contrib/libs/cxxsupp/openmp/kmp_taskq.c index 57564a308b3..3079d45974b 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_taskq.c +++ b/contrib/libs/cxxsupp/openmp/kmp_taskq.c @@ -1,2032 +1,2032 @@ -/* - * kmp_taskq.c -- TASKQ support for OpenMP. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_error.h" - -#define MAX_MESSAGE 512 - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* - * Taskq routines and global variables - */ - -#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); - -#define THREAD_ALLOC_FOR_TASKQ - -static int -in_parallel_context( kmp_team_t *team ) -{ - return ! team -> t.t_serialized; -} - -static void -__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; - kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; - - if ( __kmp_env_consistency_check ) -#if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 ); -#else - __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL ); -#endif - - if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[*tid_ref] != NULL) */ - - my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum; - - taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue; - - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); - KMP_MB(); - } -} - -static void -__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_uint32 my_token; - kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; - - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref ); - - if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[tid] != NULL) */ - - my_token = tq->tq_curr_thunk[ tid ]->th_tasknum; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } -} - -static void -__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk ) -{ - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; - - /* assume we are always called from an active parallel context */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - my_token = thunk -> th_tasknum; - - taskq = thunk -> th.th_shareds -> sv_queue; - - if(taskq->tq_tasknum_serving <= my_token) { - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); - KMP_MB(); - taskq->tq_tasknum_serving = my_token +1; - KMP_MB(); - } -} - -#ifdef KMP_DEBUG - -static void -__kmp_dump_TQF(kmp_int32 flags) -{ - if (flags & TQF_IS_ORDERED) - __kmp_printf("ORDERED "); - if (flags & TQF_IS_LASTPRIVATE) - __kmp_printf("LAST_PRIV "); - if (flags & TQF_IS_NOWAIT) - __kmp_printf("NOWAIT "); - if (flags & TQF_HEURISTICS) - __kmp_printf("HEURIST "); - if (flags & TQF_INTERFACE_RESERVED1) - __kmp_printf("RESERV1 "); - if (flags & TQF_INTERFACE_RESERVED2) - __kmp_printf("RESERV2 "); - if (flags & TQF_INTERFACE_RESERVED3) - __kmp_printf("RESERV3 "); - if (flags & TQF_INTERFACE_RESERVED4) - __kmp_printf("RESERV4 "); - if (flags & TQF_IS_LAST_TASK) - __kmp_printf("LAST_TASK "); - if (flags & TQF_TASKQ_TASK) - __kmp_printf("TASKQ_TASK "); - if (flags & TQF_RELEASE_WORKERS) - __kmp_printf("RELEASE "); - if (flags & TQF_ALL_TASKS_QUEUED) - __kmp_printf("ALL_QUEUED "); - if (flags & TQF_PARALLEL_CONTEXT) - __kmp_printf("PARALLEL "); - if (flags & TQF_DEALLOCATED) - __kmp_printf("DEALLOC "); - if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS))) - __kmp_printf("(NONE)"); -} - -static void -__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid ) -{ - int i; - int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; - - __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); - - if (thunk != NULL) { - for (i = 0; i < nproc; i++) { - if( tq->tq_curr_thunk[i] == thunk ) { - __kmp_printf("[%i] ", i); - } - } - __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); - __kmp_printf("th_task=%p, ", thunk->th_task); - __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); - __kmp_printf("th_status=%d, ", thunk->th_status); - __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); - __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags); - } - - __kmp_printf("\n"); -} - -static void -__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) -{ - kmpc_thunk_t *th; - - __kmp_printf(" Thunk stack for T#%d: ", thread_num); - - for (th = thunk; th != NULL; th = th->th_encl_thunk ) - __kmp_printf("%p ", th); - - __kmp_printf("\n"); -} - -static void -__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid ) -{ - int qs, count, i; - kmpc_thunk_t *thunk; - kmpc_task_queue_t *taskq; - - __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); - - if (queue != NULL) { - int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; - - if ( __kmp_env_consistency_check ) { - __kmp_printf(" tq_loc : "); - } - if (in_parallel) { - - //if (queue->tq.tq_parent != 0) - //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); - __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); - __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); - __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); - __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); - - //__kmp_release_lock(& queue->tq_link_lck, global_tid); - - //if (queue->tq.tq_parent != 0) - //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - __kmp_printf(" tq_shareds : "); - for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) - __kmp_printf("%p ", queue->tq_shareds[i].ai_data); - __kmp_printf("\n"); - - if (in_parallel) { - __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); - __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); - } - - __kmp_printf(" tq_queue : %p\n", queue->tq_queue); - __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); - __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); - - __kmp_printf(" tq_free_thunks : "); - for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free ) - __kmp_printf("%p ", thunk); - __kmp_printf("\n"); - - __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); - __kmp_printf(" tq_head : %d\n", queue->tq_head); - __kmp_printf(" tq_tail : %d\n", queue->tq_tail); - __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); - __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); - __kmp_printf(" tq_flags : "); __kmp_dump_TQF(queue->tq_flags); - __kmp_printf("\n"); - - if (in_parallel) { - __kmp_printf(" tq_th_thunks : "); - for (i = 0; i < queue->tq_nproc; i++) { - __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); - } - __kmp_printf("\n"); - } - - __kmp_printf("\n"); - __kmp_printf(" Queue slots:\n"); - - - qs = queue->tq_tail; - for ( count = 0; count < queue->tq_nfull; ++count ) { - __kmp_printf("(%d)", qs); - __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid ); - qs = (qs+1) % queue->tq_nslots; - } - - __kmp_printf("\n"); - - if (in_parallel) { - if (queue->tq_taskq_slot != NULL) { - __kmp_printf(" TaskQ slot:\n"); - __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid ); - __kmp_printf("\n"); - } - //__kmp_release_lock(& queue->tq_queue_lck, global_tid); - //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); - } - } - - __kmp_printf(" Taskq freelist: "); - - //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free ) - __kmp_printf("%p ", taskq); - - //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - - __kmp_printf("\n\n"); -} - -static void -__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid ) -{ - int i, count, qs; - int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue == NULL) - return; - - __kmp_printf(" "); - - for (i=0; itq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) { - __kmp_printf(" [%i]", i); - } - } - - __kmp_printf(":"); - - //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - qs = curr_queue->tq_tail; - - for ( count = 0; count < curr_queue->tq_nfull; ++count ) { - __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); - qs = (qs+1) % curr_queue->tq_nslots; - } - - //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); - - __kmp_printf("\n"); - - if (curr_queue->tq_first_child) { - //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - if (curr_queue->tq_first_child) { - for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; - queue != NULL; - queue = queue->tq_next_child) { - __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid ); - } - } - - //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } -} - -static void -__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid) -{ - __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); - - __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid ); - - __kmp_printf("\n"); -} -#endif - -/* --------------------------------------------------------------------------- */ - -/* - New taskq storage routines that try to minimize overhead of mallocs but - still provide cache line alignment. -*/ - - -static void * -__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) -{ - void *addr, *orig_addr; - size_t bytes; - - KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) ); - - bytes = sizeof(void *) + CACHE_LINE + size; - -#ifdef THREAD_ALLOC_FOR_TASKQ - orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes ); -#else - KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) ); - orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes ); -#endif /* THREAD_ALLOC_FOR_TASKQ */ - - if (orig_addr == 0) - KMP_FATAL( OutOfHeapMemory ); - - addr = orig_addr; - - if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) { - KB_TRACE( 50, ("__kmp_taskq_allocate: adjust for cache alignment\n" ) ); - addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 )); - } - - (* (void **) addr) = orig_addr; - - KB_TRACE( 10, ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n", - orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1, - (int) size, global_tid )); - - return ( ((void **) addr) + 1 ); -} - -static void -__kmpc_taskq_free(void *p, kmp_int32 global_tid) -{ - KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) ); - - KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid )); - -#ifdef THREAD_ALLOC_FOR_TASKQ - __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) ); -#else - KMP_INTERNAL_FREE( *( ((void **) p)-1) ); -#endif /* THREAD_ALLOC_FOR_TASKQ */ -} - -/* --------------------------------------------------------------------------- */ - -/* - * Keep freed kmpc_task_queue_t on an internal freelist and recycle since - * they're of constant size. - */ - -static kmpc_task_queue_t * -__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks, - kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk, - size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid ) -{ - kmp_int32 i; - size_t bytes; - kmpc_task_queue_t *new_queue; - kmpc_aligned_shared_vars_t *shared_var_array; - char *shared_var_storage; - char *pt; /* for doing byte-adjusted address computations */ - - __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - if( tq->tq_freelist ) { - new_queue = tq -> tq_freelist; - tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free; - - KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); - - new_queue->tq_flags = 0; - - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - } - else { - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - - new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid); - new_queue->tq_flags = 0; - } - - /* space in the task queue for queue slots (allocate as one big chunk */ - /* of storage including new_taskq_task space) */ - - sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */ - pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid); - new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; - *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); - - /* chain the allocated thunks into a freelist for this queue */ - - new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; - - for (i = 0; i < (nthunks - 2); i++) { - ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk); -#ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED; -#endif - } - - ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL; -#ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED; -#endif - - /* initialize the locks */ - - if (in_parallel) { - __kmp_init_lock( & new_queue->tq_link_lck ); - __kmp_init_lock( & new_queue->tq_free_thunks_lck ); - __kmp_init_lock( & new_queue->tq_queue_lck ); - } - - /* now allocate the slots */ - - bytes = nslots * sizeof (kmpc_aligned_queue_slot_t); - new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid ); - - /* space for array of pointers to shared variable structures */ - sizeof_shareds += sizeof(kmpc_task_queue_t *); - sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */ - - bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t); - shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid); - - bytes = nshareds * sizeof_shareds; - shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid); - - for (i=0; isv_queue = new_queue; - } - new_queue->tq_shareds = shared_var_array; - - - /* array for number of outstanding thunks per thread */ - - if (in_parallel) { - bytes = nproc * sizeof(kmpc_aligned_int32_t); - new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid); - new_queue->tq_nproc = nproc; - - for (i=0; itq_th_thunks[i].ai_data = 0; - } - - return new_queue; -} - -static void -__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid) -{ - __kmpc_taskq_free(p->tq_thunk_space, global_tid); - __kmpc_taskq_free(p->tq_queue, global_tid); - - /* free shared var structure storage */ - __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid); - - /* free array of pointers to shared vars storage */ - __kmpc_taskq_free(p->tq_shareds, global_tid); - -#ifdef KMP_DEBUG - p->tq_first_child = NULL; - p->tq_next_child = NULL; - p->tq_prev_child = NULL; - p->tq_ref_count = -10; - p->tq_shareds = NULL; - p->tq_tasknum_queuing = 0; - p->tq_tasknum_serving = 0; - p->tq_queue = NULL; - p->tq_thunk_space = NULL; - p->tq_taskq_slot = NULL; - p->tq_free_thunks = NULL; - p->tq_nslots = 0; - p->tq_head = 0; - p->tq_tail = 0; - p->tq_nfull = 0; - p->tq_hiwat = 0; - - if (in_parallel) { - int i; - - for (i=0; itq_nproc; i++) - p->tq_th_thunks[i].ai_data = 0; - } - if ( __kmp_env_consistency_check ) - p->tq_loc = NULL; - KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED ); - p->tq_flags = TQF_DEALLOCATED; -#endif /* KMP_DEBUG */ - - if (in_parallel) { - __kmpc_taskq_free(p->tq_th_thunks, global_tid); - __kmp_destroy_lock(& p->tq_link_lck); - __kmp_destroy_lock(& p->tq_queue_lck); - __kmp_destroy_lock(& p->tq_free_thunks_lck); - } -#ifdef KMP_DEBUG - p->tq_th_thunks = NULL; -#endif /* KMP_DEBUG */ - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - p->tq.tq_next_free = tq->tq_freelist; - - tq->tq_freelist = p; - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); -} - -/* - * Once a group of thunks has been allocated for use in a particular queue, - * these are managed via a per-queue freelist. - * We force a check that there's always a thunk free if we need one. - */ - -static kmpc_thunk_t * -__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid) -{ - kmpc_thunk_t *fl; - - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - fl = queue->tq_free_thunks; - - KMP_DEBUG_ASSERT (fl != NULL); - - queue->tq_free_thunks = fl->th.th_next_free; - fl->th_flags = 0; - - if (in_parallel) - __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); - - return fl; -} - -static void -__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid) -{ -#ifdef KMP_DEBUG - p->th_task = 0; - p->th_encl_thunk = 0; - p->th_status = 0; - p->th_tasknum = 0; - /* Also could zero pointers to private vars */ -#endif - - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - p->th.th_next_free = queue->tq_free_thunks; - queue->tq_free_thunks = p; - -#ifdef KMP_DEBUG - p->th_flags = TQF_DEALLOCATED; -#endif - - if (in_parallel) - __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); -} - -/* --------------------------------------------------------------------------- */ - -/* returns nonzero if the queue just became full after the enqueue */ - -static kmp_int32 -__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel ) -{ - kmp_int32 ret; - - /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then) */ - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots); /* check queue not full */ - - queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; - - if (queue->tq_head >= queue->tq_nslots) - queue->tq_head = 0; - - (queue->tq_nfull)++; - - KMP_MB(); /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */ - - ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; - - if (in_parallel) { - /* don't need to wait until workers are released before unlocking */ - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - - if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { - /* If just creating the root queue, the worker threads are waiting at */ - /* a join barrier until now, when there's something in the queue for */ - /* them to do; release them now to do work. */ - /* This should only be done when this is the first task enqueued, */ - /* so reset the flag here also. */ - - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers are still in spin mode */ - - KMP_MB(); /* avoid releasing barrier twice if taskq_task switches threads */ - - __kmpc_end_barrier_master( NULL, global_tid); - } - } - - return ret; -} - -static kmpc_thunk_t * -__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel) -{ - kmpc_thunk_t *pt; - int tid = __kmp_tid_from_gtid( global_tid ); - - KMP_DEBUG_ASSERT (queue->tq_nfull > 0); /* check queue not empty */ - - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - ct = ++(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - } - - pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; - - if (queue->tq_tail >= queue->tq_nslots) - queue->tq_tail = 0; - - if (in_parallel) { - queue->tq_th_thunks[tid].ai_data++; - - KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */ - - KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n", - global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); - } - - (queue->tq_nfull)--; - -#ifdef KMP_DEBUG - KMP_MB(); - - /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */ - - KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); - - if (in_parallel) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH); - } -#endif - - return pt; -} - -/* - * Find the next (non-null) task to dequeue and return it. - * This is never called unless in_parallel=TRUE - * - * Here are the rules for deciding which queue to take the task from: - * 1. Walk up the task queue tree from the current queue's parent and look - * on the way up (for loop, below). - * 2. Do a depth-first search back down the tree from the root and - * look (find_task_in_descendant_queue()). - * - * Here are the rules for deciding which task to take from a queue - * (__kmp_find_task_in_queue ()): - * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task - * must be staged to make sure we execute the last one with - * TQF_IS_LAST_TASK at the end of task queue execution. - * 2. If the queue length is below some high water mark and the taskq task - * is enqueued, prefer running the taskq task. - * 3. Otherwise, take a (normal) task from the queue. - * - * If we do all this and return pt == NULL at the bottom of this routine, - * this means there are no more tasks to execute (except possibly for - * TQF_IS_LASTPRIVATE). - */ - -static kmpc_thunk_t * -__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue) -{ - kmpc_thunk_t *pt = NULL; - int tid = __kmp_tid_from_gtid( global_tid ); - - /* To prevent deadlock from tq_queue_lck if queue already deallocated */ - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - /* Check again to avoid race in __kmpc_end_taskq() */ - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) { - /* if there's enough room in the queue and the dispatcher */ - /* (taskq task) is available, schedule more tasks */ - pt = (kmpc_thunk_t *) queue->tq_taskq_slot; - queue->tq_taskq_slot = NULL; - } - else if (queue->tq_nfull == 0 || - queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) { - /* do nothing if no thunks available or this thread can't */ - /* run any because it already is executing too many */ - - pt = NULL; - } - else if (queue->tq_nfull > 1) { - /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - } - else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { - /* one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - } - else if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - pt->th_flags |= TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */ - } - } - - /* GEH - What happens here if is lastprivate, but not last task? */ - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } - - return pt; -} - -/* - * Walk a tree of queues starting at queue's first child - * and return a non-NULL thunk if one can be scheduled. - * Must only be called when in_parallel=TRUE - */ - -static kmpc_thunk_t * -__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) -{ - kmpc_thunk_t *pt = NULL; - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; - if (queue == NULL) { - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - return NULL; - } - - while (queue != NULL) { - int ct; - kmpc_task_queue_t *next; - - ct= ++(queue->tq_ref_count); - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - - pt = __kmp_find_task_in_queue (global_tid, queue); - - if (pt != NULL) { - int ct; - - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); - - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - - return pt; - } - - /* although reference count stays active during descendant walk, shouldn't matter */ - /* since if children still exist, reference counts aren't being monitored anyway */ - - pt = __kmp_find_task_in_descendant_queue (global_tid, queue); - - if (pt != NULL) { - int ct; - - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - - return pt; - } - - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - next = queue->tq_next_child; - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - - queue = next; - } - - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } - - return pt; -} - -/* - * Walk up the taskq tree looking for a task to execute. - * If we get to the root, search the tree for a descendent queue task. - * Must only be called when in_parallel=TRUE - */ - -static kmpc_thunk_t * -__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) -{ - kmpc_task_queue_t *queue; - kmpc_thunk_t *pt; - - pt = NULL; - - if (curr_queue->tq.tq_parent != NULL) { - queue = curr_queue->tq.tq_parent; - - while (queue != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = ++(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - } - - pt = __kmp_find_task_in_queue (global_tid, queue); - if (pt != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - } - - return pt; - } - - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - } - queue = queue->tq.tq_parent; - - if (queue != NULL) - __kmp_release_lock(& queue->tq_link_lck, global_tid); - } - - } - - pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root ); - - return pt; -} - -static int -__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue) -{ - int i; - - /* KMP_MB(); *//* is this really necessary? */ - - for (i=0; itq_nproc; i++) { - if (queue->tq_th_thunks[i].ai_data != 0) - return FALSE; - } - - return TRUE; -} - -static int -__kmp_taskq_has_any_children (kmpc_task_queue_t *queue) -{ - return (queue->tq_first_child != NULL); -} - -static void -__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel ) -{ -#ifdef KMP_DEBUG - kmp_int32 i; - kmpc_thunk_t *thunk; -#endif - - KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid )); - - /* sub-queue in a recursion, not the root task queue */ - KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL); - - if (in_parallel) { - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); - - /* unlink queue from its siblings if any at this level */ - if (queue->tq_prev_child != NULL) - queue->tq_prev_child->tq_next_child = queue->tq_next_child; - if (queue->tq_next_child != NULL) - queue->tq_next_child->tq_prev_child = queue->tq_prev_child; - if (queue->tq.tq_parent->tq_first_child == queue) - queue->tq.tq_parent->tq_first_child = queue->tq_next_child; - - queue->tq_prev_child = NULL; - queue->tq_next_child = NULL; - - if (in_parallel) { - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", - __LINE__, global_tid, queue, queue->tq_ref_count)); - - /* wait until all other threads have stopped accessing this queue */ - while (queue->tq_ref_count > 1) { - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL); - - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - } - - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n", - __LINE__, global_tid, queue)); - -#ifdef KMP_DEBUG - KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); - KMP_DEBUG_ASSERT(queue->tq_nfull == 0); - - for (i=0; itq_nproc; i++) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); - } - - i = 0; - for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) - ++i; - - KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); -#endif - - /* release storage for queue entry */ - __kmp_free_taskq ( tq, queue, TRUE, global_tid ); - - KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); -} - -/* - * Starting from indicated queue, proceed downward through tree and - * remove all taskqs which are finished, but only go down to taskqs - * which have the "nowait" clause present. Assume this is only called - * when in_parallel=TRUE. - */ - -static void -__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue ) -{ - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; - if (queue != NULL) { - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - return; - } - - while (queue != NULL) { - kmpc_task_queue_t *next; - int ct = ++(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - - - /* although reference count stays active during descendant walk, */ - /* shouldn't matter since if children still exist, reference */ - /* counts aren't being monitored anyway */ - - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue ); - - if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) && - __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) { - - /* - Only remove this if we have not already marked it for deallocation. - This should prevent multiple threads from trying to free this. - */ - - if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) { - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - - __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); - - /* Can't do any more here since can't be sure where sibling queue is so just exit this level */ - return; - } - else { - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } - } - /* otherwise, just fall through and decrement reference count */ - } - } - - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - next = queue->tq_next_child; - - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - - queue = next; - } - - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } -} - -/* - * Starting from indicated queue, proceed downward through tree and - * remove all taskq's assuming all are finished and - * assuming NO other threads are executing at this point. - */ - -static void -__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue ) -{ - kmpc_task_queue_t *next_child; - - queue = (kmpc_task_queue_t *) queue->tq_first_child; - - while (queue != NULL) { - __kmp_remove_all_child_taskq ( tq, global_tid, queue ); - - next_child = queue->tq_next_child; - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE ); - queue = next_child; - } -} - -static void -__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel ) -{ - kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; - kmp_int32 tid = __kmp_tid_from_gtid( global_tid ); - - KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); - KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - - /* - * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows: - * - * happens exactly once: - * 1) __kmpc_taskq : push (if returning thunk only) - * 4) __kmpc_end_taskq_task : pop - * - * optionally happens *each* time taskq task is dequeued/enqueued: - * 2) __kmpc_taskq_task : pop - * 3) __kmp_execute_task_from_queue : push - * - * execution ordering: 1,(2,3)*,4 - */ - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - kmp_int32 index = (queue == tq->tq_root) ? tid : 0; - thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data; - - if ( __kmp_env_consistency_check ) { - __kmp_push_workshare( global_tid, - (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, - queue->tq_loc ); - } - } - else { - if ( __kmp_env_consistency_check ) - __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc ); - } - - if (in_parallel) { - thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = thunk; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - - KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); - thunk->th_task (global_tid, thunk); - KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, - queue->tq_loc ); - - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - - if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { - __kmp_taskq_check_ordered(global_tid, thunk); - } - - __kmp_free_thunk (queue, thunk, in_parallel, global_tid); - - KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - - if (in_parallel) { - KMP_MB(); /* needed so thunk put on free list before outstanding thunk count is decremented */ - - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); - - KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", - global_tid, queue->tq_th_thunks[tid].ai_data-1, queue)); - - queue->tq_th_thunks[tid].ai_data--; - - /* KMP_MB(); */ /* is MB really necessary ? */ - } - - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - ct = --(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - } - } -} - -/* --------------------------------------------------------------------------- */ - -/* starts a taskq; creates and returns a thunk for the taskq_task */ -/* also, returns pointer to shared vars for this thread in "shareds" arg */ - -kmpc_thunk_t * -__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, - size_t sizeof_thunk, size_t sizeof_shareds, - kmp_int32 flags, kmpc_shared_vars_t **shareds ) -{ - int in_parallel; - kmp_int32 nslots, nthunks, nshareds, nproc; - kmpc_task_queue_t *new_queue, *curr_queue; - kmpc_thunk_t *new_taskq_thunk; - kmp_info_t *th; - kmp_team_t *team; - kmp_taskq_t *tq; - kmp_int32 tid; - - KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid)); - - th = __kmp_threads[ global_tid ]; - team = th -> th.th_team; - tq = & team -> t.t_taskq; - nproc = team -> t.t_nproc; - tid = __kmp_tid_from_gtid( global_tid ); - - /* find out whether this is a parallel taskq or serialized one. */ - in_parallel = in_parallel_context( team ); - - if( ! tq->tq_root ) { - if (in_parallel) { - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; - } - - if (in_parallel) { - /* This shouldn't be a barrier region boundary, it will confuse the user. */ - /* Need the boundary to be at the end taskq instead. */ - if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { - /* Creating the active root queue, and we are not the master thread. */ - /* The master thread below created the queue and tasks have been */ - /* enqueued, and the master thread released this barrier. This */ - /* worker thread can now proceed and execute tasks. See also the */ - /* TQF_RELEASE_WORKERS which is used to handle this case. */ - - *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data; - - KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); - - return NULL; - } - } - - /* master thread only executes this code */ - - if( tq->tq_curr_thunk_capacity < nproc ) { - if(tq->tq_curr_thunk) - __kmp_free(tq->tq_curr_thunk); - else { - /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */ - __kmp_init_lock( & tq->tq_freelist_lck ); - } - - tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) ); - tq -> tq_curr_thunk_capacity = nproc; - } - - if (in_parallel) - tq->tq_global_flags = TQF_RELEASE_WORKERS; - } - - /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ - /* on some heuristics (e.g., depth of queue nesting?). */ - - nslots = (in_parallel) ? (2 * nproc) : 1; - - /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ - /* jobs being executed by other threads, and one extra for taskq slot */ - - nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2; - - /* Only the root taskq gets a per-thread array of shareds. */ - /* The rest of the taskq's only get one copy of the shared vars. */ - - nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1; - - /* create overall queue data structure and its components that require allocation */ - - new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc, - sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid ); - - /* rest of new_queue initializations */ - - new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS; - - if (in_parallel) { - new_queue->tq_tasknum_queuing = 0; - new_queue->tq_tasknum_serving = 0; - new_queue->tq_flags |= TQF_PARALLEL_CONTEXT; - } - - new_queue->tq_taskq_slot = NULL; - new_queue->tq_nslots = nslots; - new_queue->tq_hiwat = HIGH_WATER_MARK (nslots); - new_queue->tq_nfull = 0; - new_queue->tq_head = 0; - new_queue->tq_tail = 0; - new_queue->tq_loc = loc; - - if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { - /* prepare to serve the first-queued task's ORDERED directive */ - new_queue->tq_tasknum_serving = 1; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; - } - - /* create a new thunk for the taskq_task in the new_queue */ - *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data; - - new_taskq_thunk->th.th_shareds = *shareds; - new_taskq_thunk->th_task = taskq_task; - new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK; - new_taskq_thunk->th_status = 0; - - KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK); - - /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */ - - /* insert the new task queue into the tree, but only after all fields initialized */ - - if (in_parallel) { - if( ! tq->tq_root ) { - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; - tq->tq_root = new_queue; - } - else { - curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - new_queue->tq.tq_parent = curr_queue; - new_queue->tq_first_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; /* for this the thread that built the queue */ - - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", - __LINE__, global_tid, new_queue, new_queue->tq_ref_count)); - - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child; - - if (curr_queue->tq_first_child != NULL) - curr_queue->tq_first_child->tq_prev_child = new_queue; - - curr_queue->tq_first_child = new_queue; - - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } - - /* set up thunk stack only after code that determines curr_queue above */ - new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = new_taskq_thunk; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - else { - new_taskq_thunk->th_encl_thunk = 0; - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; - } - -#ifdef KMP_DEBUG - KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); - KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid )); - - if (in_parallel) { - KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); - } else { - KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); - } - - KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid )); - - if (in_parallel) { - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); - } -#endif /* KMP_DEBUG */ - - if ( __kmp_env_consistency_check ) - __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc ); - - KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); - - return new_taskq_thunk; -} - - -/* ends a taskq; last thread out destroys the queue */ - -void -__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk) -{ -#ifdef KMP_DEBUG - kmp_int32 i; -#endif - kmp_taskq_t *tq; - int in_parallel; - kmp_info_t *th; - kmp_int32 is_outermost; - kmpc_task_queue_t *queue; - kmpc_thunk_t *thunk; - int nproc; - - KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid)); - - tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; - - /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */ - queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue; - - KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); - is_outermost = (queue == tq->tq_root); - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - if (in_parallel) { - kmp_uint32 spins; - - /* this is just a safeguard to release the waiting threads if */ - /* the outermost taskq never queues a task */ - - if (is_outermost && (KMP_MASTER_GTID( global_tid ))) { - if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { - /* no lock needed, workers are still in spin mode */ - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; - - __kmp_end_split_barrier( bs_plain_barrier, global_tid ); - } - } - - /* keep dequeueing work until all tasks are queued and dequeued */ - - do { - /* wait until something is available to dequeue */ - KMP_INIT_YIELD(spins); - - while ( (queue->tq_nfull == 0) - && (queue->tq_taskq_slot == NULL) - && (! __kmp_taskq_has_any_children(queue) ) - && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) ) - ) { - KMP_YIELD_WHEN( TRUE, spins ); - } - - /* check to see if we can execute tasks in the queue */ - while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) ) - && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL - ) { - KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - /* see if work found can be found in a descendant queue */ - if ( (__kmp_taskq_has_any_children(queue)) - && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL - ) { - - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid )); - - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED)) - || (queue->tq_nfull != 0) - ); - - KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid)); - - /* wait while all tasks are not finished and more work found - in descendant queues */ - - while ( (!__kmp_taskq_tasks_finished(queue)) - && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL - ) { - - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid)); - - if (!is_outermost) { - /* need to return if NOWAIT present and not outermost taskq */ - - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - queue->tq_ref_count--; - KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); - - return; - } - - __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); - - /* WAIT until all tasks are finished and no child queues exist before proceeding */ - KMP_INIT_YIELD(spins); - - while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) { - thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue ); - - if (thunk != NULL) { - KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - KMP_YIELD_WHEN( thunk == NULL, spins ); - - __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); - } - - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - queue->tq_flags |= TQF_DEALLOCATED; - } - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - - /* only the allocating thread can deallocate the queue */ - if (taskq_thunk != NULL) { - __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); - } - - KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid)); - - return; - } - - /* Outermost Queue: steal work from descendants until all tasks are finished */ - - KMP_INIT_YIELD(spins); - - while (!__kmp_taskq_tasks_finished(queue)) { - thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); - - if (thunk != NULL) { - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - KMP_YIELD_WHEN( thunk == NULL, spins ); - } - - /* Need this barrier to prevent destruction of queue before threads have all executed above code */ - /* This may need to be done earlier when NOWAIT is implemented for the outermost level */ - - if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { - /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ - /* for right now, everybody waits, and the master thread destroys the */ - /* remaining queues. */ - - __kmp_remove_all_child_taskq( tq, global_tid, queue ); - - /* Now destroy the root queue */ - KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue )); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - -#ifdef KMP_DEBUG - /* the root queue entry */ - KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL)); - - /* children must all be gone by now because of barrier above */ - KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); - - for (i=0; itq_th_thunks[i].ai_data == 0); - } - - for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free); - - KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); - - for (i = 0; i < nproc; i++) { - KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] ); - } -#endif - /* unlink the root queue entry */ - tq -> tq_root = NULL; - - /* release storage for root queue entry */ - KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid)); - - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); - - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); - - /* release the workers now that the data structures are up to date */ - __kmp_end_split_barrier( bs_plain_barrier, global_tid ); - } - - th = __kmp_threads[ global_tid ]; - - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_deo_fcn = 0; - - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_dxo_fcn = 0; - } - else { - /* in serial execution context, dequeue the last task */ - /* and execute it, if there were any tasks encountered */ - - if (queue->tq_nfull > 0) { - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - - thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - - if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ - - /* no need for test_then_or call since already locked */ - thunk->th_flags |= TQF_IS_LAST_TASK; - } - - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue)); - - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } - - /* destroy the unattached serial queue now that there is no more work to do */ - KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - -#ifdef KMP_DEBUG - i = 0; - for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) - ++i; - KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1); -#endif - /* release storage for unattached serial queue */ - KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); - - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); - } - - KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid)); -} - -/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */ -/* Returns nonzero if just filled up queue */ - -kmp_int32 -__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) -{ - kmp_int32 ret; - kmpc_task_queue_t *queue; - int in_parallel; - kmp_taskq_t *tq; - - KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid)); - - KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */ - - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) - thunk->th_tasknum = ++queue->tq_tasknum_queuing; - - /* For serial execution dequeue the preceding task and execute it, if one exists */ - /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ - - if (!in_parallel && queue->tq_nfull > 0) { - kmpc_thunk_t *prev_thunk; - - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - - prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue)); - - __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel ); - } - - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ - /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ - /* task queue is not full and allocates a thunk (which is then passed to */ - /* __kmpc_task()). So, the enqueue below should never fail due to a full queue. */ - - KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); - - ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel ); - - KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - - KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid)); - - return ret; -} - -/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */ -/* this should never be called unless in a parallel context */ - -void -__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status) -{ - kmpc_task_queue_t *queue; - kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - int tid = __kmp_tid_from_gtid( global_tid ); - - KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid)); - KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); - - queue = thunk->th.th_shareds->sv_queue; - - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_taskq, loc ); - - /* thunk->th_task is the taskq_task */ - KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK); - - /* not supposed to call __kmpc_taskq_task if it's already enqueued */ - KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL); - - /* dequeue taskq thunk from curr_thunk stack */ - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - - thunk->th_status = status; - - KMP_MB(); /* flush thunk->th_status before taskq_task enqueued to avoid race condition */ - - /* enqueue taskq_task in thunk into special slot in queue */ - /* GEH - probably don't need to lock taskq slot since only one */ - /* thread enqueues & already a lock set at dequeue point */ - - queue->tq_taskq_slot = thunk; - - KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid)); -} - -/* ends a taskq_task; done generating tasks */ - -void -__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) -{ - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - int in_parallel; - int tid; - - KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); - - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - tid = __kmp_tid_from_gtid( global_tid ); - - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_taskq, loc ); - - if (in_parallel) { -#if KMP_ARCH_X86 || \ - KMP_ARCH_X86_64 - - KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED ); -#else - { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - queue->tq_flags |= TQF_ALL_TASKS_QUEUED; - - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } -#endif - } - - if (thunk->th_flags & TQF_IS_LASTPRIVATE) { - /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */ - /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task */ - /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. When */ - /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so */ - /* we know the last one in the queue is the lastprivate task. Mark the queue */ - /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that */ - /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */ - /* bit signals the instrumented code to do copy-outs after execution). */ - - if (! in_parallel) { - /* No synchronization needed for serial context */ - queue->tq_flags |= TQF_IS_LAST_TASK; - } - else { -#if KMP_ARCH_X86 || \ - KMP_ARCH_X86_64 - - KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK ); -#else - { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ - - queue->tq_flags |= TQF_IS_LAST_TASK; - - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } -#endif - /* to prevent race condition where last task is dequeued but */ - /* flag isn't visible yet (not sure about this) */ - KMP_MB(); - } - } - - /* dequeue taskq thunk from curr_thunk stack */ - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - - KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); -} - -/* returns thunk for a regular task based on taskq_thunk */ -/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */ - -kmpc_thunk_t * -__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task) -{ - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - kmpc_thunk_t *new_thunk; - int in_parallel; - - KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid)); - - KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */ - - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = taskq_thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ - /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ - /* task queue is not full and allocates a thunk (which is then passed to */ - /* __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be */ - /* the next to be enqueued in __kmpc_task(). */ - - new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid); - new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data; - new_thunk->th_encl_thunk = NULL; - new_thunk->th_task = task; - - /* GEH - shouldn't need to lock the read of tq_flags here */ - new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS; - - new_thunk->th_status = 0; - - KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK)); - - KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid )); - - KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid)); - - return new_thunk; -} - -/* --------------------------------------------------------------------------- */ +/* + * kmp_taskq.c -- TASKQ support for OpenMP. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_error.h" + +#define MAX_MESSAGE 512 + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* + * Taskq routines and global variables + */ + +#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); + +#define THREAD_ALLOC_FOR_TASKQ + +static int +in_parallel_context( kmp_team_t *team ) +{ + return ! team -> t.t_serialized; +} + +static void +__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + int gtid = *gtid_ref; + int tid = __kmp_tid_from_gtid( gtid ); + kmp_uint32 my_token; + kmpc_task_queue_t *taskq; + kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; + + if ( __kmp_env_consistency_check ) +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 ); +#else + __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL ); +#endif + + if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* GEH - need check here under stats to make sure */ + /* inside task (curr_thunk[*tid_ref] != NULL) */ + + my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum; + + taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue; + + KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); + KMP_MB(); + } +} + +static void +__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) +{ + int gtid = *gtid_ref; + int tid = __kmp_tid_from_gtid( gtid ); + kmp_uint32 my_token; + kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; + + if ( __kmp_env_consistency_check ) + __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref ); + + if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* GEH - need check here under stats to make sure */ + /* inside task (curr_thunk[tid] != NULL) */ + + my_token = tq->tq_curr_thunk[ tid ]->th_tasknum; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + } +} + +static void +__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk ) +{ + kmp_uint32 my_token; + kmpc_task_queue_t *taskq; + + /* assume we are always called from an active parallel context */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + my_token = thunk -> th_tasknum; + + taskq = thunk -> th.th_shareds -> sv_queue; + + if(taskq->tq_tasknum_serving <= my_token) { + KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); + KMP_MB(); + taskq->tq_tasknum_serving = my_token +1; + KMP_MB(); + } +} + +#ifdef KMP_DEBUG + +static void +__kmp_dump_TQF(kmp_int32 flags) +{ + if (flags & TQF_IS_ORDERED) + __kmp_printf("ORDERED "); + if (flags & TQF_IS_LASTPRIVATE) + __kmp_printf("LAST_PRIV "); + if (flags & TQF_IS_NOWAIT) + __kmp_printf("NOWAIT "); + if (flags & TQF_HEURISTICS) + __kmp_printf("HEURIST "); + if (flags & TQF_INTERFACE_RESERVED1) + __kmp_printf("RESERV1 "); + if (flags & TQF_INTERFACE_RESERVED2) + __kmp_printf("RESERV2 "); + if (flags & TQF_INTERFACE_RESERVED3) + __kmp_printf("RESERV3 "); + if (flags & TQF_INTERFACE_RESERVED4) + __kmp_printf("RESERV4 "); + if (flags & TQF_IS_LAST_TASK) + __kmp_printf("LAST_TASK "); + if (flags & TQF_TASKQ_TASK) + __kmp_printf("TASKQ_TASK "); + if (flags & TQF_RELEASE_WORKERS) + __kmp_printf("RELEASE "); + if (flags & TQF_ALL_TASKS_QUEUED) + __kmp_printf("ALL_QUEUED "); + if (flags & TQF_PARALLEL_CONTEXT) + __kmp_printf("PARALLEL "); + if (flags & TQF_DEALLOCATED) + __kmp_printf("DEALLOC "); + if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS))) + __kmp_printf("(NONE)"); +} + +static void +__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid ) +{ + int i; + int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; + + __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); + + if (thunk != NULL) { + for (i = 0; i < nproc; i++) { + if( tq->tq_curr_thunk[i] == thunk ) { + __kmp_printf("[%i] ", i); + } + } + __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); + __kmp_printf("th_task=%p, ", thunk->th_task); + __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); + __kmp_printf("th_status=%d, ", thunk->th_status); + __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); + __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags); + } + + __kmp_printf("\n"); +} + +static void +__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) +{ + kmpc_thunk_t *th; + + __kmp_printf(" Thunk stack for T#%d: ", thread_num); + + for (th = thunk; th != NULL; th = th->th_encl_thunk ) + __kmp_printf("%p ", th); + + __kmp_printf("\n"); +} + +static void +__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid ) +{ + int qs, count, i; + kmpc_thunk_t *thunk; + kmpc_task_queue_t *taskq; + + __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); + + if (queue != NULL) { + int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; + + if ( __kmp_env_consistency_check ) { + __kmp_printf(" tq_loc : "); + } + if (in_parallel) { + + //if (queue->tq.tq_parent != 0) + //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); + __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); + __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); + __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); + __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); + + //__kmp_release_lock(& queue->tq_link_lck, global_tid); + + //if (queue->tq.tq_parent != 0) + //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); + //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + __kmp_printf(" tq_shareds : "); + for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) + __kmp_printf("%p ", queue->tq_shareds[i].ai_data); + __kmp_printf("\n"); + + if (in_parallel) { + __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); + __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); + } + + __kmp_printf(" tq_queue : %p\n", queue->tq_queue); + __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); + __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); + + __kmp_printf(" tq_free_thunks : "); + for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free ) + __kmp_printf("%p ", thunk); + __kmp_printf("\n"); + + __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); + __kmp_printf(" tq_head : %d\n", queue->tq_head); + __kmp_printf(" tq_tail : %d\n", queue->tq_tail); + __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); + __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); + __kmp_printf(" tq_flags : "); __kmp_dump_TQF(queue->tq_flags); + __kmp_printf("\n"); + + if (in_parallel) { + __kmp_printf(" tq_th_thunks : "); + for (i = 0; i < queue->tq_nproc; i++) { + __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); + } + __kmp_printf("\n"); + } + + __kmp_printf("\n"); + __kmp_printf(" Queue slots:\n"); + + + qs = queue->tq_tail; + for ( count = 0; count < queue->tq_nfull; ++count ) { + __kmp_printf("(%d)", qs); + __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid ); + qs = (qs+1) % queue->tq_nslots; + } + + __kmp_printf("\n"); + + if (in_parallel) { + if (queue->tq_taskq_slot != NULL) { + __kmp_printf(" TaskQ slot:\n"); + __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid ); + __kmp_printf("\n"); + } + //__kmp_release_lock(& queue->tq_queue_lck, global_tid); + //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); + } + } + + __kmp_printf(" Taskq freelist: "); + + //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free ) + __kmp_printf("%p ", taskq); + + //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + + __kmp_printf("\n\n"); +} + +static void +__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid ) +{ + int i, count, qs; + int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; + kmpc_task_queue_t *queue = curr_queue; + + if (curr_queue == NULL) + return; + + __kmp_printf(" "); + + for (i=0; itq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) { + __kmp_printf(" [%i]", i); + } + } + + __kmp_printf(":"); + + //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + qs = curr_queue->tq_tail; + + for ( count = 0; count < curr_queue->tq_nfull; ++count ) { + __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); + qs = (qs+1) % curr_queue->tq_nslots; + } + + //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); + + __kmp_printf("\n"); + + if (curr_queue->tq_first_child) { + //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + if (curr_queue->tq_first_child) { + for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; + queue != NULL; + queue = queue->tq_next_child) { + __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid ); + } + } + + //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + } +} + +static void +__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid) +{ + __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); + + __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid ); + + __kmp_printf("\n"); +} +#endif + +/* --------------------------------------------------------------------------- */ + +/* + New taskq storage routines that try to minimize overhead of mallocs but + still provide cache line alignment. +*/ + + +static void * +__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) +{ + void *addr, *orig_addr; + size_t bytes; + + KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) ); + + bytes = sizeof(void *) + CACHE_LINE + size; + +#ifdef THREAD_ALLOC_FOR_TASKQ + orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes ); +#else + KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) ); + orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes ); +#endif /* THREAD_ALLOC_FOR_TASKQ */ + + if (orig_addr == 0) + KMP_FATAL( OutOfHeapMemory ); + + addr = orig_addr; + + if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) { + KB_TRACE( 50, ("__kmp_taskq_allocate: adjust for cache alignment\n" ) ); + addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 )); + } + + (* (void **) addr) = orig_addr; + + KB_TRACE( 10, ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n", + orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1, + (int) size, global_tid )); + + return ( ((void **) addr) + 1 ); +} + +static void +__kmpc_taskq_free(void *p, kmp_int32 global_tid) +{ + KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) ); + + KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid )); + +#ifdef THREAD_ALLOC_FOR_TASKQ + __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) ); +#else + KMP_INTERNAL_FREE( *( ((void **) p)-1) ); +#endif /* THREAD_ALLOC_FOR_TASKQ */ +} + +/* --------------------------------------------------------------------------- */ + +/* + * Keep freed kmpc_task_queue_t on an internal freelist and recycle since + * they're of constant size. + */ + +static kmpc_task_queue_t * +__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks, + kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk, + size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid ) +{ + kmp_int32 i; + size_t bytes; + kmpc_task_queue_t *new_queue; + kmpc_aligned_shared_vars_t *shared_var_array; + char *shared_var_storage; + char *pt; /* for doing byte-adjusted address computations */ + + __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + if( tq->tq_freelist ) { + new_queue = tq -> tq_freelist; + tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free; + + KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); + + new_queue->tq_flags = 0; + + __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + } + else { + __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + + new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid); + new_queue->tq_flags = 0; + } + + /* space in the task queue for queue slots (allocate as one big chunk */ + /* of storage including new_taskq_task space) */ + + sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */ + pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid); + new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; + *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); + + /* chain the allocated thunks into a freelist for this queue */ + + new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; + + for (i = 0; i < (nthunks - 2); i++) { + ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk); +#ifdef KMP_DEBUG + ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED; +#endif + } + + ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL; +#ifdef KMP_DEBUG + ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED; +#endif + + /* initialize the locks */ + + if (in_parallel) { + __kmp_init_lock( & new_queue->tq_link_lck ); + __kmp_init_lock( & new_queue->tq_free_thunks_lck ); + __kmp_init_lock( & new_queue->tq_queue_lck ); + } + + /* now allocate the slots */ + + bytes = nslots * sizeof (kmpc_aligned_queue_slot_t); + new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid ); + + /* space for array of pointers to shared variable structures */ + sizeof_shareds += sizeof(kmpc_task_queue_t *); + sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */ + + bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t); + shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid); + + bytes = nshareds * sizeof_shareds; + shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid); + + for (i=0; isv_queue = new_queue; + } + new_queue->tq_shareds = shared_var_array; + + + /* array for number of outstanding thunks per thread */ + + if (in_parallel) { + bytes = nproc * sizeof(kmpc_aligned_int32_t); + new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid); + new_queue->tq_nproc = nproc; + + for (i=0; itq_th_thunks[i].ai_data = 0; + } + + return new_queue; +} + +static void +__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid) +{ + __kmpc_taskq_free(p->tq_thunk_space, global_tid); + __kmpc_taskq_free(p->tq_queue, global_tid); + + /* free shared var structure storage */ + __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid); + + /* free array of pointers to shared vars storage */ + __kmpc_taskq_free(p->tq_shareds, global_tid); + +#ifdef KMP_DEBUG + p->tq_first_child = NULL; + p->tq_next_child = NULL; + p->tq_prev_child = NULL; + p->tq_ref_count = -10; + p->tq_shareds = NULL; + p->tq_tasknum_queuing = 0; + p->tq_tasknum_serving = 0; + p->tq_queue = NULL; + p->tq_thunk_space = NULL; + p->tq_taskq_slot = NULL; + p->tq_free_thunks = NULL; + p->tq_nslots = 0; + p->tq_head = 0; + p->tq_tail = 0; + p->tq_nfull = 0; + p->tq_hiwat = 0; + + if (in_parallel) { + int i; + + for (i=0; itq_nproc; i++) + p->tq_th_thunks[i].ai_data = 0; + } + if ( __kmp_env_consistency_check ) + p->tq_loc = NULL; + KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED ); + p->tq_flags = TQF_DEALLOCATED; +#endif /* KMP_DEBUG */ + + if (in_parallel) { + __kmpc_taskq_free(p->tq_th_thunks, global_tid); + __kmp_destroy_lock(& p->tq_link_lck); + __kmp_destroy_lock(& p->tq_queue_lck); + __kmp_destroy_lock(& p->tq_free_thunks_lck); + } +#ifdef KMP_DEBUG + p->tq_th_thunks = NULL; +#endif /* KMP_DEBUG */ + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); + p->tq.tq_next_free = tq->tq_freelist; + + tq->tq_freelist = p; + __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); +} + +/* + * Once a group of thunks has been allocated for use in a particular queue, + * these are managed via a per-queue freelist. + * We force a check that there's always a thunk free if we need one. + */ + +static kmpc_thunk_t * +__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid) +{ + kmpc_thunk_t *fl; + + if (in_parallel) { + __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + fl = queue->tq_free_thunks; + + KMP_DEBUG_ASSERT (fl != NULL); + + queue->tq_free_thunks = fl->th.th_next_free; + fl->th_flags = 0; + + if (in_parallel) + __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); + + return fl; +} + +static void +__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid) +{ +#ifdef KMP_DEBUG + p->th_task = 0; + p->th_encl_thunk = 0; + p->th_status = 0; + p->th_tasknum = 0; + /* Also could zero pointers to private vars */ +#endif + + if (in_parallel) { + __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + p->th.th_next_free = queue->tq_free_thunks; + queue->tq_free_thunks = p; + +#ifdef KMP_DEBUG + p->th_flags = TQF_DEALLOCATED; +#endif + + if (in_parallel) + __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); +} + +/* --------------------------------------------------------------------------- */ + +/* returns nonzero if the queue just became full after the enqueue */ + +static kmp_int32 +__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel ) +{ + kmp_int32 ret; + + /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then) */ + if (in_parallel) { + __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots); /* check queue not full */ + + queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; + + if (queue->tq_head >= queue->tq_nslots) + queue->tq_head = 0; + + (queue->tq_nfull)++; + + KMP_MB(); /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */ + + ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; + + if (in_parallel) { + /* don't need to wait until workers are released before unlocking */ + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + + if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { + /* If just creating the root queue, the worker threads are waiting at */ + /* a join barrier until now, when there's something in the queue for */ + /* them to do; release them now to do work. */ + /* This should only be done when this is the first task enqueued, */ + /* so reset the flag here also. */ + + tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers are still in spin mode */ + + KMP_MB(); /* avoid releasing barrier twice if taskq_task switches threads */ + + __kmpc_end_barrier_master( NULL, global_tid); + } + } + + return ret; +} + +static kmpc_thunk_t * +__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel) +{ + kmpc_thunk_t *pt; + int tid = __kmp_tid_from_gtid( global_tid ); + + KMP_DEBUG_ASSERT (queue->tq_nfull > 0); /* check queue not empty */ + + if (queue->tq.tq_parent != NULL && in_parallel) { + int ct; + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + ct = ++(queue->tq_ref_count); + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", + __LINE__, global_tid, queue, ct)); + } + + pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; + + if (queue->tq_tail >= queue->tq_nslots) + queue->tq_tail = 0; + + if (in_parallel) { + queue->tq_th_thunks[tid].ai_data++; + + KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */ + + KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n", + global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); + } + + (queue->tq_nfull)--; + +#ifdef KMP_DEBUG + KMP_MB(); + + /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */ + + KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); + + if (in_parallel) { + KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH); + } +#endif + + return pt; +} + +/* + * Find the next (non-null) task to dequeue and return it. + * This is never called unless in_parallel=TRUE + * + * Here are the rules for deciding which queue to take the task from: + * 1. Walk up the task queue tree from the current queue's parent and look + * on the way up (for loop, below). + * 2. Do a depth-first search back down the tree from the root and + * look (find_task_in_descendant_queue()). + * + * Here are the rules for deciding which task to take from a queue + * (__kmp_find_task_in_queue ()): + * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task + * must be staged to make sure we execute the last one with + * TQF_IS_LAST_TASK at the end of task queue execution. + * 2. If the queue length is below some high water mark and the taskq task + * is enqueued, prefer running the taskq task. + * 3. Otherwise, take a (normal) task from the queue. + * + * If we do all this and return pt == NULL at the bottom of this routine, + * this means there are no more tasks to execute (except possibly for + * TQF_IS_LASTPRIVATE). + */ + +static kmpc_thunk_t * +__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue) +{ + kmpc_thunk_t *pt = NULL; + int tid = __kmp_tid_from_gtid( global_tid ); + + /* To prevent deadlock from tq_queue_lck if queue already deallocated */ + if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { + + __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + + /* Check again to avoid race in __kmpc_end_taskq() */ + if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) { + /* if there's enough room in the queue and the dispatcher */ + /* (taskq task) is available, schedule more tasks */ + pt = (kmpc_thunk_t *) queue->tq_taskq_slot; + queue->tq_taskq_slot = NULL; + } + else if (queue->tq_nfull == 0 || + queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) { + /* do nothing if no thunks available or this thread can't */ + /* run any because it already is executing too many */ + + pt = NULL; + } + else if (queue->tq_nfull > 1) { + /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ + + pt = __kmp_dequeue_task (global_tid, queue, TRUE); + } + else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { + /* one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE */ + + pt = __kmp_dequeue_task (global_tid, queue, TRUE); + } + else if (queue->tq_flags & TQF_IS_LAST_TASK) { + /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ + /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ + /* instrumentation does copy-out. */ + + pt = __kmp_dequeue_task (global_tid, queue, TRUE); + pt->th_flags |= TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */ + } + } + + /* GEH - What happens here if is lastprivate, but not last task? */ + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + } + + return pt; +} + +/* + * Walk a tree of queues starting at queue's first child + * and return a non-NULL thunk if one can be scheduled. + * Must only be called when in_parallel=TRUE + */ + +static kmpc_thunk_t * +__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) +{ + kmpc_thunk_t *pt = NULL; + kmpc_task_queue_t *queue = curr_queue; + + if (curr_queue->tq_first_child != NULL) { + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; + if (queue == NULL) { + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + return NULL; + } + + while (queue != NULL) { + int ct; + kmpc_task_queue_t *next; + + ct= ++(queue->tq_ref_count); + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", + __LINE__, global_tid, queue, ct)); + + pt = __kmp_find_task_in_queue (global_tid, queue); + + if (pt != NULL) { + int ct; + + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); + + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + + return pt; + } + + /* although reference count stays active during descendant walk, shouldn't matter */ + /* since if children still exist, reference counts aren't being monitored anyway */ + + pt = __kmp_find_task_in_descendant_queue (global_tid, queue); + + if (pt != NULL) { + int ct; + + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + + return pt; + } + + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + next = queue->tq_next_child; + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + + queue = next; + } + + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + } + + return pt; +} + +/* + * Walk up the taskq tree looking for a task to execute. + * If we get to the root, search the tree for a descendent queue task. + * Must only be called when in_parallel=TRUE + */ + +static kmpc_thunk_t * +__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) +{ + kmpc_task_queue_t *queue; + kmpc_thunk_t *pt; + + pt = NULL; + + if (curr_queue->tq.tq_parent != NULL) { + queue = curr_queue->tq.tq_parent; + + while (queue != NULL) { + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + ct = ++(queue->tq_ref_count); + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", + __LINE__, global_tid, queue, ct)); + } + + pt = __kmp_find_task_in_queue (global_tid, queue); + if (pt != NULL) { + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + } + + return pt; + } + + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + } + queue = queue->tq.tq_parent; + + if (queue != NULL) + __kmp_release_lock(& queue->tq_link_lck, global_tid); + } + + } + + pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root ); + + return pt; +} + +static int +__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue) +{ + int i; + + /* KMP_MB(); *//* is this really necessary? */ + + for (i=0; itq_nproc; i++) { + if (queue->tq_th_thunks[i].ai_data != 0) + return FALSE; + } + + return TRUE; +} + +static int +__kmp_taskq_has_any_children (kmpc_task_queue_t *queue) +{ + return (queue->tq_first_child != NULL); +} + +static void +__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel ) +{ +#ifdef KMP_DEBUG + kmp_int32 i; + kmpc_thunk_t *thunk; +#endif + + KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); + KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid )); + + /* sub-queue in a recursion, not the root task queue */ + KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL); + + if (in_parallel) { + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); + + /* unlink queue from its siblings if any at this level */ + if (queue->tq_prev_child != NULL) + queue->tq_prev_child->tq_next_child = queue->tq_next_child; + if (queue->tq_next_child != NULL) + queue->tq_next_child->tq_prev_child = queue->tq_prev_child; + if (queue->tq.tq_parent->tq_first_child == queue) + queue->tq.tq_parent->tq_first_child = queue->tq_next_child; + + queue->tq_prev_child = NULL; + queue->tq_next_child = NULL; + + if (in_parallel) { + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", + __LINE__, global_tid, queue, queue->tq_ref_count)); + + /* wait until all other threads have stopped accessing this queue */ + while (queue->tq_ref_count > 1) { + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL); + + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + } + + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + } + + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n", + __LINE__, global_tid, queue)); + +#ifdef KMP_DEBUG + KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); + KMP_DEBUG_ASSERT(queue->tq_nfull == 0); + + for (i=0; itq_nproc; i++) { + KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); + } + + i = 0; + for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) + ++i; + + KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); +#endif + + /* release storage for queue entry */ + __kmp_free_taskq ( tq, queue, TRUE, global_tid ); + + KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); + KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); +} + +/* + * Starting from indicated queue, proceed downward through tree and + * remove all taskqs which are finished, but only go down to taskqs + * which have the "nowait" clause present. Assume this is only called + * when in_parallel=TRUE. + */ + +static void +__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue ) +{ + kmpc_task_queue_t *queue = curr_queue; + + if (curr_queue->tq_first_child != NULL) { + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; + if (queue != NULL) { + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + return; + } + + while (queue != NULL) { + kmpc_task_queue_t *next; + int ct = ++(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", + __LINE__, global_tid, queue, ct)); + + + /* although reference count stays active during descendant walk, */ + /* shouldn't matter since if children still exist, reference */ + /* counts aren't being monitored anyway */ + + if (queue->tq_flags & TQF_IS_NOWAIT) { + __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue ); + + if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) && + __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) { + + /* + Only remove this if we have not already marked it for deallocation. + This should prevent multiple threads from trying to free this. + */ + + if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) { + if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + + __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); + + /* Can't do any more here since can't be sure where sibling queue is so just exit this level */ + return; + } + else { + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + } + } + /* otherwise, just fall through and decrement reference count */ + } + } + + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + next = queue->tq_next_child; + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + + queue = next; + } + + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + } +} + +/* + * Starting from indicated queue, proceed downward through tree and + * remove all taskq's assuming all are finished and + * assuming NO other threads are executing at this point. + */ + +static void +__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue ) +{ + kmpc_task_queue_t *next_child; + + queue = (kmpc_task_queue_t *) queue->tq_first_child; + + while (queue != NULL) { + __kmp_remove_all_child_taskq ( tq, global_tid, queue ); + + next_child = queue->tq_next_child; + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE ); + queue = next_child; + } +} + +static void +__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel ) +{ + kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; + kmp_int32 tid = __kmp_tid_from_gtid( global_tid ); + + KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); + KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); + KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + + /* + * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows: + * + * happens exactly once: + * 1) __kmpc_taskq : push (if returning thunk only) + * 4) __kmpc_end_taskq_task : pop + * + * optionally happens *each* time taskq task is dequeued/enqueued: + * 2) __kmpc_taskq_task : pop + * 3) __kmp_execute_task_from_queue : push + * + * execution ordering: 1,(2,3)*,4 + */ + + if (!(thunk->th_flags & TQF_TASKQ_TASK)) { + kmp_int32 index = (queue == tq->tq_root) ? tid : 0; + thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data; + + if ( __kmp_env_consistency_check ) { + __kmp_push_workshare( global_tid, + (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, + queue->tq_loc ); + } + } + else { + if ( __kmp_env_consistency_check ) + __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc ); + } + + if (in_parallel) { + thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; + tq->tq_curr_thunk[tid] = thunk; + + KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + } + + KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); + thunk->th_task (global_tid, thunk); + KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); + + if (!(thunk->th_flags & TQF_TASKQ_TASK)) { + if ( __kmp_env_consistency_check ) + __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, + queue->tq_loc ); + + if (in_parallel) { + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; + KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + } + + if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { + __kmp_taskq_check_ordered(global_tid, thunk); + } + + __kmp_free_thunk (queue, thunk, in_parallel, global_tid); + + KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk)); + KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + + if (in_parallel) { + KMP_MB(); /* needed so thunk put on free list before outstanding thunk count is decremented */ + + KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); + + KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", + global_tid, queue->tq_th_thunks[tid].ai_data-1, queue)); + + queue->tq_th_thunks[tid].ai_data--; + + /* KMP_MB(); */ /* is MB really necessary ? */ + } + + if (queue->tq.tq_parent != NULL && in_parallel) { + int ct; + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + ct = --(queue->tq_ref_count); + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", + __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT( ct >= 0 ); + } + } +} + +/* --------------------------------------------------------------------------- */ + +/* starts a taskq; creates and returns a thunk for the taskq_task */ +/* also, returns pointer to shared vars for this thread in "shareds" arg */ + +kmpc_thunk_t * +__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, + size_t sizeof_thunk, size_t sizeof_shareds, + kmp_int32 flags, kmpc_shared_vars_t **shareds ) +{ + int in_parallel; + kmp_int32 nslots, nthunks, nshareds, nproc; + kmpc_task_queue_t *new_queue, *curr_queue; + kmpc_thunk_t *new_taskq_thunk; + kmp_info_t *th; + kmp_team_t *team; + kmp_taskq_t *tq; + kmp_int32 tid; + + KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid)); + + th = __kmp_threads[ global_tid ]; + team = th -> th.th_team; + tq = & team -> t.t_taskq; + nproc = team -> t.t_nproc; + tid = __kmp_tid_from_gtid( global_tid ); + + /* find out whether this is a parallel taskq or serialized one. */ + in_parallel = in_parallel_context( team ); + + if( ! tq->tq_root ) { + if (in_parallel) { + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; + + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; + } + + if (in_parallel) { + /* This shouldn't be a barrier region boundary, it will confuse the user. */ + /* Need the boundary to be at the end taskq instead. */ + if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { + /* Creating the active root queue, and we are not the master thread. */ + /* The master thread below created the queue and tasks have been */ + /* enqueued, and the master thread released this barrier. This */ + /* worker thread can now proceed and execute tasks. See also the */ + /* TQF_RELEASE_WORKERS which is used to handle this case. */ + + *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data; + + KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); + + return NULL; + } + } + + /* master thread only executes this code */ + + if( tq->tq_curr_thunk_capacity < nproc ) { + if(tq->tq_curr_thunk) + __kmp_free(tq->tq_curr_thunk); + else { + /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */ + __kmp_init_lock( & tq->tq_freelist_lck ); + } + + tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) ); + tq -> tq_curr_thunk_capacity = nproc; + } + + if (in_parallel) + tq->tq_global_flags = TQF_RELEASE_WORKERS; + } + + /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ + /* on some heuristics (e.g., depth of queue nesting?). */ + + nslots = (in_parallel) ? (2 * nproc) : 1; + + /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ + /* jobs being executed by other threads, and one extra for taskq slot */ + + nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2; + + /* Only the root taskq gets a per-thread array of shareds. */ + /* The rest of the taskq's only get one copy of the shared vars. */ + + nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1; + + /* create overall queue data structure and its components that require allocation */ + + new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc, + sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid ); + + /* rest of new_queue initializations */ + + new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS; + + if (in_parallel) { + new_queue->tq_tasknum_queuing = 0; + new_queue->tq_tasknum_serving = 0; + new_queue->tq_flags |= TQF_PARALLEL_CONTEXT; + } + + new_queue->tq_taskq_slot = NULL; + new_queue->tq_nslots = nslots; + new_queue->tq_hiwat = HIGH_WATER_MARK (nslots); + new_queue->tq_nfull = 0; + new_queue->tq_head = 0; + new_queue->tq_tail = 0; + new_queue->tq_loc = loc; + + if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { + /* prepare to serve the first-queued task's ORDERED directive */ + new_queue->tq_tasknum_serving = 1; + + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; + + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; + } + + /* create a new thunk for the taskq_task in the new_queue */ + *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data; + + new_taskq_thunk->th.th_shareds = *shareds; + new_taskq_thunk->th_task = taskq_task; + new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK; + new_taskq_thunk->th_status = 0; + + KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK); + + /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */ + + /* insert the new task queue into the tree, but only after all fields initialized */ + + if (in_parallel) { + if( ! tq->tq_root ) { + new_queue->tq.tq_parent = NULL; + new_queue->tq_first_child = NULL; + new_queue->tq_next_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = 1; + tq->tq_root = new_queue; + } + else { + curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; + new_queue->tq.tq_parent = curr_queue; + new_queue->tq_first_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = 1; /* for this the thread that built the queue */ + + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", + __LINE__, global_tid, new_queue, new_queue->tq_ref_count)); + + __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child; + + if (curr_queue->tq_first_child != NULL) + curr_queue->tq_first_child->tq_prev_child = new_queue; + + curr_queue->tq_first_child = new_queue; + + __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + } + + /* set up thunk stack only after code that determines curr_queue above */ + new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; + tq->tq_curr_thunk[tid] = new_taskq_thunk; + + KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + } + else { + new_taskq_thunk->th_encl_thunk = 0; + new_queue->tq.tq_parent = NULL; + new_queue->tq_first_child = NULL; + new_queue->tq_next_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = 1; + } + +#ifdef KMP_DEBUG + KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); + KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid )); + + if (in_parallel) { + KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); + } else { + KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); + } + + KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid )); + + if (in_parallel) { + KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); + } +#endif /* KMP_DEBUG */ + + if ( __kmp_env_consistency_check ) + __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc ); + + KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); + + return new_taskq_thunk; +} + + +/* ends a taskq; last thread out destroys the queue */ + +void +__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk) +{ +#ifdef KMP_DEBUG + kmp_int32 i; +#endif + kmp_taskq_t *tq; + int in_parallel; + kmp_info_t *th; + kmp_int32 is_outermost; + kmpc_task_queue_t *queue; + kmpc_thunk_t *thunk; + int nproc; + + KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid)); + + tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq; + nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; + + /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */ + queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue; + + KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); + is_outermost = (queue == tq->tq_root); + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + + if (in_parallel) { + kmp_uint32 spins; + + /* this is just a safeguard to release the waiting threads if */ + /* the outermost taskq never queues a task */ + + if (is_outermost && (KMP_MASTER_GTID( global_tid ))) { + if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { + /* no lock needed, workers are still in spin mode */ + tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; + + __kmp_end_split_barrier( bs_plain_barrier, global_tid ); + } + } + + /* keep dequeueing work until all tasks are queued and dequeued */ + + do { + /* wait until something is available to dequeue */ + KMP_INIT_YIELD(spins); + + while ( (queue->tq_nfull == 0) + && (queue->tq_taskq_slot == NULL) + && (! __kmp_taskq_has_any_children(queue) ) + && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) ) + ) { + KMP_YIELD_WHEN( TRUE, spins ); + } + + /* check to see if we can execute tasks in the queue */ + while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) ) + && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL + ) { + KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid)); + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + /* see if work found can be found in a descendant queue */ + if ( (__kmp_taskq_has_any_children(queue)) + && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL + ) { + + KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid )); + + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED)) + || (queue->tq_nfull != 0) + ); + + KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid)); + + /* wait while all tasks are not finished and more work found + in descendant queues */ + + while ( (!__kmp_taskq_tasks_finished(queue)) + && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL + ) { + + KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid)); + + if (!is_outermost) { + /* need to return if NOWAIT present and not outermost taskq */ + + if (queue->tq_flags & TQF_IS_NOWAIT) { + __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + queue->tq_ref_count--; + KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); + __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + + KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); + + return; + } + + __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); + + /* WAIT until all tasks are finished and no child queues exist before proceeding */ + KMP_INIT_YIELD(spins); + + while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) { + thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue ); + + if (thunk != NULL) { + KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + KMP_YIELD_WHEN( thunk == NULL, spins ); + + __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); + } + + __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { + queue->tq_flags |= TQF_DEALLOCATED; + } + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + + /* only the allocating thread can deallocate the queue */ + if (taskq_thunk != NULL) { + __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); + } + + KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid)); + + return; + } + + /* Outermost Queue: steal work from descendants until all tasks are finished */ + + KMP_INIT_YIELD(spins); + + while (!__kmp_taskq_tasks_finished(queue)) { + thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); + + if (thunk != NULL) { + KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + KMP_YIELD_WHEN( thunk == NULL, spins ); + } + + /* Need this barrier to prevent destruction of queue before threads have all executed above code */ + /* This may need to be done earlier when NOWAIT is implemented for the outermost level */ + + if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { + /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ + /* for right now, everybody waits, and the master thread destroys the */ + /* remaining queues. */ + + __kmp_remove_all_child_taskq( tq, global_tid, queue ); + + /* Now destroy the root queue */ + KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue )); + KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + +#ifdef KMP_DEBUG + /* the root queue entry */ + KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL)); + + /* children must all be gone by now because of barrier above */ + KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); + + for (i=0; itq_th_thunks[i].ai_data == 0); + } + + for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free); + + KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); + + for (i = 0; i < nproc; i++) { + KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] ); + } +#endif + /* unlink the root queue entry */ + tq -> tq_root = NULL; + + /* release storage for root queue entry */ + KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid)); + + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); + + KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); + + /* release the workers now that the data structures are up to date */ + __kmp_end_split_barrier( bs_plain_barrier, global_tid ); + } + + th = __kmp_threads[ global_tid ]; + + /* Reset ORDERED SECTION to parallel version */ + th->th.th_dispatch->th_deo_fcn = 0; + + /* Reset ORDERED SECTION to parallel version */ + th->th.th_dispatch->th_dxo_fcn = 0; + } + else { + /* in serial execution context, dequeue the last task */ + /* and execute it, if there were any tasks encountered */ + + if (queue->tq_nfull > 0) { + KMP_DEBUG_ASSERT(queue->tq_nfull == 1); + + thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); + + if (queue->tq_flags & TQF_IS_LAST_TASK) { + /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ + /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ + /* instrumentation does copy-out. */ + + /* no need for test_then_or call since already locked */ + thunk->th_flags |= TQF_IS_LAST_TASK; + } + + KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue)); + + __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); + } + + /* destroy the unattached serial queue now that there is no more work to do */ + KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid)); + KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + +#ifdef KMP_DEBUG + i = 0; + for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) + ++i; + KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1); +#endif + /* release storage for unattached serial queue */ + KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); + + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); + } + + KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid)); +} + +/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */ +/* Returns nonzero if just filled up queue */ + +kmp_int32 +__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) +{ + kmp_int32 ret; + kmpc_task_queue_t *queue; + int in_parallel; + kmp_taskq_t *tq; + + KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid)); + + KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */ + + tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; + queue = thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + + if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) + thunk->th_tasknum = ++queue->tq_tasknum_queuing; + + /* For serial execution dequeue the preceding task and execute it, if one exists */ + /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ + + if (!in_parallel && queue->tq_nfull > 0) { + kmpc_thunk_t *prev_thunk; + + KMP_DEBUG_ASSERT(queue->tq_nfull == 1); + + prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); + + KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue)); + + __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel ); + } + + /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ + /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ + /* task queue is not full and allocates a thunk (which is then passed to */ + /* __kmpc_task()). So, the enqueue below should never fail due to a full queue. */ + + KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); + + ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel ); + + KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + + KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid)); + + return ret; +} + +/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */ +/* this should never be called unless in a parallel context */ + +void +__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status) +{ + kmpc_task_queue_t *queue; + kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; + int tid = __kmp_tid_from_gtid( global_tid ); + + KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid)); + KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); + + queue = thunk->th.th_shareds->sv_queue; + + if ( __kmp_env_consistency_check ) + __kmp_pop_workshare( global_tid, ct_taskq, loc ); + + /* thunk->th_task is the taskq_task */ + KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK); + + /* not supposed to call __kmpc_taskq_task if it's already enqueued */ + KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL); + + /* dequeue taskq thunk from curr_thunk stack */ + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; + + KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + + thunk->th_status = status; + + KMP_MB(); /* flush thunk->th_status before taskq_task enqueued to avoid race condition */ + + /* enqueue taskq_task in thunk into special slot in queue */ + /* GEH - probably don't need to lock taskq slot since only one */ + /* thread enqueues & already a lock set at dequeue point */ + + queue->tq_taskq_slot = thunk; + + KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid)); +} + +/* ends a taskq_task; done generating tasks */ + +void +__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) +{ + kmp_taskq_t *tq; + kmpc_task_queue_t *queue; + int in_parallel; + int tid; + + KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); + + tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; + queue = thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + tid = __kmp_tid_from_gtid( global_tid ); + + if ( __kmp_env_consistency_check ) + __kmp_pop_workshare( global_tid, ct_taskq, loc ); + + if (in_parallel) { +#if KMP_ARCH_X86 || \ + KMP_ARCH_X86_64 + + KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED ); +#else + { + __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + + queue->tq_flags |= TQF_ALL_TASKS_QUEUED; + + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + } +#endif + } + + if (thunk->th_flags & TQF_IS_LASTPRIVATE) { + /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */ + /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task */ + /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. When */ + /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so */ + /* we know the last one in the queue is the lastprivate task. Mark the queue */ + /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that */ + /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */ + /* bit signals the instrumented code to do copy-outs after execution). */ + + if (! in_parallel) { + /* No synchronization needed for serial context */ + queue->tq_flags |= TQF_IS_LAST_TASK; + } + else { +#if KMP_ARCH_X86 || \ + KMP_ARCH_X86_64 + + KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK ); +#else + { + __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + + KMP_MB(); /* make sure data structures are in consistent state before querying them */ + /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ + + queue->tq_flags |= TQF_IS_LAST_TASK; + + __kmp_release_lock(& queue->tq_queue_lck, global_tid); + } +#endif + /* to prevent race condition where last task is dequeued but */ + /* flag isn't visible yet (not sure about this) */ + KMP_MB(); + } + } + + /* dequeue taskq thunk from curr_thunk stack */ + if (in_parallel) { + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; + + KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + } + + KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); +} + +/* returns thunk for a regular task based on taskq_thunk */ +/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */ + +kmpc_thunk_t * +__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task) +{ + kmp_taskq_t *tq; + kmpc_task_queue_t *queue; + kmpc_thunk_t *new_thunk; + int in_parallel; + + KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid)); + + KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */ + + tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; + queue = taskq_thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + + /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ + /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ + /* task queue is not full and allocates a thunk (which is then passed to */ + /* __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be */ + /* the next to be enqueued in __kmpc_task(). */ + + new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid); + new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data; + new_thunk->th_encl_thunk = NULL; + new_thunk->th_task = task; + + /* GEH - shouldn't need to lock the read of tq_flags here */ + new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS; + + new_thunk->th_status = 0; + + KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK)); + + KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid )); + + KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid)); + + return new_thunk; +} + +/* --------------------------------------------------------------------------- */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c b/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c index 58010e25963..240319fd9f2 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c +++ b/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c @@ -1,733 +1,733 @@ -/* - * kmp_threadprivate.c -- OpenMP threadprivate support library - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_itt.h" -#include "kmp_i18n.h" - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#define USE_CHECKS_COMMON - -#define KMP_INLINE_SUBR 1 - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); - -struct shared_table __kmp_threadprivate_d_table; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static -#ifdef KMP_INLINE_SUBR -__forceinline -#endif -struct private_common * -__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr ) - -{ - struct private_common *tn; - -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n", - gtid, pc_addr ) ); - dump_list(); -#endif - - for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n", - gtid, pc_addr ) ); -#endif - return tn; - } - } - return 0; -} - -static -#ifdef KMP_INLINE_SUBR -__forceinline -#endif -struct shared_common * -__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr ) -{ - struct shared_common *tn; - - for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n", - gtid, pc_addr ) ); -#endif - return tn; - } - } - return 0; -} - - -/* - * Create a template for the data initialized storage. - * Either the template is NULL indicating zero fill, - * or the template is a copy of the original data. - */ - -static struct private_data * -__kmp_init_common_data( void *pc_addr, size_t pc_size ) -{ - struct private_data *d; - size_t i; - char *p; - - d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) ); -/* - d->data = 0; // AC: commented out because __kmp_allocate zeroes the memory - d->next = 0; -*/ - d->size = pc_size; - d->more = 1; - - p = (char*)pc_addr; - - for (i = pc_size; i > 0; --i) { - if (*p++ != '\0') { - d->data = __kmp_allocate( pc_size ); - KMP_MEMCPY( d->data, pc_addr, pc_size ); - break; - } - } - - return d; -} - -/* - * Initialize the data area from the template. - */ - -static void -__kmp_copy_common_data( void *pc_addr, struct private_data *d ) -{ - char *addr = (char *) pc_addr; - int i, offset; - - for (offset = 0; d != 0; d = d->next) { - for (i = d->more; i > 0; --i) { - if (d->data == 0) - memset( & addr[ offset ], '\0', d->size ); - else - KMP_MEMCPY( & addr[ offset ], d->data, d->size ); - offset += d->size; - } - } -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ -void -__kmp_common_initialize( void ) -{ - if( ! TCR_4(__kmp_init_common) ) { - int q; -#ifdef KMP_DEBUG - int gtid; -#endif - - __kmp_threadpriv_cache_list = NULL; - -#ifdef KMP_DEBUG - /* verify the uber masters were initialized */ - for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ ) - if( __kmp_root[gtid] ) { - KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread ); - for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q) - KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] ); -/* __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/ - } -#endif /* KMP_DEBUG */ - - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) - __kmp_threadprivate_d_table.data[ q ] = 0; - - TCW_4(__kmp_init_common, TRUE); - } -} - -/* Call all destructors for threadprivate data belonging to all threads. - Currently unused! */ -void -__kmp_common_destroy( void ) -{ - if( TCR_4(__kmp_init_common) ) { - int q; - - TCW_4(__kmp_init_common, FALSE); - - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - int gtid; - struct private_common *tn; - struct shared_common *d_tn; - - /* C++ destructors need to be called once per thread before exiting */ - /* don't call destructors for master thread though unless we used copy constructor */ - - for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) { - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if( __kmp_threads[gtid] ) { - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, - gtid, d_tn->gbl_addr ); - if (tn) { - (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); - } - } - } else { - if (d_tn->dt.dtor != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if( __kmp_threads[gtid] ) { - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, - gtid, d_tn->gbl_addr ); - if (tn) { - (*d_tn->dt.dtor) (tn->par_addr); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtor) (d_tn->obj_init); - } - } - } - } - __kmp_threadprivate_d_table.data[ q ] = 0; - } - } -} - -/* Call all destructors for threadprivate data belonging to this thread */ -void -__kmp_common_destroy_gtid( int gtid ) -{ - struct private_common *tn; - struct shared_common *d_tn; - - KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) ); - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - - if( TCR_4(__kmp_init_common) ) { - - /* Cannot do this here since not all threads have destroyed their data */ - /* TCW_4(__kmp_init_common, FALSE); */ - - for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) { - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, tn->gbl_addr ); - - KMP_DEBUG_ASSERT( d_tn ); - - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); - } - if (d_tn->obj_init != 0) { - (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); - } - } else { - if (d_tn->dt.dtor != 0) { - (void) (*d_tn->dt.dtor) (tn->par_addr); - } - if (d_tn->obj_init != 0) { - (void) (*d_tn->dt.dtor) (d_tn->obj_init); - } - } - } - KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n", - gtid ) ); - } - } -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef KMP_TASK_COMMON_DEBUG -static void -dump_list( void ) -{ - int p, q; - - for (p = 0; p < __kmp_all_nth; ++p) { - if( !__kmp_threads[p] ) continue; - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) { - struct private_common *tn; - - KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) ); - - for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next) { - KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", - tn->gbl_addr, tn->par_addr ) ); - } - } - } - } -} -#endif /* KMP_TASK_COMMON_DEBUG */ - - -/* - * NOTE: this routine is to be called only from the serial part of the program. - */ - -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) -{ - struct shared_common **lnk_tn, *d_tn; - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] && - __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 ); - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, pc_addr ); - - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - - d_tn->gbl_addr = pc_addr; - d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0;; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; -*/ - d_tn->cmn_size = pc_size; - - __kmp_acquire_lock( &__kmp_global_lock, gtid ); - - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - - __kmp_release_lock( &__kmp_global_lock, gtid ); - } -} - -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) -{ - struct private_common *tn, **tt; - struct shared_common *d_tn; - - /* +++++++++ START OF CRITICAL SECTION +++++++++ */ - - __kmp_acquire_lock( & __kmp_global_lock, gtid ); - - tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) ); - - tn->gbl_addr = pc_addr; - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, pc_addr ); /* Only the MASTER data table exists. */ - - if (d_tn != 0) { - /* This threadprivate variable has already been seen. */ - - if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) { - d_tn->cmn_size = pc_size; - - if (d_tn->is_vec) { - if (d_tn->ct.ctorv != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } - else if (d_tn->cct.cctorv != 0) { - /* Now data initialize the prototype since it was previously registered */ - d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); - (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len); - } - else { - d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); - } - } else { - if (d_tn->ct.ctor != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } - else if (d_tn->cct.cctor != 0) { - /* Now data initialize the prototype since it was previously registered */ - d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); - (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr); - } - else { - d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); - } - } - } - } - else { - struct shared_common **lnk_tn; - - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = pc_addr; - d_tn->cmn_size = pc_size; - d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } - - tn->cmn_size = d_tn->cmn_size; - - if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) { - tn->par_addr = (void *) pc_addr; - } - else { - tn->par_addr = (void *) __kmp_allocate( tn->cmn_size ); - } - - __kmp_release_lock( & __kmp_global_lock, gtid ); - - /* +++++++++ END OF CRITICAL SECTION +++++++++ */ - -#ifdef USE_CHECKS_COMMON - if (pc_size > d_tn->cmn_size) { - KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%" - KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", - pc_addr, pc_size, d_tn->cmn_size ) ); - KMP_FATAL( TPCommonBlocksInconsist ); - } -#endif /* USE_CHECKS_COMMON */ - - tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]); - -#ifdef KMP_TASK_COMMON_DEBUG - if (*tt != 0) { - KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", - gtid, pc_addr ) ); - } -#endif - tn->next = *tt; - *tt = tn; - -#ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", - gtid, pc_addr ) ); - dump_list( ); -#endif - - /* Link the node into a simple list */ - - tn->link = __kmp_threads[ gtid ]->th.th_pri_head; - __kmp_threads[ gtid ]->th.th_pri_head = tn; - -#ifdef BUILD_TV - __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr ); -#endif - - if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) - return tn; - - /* - * if C++ object with copy constructor, use it; - * else if C++ object with constructor, use it for the non-master copies only; - * else use pod_init and memcpy - * - * C++ constructors need to be called once for each non-master thread on allocate - * C++ copy constructors need to be called once for each thread on allocate - */ - - /* - * C++ object with constructors/destructors; - * don't call constructors for master thread though - */ - if (d_tn->is_vec) { - if ( d_tn->ct.ctorv != 0) { - (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len); - } else if (d_tn->cct.cctorv != 0) { - (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); - } - } else { - if ( d_tn->ct.ctor != 0 ) { - (void) (*d_tn->ct.ctor) (tn->par_addr); - } else if (d_tn->cct.cctor != 0) { - (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); - } - } -/* !BUILD_OPENMP_C - if (tn->par_addr != tn->gbl_addr) - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ - - return tn; -} - -/* ------------------------------------------------------------------------ */ -/* We are currently parallel, and we know the thread id. */ -/* ------------------------------------------------------------------------ */ - -/*! - @ingroup THREADPRIVATE - - @param loc source location information - @param data pointer to data being privatized - @param ctor pointer to constructor function for data - @param cctor pointer to copy constructor function for data - @param dtor pointer to destructor function for data - - Register constructors and destructors for thread private data. - This function is called when executing in parallel, when we know the thread id. -*/ -void -__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor) -{ - struct shared_common *d_tn, **lnk_tn; - - KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) ); - -#ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT( cctor == 0); -#endif /* USE_CHECKS_COMMON */ - - /* Only the global data table exists. */ - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data ); - - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = data; - - d_tn->ct.ctor = ctor; - d_tn->cct.cctor = cctor; - d_tn->dt.dtor = dtor; -/* - d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->vec_len = 0L; - d_tn->obj_init = 0; - d_tn->pod_init = 0; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } -} - -void * -__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size) -{ - void *ret; - struct private_common *tn; - - KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) ); - -#ifdef USE_CHECKS_COMMON - if (! __kmp_init_serial) - KMP_FATAL( RTLNotInitialized ); -#endif /* USE_CHECKS_COMMON */ - - if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */ - - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) ); - kmp_threadprivate_insert_private_data( global_tid, data, data, size ); - - ret = data; - } - else { - KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", - global_tid, data ) ); - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data ); - - if ( tn ) { - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) ); -#ifdef USE_CHECKS_COMMON - if ((size_t) size > tn->cmn_size) { - KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", - data, size, tn->cmn_size ) ); - KMP_FATAL( TPCommonBlocksInconsist ); - } -#endif /* USE_CHECKS_COMMON */ - } - else { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */ - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) ); - tn = kmp_threadprivate_insert( global_tid, data, data, size ); - } - - ret = tn->par_addr; - } - KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", - global_tid, ret ) ); - - return ret; -} - -/*! - @ingroup THREADPRIVATE - @param loc source location information - @param global_tid global thread number - @param data pointer to data to privatize - @param size size of data to privatize - @param cache pointer to cache - @return pointer to private storage - - Allocate private storage for threadprivate data. -*/ -void * -__kmpc_threadprivate_cached( - ident_t * loc, - kmp_int32 global_tid, // gtid. - void * data, // Pointer to original global variable. - size_t size, // Size of original global variable. - void *** cache -) { - KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %" - KMP_SIZE_T_SPEC "\n", - global_tid, *cache, data, size ) ); - - if ( TCR_PTR(*cache) == 0) { - __kmp_acquire_lock( & __kmp_global_lock, global_tid ); - - if ( TCR_PTR(*cache) == 0) { - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - __kmp_tp_cached = 1; - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - void ** my_cache; - KMP_ITT_IGNORE( - my_cache = (void**) - __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t )); - ); - // No need to zero the allocated memory; __kmp_allocate does that. - KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", - global_tid, my_cache ) ); - - /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */ - /* Add address of mycache to linked list for cleanup later */ - kmp_cached_addr_t *tp_cache_addr; - - tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity]; - tp_cache_addr -> addr = my_cache; - tp_cache_addr -> next = __kmp_threadpriv_cache_list; - __kmp_threadpriv_cache_list = tp_cache_addr; - - KMP_MB(); - - TCW_PTR( *cache, my_cache); - - KMP_MB(); - } - - __kmp_release_lock( & __kmp_global_lock, global_tid ); - } - - void *ret; - if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) { - ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size); - - TCW_PTR( (*cache)[ global_tid ], ret); - } - KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", - global_tid, ret ) ); - - return ret; -} - -/*! - @ingroup THREADPRIVATE - @param loc source location information - @param data pointer to data being privatized - @param ctor pointer to constructor function for data - @param cctor pointer to copy constructor function for data - @param dtor pointer to destructor function for data - @param vector_length length of the vector (bytes or elements?) - Register vector constructors and destructors for thread private data. -*/ -void -__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor, - kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, - size_t vector_length ) -{ - struct shared_common *d_tn, **lnk_tn; - - KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) ); - -#ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT( cctor == 0); -#endif /* USE_CHECKS_COMMON */ - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - -1, data ); /* Only the global data table exists. */ - - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = data; - - d_tn->ct.ctorv = ctor; - d_tn->cct.cctorv = cctor; - d_tn->dt.dtorv = dtor; - d_tn->is_vec = TRUE; - d_tn->vec_len = (size_t) vector_length; -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->pod_init = 0; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } -} +/* + * kmp_threadprivate.c -- OpenMP threadprivate support library + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_itt.h" +#include "kmp_i18n.h" + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#define USE_CHECKS_COMMON + +#define KMP_INLINE_SUBR 1 + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); +struct private_common * +kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); + +struct shared_table __kmp_threadprivate_d_table; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +static +#ifdef KMP_INLINE_SUBR +__forceinline +#endif +struct private_common * +__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr ) + +{ + struct private_common *tn; + +#ifdef KMP_TASK_COMMON_DEBUG + KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n", + gtid, pc_addr ) ); + dump_list(); +#endif + + for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { + if (tn->gbl_addr == pc_addr) { +#ifdef KMP_TASK_COMMON_DEBUG + KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n", + gtid, pc_addr ) ); +#endif + return tn; + } + } + return 0; +} + +static +#ifdef KMP_INLINE_SUBR +__forceinline +#endif +struct shared_common * +__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr ) +{ + struct shared_common *tn; + + for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { + if (tn->gbl_addr == pc_addr) { +#ifdef KMP_TASK_COMMON_DEBUG + KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n", + gtid, pc_addr ) ); +#endif + return tn; + } + } + return 0; +} + + +/* + * Create a template for the data initialized storage. + * Either the template is NULL indicating zero fill, + * or the template is a copy of the original data. + */ + +static struct private_data * +__kmp_init_common_data( void *pc_addr, size_t pc_size ) +{ + struct private_data *d; + size_t i; + char *p; + + d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) ); +/* + d->data = 0; // AC: commented out because __kmp_allocate zeroes the memory + d->next = 0; +*/ + d->size = pc_size; + d->more = 1; + + p = (char*)pc_addr; + + for (i = pc_size; i > 0; --i) { + if (*p++ != '\0') { + d->data = __kmp_allocate( pc_size ); + KMP_MEMCPY( d->data, pc_addr, pc_size ); + break; + } + } + + return d; +} + +/* + * Initialize the data area from the template. + */ + +static void +__kmp_copy_common_data( void *pc_addr, struct private_data *d ) +{ + char *addr = (char *) pc_addr; + int i, offset; + + for (offset = 0; d != 0; d = d->next) { + for (i = d->more; i > 0; --i) { + if (d->data == 0) + memset( & addr[ offset ], '\0', d->size ); + else + KMP_MEMCPY( & addr[ offset ], d->data, d->size ); + offset += d->size; + } + } +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ +void +__kmp_common_initialize( void ) +{ + if( ! TCR_4(__kmp_init_common) ) { + int q; +#ifdef KMP_DEBUG + int gtid; +#endif + + __kmp_threadpriv_cache_list = NULL; + +#ifdef KMP_DEBUG + /* verify the uber masters were initialized */ + for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ ) + if( __kmp_root[gtid] ) { + KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread ); + for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q) + KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] ); +/* __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/ + } +#endif /* KMP_DEBUG */ + + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) + __kmp_threadprivate_d_table.data[ q ] = 0; + + TCW_4(__kmp_init_common, TRUE); + } +} + +/* Call all destructors for threadprivate data belonging to all threads. + Currently unused! */ +void +__kmp_common_destroy( void ) +{ + if( TCR_4(__kmp_init_common) ) { + int q; + + TCW_4(__kmp_init_common, FALSE); + + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { + int gtid; + struct private_common *tn; + struct shared_common *d_tn; + + /* C++ destructors need to be called once per thread before exiting */ + /* don't call destructors for master thread though unless we used copy constructor */ + + for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) { + if (d_tn->is_vec) { + if (d_tn->dt.dtorv != 0) { + for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { + if( __kmp_threads[gtid] ) { + if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : + (! KMP_UBER_GTID (gtid)) ) { + tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, + gtid, d_tn->gbl_addr ); + if (tn) { + (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); + } + } + } + } + if (d_tn->obj_init != 0) { + (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); + } + } + } else { + if (d_tn->dt.dtor != 0) { + for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { + if( __kmp_threads[gtid] ) { + if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : + (! KMP_UBER_GTID (gtid)) ) { + tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, + gtid, d_tn->gbl_addr ); + if (tn) { + (*d_tn->dt.dtor) (tn->par_addr); + } + } + } + } + if (d_tn->obj_init != 0) { + (*d_tn->dt.dtor) (d_tn->obj_init); + } + } + } + } + __kmp_threadprivate_d_table.data[ q ] = 0; + } + } +} + +/* Call all destructors for threadprivate data belonging to this thread */ +void +__kmp_common_destroy_gtid( int gtid ) +{ + struct private_common *tn; + struct shared_common *d_tn; + + KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) ); + if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : + (! KMP_UBER_GTID (gtid)) ) { + + if( TCR_4(__kmp_init_common) ) { + + /* Cannot do this here since not all threads have destroyed their data */ + /* TCW_4(__kmp_init_common, FALSE); */ + + for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) { + + d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, + gtid, tn->gbl_addr ); + + KMP_DEBUG_ASSERT( d_tn ); + + if (d_tn->is_vec) { + if (d_tn->dt.dtorv != 0) { + (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); + } + if (d_tn->obj_init != 0) { + (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); + } + } else { + if (d_tn->dt.dtor != 0) { + (void) (*d_tn->dt.dtor) (tn->par_addr); + } + if (d_tn->obj_init != 0) { + (void) (*d_tn->dt.dtor) (d_tn->obj_init); + } + } + } + KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n", + gtid ) ); + } + } +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef KMP_TASK_COMMON_DEBUG +static void +dump_list( void ) +{ + int p, q; + + for (p = 0; p < __kmp_all_nth; ++p) { + if( !__kmp_threads[p] ) continue; + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { + if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) { + struct private_common *tn; + + KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) ); + + for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next) { + KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", + tn->gbl_addr, tn->par_addr ) ); + } + } + } + } +} +#endif /* KMP_TASK_COMMON_DEBUG */ + + +/* + * NOTE: this routine is to be called only from the serial part of the program. + */ + +void +kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) +{ + struct shared_common **lnk_tn, *d_tn; + KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] && + __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 ); + + d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, + gtid, pc_addr ); + + if (d_tn == 0) { + d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); + + d_tn->gbl_addr = pc_addr; + d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); +/* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory + d_tn->ct.ctor = 0; + d_tn->cct.cctor = 0;; + d_tn->dt.dtor = 0; + d_tn->is_vec = FALSE; + d_tn->vec_len = 0L; +*/ + d_tn->cmn_size = pc_size; + + __kmp_acquire_lock( &__kmp_global_lock, gtid ); + + lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + + __kmp_release_lock( &__kmp_global_lock, gtid ); + } +} + +struct private_common * +kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) +{ + struct private_common *tn, **tt; + struct shared_common *d_tn; + + /* +++++++++ START OF CRITICAL SECTION +++++++++ */ + + __kmp_acquire_lock( & __kmp_global_lock, gtid ); + + tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) ); + + tn->gbl_addr = pc_addr; + + d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, + gtid, pc_addr ); /* Only the MASTER data table exists. */ + + if (d_tn != 0) { + /* This threadprivate variable has already been seen. */ + + if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) { + d_tn->cmn_size = pc_size; + + if (d_tn->is_vec) { + if (d_tn->ct.ctorv != 0) { + /* Construct from scratch so no prototype exists */ + d_tn->obj_init = 0; + } + else if (d_tn->cct.cctorv != 0) { + /* Now data initialize the prototype since it was previously registered */ + d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); + (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len); + } + else { + d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); + } + } else { + if (d_tn->ct.ctor != 0) { + /* Construct from scratch so no prototype exists */ + d_tn->obj_init = 0; + } + else if (d_tn->cct.cctor != 0) { + /* Now data initialize the prototype since it was previously registered */ + d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); + (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr); + } + else { + d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); + } + } + } + } + else { + struct shared_common **lnk_tn; + + d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); + d_tn->gbl_addr = pc_addr; + d_tn->cmn_size = pc_size; + d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); +/* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory + d_tn->ct.ctor = 0; + d_tn->cct.cctor = 0; + d_tn->dt.dtor = 0; + d_tn->is_vec = FALSE; + d_tn->vec_len = 0L; +*/ + lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } + + tn->cmn_size = d_tn->cmn_size; + + if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) { + tn->par_addr = (void *) pc_addr; + } + else { + tn->par_addr = (void *) __kmp_allocate( tn->cmn_size ); + } + + __kmp_release_lock( & __kmp_global_lock, gtid ); + + /* +++++++++ END OF CRITICAL SECTION +++++++++ */ + +#ifdef USE_CHECKS_COMMON + if (pc_size > d_tn->cmn_size) { + KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%" + KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", + pc_addr, pc_size, d_tn->cmn_size ) ); + KMP_FATAL( TPCommonBlocksInconsist ); + } +#endif /* USE_CHECKS_COMMON */ + + tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]); + +#ifdef KMP_TASK_COMMON_DEBUG + if (*tt != 0) { + KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", + gtid, pc_addr ) ); + } +#endif + tn->next = *tt; + *tt = tn; + +#ifdef KMP_TASK_COMMON_DEBUG + KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", + gtid, pc_addr ) ); + dump_list( ); +#endif + + /* Link the node into a simple list */ + + tn->link = __kmp_threads[ gtid ]->th.th_pri_head; + __kmp_threads[ gtid ]->th.th_pri_head = tn; + +#ifdef BUILD_TV + __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr ); +#endif + + if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) + return tn; + + /* + * if C++ object with copy constructor, use it; + * else if C++ object with constructor, use it for the non-master copies only; + * else use pod_init and memcpy + * + * C++ constructors need to be called once for each non-master thread on allocate + * C++ copy constructors need to be called once for each thread on allocate + */ + + /* + * C++ object with constructors/destructors; + * don't call constructors for master thread though + */ + if (d_tn->is_vec) { + if ( d_tn->ct.ctorv != 0) { + (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len); + } else if (d_tn->cct.cctorv != 0) { + (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len); + } else if (tn->par_addr != tn->gbl_addr) { + __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); + } + } else { + if ( d_tn->ct.ctor != 0 ) { + (void) (*d_tn->ct.ctor) (tn->par_addr); + } else if (d_tn->cct.cctor != 0) { + (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init); + } else if (tn->par_addr != tn->gbl_addr) { + __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); + } + } +/* !BUILD_OPENMP_C + if (tn->par_addr != tn->gbl_addr) + __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ + + return tn; +} + +/* ------------------------------------------------------------------------ */ +/* We are currently parallel, and we know the thread id. */ +/* ------------------------------------------------------------------------ */ + +/*! + @ingroup THREADPRIVATE + + @param loc source location information + @param data pointer to data being privatized + @param ctor pointer to constructor function for data + @param cctor pointer to copy constructor function for data + @param dtor pointer to destructor function for data + + Register constructors and destructors for thread private data. + This function is called when executing in parallel, when we know the thread id. +*/ +void +__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor) +{ + struct shared_common *d_tn, **lnk_tn; + + KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) ); + +#ifdef USE_CHECKS_COMMON + /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ + KMP_ASSERT( cctor == 0); +#endif /* USE_CHECKS_COMMON */ + + /* Only the global data table exists. */ + d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data ); + + if (d_tn == 0) { + d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); + d_tn->gbl_addr = data; + + d_tn->ct.ctor = ctor; + d_tn->cct.cctor = cctor; + d_tn->dt.dtor = dtor; +/* + d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate zeroes the memory + d_tn->vec_len = 0L; + d_tn->obj_init = 0; + d_tn->pod_init = 0; +*/ + lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } +} + +void * +__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size) +{ + void *ret; + struct private_common *tn; + + KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) ); + +#ifdef USE_CHECKS_COMMON + if (! __kmp_init_serial) + KMP_FATAL( RTLNotInitialized ); +#endif /* USE_CHECKS_COMMON */ + + if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) { + /* The parallel address will NEVER overlap with the data_address */ + /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */ + + KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) ); + kmp_threadprivate_insert_private_data( global_tid, data, data, size ); + + ret = data; + } + else { + KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", + global_tid, data ) ); + tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data ); + + if ( tn ) { + KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) ); +#ifdef USE_CHECKS_COMMON + if ((size_t) size > tn->cmn_size) { + KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", + data, size, tn->cmn_size ) ); + KMP_FATAL( TPCommonBlocksInconsist ); + } +#endif /* USE_CHECKS_COMMON */ + } + else { + /* The parallel address will NEVER overlap with the data_address */ + /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */ + KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) ); + tn = kmp_threadprivate_insert( global_tid, data, data, size ); + } + + ret = tn->par_addr; + } + KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", + global_tid, ret ) ); + + return ret; +} + +/*! + @ingroup THREADPRIVATE + @param loc source location information + @param global_tid global thread number + @param data pointer to data to privatize + @param size size of data to privatize + @param cache pointer to cache + @return pointer to private storage + + Allocate private storage for threadprivate data. +*/ +void * +__kmpc_threadprivate_cached( + ident_t * loc, + kmp_int32 global_tid, // gtid. + void * data, // Pointer to original global variable. + size_t size, // Size of original global variable. + void *** cache +) { + KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %" + KMP_SIZE_T_SPEC "\n", + global_tid, *cache, data, size ) ); + + if ( TCR_PTR(*cache) == 0) { + __kmp_acquire_lock( & __kmp_global_lock, global_tid ); + + if ( TCR_PTR(*cache) == 0) { + __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); + __kmp_tp_cached = 1; + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + void ** my_cache; + KMP_ITT_IGNORE( + my_cache = (void**) + __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t )); + ); + // No need to zero the allocated memory; __kmp_allocate does that. + KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", + global_tid, my_cache ) ); + + /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */ + /* Add address of mycache to linked list for cleanup later */ + kmp_cached_addr_t *tp_cache_addr; + + tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity]; + tp_cache_addr -> addr = my_cache; + tp_cache_addr -> next = __kmp_threadpriv_cache_list; + __kmp_threadpriv_cache_list = tp_cache_addr; + + KMP_MB(); + + TCW_PTR( *cache, my_cache); + + KMP_MB(); + } + + __kmp_release_lock( & __kmp_global_lock, global_tid ); + } + + void *ret; + if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) { + ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size); + + TCW_PTR( (*cache)[ global_tid ], ret); + } + KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", + global_tid, ret ) ); + + return ret; +} + +/*! + @ingroup THREADPRIVATE + @param loc source location information + @param data pointer to data being privatized + @param ctor pointer to constructor function for data + @param cctor pointer to copy constructor function for data + @param dtor pointer to destructor function for data + @param vector_length length of the vector (bytes or elements?) + Register vector constructors and destructors for thread private data. +*/ +void +__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor, + kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, + size_t vector_length ) +{ + struct shared_common *d_tn, **lnk_tn; + + KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) ); + +#ifdef USE_CHECKS_COMMON + /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ + KMP_ASSERT( cctor == 0); +#endif /* USE_CHECKS_COMMON */ + + d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, + -1, data ); /* Only the global data table exists. */ + + if (d_tn == 0) { + d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); + d_tn->gbl_addr = data; + + d_tn->ct.ctorv = ctor; + d_tn->cct.cctorv = cctor; + d_tn->dt.dtorv = dtor; + d_tn->is_vec = TRUE; + d_tn->vec_len = (size_t) vector_length; +/* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory + d_tn->pod_init = 0; +*/ + lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_utility.c b/contrib/libs/cxxsupp/openmp/kmp_utility.c index 311b2031d8e..c777d7dc0c3 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_utility.c +++ b/contrib/libs/cxxsupp/openmp/kmp_utility.c @@ -1,440 +1,440 @@ -/* - * kmp_utility.c -- Utility routines for the OpenMP support library. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_str.h" -#include -#include "kmp_i18n.h" - +/* + * kmp_utility.c -- Utility routines for the OpenMP support library. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_wrapper_getpid.h" +#include "kmp_str.h" +#include +#include "kmp_i18n.h" + #include const char* CpuBrand(ui32 store[12]) noexcept; //defined in -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static const char *unknown = "unknown"; - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */ -/* the debugging package has not been initialized yet, and only "0" will print */ -/* debugging output since the environment variables have not been read. */ - -#ifdef KMP_DEBUG -static int trace_level = 5; -#endif - -/* - * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) - * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID - * PHY_ID = APIC_ID >> LOG_ID_BITS - */ -int -__kmp_get_physical_id( int log_per_phy, int apic_id ) -{ - int index_lsb, index_msb, temp; - - if (log_per_phy > 1) { - index_lsb = 0; - index_msb = 31; - - temp = log_per_phy; - while ( (temp & 1) == 0 ) { - temp >>= 1; - index_lsb++; - } - - temp = log_per_phy; - while ( (temp & 0x80000000)==0 ) { - temp <<= 1; - index_msb--; - } - - /* If >1 bits were set in log_per_phy, choose next higher power of 2 */ - if (index_lsb != index_msb) index_msb++; - - return ( (int) (apic_id >> index_msb) ); - } - - return apic_id; -} - - -/* - * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) - * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID - * LOG_ID = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 ) - */ -int -__kmp_get_logical_id( int log_per_phy, int apic_id ) -{ - unsigned current_bit; - int bits_seen; - - if (log_per_phy <= 1) return ( 0 ); - - bits_seen = 0; - - for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { - if ( log_per_phy & current_bit ) { - log_per_phy &= ~current_bit; - bits_seen++; - } - } - - /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ - if (bits_seen == 1) { - current_bit >>= 1; - } - - return ( (int) ((current_bit - 1) & apic_id) ); -} - - -static -kmp_uint64 -__kmp_parse_frequency( // R: Frequency in Hz. - char const * frequency // I: Float number and unit: MHz, GHz, or TGz. -) { - - double value = 0.0; - char const * unit = NULL; - kmp_uint64 result = ~ 0; - - if ( frequency == NULL ) { - return result; - }; // if - value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char conts *". - if ( 0 < value && value <= DBL_MAX ) { // Good value (not overflow, underflow, etc). - if ( strcmp( unit, "MHz" ) == 0 ) { - value = value * 1.0E+6; - } else if ( strcmp( unit, "GHz" ) == 0 ) { - value = value * 1.0E+9; - } else if ( strcmp( unit, "THz" ) == 0 ) { - value = value * 1.0E+12; - } else { // Wrong unit. - return result; - }; // if - result = value; - }; // if - return result; - -}; // func __kmp_parse_cpu_frequency - -void -__kmp_query_cpuid( kmp_cpuinfo_t *p ) -{ - struct kmp_cpuid buf; - int max_arg; - int log_per_phy; -#ifdef KMP_DEBUG - int cflush_size; -#endif - - memset(&buf, 0, sizeof(buf)); - - p->initialized = 1; - - p->sse2 = 1; // Assume SSE2 by default. - - __kmp_x86_cpuid( 0, 0, &buf ); - - KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - 0, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); - - max_arg = buf.eax; - - p->apic_id = -1; - - if (max_arg >= 1) { - int i; - kmp_uint32 t, data[ 4 ]; - - __kmp_x86_cpuid( 1, 0, &buf ); - KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - 1, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); - - { -#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask ) ) - - p->signature = buf.eax; - p->family = get_value( buf.eax, 20, 0xff ) + get_value( buf.eax, 8, 0x0f ); - p->model = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f ); - p->stepping = get_value( buf.eax, 0, 0x0f ); - -#undef get_value - - KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) ); - } - - for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) { - data[ i ] = (t & 0xff); - }; // for - - p->sse2 = ( buf.edx >> 26 ) & 1; - -#ifdef KMP_DEBUG - - if ( (buf.edx >> 4) & 1 ) { - /* TSC - Timestamp Counter Available */ - KA_TRACE( trace_level, (" TSC" ) ); - } - if ( (buf.edx >> 8) & 1 ) { - /* CX8 - CMPXCHG8B Instruction Available */ - KA_TRACE( trace_level, (" CX8" ) ); - } - if ( (buf.edx >> 9) & 1 ) { - /* APIC - Local APIC Present (multi-processor operation support */ - KA_TRACE( trace_level, (" APIC" ) ); - } - if ( (buf.edx >> 15) & 1 ) { - /* CMOV - Conditional MOVe Instruction Available */ - KA_TRACE( trace_level, (" CMOV" ) ); - } - if ( (buf.edx >> 18) & 1 ) { - /* PSN - Processor Serial Number Available */ - KA_TRACE( trace_level, (" PSN" ) ); - } - if ( (buf.edx >> 19) & 1 ) { - /* CLFULSH - Cache Flush Instruction Available */ - cflush_size = data[ 1 ] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ - KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) ); - - } - if ( (buf.edx >> 21) & 1 ) { - /* DTES - Debug Trace & EMON Store */ - KA_TRACE( trace_level, (" DTES" ) ); - } - if ( (buf.edx >> 22) & 1 ) { - /* ACPI - ACPI Support Available */ - KA_TRACE( trace_level, (" ACPI" ) ); - } - if ( (buf.edx >> 23) & 1 ) { - /* MMX - Multimedia Extensions */ - KA_TRACE( trace_level, (" MMX" ) ); - } - if ( (buf.edx >> 25) & 1 ) { - /* SSE - SSE Instructions */ - KA_TRACE( trace_level, (" SSE" ) ); - } - if ( (buf.edx >> 26) & 1 ) { - /* SSE2 - SSE2 Instructions */ - KA_TRACE( trace_level, (" SSE2" ) ); - } - if ( (buf.edx >> 27) & 1 ) { - /* SLFSNP - Self-Snooping Cache */ - KA_TRACE( trace_level, (" SLFSNP" ) ); - } -#endif /* KMP_DEBUG */ - - if ( (buf.edx >> 28) & 1 ) { - /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ - log_per_phy = data[ 2 ]; - p->apic_id = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */ - KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) ); - - if( log_per_phy > 1 ) { - /* default to 1k FOR JT-enabled processors (4k on OS X*) */ -#if KMP_OS_DARWIN - p->cpu_stackoffset = 4 * 1024; -#else - p->cpu_stackoffset = 1 * 1024; -#endif - } - - p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id ); - p->logical_id = __kmp_get_logical_id( log_per_phy, p->apic_id ); - } -#ifdef KMP_DEBUG - if ( (buf.edx >> 29) & 1 ) { - /* ATHROTL - Automatic Throttle Control */ - KA_TRACE( trace_level, (" ATHROTL" ) ); - } - KA_TRACE( trace_level, (" ]\n" ) ); - - for (i = 2; i <= max_arg; ++i) { - __kmp_x86_cpuid( i, 0, &buf ); - KA_TRACE( trace_level, - ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - i, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); - } -#endif -#if KMP_USE_ADAPTIVE_LOCKS - p->rtm = 0; - if (max_arg > 7) - { - /* RTM bit CPUID.07:EBX, bit 11 */ - __kmp_x86_cpuid(7, 0, &buf); - p->rtm = (buf.ebx >> 11) & 1; - KA_TRACE( trace_level, (" RTM" ) ); - } -#endif - }; // if - - { // Parse CPU brand string for frequency. - - union kmp_cpu_brand_string { +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +static const char *unknown = "unknown"; + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */ +/* the debugging package has not been initialized yet, and only "0" will print */ +/* debugging output since the environment variables have not been read. */ + +#ifdef KMP_DEBUG +static int trace_level = 5; +#endif + +/* + * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) + * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID + * PHY_ID = APIC_ID >> LOG_ID_BITS + */ +int +__kmp_get_physical_id( int log_per_phy, int apic_id ) +{ + int index_lsb, index_msb, temp; + + if (log_per_phy > 1) { + index_lsb = 0; + index_msb = 31; + + temp = log_per_phy; + while ( (temp & 1) == 0 ) { + temp >>= 1; + index_lsb++; + } + + temp = log_per_phy; + while ( (temp & 0x80000000)==0 ) { + temp <<= 1; + index_msb--; + } + + /* If >1 bits were set in log_per_phy, choose next higher power of 2 */ + if (index_lsb != index_msb) index_msb++; + + return ( (int) (apic_id >> index_msb) ); + } + + return apic_id; +} + + +/* + * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) + * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID + * LOG_ID = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 ) + */ +int +__kmp_get_logical_id( int log_per_phy, int apic_id ) +{ + unsigned current_bit; + int bits_seen; + + if (log_per_phy <= 1) return ( 0 ); + + bits_seen = 0; + + for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { + if ( log_per_phy & current_bit ) { + log_per_phy &= ~current_bit; + bits_seen++; + } + } + + /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ + if (bits_seen == 1) { + current_bit >>= 1; + } + + return ( (int) ((current_bit - 1) & apic_id) ); +} + + +static +kmp_uint64 +__kmp_parse_frequency( // R: Frequency in Hz. + char const * frequency // I: Float number and unit: MHz, GHz, or TGz. +) { + + double value = 0.0; + char const * unit = NULL; + kmp_uint64 result = ~ 0; + + if ( frequency == NULL ) { + return result; + }; // if + value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char conts *". + if ( 0 < value && value <= DBL_MAX ) { // Good value (not overflow, underflow, etc). + if ( strcmp( unit, "MHz" ) == 0 ) { + value = value * 1.0E+6; + } else if ( strcmp( unit, "GHz" ) == 0 ) { + value = value * 1.0E+9; + } else if ( strcmp( unit, "THz" ) == 0 ) { + value = value * 1.0E+12; + } else { // Wrong unit. + return result; + }; // if + result = value; + }; // if + return result; + +}; // func __kmp_parse_cpu_frequency + +void +__kmp_query_cpuid( kmp_cpuinfo_t *p ) +{ + struct kmp_cpuid buf; + int max_arg; + int log_per_phy; +#ifdef KMP_DEBUG + int cflush_size; +#endif + + memset(&buf, 0, sizeof(buf)); + + p->initialized = 1; + + p->sse2 = 1; // Assume SSE2 by default. + + __kmp_x86_cpuid( 0, 0, &buf ); + + KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", + 0, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); + + max_arg = buf.eax; + + p->apic_id = -1; + + if (max_arg >= 1) { + int i; + kmp_uint32 t, data[ 4 ]; + + __kmp_x86_cpuid( 1, 0, &buf ); + KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", + 1, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); + + { +#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask ) ) + + p->signature = buf.eax; + p->family = get_value( buf.eax, 20, 0xff ) + get_value( buf.eax, 8, 0x0f ); + p->model = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f ); + p->stepping = get_value( buf.eax, 0, 0x0f ); + +#undef get_value + + KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) ); + } + + for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) { + data[ i ] = (t & 0xff); + }; // for + + p->sse2 = ( buf.edx >> 26 ) & 1; + +#ifdef KMP_DEBUG + + if ( (buf.edx >> 4) & 1 ) { + /* TSC - Timestamp Counter Available */ + KA_TRACE( trace_level, (" TSC" ) ); + } + if ( (buf.edx >> 8) & 1 ) { + /* CX8 - CMPXCHG8B Instruction Available */ + KA_TRACE( trace_level, (" CX8" ) ); + } + if ( (buf.edx >> 9) & 1 ) { + /* APIC - Local APIC Present (multi-processor operation support */ + KA_TRACE( trace_level, (" APIC" ) ); + } + if ( (buf.edx >> 15) & 1 ) { + /* CMOV - Conditional MOVe Instruction Available */ + KA_TRACE( trace_level, (" CMOV" ) ); + } + if ( (buf.edx >> 18) & 1 ) { + /* PSN - Processor Serial Number Available */ + KA_TRACE( trace_level, (" PSN" ) ); + } + if ( (buf.edx >> 19) & 1 ) { + /* CLFULSH - Cache Flush Instruction Available */ + cflush_size = data[ 1 ] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ + KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) ); + + } + if ( (buf.edx >> 21) & 1 ) { + /* DTES - Debug Trace & EMON Store */ + KA_TRACE( trace_level, (" DTES" ) ); + } + if ( (buf.edx >> 22) & 1 ) { + /* ACPI - ACPI Support Available */ + KA_TRACE( trace_level, (" ACPI" ) ); + } + if ( (buf.edx >> 23) & 1 ) { + /* MMX - Multimedia Extensions */ + KA_TRACE( trace_level, (" MMX" ) ); + } + if ( (buf.edx >> 25) & 1 ) { + /* SSE - SSE Instructions */ + KA_TRACE( trace_level, (" SSE" ) ); + } + if ( (buf.edx >> 26) & 1 ) { + /* SSE2 - SSE2 Instructions */ + KA_TRACE( trace_level, (" SSE2" ) ); + } + if ( (buf.edx >> 27) & 1 ) { + /* SLFSNP - Self-Snooping Cache */ + KA_TRACE( trace_level, (" SLFSNP" ) ); + } +#endif /* KMP_DEBUG */ + + if ( (buf.edx >> 28) & 1 ) { + /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ + log_per_phy = data[ 2 ]; + p->apic_id = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */ + KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) ); + + if( log_per_phy > 1 ) { + /* default to 1k FOR JT-enabled processors (4k on OS X*) */ +#if KMP_OS_DARWIN + p->cpu_stackoffset = 4 * 1024; +#else + p->cpu_stackoffset = 1 * 1024; +#endif + } + + p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id ); + p->logical_id = __kmp_get_logical_id( log_per_phy, p->apic_id ); + } +#ifdef KMP_DEBUG + if ( (buf.edx >> 29) & 1 ) { + /* ATHROTL - Automatic Throttle Control */ + KA_TRACE( trace_level, (" ATHROTL" ) ); + } + KA_TRACE( trace_level, (" ]\n" ) ); + + for (i = 2; i <= max_arg; ++i) { + __kmp_x86_cpuid( i, 0, &buf ); + KA_TRACE( trace_level, + ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", + i, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); + } +#endif +#if KMP_USE_ADAPTIVE_LOCKS + p->rtm = 0; + if (max_arg > 7) + { + /* RTM bit CPUID.07:EBX, bit 11 */ + __kmp_x86_cpuid(7, 0, &buf); + p->rtm = (buf.ebx >> 11) & 1; + KA_TRACE( trace_level, (" RTM" ) ); + } +#endif + }; // if + + { // Parse CPU brand string for frequency. + + union kmp_cpu_brand_string { ui32 buf[ 12 ]; char string[ sizeof( ui32 ) * 12 + 1 ]; - }; // union kmp_cpu_brand_string - union kmp_cpu_brand_string brand; - - memset(&brand, 0, sizeof(brand)); - - p->frequency = 0; - - // Get CPU brand string. + }; // union kmp_cpu_brand_string + union kmp_cpu_brand_string brand; + + memset(&brand, 0, sizeof(brand)); + + p->frequency = 0; + + // Get CPU brand string. CpuBrand(brand.buf); - brand.string[ sizeof( brand.string ) - 1 ] = 0; // Just in case. ;-) - KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", brand.string ) ); - - // Parse frequency. - p->frequency = __kmp_parse_frequency( strrchr( brand.string, ' ' ) ); - KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) ); - } -} - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -/* ------------------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------------------ */ - -void -__kmp_expand_host_name( char *buffer, size_t size ) -{ - KMP_DEBUG_ASSERT(size >= sizeof(unknown)); -#if KMP_OS_WINDOWS - { - DWORD s = size; - - if (! GetComputerNameA( buffer, & s )) - KMP_STRCPY_S( buffer, size, unknown ); - } -#else - buffer[size - 2] = 0; - if (gethostname( buffer, size ) || buffer[size - 2] != 0) - KMP_STRCPY_S( buffer, size, unknown ); -#endif -} - -/* Expand the meta characters in the filename: - * - * Currently defined characters are: - * - * %H the hostname - * %P the number of threads used. - * %I the unique identifier for this run. - */ - -void -__kmp_expand_file_name( char *result, size_t rlen, char *pattern ) -{ - char *pos = result, *end = result + rlen - 1; - char buffer[256]; - int default_cpu_width = 1; - int snp_result; - - KMP_DEBUG_ASSERT(rlen > 0); - *end = 0; - { - int i; - for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width); - } - - if (pattern != NULL) { - while (*pattern != '\0' && pos < end) { - if (*pattern != '%') { - *pos++ = *pattern++; - } else { - char *old_pattern = pattern; - int width = 1; - int cpu_width = default_cpu_width; - - ++pattern; - - if (*pattern >= '0' && *pattern <= '9') { - width = 0; - do { - width = (width * 10) + *pattern++ - '0'; - } while (*pattern >= '0' && *pattern <= '9'); - if (width < 0 || width > 1024) - width = 1; - - cpu_width = width; - } - - switch (*pattern) { - case 'H': - case 'h': - { - __kmp_expand_host_name( buffer, sizeof( buffer ) ); - KMP_STRNCPY( pos, buffer, end - pos + 1); - if(*end == 0) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - } - break; - case 'P': - case 'p': - { - snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth ); - if(snp_result >= 0 && snp_result <= end - pos) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - } - break; - case 'I': - case 'i': - { - pid_t id = getpid(); - snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id ); - if(snp_result >= 0 && snp_result <= end - pos) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - break; - } - case '%': - { - *pos++ = '%'; - ++pattern; - break; - } - default: - { - *pos++ = '%'; - pattern = old_pattern + 1; - break; - } - } - } - } - /* TODO: How do we get rid of this? */ - if(*pattern != '\0') - KMP_FATAL( FileNameTooLong ); - } - - *pos = '\0'; -} + brand.string[ sizeof( brand.string ) - 1 ] = 0; // Just in case. ;-) + KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", brand.string ) ); + + // Parse frequency. + p->frequency = __kmp_parse_frequency( strrchr( brand.string, ' ' ) ); + KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) ); + } +} + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +/* ------------------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------------------ */ + +void +__kmp_expand_host_name( char *buffer, size_t size ) +{ + KMP_DEBUG_ASSERT(size >= sizeof(unknown)); +#if KMP_OS_WINDOWS + { + DWORD s = size; + + if (! GetComputerNameA( buffer, & s )) + KMP_STRCPY_S( buffer, size, unknown ); + } +#else + buffer[size - 2] = 0; + if (gethostname( buffer, size ) || buffer[size - 2] != 0) + KMP_STRCPY_S( buffer, size, unknown ); +#endif +} + +/* Expand the meta characters in the filename: + * + * Currently defined characters are: + * + * %H the hostname + * %P the number of threads used. + * %I the unique identifier for this run. + */ + +void +__kmp_expand_file_name( char *result, size_t rlen, char *pattern ) +{ + char *pos = result, *end = result + rlen - 1; + char buffer[256]; + int default_cpu_width = 1; + int snp_result; + + KMP_DEBUG_ASSERT(rlen > 0); + *end = 0; + { + int i; + for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width); + } + + if (pattern != NULL) { + while (*pattern != '\0' && pos < end) { + if (*pattern != '%') { + *pos++ = *pattern++; + } else { + char *old_pattern = pattern; + int width = 1; + int cpu_width = default_cpu_width; + + ++pattern; + + if (*pattern >= '0' && *pattern <= '9') { + width = 0; + do { + width = (width * 10) + *pattern++ - '0'; + } while (*pattern >= '0' && *pattern <= '9'); + if (width < 0 || width > 1024) + width = 1; + + cpu_width = width; + } + + switch (*pattern) { + case 'H': + case 'h': + { + __kmp_expand_host_name( buffer, sizeof( buffer ) ); + KMP_STRNCPY( pos, buffer, end - pos + 1); + if(*end == 0) { + while ( *pos ) + ++pos; + ++pattern; + } else + pos = end; + } + break; + case 'P': + case 'p': + { + snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth ); + if(snp_result >= 0 && snp_result <= end - pos) { + while ( *pos ) + ++pos; + ++pattern; + } else + pos = end; + } + break; + case 'I': + case 'i': + { + pid_t id = getpid(); + snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id ); + if(snp_result >= 0 && snp_result <= end - pos) { + while ( *pos ) + ++pos; + ++pattern; + } else + pos = end; + break; + } + case '%': + { + *pos++ = '%'; + ++pattern; + break; + } + default: + { + *pos++ = '%'; + pattern = old_pattern + 1; + break; + } + } + } + } + /* TODO: How do we get rid of this? */ + if(*pattern != '\0') + KMP_FATAL( FileNameTooLong ); + } + + *pos = '\0'; +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_version.c b/contrib/libs/cxxsupp/openmp/kmp_version.c index 8b7598c46b6..2ddd76d3ad7 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_version.c +++ b/contrib/libs/cxxsupp/openmp/kmp_version.c @@ -1,211 +1,211 @@ -/* - * kmp_version.c - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_io.h" -#include "kmp_version.h" - -// Replace with snapshot date YYYYMMDD for promotion build. -#define KMP_VERSION_BUILD 20140926 - -// Helper macros to convert value of macro to string literal. -#define _stringer( x ) #x -#define stringer( x ) _stringer( x ) - -// Detect compiler. -#if KMP_COMPILER_ICC - #if __INTEL_COMPILER == 1010 - #define KMP_COMPILER "Intel C++ Compiler 10.1" - #elif __INTEL_COMPILER == 1100 - #define KMP_COMPILER "Intel C++ Compiler 11.0" - #elif __INTEL_COMPILER == 1110 - #define KMP_COMPILER "Intel C++ Compiler 11.1" - #elif __INTEL_COMPILER == 1200 - #define KMP_COMPILER "Intel C++ Compiler 12.0" - #elif __INTEL_COMPILER == 1210 - #define KMP_COMPILER "Intel C++ Compiler 12.1" - #elif __INTEL_COMPILER == 1300 - #define KMP_COMPILER "Intel C++ Compiler 13.0" - #elif __INTEL_COMPILER == 1310 - #define KMP_COMPILER "Intel C++ Compiler 13.1" - #elif __INTEL_COMPILER == 1400 - #define KMP_COMPILER "Intel C++ Compiler 14.0" - #elif __INTEL_COMPILER == 1410 - #define KMP_COMPILER "Intel C++ Compiler 14.1" - #elif __INTEL_COMPILER == 1500 - #define KMP_COMPILER "Intel C++ Compiler 15.0" - #elif __INTEL_COMPILER == 1600 - #define KMP_COMPILER "Intel C++ Compiler 16.0" - #elif __INTEL_COMPILER == 9999 - #define KMP_COMPILER "Intel C++ Compiler mainline" - #endif -#elif KMP_COMPILER_CLANG - #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ ) -#elif KMP_COMPILER_GCC - #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ ) -#elif KMP_COMPILER_MSVC - #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER ) -#endif -#ifndef KMP_COMPILER - #warning "Unknown compiler" - #define KMP_COMPILER "unknown compiler" -#endif - -// Detect librray type (perf, stub). -#ifdef KMP_STUB - #define KMP_LIB_TYPE "stub" -#else - #define KMP_LIB_TYPE "performance" -#endif // KMP_LIB_TYPE - -// Detect link type (static, dynamic). -#ifdef KMP_DYNAMIC_LIB - #define KMP_LINK_TYPE "dynamic" -#else - #define KMP_LINK_TYPE "static" -#endif // KMP_LINK_TYPE - -// Finally, define strings. -#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" -#define KMP_COPYRIGHT "" - -int const __kmp_version_major = KMP_VERSION_MAJOR; -int const __kmp_version_minor = KMP_VERSION_MINOR; -int const __kmp_version_build = KMP_VERSION_BUILD; -int const __kmp_openmp_version = - #if OMP_40_ENABLED - 201307; - #else - 201107; - #endif - -/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for a - specific format some changes in the recognition routine there need to - be made before this is changed. -*/ -char const __kmp_copyright[] = - KMP_VERSION_PREFIX KMP_LIBRARY - " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) - "." stringer( KMP_VERSION_BUILD ) " " - KMP_COPYRIGHT; - -char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT; -char const __kmp_version_lib_ver[] = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD ); -char const __kmp_version_lib_type[] = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; -char const __kmp_version_link_type[] = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; -char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " __DATE__ " " __TIME__; -#if KMP_MIC2 - char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2"; -#endif -char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; - -// -// Called at serial initialization time. -// -static int __kmp_version_1_printed = FALSE; - -void -__kmp_print_version_1( void ) -{ - if ( __kmp_version_1_printed ) { - return; - }; // if - __kmp_version_1_printed = TRUE; - - #ifndef KMP_STUB - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - // Print version strings skipping initial magic. - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] ); - #if KMP_MIC - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] ); - #endif - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] ); - #if defined(KMP_GOMP_COMPAT) - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] ); - #endif /* defined(KMP_GOMP_COMPAT) */ - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" ) ); - #ifdef KMP_DEBUG - for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { - __kmp_str_buf_print( - & buffer, - "%s%s barrier branch bits: gather=%u, release=%u\n", - KMP_VERSION_PREF_STR, - __kmp_barrier_type_name[ i ], - __kmp_barrier_gather_branch_bits[ i ], - __kmp_barrier_release_branch_bits[ i ] - ); // __kmp_str_buf_print - }; // for i - for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { - __kmp_str_buf_print( - & buffer, - "%s%s barrier pattern: gather=%s, release=%s\n", - KMP_VERSION_PREF_STR, - __kmp_barrier_type_name[ i ], - __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ], - __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ] - ); // __kmp_str_buf_print - }; // for i - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] ); - #endif - __kmp_str_buf_print( - & buffer, - "%sthread affinity support: %s\n", - KMP_VERSION_PREF_STR, - #if KMP_AFFINITY_SUPPORTED - ( - KMP_AFFINITY_CAPABLE() - ? - ( - __kmp_affinity_type == affinity_none - ? - "not used" - : - "yes" - ) - : - "no" - ) - #else - "no" - #endif - ); - __kmp_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); - K_DIAG( 1, ( "KMP_VERSION is true\n" ) ); - #endif // KMP_STUB -} // __kmp_print_version_1 - -// -// Called at parallel initialization time. -// -static int __kmp_version_2_printed = FALSE; - -void -__kmp_print_version_2( void ) { - if ( __kmp_version_2_printed ) { - return; - }; // if - __kmp_version_2_printed = TRUE; - - #ifndef KMP_STUB - #endif // KMP_STUB -} // __kmp_print_version_2 - -// end of file // +/* + * kmp_version.c + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_io.h" +#include "kmp_version.h" + +// Replace with snapshot date YYYYMMDD for promotion build. +#define KMP_VERSION_BUILD 20140926 + +// Helper macros to convert value of macro to string literal. +#define _stringer( x ) #x +#define stringer( x ) _stringer( x ) + +// Detect compiler. +#if KMP_COMPILER_ICC + #if __INTEL_COMPILER == 1010 + #define KMP_COMPILER "Intel C++ Compiler 10.1" + #elif __INTEL_COMPILER == 1100 + #define KMP_COMPILER "Intel C++ Compiler 11.0" + #elif __INTEL_COMPILER == 1110 + #define KMP_COMPILER "Intel C++ Compiler 11.1" + #elif __INTEL_COMPILER == 1200 + #define KMP_COMPILER "Intel C++ Compiler 12.0" + #elif __INTEL_COMPILER == 1210 + #define KMP_COMPILER "Intel C++ Compiler 12.1" + #elif __INTEL_COMPILER == 1300 + #define KMP_COMPILER "Intel C++ Compiler 13.0" + #elif __INTEL_COMPILER == 1310 + #define KMP_COMPILER "Intel C++ Compiler 13.1" + #elif __INTEL_COMPILER == 1400 + #define KMP_COMPILER "Intel C++ Compiler 14.0" + #elif __INTEL_COMPILER == 1410 + #define KMP_COMPILER "Intel C++ Compiler 14.1" + #elif __INTEL_COMPILER == 1500 + #define KMP_COMPILER "Intel C++ Compiler 15.0" + #elif __INTEL_COMPILER == 1600 + #define KMP_COMPILER "Intel C++ Compiler 16.0" + #elif __INTEL_COMPILER == 9999 + #define KMP_COMPILER "Intel C++ Compiler mainline" + #endif +#elif KMP_COMPILER_CLANG + #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ ) +#elif KMP_COMPILER_GCC + #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ ) +#elif KMP_COMPILER_MSVC + #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER ) +#endif +#ifndef KMP_COMPILER + #warning "Unknown compiler" + #define KMP_COMPILER "unknown compiler" +#endif + +// Detect librray type (perf, stub). +#ifdef KMP_STUB + #define KMP_LIB_TYPE "stub" +#else + #define KMP_LIB_TYPE "performance" +#endif // KMP_LIB_TYPE + +// Detect link type (static, dynamic). +#ifdef KMP_DYNAMIC_LIB + #define KMP_LINK_TYPE "dynamic" +#else + #define KMP_LINK_TYPE "static" +#endif // KMP_LINK_TYPE + +// Finally, define strings. +#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" +#define KMP_COPYRIGHT "" + +int const __kmp_version_major = KMP_VERSION_MAJOR; +int const __kmp_version_minor = KMP_VERSION_MINOR; +int const __kmp_version_build = KMP_VERSION_BUILD; +int const __kmp_openmp_version = + #if OMP_40_ENABLED + 201307; + #else + 201107; + #endif + +/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for a + specific format some changes in the recognition routine there need to + be made before this is changed. +*/ +char const __kmp_copyright[] = + KMP_VERSION_PREFIX KMP_LIBRARY + " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) + "." stringer( KMP_VERSION_BUILD ) " " + KMP_COPYRIGHT; + +char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT; +char const __kmp_version_lib_ver[] = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD ); +char const __kmp_version_lib_type[] = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; +char const __kmp_version_link_type[] = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; +char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " __DATE__ " " __TIME__; +#if KMP_MIC2 + char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2"; +#endif +char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; + +// +// Called at serial initialization time. +// +static int __kmp_version_1_printed = FALSE; + +void +__kmp_print_version_1( void ) +{ + if ( __kmp_version_1_printed ) { + return; + }; // if + __kmp_version_1_printed = TRUE; + + #ifndef KMP_STUB + kmp_str_buf_t buffer; + __kmp_str_buf_init( & buffer ); + // Print version strings skipping initial magic. + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] ); + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] ); + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] ); + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] ); + #if KMP_MIC + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] ); + #endif + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] ); + #if defined(KMP_GOMP_COMPAT) + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] ); + #endif /* defined(KMP_GOMP_COMPAT) */ + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] ); + __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" ) ); + #ifdef KMP_DEBUG + for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { + __kmp_str_buf_print( + & buffer, + "%s%s barrier branch bits: gather=%u, release=%u\n", + KMP_VERSION_PREF_STR, + __kmp_barrier_type_name[ i ], + __kmp_barrier_gather_branch_bits[ i ], + __kmp_barrier_release_branch_bits[ i ] + ); // __kmp_str_buf_print + }; // for i + for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { + __kmp_str_buf_print( + & buffer, + "%s%s barrier pattern: gather=%s, release=%s\n", + KMP_VERSION_PREF_STR, + __kmp_barrier_type_name[ i ], + __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ], + __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ] + ); // __kmp_str_buf_print + }; // for i + __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] ); + #endif + __kmp_str_buf_print( + & buffer, + "%sthread affinity support: %s\n", + KMP_VERSION_PREF_STR, + #if KMP_AFFINITY_SUPPORTED + ( + KMP_AFFINITY_CAPABLE() + ? + ( + __kmp_affinity_type == affinity_none + ? + "not used" + : + "yes" + ) + : + "no" + ) + #else + "no" + #endif + ); + __kmp_printf( "%s", buffer.str ); + __kmp_str_buf_free( & buffer ); + K_DIAG( 1, ( "KMP_VERSION is true\n" ) ); + #endif // KMP_STUB +} // __kmp_print_version_1 + +// +// Called at parallel initialization time. +// +static int __kmp_version_2_printed = FALSE; + +void +__kmp_print_version_2( void ) { + if ( __kmp_version_2_printed ) { + return; + }; // if + __kmp_version_2_printed = TRUE; + + #ifndef KMP_STUB + #endif // KMP_STUB +} // __kmp_print_version_2 + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_version.h b/contrib/libs/cxxsupp/openmp/kmp_version.h index ba7c1b949a8..212853b8e2b 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_version.h +++ b/contrib/libs/cxxsupp/openmp/kmp_version.h @@ -1,68 +1,68 @@ -/* - * kmp_version.h -- version number for this release - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_VERSION_H -#define KMP_VERSION_H - -#ifdef __cplusplus - extern "C" { -#endif // __cplusplus - -#ifndef KMP_VERSION_MAJOR - #error KMP_VERSION_MAJOR macro is not defined. -#endif -#define KMP_VERSION_MINOR 0 -/* - Using "magic" prefix in all the version strings is rather convenient to get static version info - from binaries by using standard utilities "strings" and "grep", e. g.: - $ strings libomp.so | grep "@(#)" - gives clean list of all version strings in the library. Leading zero helps to keep version - string separate from printable characters which may occurs just before version string. -*/ -#define KMP_VERSION_MAGIC_STR "\x00@(#) " -#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR. -#define KMP_VERSION_PREF_STR "Intel(R) OMP " -#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR - -/* declare all the version string constants for KMP_VERSION env. variable */ -extern int const __kmp_version_major; -extern int const __kmp_version_minor; -extern int const __kmp_version_build; -extern int const __kmp_openmp_version; -extern char const __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP. -extern char const __kmp_version_copyright[]; -extern char const __kmp_version_lib_ver[]; -extern char const __kmp_version_lib_type[]; -extern char const __kmp_version_link_type[]; -extern char const __kmp_version_build_time[]; -extern char const __kmp_version_target_env[]; -extern char const __kmp_version_build_compiler[]; -extern char const __kmp_version_alt_comp[]; -extern char const __kmp_version_omp_api[]; -// ??? extern char const __kmp_version_debug[]; -extern char const __kmp_version_lock[]; -extern char const __kmp_version_nested_stats_reporting[]; -extern char const __kmp_version_ftnstdcall[]; -extern char const __kmp_version_ftncdecl[]; -extern char const __kmp_version_ftnextra[]; - -void __kmp_print_version_1( void ); -void __kmp_print_version_2( void ); - -#ifdef __cplusplus - } // extern "C" -#endif // __cplusplus - -#endif /* KMP_VERSION_H */ +/* + * kmp_version.h -- version number for this release + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_VERSION_H +#define KMP_VERSION_H + +#ifdef __cplusplus + extern "C" { +#endif // __cplusplus + +#ifndef KMP_VERSION_MAJOR + #error KMP_VERSION_MAJOR macro is not defined. +#endif +#define KMP_VERSION_MINOR 0 +/* + Using "magic" prefix in all the version strings is rather convenient to get static version info + from binaries by using standard utilities "strings" and "grep", e. g.: + $ strings libomp.so | grep "@(#)" + gives clean list of all version strings in the library. Leading zero helps to keep version + string separate from printable characters which may occurs just before version string. +*/ +#define KMP_VERSION_MAGIC_STR "\x00@(#) " +#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR. +#define KMP_VERSION_PREF_STR "Intel(R) OMP " +#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR + +/* declare all the version string constants for KMP_VERSION env. variable */ +extern int const __kmp_version_major; +extern int const __kmp_version_minor; +extern int const __kmp_version_build; +extern int const __kmp_openmp_version; +extern char const __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP. +extern char const __kmp_version_copyright[]; +extern char const __kmp_version_lib_ver[]; +extern char const __kmp_version_lib_type[]; +extern char const __kmp_version_link_type[]; +extern char const __kmp_version_build_time[]; +extern char const __kmp_version_target_env[]; +extern char const __kmp_version_build_compiler[]; +extern char const __kmp_version_alt_comp[]; +extern char const __kmp_version_omp_api[]; +// ??? extern char const __kmp_version_debug[]; +extern char const __kmp_version_lock[]; +extern char const __kmp_version_nested_stats_reporting[]; +extern char const __kmp_version_ftnstdcall[]; +extern char const __kmp_version_ftncdecl[]; +extern char const __kmp_version_ftnextra[]; + +void __kmp_print_version_1( void ); +void __kmp_print_version_2( void ); + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + +#endif /* KMP_VERSION_H */ diff --git a/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp b/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp index 7758e18991a..d865bf6d46f 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp @@ -1,50 +1,50 @@ -/* - * kmp_wait_release.cpp -- Wait/Release implementation - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_wait_release.h" - -void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - __kmp_wait_template(this_thr, flag, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj) ); -} - -void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - __kmp_wait_template(this_thr, flag, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj) ); -} - -void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - __kmp_wait_template(this_thr, flag, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj) ); -} - - - -void __kmp_release_32(kmp_flag_32 *flag) { - __kmp_release_template(flag); -} - -void __kmp_release_64(kmp_flag_64 *flag) { - __kmp_release_template(flag); -} - -void __kmp_release_oncore(kmp_flag_oncore *flag) { - __kmp_release_template(flag); -} +/* + * kmp_wait_release.cpp -- Wait/Release implementation + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "kmp_wait_release.h" + +void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + __kmp_wait_template(this_thr, flag, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj) ); +} + +void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + __kmp_wait_template(this_thr, flag, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj) ); +} + +void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + __kmp_wait_template(this_thr, flag, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj) ); +} + + + +void __kmp_release_32(kmp_flag_32 *flag) { + __kmp_release_template(flag); +} + +void __kmp_release_64(kmp_flag_64 *flag) { + __kmp_release_template(flag); +} + +void __kmp_release_oncore(kmp_flag_oncore *flag) { + __kmp_release_template(flag); +} diff --git a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h index 60a0f45f3db..92db155eb5a 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h +++ b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h @@ -1,564 +1,564 @@ -/* - * kmp_wait_release.h -- Wait/Release implementation - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_WAIT_RELEASE_H -#define KMP_WAIT_RELEASE_H - -#include "kmp.h" -#include "kmp_itt.h" - +/* + * kmp_wait_release.h -- Wait/Release implementation + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_WAIT_RELEASE_H +#define KMP_WAIT_RELEASE_H + +#include "kmp.h" +#include "kmp_itt.h" + +/*! +@defgroup WAIT_RELEASE Wait/Release operations + +The definitions and functions here implement the lowest level thread +synchronizations of suspending a thread and awaking it. They are used +to build higher level operations such as barriers and fork/join. +*/ + +/*! +@ingroup WAIT_RELEASE +@{ +*/ + /*! -@defgroup WAIT_RELEASE Wait/Release operations - -The definitions and functions here implement the lowest level thread -synchronizations of suspending a thread and awaking it. They are used -to build higher level operations such as barriers and fork/join. -*/ - -/*! -@ingroup WAIT_RELEASE -@{ -*/ - -/*! - * The flag_type describes the storage used for the flag. - */ -enum flag_type { - flag32, /**< 32 bit flags */ - flag64, /**< 64 bit flags */ - flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ -}; - -/*! - * Base class for wait/release volatile flag - */ -template -class kmp_flag { - volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */ - flag_type t; /**< "Type" of the flag in loc */ - public: - typedef P flag_t; - kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {} - /*! - * @result the pointer to the actual flag - */ - volatile P * get() { return loc; } - /*! - * @param new_loc in set loc to point at new_loc - */ - void set(volatile P *new_loc) { loc = new_loc; } - /*! - * @result the flag_type - */ - flag_type get_type() { return t; } - // Derived classes must provide the following: - /* - kmp_info_t * get_waiter(kmp_uint32 i); - kmp_uint32 get_num_waiters(); - bool done_check(); - bool done_check_val(P old_loc); - bool notdone_check(); - P internal_release(); - void suspend(int th_gtid); - void resume(int th_gtid); - P set_sleeping(); - P unset_sleeping(); - bool is_sleeping(); - bool is_any_sleeping(); - bool is_sleeping_val(P old_loc); - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained); - */ -}; - -/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* - must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */ -template -static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - // NOTE: We may not belong to a team at this point. - volatile typename C::flag_t *spin = flag->get(); - kmp_uint32 spins; - kmp_uint32 hibernate; - int th_gtid; - int tasks_completed = FALSE; - - KMP_FSYNC_SPIN_INIT(spin, NULL); - if (flag->done_check()) { - KMP_FSYNC_SPIN_ACQUIRED(spin); - return; - } - th_gtid = this_thr->th.th_info.ds.ds_gtid; - KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); - -#if OMPT_SUPPORT && OMPT_BLAME - ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled && - ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { - ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team){ - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; - } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); - } - } -#endif - - // Setup for waiting - KMP_INIT_YIELD(spins); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // The worker threads cannot rely on the team struct existing at this point. - // Use the bt values cached in the thread struct instead. -#ifdef KMP_ADJUST_BLOCKTIME - if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) - // Force immediate suspend if not set by user and more threads than available procs - hibernate = 0; - else - hibernate = this_thr->th.th_team_bt_intervals; -#else - hibernate = this_thr->th.th_team_bt_intervals; -#endif /* KMP_ADJUST_BLOCKTIME */ - - /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety - of the specified #intervals, plus up to one interval more. This increment make - certain that this thread doesn't go to sleep too soon. */ - if (hibernate != 0) - hibernate++; - - // Add in the current time value. - hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); - KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", - th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, - hibernate - __kmp_global.g.g_time.dt.t_value)); - } - - KMP_MB(); - - // Main wait spin loop - while (flag->notdone_check()) { - int in_pool; - - /* If the task team is NULL, it means one of things: - 1) A newly-created thread is first being released by __kmp_fork_barrier(), and - its task team has not been set up yet. - 2) All tasks have been executed to completion, this thread has decremented the task - team's ref ct and possibly deallocated it, and should no longer reference it. - 3) Tasking is off for this region. This could be because we are in a serialized region - (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */ - kmp_task_team_t * task_team = NULL; - if (__kmp_tasking_mode != tskm_immediate_exec) { - task_team = this_thr->th.th_task_team; - if (task_team != NULL) { - if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) - flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed - USE_ITT_BUILD_ARG(itt_sync_obj), 0); - } - else { - KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); - this_thr->th.th_task_team = NULL; - } - } // if - } // if - - KMP_FSYNC_SPIN_PREPARE(spin); - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } - - // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - // TODO: Should it be number of cores instead of thread contexts? Like: - // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); - // Need performance improvement data to make the change... - KMP_YIELD_SPIN(spins); - - // Check if this thread was transferred from a team - // to the thread pool (or vice-versa) while spinning. - in_pool = !!TCR_4(this_thr->th.th_in_pool); - if (in_pool != !!this_thr->th.th_active_in_pool) { - if (in_pool) { // Recently transferred from team to pool - KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); - this_thr->th.th_active_in_pool = TRUE; - /* Here, we cannot assert that: - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth); - __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join - lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously - by the workers. The two can get out of sync for brief periods of time. */ - } - else { // Recently transferred from pool to team - KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - this_thr->th.th_active_in_pool = FALSE; - } - } - - // Don't suspend if KMP_BLOCKTIME is set to "infinite" - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) - continue; - - // Don't suspend if there is a likelihood of new tasks being spawned. - if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) - continue; - - // If we have waited a bit more, fall asleep - if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) - continue; - - KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); - - flag->suspend(th_gtid); - - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } - // TODO: If thread is done with work and times out, disband/free - } - -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { - ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team){ - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; - } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); - } - } -#endif - - KMP_FSYNC_SPIN_ACQUIRED(spin); -} - -/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread - if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake - up the potentially sleeping thread and prevent deadlocks! */ -template -static inline void __kmp_release_template(C *flag) -{ -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); - KMP_DEBUG_ASSERT(flag->get()); - KMP_FSYNC_RELEASING(flag->get()); - - flag->internal_release(); - - KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get()))); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // Only need to check sleep stuff if infinite block time not set - if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping? - for (unsigned int i=0; iget_num_waiters(); ++i) { - kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag - if (waiter) { - int wait_gtid = waiter->th.th_info.ds.ds_gtid; - // Wake up thread if needed - KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n", - gtid, wait_gtid, flag->get())); - flag->resume(wait_gtid); // unsets flag's current_waiter when done - } - } - } - } -} - -template -struct flag_traits {}; - -template <> -struct flag_traits { - typedef kmp_uint32 flag_t; - static const flag_type t = flag32; - static inline flag_t tcr(flag_t f) { return TCR_4(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); } -}; - -template <> -struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = flag64; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); } -}; - -template -class kmp_basic_flag : public kmp_flag { - typedef flag_traits traits_type; - FlagType checker; /**< Value to compare flag to to check if flag has been released. */ - kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */ - kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */ - public: - kmp_basic_flag(volatile FlagType *p) : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} - kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag(p, traits_type::t), num_waiting_threads(1) { - waiting_threads[0] = thr; - } - kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag(p, traits_type::t), checker(c), num_waiting_threads(0) {} - /*! - * param i in index into waiting_threads - * @result the thread that is waiting at index i - */ - kmp_info_t * get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(iget())) == checker; } - /*! - * @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. - */ - bool done_check_val(FlagType old_loc) { return old_loc == checker; } - /*! - * @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode - */ - bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } - /*! - * @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. - */ - void internal_release() { - (void) traits_type::test_then_add4((volatile FlagType *)this->get()); - } - /*! - * @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s). - */ - FlagType set_sleeping() { - return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE); - } - /*! - * @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s). - */ - FlagType unset_sleeping() { - return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE); - } - /*! - * @param old_loc in old value of flag - * Test whether there are threads sleeping on the flag's old value in old_loc. - */ - bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } - /*! - * Test whether there are threads sleeping on the flag. - */ - bool is_sleeping() { return is_sleeping_val(*(this->get())); } - bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bs_last_barrier; } -}; - -class kmp_flag_32 : public kmp_basic_flag { - public: - kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag(p) {} - kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} - kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj)) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag32; } -}; - -class kmp_flag_64 : public kmp_basic_flag { - public: - kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag(p) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj)) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag64; } -}; - -// Hierarchical 64-bit on-core barrier instantiation -class kmp_flag_oncore : public kmp_flag { - kmp_uint64 checker; - kmp_info_t * waiting_threads[1]; - kmp_uint32 num_waiting_threads; - kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */ - bool flag_switch; /**< Indicates a switch in flag location. */ - enum barrier_type bt; /**< Barrier type. */ - kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */ -#if USE_ITT_BUILD - void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */ -#endif - unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; } -public: - kmp_flag_oncore(volatile kmp_uint64 *p) - : kmp_flag(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) - : kmp_flag(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, - kmp_info_t * thr -#if USE_ITT_BUILD - , void *itt -#endif - ) - : kmp_flag(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx), - flag_switch(false), bt(bar_t), this_thr(thr) -#if USE_ITT_BUILD - , itt_sync_obj(itt) -#endif - {} + * The flag_type describes the storage used for the flag. + */ +enum flag_type { + flag32, /**< 32 bit flags */ + flag64, /**< 64 bit flags */ + flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ +}; + +/*! + * Base class for wait/release volatile flag + */ +template +class kmp_flag { + volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */ + flag_type t; /**< "Type" of the flag in loc */ + public: + typedef P flag_t; + kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {} + /*! + * @result the pointer to the actual flag + */ + volatile P * get() { return loc; } + /*! + * @param new_loc in set loc to point at new_loc + */ + void set(volatile P *new_loc) { loc = new_loc; } + /*! + * @result the flag_type + */ + flag_type get_type() { return t; } + // Derived classes must provide the following: + /* + kmp_info_t * get_waiter(kmp_uint32 i); + kmp_uint32 get_num_waiters(); + bool done_check(); + bool done_check_val(P old_loc); + bool notdone_check(); + P internal_release(); + void suspend(int th_gtid); + void resume(int th_gtid); + P set_sleeping(); + P unset_sleeping(); + bool is_sleeping(); + bool is_any_sleeping(); + bool is_sleeping_val(P old_loc); + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained); + */ +}; + +/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* + must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */ +template +static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) +{ + // NOTE: We may not belong to a team at this point. + volatile typename C::flag_t *spin = flag->get(); + kmp_uint32 spins; + kmp_uint32 hibernate; + int th_gtid; + int tasks_completed = FALSE; + + KMP_FSYNC_SPIN_INIT(spin, NULL); + if (flag->done_check()) { + KMP_FSYNC_SPIN_ACQUIRED(spin); + return; + } + th_gtid = this_thr->th.th_info.ds.ds_gtid; + KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); + +#if OMPT_SUPPORT && OMPT_BLAME + ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; + if (ompt_enabled && + ompt_state != ompt_state_undefined) { + if (ompt_state == ompt_state_idle) { + if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { + ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); + } + } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { + KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || + ompt_state == ompt_state_wait_barrier_implicit || + ompt_state == ompt_state_wait_barrier_explicit); + + ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; + ompt_parallel_id_t pId; + ompt_task_id_t tId; + if (team){ + pId = team->ompt_team_info.parallel_id; + tId = team->ompt_task_info.task_id; + } else { + pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; + tId = this_thr->th.th_current_task->ompt_task_info.task_id; + } + ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); + } + } +#endif + + // Setup for waiting + KMP_INIT_YIELD(spins); + + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + // The worker threads cannot rely on the team struct existing at this point. + // Use the bt values cached in the thread struct instead. +#ifdef KMP_ADJUST_BLOCKTIME + if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) + // Force immediate suspend if not set by user and more threads than available procs + hibernate = 0; + else + hibernate = this_thr->th.th_team_bt_intervals; +#else + hibernate = this_thr->th.th_team_bt_intervals; +#endif /* KMP_ADJUST_BLOCKTIME */ + + /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety + of the specified #intervals, plus up to one interval more. This increment make + certain that this thread doesn't go to sleep too soon. */ + if (hibernate != 0) + hibernate++; + + // Add in the current time value. + hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); + KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", + th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, + hibernate - __kmp_global.g.g_time.dt.t_value)); + } + + KMP_MB(); + + // Main wait spin loop + while (flag->notdone_check()) { + int in_pool; + + /* If the task team is NULL, it means one of things: + 1) A newly-created thread is first being released by __kmp_fork_barrier(), and + its task team has not been set up yet. + 2) All tasks have been executed to completion, this thread has decremented the task + team's ref ct and possibly deallocated it, and should no longer reference it. + 3) Tasking is off for this region. This could be because we are in a serialized region + (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */ + kmp_task_team_t * task_team = NULL; + if (__kmp_tasking_mode != tskm_immediate_exec) { + task_team = this_thr->th.th_task_team; + if (task_team != NULL) { + if (TCR_SYNC_4(task_team->tt.tt_active)) { + if (KMP_TASKING_ENABLED(task_team)) + flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed + USE_ITT_BUILD_ARG(itt_sync_obj), 0); + } + else { + KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); + this_thr->th.th_task_team = NULL; + } + } // if + } // if + + KMP_FSYNC_SPIN_PREPARE(spin); + if (TCR_4(__kmp_global.g.g_done)) { + if (__kmp_global.g.g_abort) + __kmp_abort_thread(); + break; + } + + // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + // TODO: Should it be number of cores instead of thread contexts? Like: + // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); + // Need performance improvement data to make the change... + KMP_YIELD_SPIN(spins); + + // Check if this thread was transferred from a team + // to the thread pool (or vice-versa) while spinning. + in_pool = !!TCR_4(this_thr->th.th_in_pool); + if (in_pool != !!this_thr->th.th_active_in_pool) { + if (in_pool) { // Recently transferred from team to pool + KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + this_thr->th.th_active_in_pool = TRUE; + /* Here, we cannot assert that: + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth); + __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join + lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously + by the workers. The two can get out of sync for brief periods of time. */ + } + else { // Recently transferred from pool to team + KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth); + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); + this_thr->th.th_active_in_pool = FALSE; + } + } + + // Don't suspend if KMP_BLOCKTIME is set to "infinite" + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) + continue; + + // Don't suspend if there is a likelihood of new tasks being spawned. + if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) + continue; + + // If we have waited a bit more, fall asleep + if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) + continue; + + KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); + + flag->suspend(th_gtid); + + if (TCR_4(__kmp_global.g.g_done)) { + if (__kmp_global.g.g_abort) + __kmp_abort_thread(); + break; + } + // TODO: If thread is done with work and times out, disband/free + } + +#if OMPT_SUPPORT && OMPT_BLAME + if (ompt_enabled && + ompt_state != ompt_state_undefined) { + if (ompt_state == ompt_state_idle) { + if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { + ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); + } + } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { + KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || + ompt_state == ompt_state_wait_barrier_implicit || + ompt_state == ompt_state_wait_barrier_explicit); + + ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; + ompt_parallel_id_t pId; + ompt_task_id_t tId; + if (team){ + pId = team->ompt_team_info.parallel_id; + tId = team->ompt_task_info.task_id; + } else { + pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; + tId = this_thr->th.th_current_task->ompt_task_info.task_id; + } + ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); + } + } +#endif + + KMP_FSYNC_SPIN_ACQUIRED(spin); +} + +/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread + if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake + up the potentially sleeping thread and prevent deadlocks! */ +template +static inline void __kmp_release_template(C *flag) +{ +#ifdef KMP_DEBUG + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; +#endif + KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); + KMP_DEBUG_ASSERT(flag->get()); + KMP_FSYNC_RELEASING(flag->get()); + + flag->internal_release(); + + KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get()))); + + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + // Only need to check sleep stuff if infinite block time not set + if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping? + for (unsigned int i=0; iget_num_waiters(); ++i) { + kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag + if (waiter) { + int wait_gtid = waiter->th.th_info.ds.ds_gtid; + // Wake up thread if needed + KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n", + gtid, wait_gtid, flag->get())); + flag->resume(wait_gtid); // unsets flag's current_waiter when done + } + } + } + } +} + +template +struct flag_traits {}; + +template <> +struct flag_traits { + typedef kmp_uint32 flag_t; + static const flag_type t = flag32; + static inline flag_t tcr(flag_t f) { return TCR_4(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); } +}; + +template <> +struct flag_traits { + typedef kmp_uint64 flag_t; + static const flag_type t = flag64; + static inline flag_t tcr(flag_t f) { return TCR_8(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); } +}; + +template +class kmp_basic_flag : public kmp_flag { + typedef flag_traits traits_type; + FlagType checker; /**< Value to compare flag to to check if flag has been released. */ + kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */ + kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */ + public: + kmp_basic_flag(volatile FlagType *p) : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} + kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag(p, traits_type::t), num_waiting_threads(1) { + waiting_threads[0] = thr; + } + kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag(p, traits_type::t), checker(c), num_waiting_threads(0) {} + /*! + * param i in index into waiting_threads + * @result the thread that is waiting at index i + */ kmp_info_t * get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(ith.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) - flag_switch = true; - if (byteref(get(),offset) != 1 && !flag_switch) - return true; - else if (flag_switch) { - this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; - kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP); - __kmp_wait_64(this_thr, &flag, TRUE -#if USE_ITT_BUILD - , itt_sync_obj -#endif - ); - } - return false; - } - void internal_release() { - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - byteref(get(),offset) = 1; - } - else { - kmp_uint64 mask=0; - byteref(&mask,offset) = 1; - (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask); - } - } - kmp_uint64 set_sleeping() { - return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE); - } - kmp_uint64 unset_sleeping() { - return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE); - } - bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } - bool is_sleeping() { return is_sleeping_val(*get()); } - bool is_any_sleeping() { return is_sleeping_val(*get()); } - void wait(kmp_info_t *this_thr, int final_spin) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bt; } - flag_type get_ptr_type() { return flag_oncore; } -}; - - -/*! -@} -*/ - -#endif // KMP_WAIT_RELEASE_H + num_waiting_threads = 1; + } + /*! + * @result true if the flag object has been released. + */ + bool done_check() { return traits_type::tcr(*(this->get())) == checker; } + /*! + * @param old_loc in old value of flag + * @result true if the flag's old value indicates it was released. + */ + bool done_check_val(FlagType old_loc) { return old_loc == checker; } + /*! + * @result true if the flag object is not yet released. + * Used in __kmp_wait_template like: + * @code + * while (flag.notdone_check()) { pause(); } + * @endcode + */ + bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } + /*! + * @result Actual flag value before release was applied. + * Trigger all waiting threads to run by modifying flag to release state. + */ + void internal_release() { + (void) traits_type::test_then_add4((volatile FlagType *)this->get()); + } + /*! + * @result Actual flag value before sleep bit(s) set. + * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s). + */ + FlagType set_sleeping() { + return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE); + } + /*! + * @result Actual flag value before sleep bit(s) cleared. + * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s). + */ + FlagType unset_sleeping() { + return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE); + } + /*! + * @param old_loc in old value of flag + * Test whether there are threads sleeping on the flag's old value in old_loc. + */ + bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } + /*! + * Test whether there are threads sleeping on the flag. + */ + bool is_sleeping() { return is_sleeping_val(*(this->get())); } + bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bs_last_barrier; } +}; + +class kmp_flag_32 : public kmp_basic_flag { + public: + kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag(p) {} + kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} + kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag(p, c) {} + void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { + return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + void wait(kmp_info_t *this_thr, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj)) { + __kmp_wait_template(this_thr, this, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + flag_type get_ptr_type() { return flag32; } +}; + +class kmp_flag_64 : public kmp_basic_flag { + public: + kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag(p) {} + kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} + kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag(p, c) {} + void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { + return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + void wait(kmp_info_t *this_thr, int final_spin + USE_ITT_BUILD_ARG(void * itt_sync_obj)) { + __kmp_wait_template(this_thr, this, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + flag_type get_ptr_type() { return flag64; } +}; + +// Hierarchical 64-bit on-core barrier instantiation +class kmp_flag_oncore : public kmp_flag { + kmp_uint64 checker; + kmp_info_t * waiting_threads[1]; + kmp_uint32 num_waiting_threads; + kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */ + bool flag_switch; /**< Indicates a switch in flag location. */ + enum barrier_type bt; /**< Barrier type. */ + kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */ +#if USE_ITT_BUILD + void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */ +#endif + unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; } +public: + kmp_flag_oncore(volatile kmp_uint64 *p) + : kmp_flag(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {} + kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) + : kmp_flag(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {} + kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, + kmp_info_t * thr +#if USE_ITT_BUILD + , void *itt +#endif + ) + : kmp_flag(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx), + flag_switch(false), bt(bar_t), this_thr(thr) +#if USE_ITT_BUILD + , itt_sync_obj(itt) +#endif + {} + kmp_info_t * get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(ith.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) + flag_switch = true; + if (byteref(get(),offset) != 1 && !flag_switch) + return true; + else if (flag_switch) { + this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; + kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP); + __kmp_wait_64(this_thr, &flag, TRUE +#if USE_ITT_BUILD + , itt_sync_obj +#endif + ); + } + return false; + } + void internal_release() { + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + byteref(get(),offset) = 1; + } + else { + kmp_uint64 mask=0; + byteref(&mask,offset) = 1; + (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask); + } + } + kmp_uint64 set_sleeping() { + return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE); + } + kmp_uint64 unset_sleeping() { + return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE); + } + bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } + bool is_sleeping() { return is_sleeping_val(*get()); } + bool is_any_sleeping() { return is_sleeping_val(*get()); } + void wait(kmp_info_t *this_thr, int final_spin) { + __kmp_wait_template(this_thr, this, final_spin + USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { + return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bt; } + flag_type get_ptr_type() { return flag_oncore; } +}; + + +/*! +@} +*/ + +#endif // KMP_WAIT_RELEASE_H diff --git a/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h b/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h index 87c6f37f161..61a046c37d2 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h +++ b/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h @@ -1,56 +1,56 @@ -/* - * kmp_wrapper_getpid.h -- getpid() declaration. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_WRAPPER_GETPID_H -#define KMP_WRAPPER_GETPID_H - -#if KMP_OS_UNIX - - // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers. - #include - #include - -#elif KMP_OS_WINDOWS - - // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h". - #include - // Let us simulate Unix. - typedef int pid_t; - #define getpid _getpid - -#else - - #error Unknown or unsupported OS. - -#endif - -/* - TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good. - But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t - be longer that int?). It seems all pid prints should be rewritten as - - printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); - - or (at least) as - - printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); - - (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".) - -*/ - -#endif // KMP_WRAPPER_GETPID_H - -// end of file // +/* + * kmp_wrapper_getpid.h -- getpid() declaration. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_WRAPPER_GETPID_H +#define KMP_WRAPPER_GETPID_H + +#if KMP_OS_UNIX + + // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers. + #include + #include + +#elif KMP_OS_WINDOWS + + // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h". + #include + // Let us simulate Unix. + typedef int pid_t; + #define getpid _getpid + +#else + + #error Unknown or unsupported OS. + +#endif + +/* + TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good. + But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t + be longer that int?). It seems all pid prints should be rewritten as + + printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); + + or (at least) as + + printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); + + (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".) + +*/ + +#endif // KMP_WRAPPER_GETPID_H + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h b/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h index ce9ae3f40a1..453d1ef5e7c 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h +++ b/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h @@ -1,205 +1,205 @@ -/* - * kmp_wrapper_malloc.h -- Wrappers for memory allocation routines - * (malloc(), free(), and others). - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_WRAPPER_MALLOC_H -#define KMP_WRAPPER_MALLOC_H - -/* - This header serves for 3 purposes: - - 1. Declaring standard memory allocation rourines in OS-independent way. - 2. Passing source location info through memory allocation wrappers. - 3. Enabling native memory debugging capabilities. - - - 1. Declaring standard memory allocation rourines in OS-independent way. - ----------------------------------------------------------------------- - - On Linux* OS, alloca() function is declared in header, while on Windows* OS there is no - header, function _alloca() (note underscore!) is declared in . This header - eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on - following routines: - - malloc - calloc - realloc - free - alloca - - in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently - it is available only on Windows* OS.) - - - 2. Passing source location info through memory allocation wrappers. - ------------------------------------------------------------------- - - Some tools may help debugging memory errors, for example, report memory leaks. However, memory - allocation wrappers may hinder source location. - - For example: - - void * aligned_malloc( int size ) { - void * ptr = malloc( size ); // All the memory leaks will be reported at this line. - // some adjustments... - return ptr; - }; - - ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-( - - To overcome the problem, information about original source location should be passed through all - the memory allocation wrappers, for example: - - void * aligned_malloc( int size, char const * file, int line ) { - void * ptr = _malloc_dbg( size, file, line ); - // some adjustments... - return ptr; - }; - - void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); - - This is a good idea for debug, but passing additional arguments impacts performance. Disabling - extra arguments in release version of the software introduces too many conditional compilation, - which makes code unreadable. This header defines few macros and functions facilitating it: - - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // some adjustments... - return ptr; - }; - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function. - - void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be reported at this line. - - - 3. Enabling native memory debugging capabilities. - ------------------------------------------------- - - Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft - RTL tracks all memory allocations and can report memory leaks. This header enables this, and - makes report more useful (see "Passing source location info through memory allocation - wrappers"). - -*/ - -#include - -#include "kmp_os.h" - -// Include alloca() declaration. -#if KMP_OS_WINDOWS - #include // Windows* OS: _alloca() declared in "malloc.h". - #define alloca _alloca // Allow to use alloca() with no underscore. -#elif KMP_OS_FREEBSD || KMP_OS_NETBSD - // Declared in "stdlib.h". -#elif KMP_OS_UNIX - #include // Linux* OS and OS X*: alloc() declared in "alloca". -#else - #error Unknown or unsupported OS. -#endif - -/* - KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration. - KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying - levels. - KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at - top-level. - - Typical usage: - - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - // Note: Comma is missed before KMP_SRC_LOC_DECL. - KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); - ... - } - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function -- macro passes info about current - // source location to the func. -*/ -#if KMP_DEBUG - #define KMP_SRC_LOC_DECL , char const * _file_, int _line_ - #define KMP_SRC_LOC_PARM , _file_, _line_ - #define KMP_SRC_LOC_CURR , __FILE__, __LINE__ -#else - #define KMP_SRC_LOC_DECL - #define KMP_SRC_LOC_PARM - #define KMP_SRC_LOC_CURR -#endif // KMP_DEBUG - -/* - malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra - arguments (source location info) in debug mode. They should be used in place of malloc() and - free(), this allows enabling native memory debugging capabilities (if any). - - Typical usage: - - ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // Inside memory allocation wrapper, or - ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); - // Outside of memory allocation wrapper. - - -*/ -#define malloc_src_loc( args ) _malloc_src_loc( args ) -#define free_src_loc( args ) _free_src_loc( args ) - /* - Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3 - parameters, but calls to malloc_src_loc() are always the same: - - ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR - - Compiler issues warning/error "too few arguments in macro invocation". Declaring two - macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem. - */ - -#if KMP_DEBUG - - #if KMP_OS_WINDOWS && _DEBUG - // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. - - // Windows* OS has native memory debugging capabilities. Enable them. - - #include - - #define KMP_MEM_BLOCK _CLIENT_BLOCK - #define malloc( size ) _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define calloc( num, size ) _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define realloc( ptr, size ) _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define free( ptr ) _free_dbg( (ptr), KMP_MEM_BLOCK ) - - #define _malloc_src_loc( size, file, line ) _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) ) - #define _free_src_loc( ptr, file, line ) _free_dbg( (ptr), KMP_MEM_BLOCK ) - - #else - - // Linux* OS, OS X*, or non-debug Windows* OS. - - #define _malloc_src_loc( size, file, line ) malloc( (size) ) - #define _free_src_loc( ptr, file, line ) free( (ptr) ) - - #endif - -#else - - // In release build malloc_src_loc() and free_src_loc() do not have extra parameters. - #define _malloc_src_loc( size ) malloc( (size) ) - #define _free_src_loc( ptr ) free( (ptr) ) - -#endif // KMP_DEBUG - -#endif // KMP_WRAPPER_MALLOC_H - -// end of file // +/* + * kmp_wrapper_malloc.h -- Wrappers for memory allocation routines + * (malloc(), free(), and others). + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_WRAPPER_MALLOC_H +#define KMP_WRAPPER_MALLOC_H + +/* + This header serves for 3 purposes: + + 1. Declaring standard memory allocation rourines in OS-independent way. + 2. Passing source location info through memory allocation wrappers. + 3. Enabling native memory debugging capabilities. + + + 1. Declaring standard memory allocation rourines in OS-independent way. + ----------------------------------------------------------------------- + + On Linux* OS, alloca() function is declared in header, while on Windows* OS there is no + header, function _alloca() (note underscore!) is declared in . This header + eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on + following routines: + + malloc + calloc + realloc + free + alloca + + in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently + it is available only on Windows* OS.) + + + 2. Passing source location info through memory allocation wrappers. + ------------------------------------------------------------------- + + Some tools may help debugging memory errors, for example, report memory leaks. However, memory + allocation wrappers may hinder source location. + + For example: + + void * aligned_malloc( int size ) { + void * ptr = malloc( size ); // All the memory leaks will be reported at this line. + // some adjustments... + return ptr; + }; + + ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-( + + To overcome the problem, information about original source location should be passed through all + the memory allocation wrappers, for example: + + void * aligned_malloc( int size, char const * file, int line ) { + void * ptr = _malloc_dbg( size, file, line ); + // some adjustments... + return ptr; + }; + + void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); + + This is a good idea for debug, but passing additional arguments impacts performance. Disabling + extra arguments in release version of the software introduces too many conditional compilation, + which makes code unreadable. This header defines few macros and functions facilitating it: + + void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { + void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); + // some adjustments... + return ptr; + }; + #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) + // Use macro instead of direct call to function. + + void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be reported at this line. + + + 3. Enabling native memory debugging capabilities. + ------------------------------------------------- + + Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft + RTL tracks all memory allocations and can report memory leaks. This header enables this, and + makes report more useful (see "Passing source location info through memory allocation + wrappers"). + +*/ + +#include + +#include "kmp_os.h" + +// Include alloca() declaration. +#if KMP_OS_WINDOWS + #include // Windows* OS: _alloca() declared in "malloc.h". + #define alloca _alloca // Allow to use alloca() with no underscore. +#elif KMP_OS_FREEBSD || KMP_OS_NETBSD + // Declared in "stdlib.h". +#elif KMP_OS_UNIX + #include // Linux* OS and OS X*: alloc() declared in "alloca". +#else + #error Unknown or unsupported OS. +#endif + +/* + KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration. + KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying + levels. + KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at + top-level. + + Typical usage: + + void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { + // Note: Comma is missed before KMP_SRC_LOC_DECL. + KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); + ... + } + #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) + // Use macro instead of direct call to function -- macro passes info about current + // source location to the func. +*/ +#if KMP_DEBUG + #define KMP_SRC_LOC_DECL , char const * _file_, int _line_ + #define KMP_SRC_LOC_PARM , _file_, _line_ + #define KMP_SRC_LOC_CURR , __FILE__, __LINE__ +#else + #define KMP_SRC_LOC_DECL + #define KMP_SRC_LOC_PARM + #define KMP_SRC_LOC_CURR +#endif // KMP_DEBUG + +/* + malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra + arguments (source location info) in debug mode. They should be used in place of malloc() and + free(), this allows enabling native memory debugging capabilities (if any). + + Typical usage: + + ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); + // Inside memory allocation wrapper, or + ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); + // Outside of memory allocation wrapper. + + +*/ +#define malloc_src_loc( args ) _malloc_src_loc( args ) +#define free_src_loc( args ) _free_src_loc( args ) + /* + Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3 + parameters, but calls to malloc_src_loc() are always the same: + + ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR + + Compiler issues warning/error "too few arguments in macro invocation". Declaring two + macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem. + */ + +#if KMP_DEBUG + + #if KMP_OS_WINDOWS && _DEBUG + // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. + + // Windows* OS has native memory debugging capabilities. Enable them. + + #include + + #define KMP_MEM_BLOCK _CLIENT_BLOCK + #define malloc( size ) _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) + #define calloc( num, size ) _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) + #define realloc( ptr, size ) _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) + #define free( ptr ) _free_dbg( (ptr), KMP_MEM_BLOCK ) + + #define _malloc_src_loc( size, file, line ) _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) ) + #define _free_src_loc( ptr, file, line ) _free_dbg( (ptr), KMP_MEM_BLOCK ) + + #else + + // Linux* OS, OS X*, or non-debug Windows* OS. + + #define _malloc_src_loc( size, file, line ) malloc( (size) ) + #define _free_src_loc( ptr, file, line ) free( (ptr) ) + + #endif + +#else + + // In release build malloc_src_loc() and free_src_loc() do not have extra parameters. + #define _malloc_src_loc( size ) malloc( (size) ) + #define _free_src_loc( ptr ) free( (ptr) ) + +#endif // KMP_DEBUG + +#endif // KMP_WRAPPER_MALLOC_H + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/libomp.rc.var b/contrib/libs/cxxsupp/openmp/libomp.rc.var index fcc64c9f2f1..cf6a9c9efa2 100644 --- a/contrib/libs/cxxsupp/openmp/libomp.rc.var +++ b/contrib/libs/cxxsupp/openmp/libomp.rc.var @@ -1,70 +1,70 @@ -// libomp.rc.var - -// -////===----------------------------------------------------------------------===// -//// -//// The LLVM Compiler Infrastructure -//// -//// This file is dual licensed under the MIT and the University of Illinois Open -//// Source Licenses. See LICENSE.txt for details. -//// -////===----------------------------------------------------------------------===// -// - -#include "winres.h" -#include "kmp_config.h" - -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // English (U.S.) resources -#pragma code_page(1252) - -VS_VERSION_INFO VERSIONINFO - // Parts of FILEVERSION and PRODUCTVERSION are 16-bit fields, entire build date yyyymmdd - // does not fit into one version part, so we need to split it into yyyy and mmdd: - FILEVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ - PRODUCTVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ - FILEFLAGSMASK VS_FFI_FILEFLAGSMASK - FILEFLAGS 0 -#if KMP_DEBUG - | VS_FF_DEBUG -#endif -#if @LIBOMP_VERSION_BUILD@ == 0 - | VS_FF_PRIVATEBUILD | VS_FF_PRERELEASE -#endif - FILEOS VOS_NT_WINDOWS32 // Windows* Server* 2003, XP*, 2000, or NT* - FILETYPE VFT_DLL - BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) - BEGIN - - // FileDescription and LegalCopyright should be short. - VALUE "FileDescription", "LLVM* OpenMP* Runtime Library\0" - // Following values may be relatively long. - VALUE "CompanyName", "LLVM\0" - // VALUE "LegalTrademarks", "\0" // Not used for now. - VALUE "ProductName", "LLVM* OpenMP* Runtime Library\0" - VALUE "ProductVersion", "@LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@\0" - VALUE "FileVersion", "@LIBOMP_VERSION_BUILD@\0" - VALUE "InternalName", "@LIBOMP_LIB_FILE@\0" - VALUE "OriginalFilename", "@LIBOMP_LIB_FILE@\0" - VALUE "Comments", - "LLVM* OpenMP* @LIBOMP_LEGAL_TYPE@ Library " - "version @LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@.@LIBOMP_VERSION_BUILD@ " - "for @LIBOMP_LEGAL_ARCH@ architecture built on @LIBOMP_BUILD_DATE@.\0" -#if @LIBOMP_VERSION_BUILD@ == 0 - VALUE "PrivateBuild", - "This is a development build.\0" -#endif - // VALUE "SpecialBuild", "\0" // Not used for now. - - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 1033, 1200 - // 1033 -- U.S. English, 1200 -- Unicode - END - END - -// end of file // +// libomp.rc.var + +// +////===----------------------------------------------------------------------===// +//// +//// The LLVM Compiler Infrastructure +//// +//// This file is dual licensed under the MIT and the University of Illinois Open +//// Source Licenses. See LICENSE.txt for details. +//// +////===----------------------------------------------------------------------===// +// + +#include "winres.h" +#include "kmp_config.h" + +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // English (U.S.) resources +#pragma code_page(1252) + +VS_VERSION_INFO VERSIONINFO + // Parts of FILEVERSION and PRODUCTVERSION are 16-bit fields, entire build date yyyymmdd + // does not fit into one version part, so we need to split it into yyyy and mmdd: + FILEVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ + PRODUCTVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS 0 +#if KMP_DEBUG + | VS_FF_DEBUG +#endif +#if @LIBOMP_VERSION_BUILD@ == 0 + | VS_FF_PRIVATEBUILD | VS_FF_PRERELEASE +#endif + FILEOS VOS_NT_WINDOWS32 // Windows* Server* 2003, XP*, 2000, or NT* + FILETYPE VFT_DLL + BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) + BEGIN + + // FileDescription and LegalCopyright should be short. + VALUE "FileDescription", "LLVM* OpenMP* Runtime Library\0" + // Following values may be relatively long. + VALUE "CompanyName", "LLVM\0" + // VALUE "LegalTrademarks", "\0" // Not used for now. + VALUE "ProductName", "LLVM* OpenMP* Runtime Library\0" + VALUE "ProductVersion", "@LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@\0" + VALUE "FileVersion", "@LIBOMP_VERSION_BUILD@\0" + VALUE "InternalName", "@LIBOMP_LIB_FILE@\0" + VALUE "OriginalFilename", "@LIBOMP_LIB_FILE@\0" + VALUE "Comments", + "LLVM* OpenMP* @LIBOMP_LEGAL_TYPE@ Library " + "version @LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@.@LIBOMP_VERSION_BUILD@ " + "for @LIBOMP_LEGAL_ARCH@ architecture built on @LIBOMP_BUILD_DATE@.\0" +#if @LIBOMP_VERSION_BUILD@ == 0 + VALUE "PrivateBuild", + "This is a development build.\0" +#endif + // VALUE "SpecialBuild", "\0" // Not used for now. + + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 1033, 1200 + // 1033 -- U.S. English, 1200 -- Unicode + END + END + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/omp.h b/contrib/libs/cxxsupp/openmp/omp.h index eadd69db434..2dee5600ecd 100644 --- a/contrib/libs/cxxsupp/openmp/omp.h +++ b/contrib/libs/cxxsupp/openmp/omp.h @@ -1,183 +1,183 @@ -/* - * include/41/omp.h.var - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef __OMP_H -# define __OMP_H - -# define KMP_VERSION_MAJOR 5 -# define KMP_VERSION_MINOR 0 -# define KMP_VERSION_BUILD 20140926 -# define KMP_BUILD_DATE "No_Timestamp" - -# ifdef __cplusplus - extern "C" { -# endif - -# if defined(_WIN32) -# define __KAI_KMPC_CONVENTION __cdecl -# else -# define __KAI_KMPC_CONVENTION -# endif - - /* schedule kind constants */ - typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 - } omp_sched_t; - - /* set API functions */ - extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); - extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); - extern void __KAI_KMPC_CONVENTION omp_set_nested (int); - extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); - extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); - - /* query API functions */ - extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); - extern int __KAI_KMPC_CONVENTION omp_get_nested (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); - extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); - extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); - extern int __KAI_KMPC_CONVENTION omp_in_final (void); - extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_level (void); - extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); - extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); - extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); - extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); - extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); - - /* lock API functions */ - typedef struct omp_lock_t { - void * _lk; - } omp_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); - - /* nested lock API functions */ - typedef struct omp_nest_lock_t { - void * _lk; - } omp_nest_lock_t; - - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); - extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); - extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); - - /* lock hint type for dynamic user lock */ - typedef enum omp_lock_hint_t { - omp_lock_hint_none = 0, - omp_lock_hint_uncontended = 1, - omp_lock_hint_contended = (1<<1 ), - omp_lock_hint_nonspeculative = (1<<2 ), - omp_lock_hint_speculative = (1<<3 ), - kmp_lock_hint_hle = (1<<16), - kmp_lock_hint_rtm = (1<<17), - kmp_lock_hint_adaptive = (1<<18) - } omp_lock_hint_t; - - /* hinted lock initializers */ - extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); - extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); - - /* time API functions */ - extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); - extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); - - /* OpenMP 4.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); - extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); - extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); - extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); - extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); - extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); - +/* + * include/41/omp.h.var + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef __OMP_H +# define __OMP_H + +# define KMP_VERSION_MAJOR 5 +# define KMP_VERSION_MINOR 0 +# define KMP_VERSION_BUILD 20140926 +# define KMP_BUILD_DATE "No_Timestamp" + +# ifdef __cplusplus + extern "C" { +# endif + +# if defined(_WIN32) +# define __KAI_KMPC_CONVENTION __cdecl +# else +# define __KAI_KMPC_CONVENTION +# endif + + /* schedule kind constants */ + typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 + } omp_sched_t; + + /* set API functions */ + extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); + extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); + extern void __KAI_KMPC_CONVENTION omp_set_nested (int); + extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); + extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); + + /* query API functions */ + extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); + extern int __KAI_KMPC_CONVENTION omp_get_nested (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); + extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); + extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); + extern int __KAI_KMPC_CONVENTION omp_in_final (void); + extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_level (void); + extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); + extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); + extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); + extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); + extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); + + /* lock API functions */ + typedef struct omp_lock_t { + void * _lk; + } omp_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); + + /* nested lock API functions */ + typedef struct omp_nest_lock_t { + void * _lk; + } omp_nest_lock_t; + + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); + extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); + extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); + + /* lock hint type for dynamic user lock */ + typedef enum omp_lock_hint_t { + omp_lock_hint_none = 0, + omp_lock_hint_uncontended = 1, + omp_lock_hint_contended = (1<<1 ), + omp_lock_hint_nonspeculative = (1<<2 ), + omp_lock_hint_speculative = (1<<3 ), + kmp_lock_hint_hle = (1<<16), + kmp_lock_hint_rtm = (1<<17), + kmp_lock_hint_adaptive = (1<<18) + } omp_lock_hint_t; + + /* hinted lock initializers */ + extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); + extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); + + /* time API functions */ + extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); + extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); + + /* OpenMP 4.0 */ + extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); + extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); + extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); + extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); + extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); + #if 0 // !defined(NORUNTIME) && !defined(USE_STL_SYSTEM) // We need to put all possible dependencies to prevent blinking: // on all stdlib.h that can be mentioned here within a platform. # include #else -# include +# include #endif - /* kmp API functions */ - extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); - extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); - extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); - extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); - extern int __KAI_KMPC_CONVENTION kmp_get_library (void); - extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library (int); - extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); - extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); - extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); - - /* Intel affinity API */ - typedef void * kmp_affinity_mask_t; - - extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); - extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); - extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); - extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); - - /* OpenMP 4.0 affinity API */ - typedef enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 - } omp_proc_bind_t; - - extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); - - extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); - extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); - extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); - extern void __KAI_KMPC_CONVENTION kmp_free (void *); - - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); - extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); - -# undef __KAI_KMPC_CONVENTION - - /* Warning: - The following typedefs are not standard, deprecated and will be removed in a future release. - */ - typedef int omp_int_t; - typedef double omp_wtime_t; - -# ifdef __cplusplus - } -# endif - -#endif /* __OMP_H */ - + /* kmp API functions */ + extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); + extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); + extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); + extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); + extern int __KAI_KMPC_CONVENTION kmp_get_library (void); + extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library (int); + extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); + extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); + extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); + + /* Intel affinity API */ + typedef void * kmp_affinity_mask_t; + + extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); + extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); + extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); + extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); + + /* OpenMP 4.0 affinity API */ + typedef enum omp_proc_bind_t { + omp_proc_bind_false = 0, + omp_proc_bind_true = 1, + omp_proc_bind_master = 2, + omp_proc_bind_close = 3, + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + + extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); + + extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); + extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); + extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); + extern void __KAI_KMPC_CONVENTION kmp_free (void *); + + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); + extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + +# undef __KAI_KMPC_CONVENTION + + /* Warning: + The following typedefs are not standard, deprecated and will be removed in a future release. + */ + typedef int omp_int_t; + typedef double omp_wtime_t; + +# ifdef __cplusplus + } +# endif + +#endif /* __OMP_H */ + diff --git a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h index 71e3cf2aa31..28c1512ac26 100644 --- a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h +++ b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h @@ -1,144 +1,144 @@ -#ifndef __OMPT_EVENT_SPECIFIC_H__ -#define __OMPT_EVENT_SPECIFIC_H__ - -/****************************************************************************** - * File: ompt-event-specific.h - * - * Description: - * - * specify which of the OMPT events are implemented by this runtime system - * and the level of their implementation by a runtime system. - *****************************************************************************/ - -#define _ompt_tokenpaste_helper(x,y) x ## y -#define _ompt_tokenpaste(x,y) _ompt_tokenpaste_helper(x,y) -#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented) - - -/*---------------------------------------------------------------------------- - | Specify whether an event may occur or not, and whether event callbacks - | never, sometimes, or always occur. - | - | The values for these constants are defined in section 6.1.2 of - | the OMPT TR. They are exposed to tools through ompt_set_callback. - +--------------------------------------------------------------------------*/ - -#define ompt_event_NEVER ompt_set_result_event_never_occurs -#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback -#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some -#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always - -#if OMPT_TRACE -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS -#else -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED -#endif - -#if OMPT_BLAME -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS -#else -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED -#endif - -/*---------------------------------------------------------------------------- - | Mandatory Events - +--------------------------------------------------------------------------*/ - -#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_event_control_implemented ompt_event_MAY_ALWAYS - -#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS - - -/*---------------------------------------------------------------------------- - | Optional Events (blame shifting) - +--------------------------------------------------------------------------*/ - -#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME - -#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME - -#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_nest_lock_last_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME - - -/*---------------------------------------------------------------------------- - | Optional Events (synchronous events) - +--------------------------------------------------------------------------*/ - -#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_release_nest_lock_prev_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_next_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED - -#endif +#ifndef __OMPT_EVENT_SPECIFIC_H__ +#define __OMPT_EVENT_SPECIFIC_H__ + +/****************************************************************************** + * File: ompt-event-specific.h + * + * Description: + * + * specify which of the OMPT events are implemented by this runtime system + * and the level of their implementation by a runtime system. + *****************************************************************************/ + +#define _ompt_tokenpaste_helper(x,y) x ## y +#define _ompt_tokenpaste(x,y) _ompt_tokenpaste_helper(x,y) +#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented) + + +/*---------------------------------------------------------------------------- + | Specify whether an event may occur or not, and whether event callbacks + | never, sometimes, or always occur. + | + | The values for these constants are defined in section 6.1.2 of + | the OMPT TR. They are exposed to tools through ompt_set_callback. + +--------------------------------------------------------------------------*/ + +#define ompt_event_NEVER ompt_set_result_event_never_occurs +#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback +#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some +#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always + +#if OMPT_TRACE +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS +#else +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED +#endif + +#if OMPT_BLAME +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS +#else +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED +#endif + +/*---------------------------------------------------------------------------- + | Mandatory Events + +--------------------------------------------------------------------------*/ + +#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_control_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS + + +/*---------------------------------------------------------------------------- + | Optional Events (blame shifting) + +--------------------------------------------------------------------------*/ + +#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME + +#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME + +#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_nest_lock_last_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME + + +/*---------------------------------------------------------------------------- + | Optional Events (synchronous events) + +--------------------------------------------------------------------------*/ + +#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_release_nest_lock_prev_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_nest_lock_next_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED + +#endif diff --git a/contrib/libs/cxxsupp/openmp/ompt-general.c b/contrib/libs/cxxsupp/openmp/ompt-general.c index 6dda24418ae..4daae81917a 100644 --- a/contrib/libs/cxxsupp/openmp/ompt-general.c +++ b/contrib/libs/cxxsupp/openmp/ompt-general.c @@ -1,535 +1,535 @@ -/***************************************************************************** - * system include files - ****************************************************************************/ - -#include - -#include -#include -#include -#include - - - -/***************************************************************************** - * ompt include files - ****************************************************************************/ - -#include "ompt-specific.c" - - - -/***************************************************************************** - * macros - ****************************************************************************/ - -#define ompt_get_callback_success 1 -#define ompt_get_callback_failure 0 - -#define no_tool_present 0 - -#define OMPT_API_ROUTINE static - -#ifndef OMPT_STR_MATCH -#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) -#endif - - -/***************************************************************************** - * types - ****************************************************************************/ - -typedef struct { - const char *state_name; - ompt_state_t state_id; -} ompt_state_info_t; - - -enum tool_setting_e { - omp_tool_error, - omp_tool_unset, - omp_tool_disabled, - omp_tool_enabled -}; - - -typedef void (*ompt_initialize_t) ( - ompt_function_lookup_t ompt_fn_lookup, - const char *version, - unsigned int ompt_version -); - - - -/***************************************************************************** - * global variables - ****************************************************************************/ - -int ompt_enabled = 0; - -ompt_state_info_t ompt_state_info[] = { -#define ompt_state_macro(state, code) { # state, state }, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -}; - -ompt_callbacks_t ompt_callbacks; - -static ompt_initialize_t ompt_initialize_fn = NULL; - - - -/***************************************************************************** - * forward declarations - ****************************************************************************/ - -static ompt_interface_fn_t ompt_fn_lookup(const char *s); - -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); - - -/***************************************************************************** - * initialization and finalization (private operations) - ****************************************************************************/ - -/* On Unix-like systems that support weak symbols the following implementation - * of ompt_tool() will be used in case no tool-supplied implementation of - * this function is present in the address space of a process. - * - * On Windows, the ompt_tool_windows function is used to find the - * ompt_tool symbol across all modules loaded by a process. If ompt_tool is - * found, ompt_tool's return value is used to initialize the tool. Otherwise, - * NULL is returned and OMPT won't be enabled */ -#if OMPT_HAVE_WEAK_ATTRIBUTE -_OMP_EXTERN -__attribute__ (( weak )) -ompt_initialize_t ompt_tool() -{ -#if OMPT_DEBUG - printf("ompt_tool() is called from the RTL\n"); -#endif - return NULL; -} - -#elif OMPT_HAVE_PSAPI - -#include -#pragma comment(lib, "psapi.lib") -#define ompt_tool ompt_tool_windows - -// The number of loaded modules to start enumeration with EnumProcessModules() -#define NUM_MODULES 128 - -static -ompt_initialize_t ompt_tool_windows() -{ - int i; - DWORD needed, new_size; - HMODULE *modules; - HANDLE process = GetCurrentProcess(); - modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) ); - ompt_initialize_t (*ompt_tool_p)() = NULL; - -#if OMPT_DEBUG - printf("ompt_tool_windows(): looking for ompt_tool\n"); -#endif - if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE), - &needed)) { - // Regardless of the error reason use the stub initialization function - free(modules); - return NULL; - } - // Check if NUM_MODULES is enough to list all modules - new_size = needed / sizeof(HMODULE); - if (new_size > NUM_MODULES) { -#if OMPT_DEBUG - printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); -#endif - modules = (HMODULE*)realloc( modules, needed ); - // If resizing failed use the stub function. - if (!EnumProcessModules(process, modules, needed, &needed)) { - free(modules); - return NULL; - } - } - for (i = 0; i < new_size; ++i) { - (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); - if (ompt_tool_p) { -#if OMPT_DEBUG - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool found in module %s\n", - modName); -#endif - free(modules); - return ompt_tool_p(); - } -#if OMPT_DEBUG - else { - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool not found in module %s\n", - modName); - } -#endif - } - free(modules); - return NULL; -} -#else -# error Either __attribute__((weak)) or psapi.dll are required for OMPT support -#endif // OMPT_HAVE_WEAK_ATTRIBUTE - -void ompt_pre_init() -{ - //-------------------------------------------------- - // Execute the pre-initialization logic only once. - //-------------------------------------------------- - static int ompt_pre_initialized = 0; - - if (ompt_pre_initialized) return; - - ompt_pre_initialized = 1; - - //-------------------------------------------------- - // Use a tool iff a tool is enabled and available. - //-------------------------------------------------- - const char *ompt_env_var = getenv("OMP_TOOL"); - tool_setting_e tool_setting = omp_tool_error; - - if (!ompt_env_var || !strcmp(ompt_env_var, "")) - tool_setting = omp_tool_unset; - else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) - tool_setting = omp_tool_disabled; - else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) - tool_setting = omp_tool_enabled; - -#if OMPT_DEBUG - printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); -#endif - switch(tool_setting) { - case omp_tool_disabled: - break; - - case omp_tool_unset: - case omp_tool_enabled: - ompt_initialize_fn = ompt_tool(); - if (ompt_initialize_fn) { - ompt_enabled = 1; - } - break; - - case omp_tool_error: - fprintf(stderr, - "Warning: OMP_TOOL has invalid value \"%s\".\n" - " legal values are (NULL,\"\",\"disabled\"," - "\"enabled\").\n", ompt_env_var); - break; - } -#if OMPT_DEBUG - printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); -#endif -} - - -void ompt_post_init() -{ - //-------------------------------------------------- - // Execute the post-initialization logic only once. - //-------------------------------------------------- - static int ompt_post_initialized = 0; - - if (ompt_post_initialized) return; - - ompt_post_initialized = 1; - - //-------------------------------------------------- - // Initialize the tool if so indicated. - //-------------------------------------------------- - if (ompt_enabled) { - ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), - OMPT_VERSION); - - ompt_thread_t *root_thread = ompt_get_thread(); - - ompt_set_thread_state(root_thread, ompt_state_overhead); - - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin) - (ompt_thread_initial, ompt_get_thread_id()); - } - - ompt_set_thread_state(root_thread, ompt_state_work_serial); - } -} - - -void ompt_fini() -{ - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { - ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); - } - } - - ompt_enabled = 0; -} - - -/***************************************************************************** - * interface operations - ****************************************************************************/ - -/***************************************************************************** - * state - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, - const char **next_state_name) -{ - const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); - int i = 0; - - for (i = 0; i < len - 1; i++) { - if (ompt_state_info[i].state_id == current_state) { - *next_state = ompt_state_info[i+1].state_id; - *next_state_name = ompt_state_info[i+1].state_name; - return 1; - } - } - - return 0; -} - - - -/***************************************************************************** - * callbacks - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) -{ - switch (evid) { - -#define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type) cb; \ - } \ - return ompt_event_implementation_status(event_name); - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro - - default: return ompt_set_result_registration_error; - } -} - - -OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) -{ - switch (evid) { - -#define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callback_t mycb = \ - (ompt_callback_t) ompt_callbacks.ompt_callback(event_name); \ - if (mycb) { \ - *cb = mycb; \ - return ompt_get_callback_success; \ - } \ - } \ - return ompt_get_callback_failure; - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro - - default: return ompt_get_callback_failure; - } -} - - -/***************************************************************************** - * parallel regions - ****************************************************************************/ - -OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) -{ - return __ompt_get_parallel_id_internal(ancestor_level); -} - - -OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) -{ - return __ompt_get_parallel_team_size_internal(ancestor_level); -} - - -OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) -{ - return __ompt_get_parallel_function_internal(ancestor_level); -} - - -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) -{ - ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); - - if (thread_state == ompt_state_undefined) { - thread_state = ompt_state_work_serial; - } - - return thread_state; -} - - - -/***************************************************************************** - * threads - ****************************************************************************/ - - -OMPT_API_ROUTINE void *ompt_get_idle_frame() -{ - return __ompt_get_idle_frame_internal(); -} - - - -/***************************************************************************** - * tasks - ****************************************************************************/ - - -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) -{ - return __ompt_get_thread_id_internal(); -} - -OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) -{ - return __ompt_get_task_id_internal(depth); -} - - -OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) -{ - return __ompt_get_task_frame_internal(depth); -} - - -OMPT_API_ROUTINE void *ompt_get_task_function(int depth) -{ - return __ompt_get_task_function_internal(depth); -} - - -/***************************************************************************** - * placeholders - ****************************************************************************/ - -// Don't define this as static. The loader may choose to eliminate the symbol -// even though it is needed by tools. -#define OMPT_API_PLACEHOLDER - -// Ensure that placeholders don't have mangled names in the symbol table. -#ifdef __cplusplus -extern "C" { -#endif - - -OMPT_API_PLACEHOLDER void ompt_idle(void) -{ - // This function is a placeholder used to represent the calling context of - // idle OpenMP worker threads. It is not meant to be invoked. - assert(0); -} - - -OMPT_API_PLACEHOLDER void ompt_overhead(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads working in the OpenMP runtime. It is not meant to be invoked. - assert(0); -} - - -OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a barrier in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - - -OMPT_API_PLACEHOLDER void ompt_task_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a task in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - - -OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a mutex in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - -#ifdef __cplusplus -}; -#endif - - -/***************************************************************************** - * compatability - ****************************************************************************/ - -OMPT_API_ROUTINE int ompt_get_ompt_version() -{ - return OMPT_VERSION; -} - - - -/***************************************************************************** - * application-facing API - ****************************************************************************/ - - -/*---------------------------------------------------------------------------- - | control - ---------------------------------------------------------------------------*/ - -_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) -{ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { - ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); - } -} - - - -/***************************************************************************** - * API inquiry for tool - ****************************************************************************/ - -static ompt_interface_fn_t ompt_fn_lookup(const char *s) -{ - -#define ompt_interface_fn(fn) \ - if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn; - - FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - - FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) - - return (ompt_interface_fn_t) 0; -} +/***************************************************************************** + * system include files + ****************************************************************************/ + +#include + +#include +#include +#include +#include + + + +/***************************************************************************** + * ompt include files + ****************************************************************************/ + +#include "ompt-specific.c" + + + +/***************************************************************************** + * macros + ****************************************************************************/ + +#define ompt_get_callback_success 1 +#define ompt_get_callback_failure 0 + +#define no_tool_present 0 + +#define OMPT_API_ROUTINE static + +#ifndef OMPT_STR_MATCH +#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) +#endif + + +/***************************************************************************** + * types + ****************************************************************************/ + +typedef struct { + const char *state_name; + ompt_state_t state_id; +} ompt_state_info_t; + + +enum tool_setting_e { + omp_tool_error, + omp_tool_unset, + omp_tool_disabled, + omp_tool_enabled +}; + + +typedef void (*ompt_initialize_t) ( + ompt_function_lookup_t ompt_fn_lookup, + const char *version, + unsigned int ompt_version +); + + + +/***************************************************************************** + * global variables + ****************************************************************************/ + +int ompt_enabled = 0; + +ompt_state_info_t ompt_state_info[] = { +#define ompt_state_macro(state, code) { # state, state }, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +}; + +ompt_callbacks_t ompt_callbacks; + +static ompt_initialize_t ompt_initialize_fn = NULL; + + + +/***************************************************************************** + * forward declarations + ****************************************************************************/ + +static ompt_interface_fn_t ompt_fn_lookup(const char *s); + +OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); + + +/***************************************************************************** + * initialization and finalization (private operations) + ****************************************************************************/ + +/* On Unix-like systems that support weak symbols the following implementation + * of ompt_tool() will be used in case no tool-supplied implementation of + * this function is present in the address space of a process. + * + * On Windows, the ompt_tool_windows function is used to find the + * ompt_tool symbol across all modules loaded by a process. If ompt_tool is + * found, ompt_tool's return value is used to initialize the tool. Otherwise, + * NULL is returned and OMPT won't be enabled */ +#if OMPT_HAVE_WEAK_ATTRIBUTE +_OMP_EXTERN +__attribute__ (( weak )) +ompt_initialize_t ompt_tool() +{ +#if OMPT_DEBUG + printf("ompt_tool() is called from the RTL\n"); +#endif + return NULL; +} + +#elif OMPT_HAVE_PSAPI + +#include +#pragma comment(lib, "psapi.lib") +#define ompt_tool ompt_tool_windows + +// The number of loaded modules to start enumeration with EnumProcessModules() +#define NUM_MODULES 128 + +static +ompt_initialize_t ompt_tool_windows() +{ + int i; + DWORD needed, new_size; + HMODULE *modules; + HANDLE process = GetCurrentProcess(); + modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) ); + ompt_initialize_t (*ompt_tool_p)() = NULL; + +#if OMPT_DEBUG + printf("ompt_tool_windows(): looking for ompt_tool\n"); +#endif + if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE), + &needed)) { + // Regardless of the error reason use the stub initialization function + free(modules); + return NULL; + } + // Check if NUM_MODULES is enough to list all modules + new_size = needed / sizeof(HMODULE); + if (new_size > NUM_MODULES) { +#if OMPT_DEBUG + printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); +#endif + modules = (HMODULE*)realloc( modules, needed ); + // If resizing failed use the stub function. + if (!EnumProcessModules(process, modules, needed, &needed)) { + free(modules); + return NULL; + } + } + for (i = 0; i < new_size; ++i) { + (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); + if (ompt_tool_p) { +#if OMPT_DEBUG + TCHAR modName[MAX_PATH]; + if (GetModuleFileName(modules[i], modName, MAX_PATH)) + printf("ompt_tool_windows(): ompt_tool found in module %s\n", + modName); +#endif + free(modules); + return ompt_tool_p(); + } +#if OMPT_DEBUG + else { + TCHAR modName[MAX_PATH]; + if (GetModuleFileName(modules[i], modName, MAX_PATH)) + printf("ompt_tool_windows(): ompt_tool not found in module %s\n", + modName); + } +#endif + } + free(modules); + return NULL; +} +#else +# error Either __attribute__((weak)) or psapi.dll are required for OMPT support +#endif // OMPT_HAVE_WEAK_ATTRIBUTE + +void ompt_pre_init() +{ + //-------------------------------------------------- + // Execute the pre-initialization logic only once. + //-------------------------------------------------- + static int ompt_pre_initialized = 0; + + if (ompt_pre_initialized) return; + + ompt_pre_initialized = 1; + + //-------------------------------------------------- + // Use a tool iff a tool is enabled and available. + //-------------------------------------------------- + const char *ompt_env_var = getenv("OMP_TOOL"); + tool_setting_e tool_setting = omp_tool_error; + + if (!ompt_env_var || !strcmp(ompt_env_var, "")) + tool_setting = omp_tool_unset; + else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) + tool_setting = omp_tool_disabled; + else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) + tool_setting = omp_tool_enabled; + +#if OMPT_DEBUG + printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); +#endif + switch(tool_setting) { + case omp_tool_disabled: + break; + + case omp_tool_unset: + case omp_tool_enabled: + ompt_initialize_fn = ompt_tool(); + if (ompt_initialize_fn) { + ompt_enabled = 1; + } + break; + + case omp_tool_error: + fprintf(stderr, + "Warning: OMP_TOOL has invalid value \"%s\".\n" + " legal values are (NULL,\"\",\"disabled\"," + "\"enabled\").\n", ompt_env_var); + break; + } +#if OMPT_DEBUG + printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); +#endif +} + + +void ompt_post_init() +{ + //-------------------------------------------------- + // Execute the post-initialization logic only once. + //-------------------------------------------------- + static int ompt_post_initialized = 0; + + if (ompt_post_initialized) return; + + ompt_post_initialized = 1; + + //-------------------------------------------------- + // Initialize the tool if so indicated. + //-------------------------------------------------- + if (ompt_enabled) { + ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), + OMPT_VERSION); + + ompt_thread_t *root_thread = ompt_get_thread(); + + ompt_set_thread_state(root_thread, ompt_state_overhead); + + if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { + ompt_callbacks.ompt_callback(ompt_event_thread_begin) + (ompt_thread_initial, ompt_get_thread_id()); + } + + ompt_set_thread_state(root_thread, ompt_state_work_serial); + } +} + + +void ompt_fini() +{ + if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { + ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); + } + } + + ompt_enabled = 0; +} + + +/***************************************************************************** + * interface operations + ****************************************************************************/ + +/***************************************************************************** + * state + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, + const char **next_state_name) +{ + const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); + int i = 0; + + for (i = 0; i < len - 1; i++) { + if (ompt_state_info[i].state_id == current_state) { + *next_state = ompt_state_info[i+1].state_id; + *next_state_name = ompt_state_info[i+1].state_name; + return 1; + } + } + + return 0; +} + + + +/***************************************************************************** + * callbacks + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) +{ + switch (evid) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callbacks.ompt_callback(event_name) = (callback_type) cb; \ + } \ + return ompt_event_implementation_status(event_name); + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: return ompt_set_result_registration_error; + } +} + + +OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) +{ + switch (evid) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callback_t mycb = \ + (ompt_callback_t) ompt_callbacks.ompt_callback(event_name); \ + if (mycb) { \ + *cb = mycb; \ + return ompt_get_callback_success; \ + } \ + } \ + return ompt_get_callback_failure; + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: return ompt_get_callback_failure; + } +} + + +/***************************************************************************** + * parallel regions + ****************************************************************************/ + +OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) +{ + return __ompt_get_parallel_id_internal(ancestor_level); +} + + +OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) +{ + return __ompt_get_parallel_team_size_internal(ancestor_level); +} + + +OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) +{ + return __ompt_get_parallel_function_internal(ancestor_level); +} + + +OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) +{ + ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); + + if (thread_state == ompt_state_undefined) { + thread_state = ompt_state_work_serial; + } + + return thread_state; +} + + + +/***************************************************************************** + * threads + ****************************************************************************/ + + +OMPT_API_ROUTINE void *ompt_get_idle_frame() +{ + return __ompt_get_idle_frame_internal(); +} + + + +/***************************************************************************** + * tasks + ****************************************************************************/ + + +OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) +{ + return __ompt_get_thread_id_internal(); +} + +OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) +{ + return __ompt_get_task_id_internal(depth); +} + + +OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) +{ + return __ompt_get_task_frame_internal(depth); +} + + +OMPT_API_ROUTINE void *ompt_get_task_function(int depth) +{ + return __ompt_get_task_function_internal(depth); +} + + +/***************************************************************************** + * placeholders + ****************************************************************************/ + +// Don't define this as static. The loader may choose to eliminate the symbol +// even though it is needed by tools. +#define OMPT_API_PLACEHOLDER + +// Ensure that placeholders don't have mangled names in the symbol table. +#ifdef __cplusplus +extern "C" { +#endif + + +OMPT_API_PLACEHOLDER void ompt_idle(void) +{ + // This function is a placeholder used to represent the calling context of + // idle OpenMP worker threads. It is not meant to be invoked. + assert(0); +} + + +OMPT_API_PLACEHOLDER void ompt_overhead(void) +{ + // This function is a placeholder used to represent the OpenMP context of + // threads working in the OpenMP runtime. It is not meant to be invoked. + assert(0); +} + + +OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) +{ + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a barrier in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + + +OMPT_API_PLACEHOLDER void ompt_task_wait(void) +{ + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a task in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + + +OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) +{ + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a mutex in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + +#ifdef __cplusplus +}; +#endif + + +/***************************************************************************** + * compatability + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_ompt_version() +{ + return OMPT_VERSION; +} + + + +/***************************************************************************** + * application-facing API + ****************************************************************************/ + + +/*---------------------------------------------------------------------------- + | control + ---------------------------------------------------------------------------*/ + +_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) +{ + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { + ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); + } +} + + + +/***************************************************************************** + * API inquiry for tool + ****************************************************************************/ + +static ompt_interface_fn_t ompt_fn_lookup(const char *s) +{ + +#define ompt_interface_fn(fn) \ + if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn; + + FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) + + FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) + + return (ompt_interface_fn_t) 0; +} diff --git a/contrib/libs/cxxsupp/openmp/ompt-internal.h b/contrib/libs/cxxsupp/openmp/ompt-internal.h index 6466c3bc22c..64e8d2e8fd6 100644 --- a/contrib/libs/cxxsupp/openmp/ompt-internal.h +++ b/contrib/libs/cxxsupp/openmp/ompt-internal.h @@ -1,79 +1,79 @@ -#ifndef __OMPT_INTERNAL_H__ -#define __OMPT_INTERNAL_H__ - -#include "ompt.h" -#include "ompt-event-specific.h" - -#define OMPT_VERSION 1 - -#define _OMP_EXTERN extern "C" - -#define OMPT_INVOKER(x) \ - ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime) - - -#define ompt_callback(e) e ## _callback - - -typedef struct ompt_callbacks_s { -#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event); - - FOREACH_OMPT_EVENT(ompt_event_macro) - -#undef ompt_event_macro -} ompt_callbacks_t; - - - -typedef struct { - ompt_frame_t frame; - void* function; - ompt_task_id_t task_id; -} ompt_task_info_t; - - -typedef struct { - ompt_parallel_id_t parallel_id; - void *microtask; -} ompt_team_info_t; - - -typedef struct ompt_lw_taskteam_s { - ompt_team_info_t ompt_team_info; - ompt_task_info_t ompt_task_info; - struct ompt_lw_taskteam_s *parent; -} ompt_lw_taskteam_t; - - -typedef struct ompt_parallel_info_s { - ompt_task_id_t parent_task_id; /* id of parent task */ - ompt_parallel_id_t parallel_id; /* id of parallel region */ - ompt_frame_t *parent_task_frame; /* frame data of parent task */ - void *parallel_function; /* pointer to outlined function */ -} ompt_parallel_info_t; - - -typedef struct { - ompt_state_t state; - ompt_wait_id_t wait_id; - void *idle_frame; -} ompt_thread_info_t; - - -extern ompt_callbacks_t ompt_callbacks; - -#ifdef __cplusplus -extern "C" { -#endif - -void ompt_pre_init(void); -void ompt_post_init(void); -void ompt_fini(void); - -extern int ompt_enabled; - -#ifdef __cplusplus -}; -#endif - -#endif +#ifndef __OMPT_INTERNAL_H__ +#define __OMPT_INTERNAL_H__ + +#include "ompt.h" +#include "ompt-event-specific.h" + +#define OMPT_VERSION 1 + +#define _OMP_EXTERN extern "C" + +#define OMPT_INVOKER(x) \ + ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime) + + +#define ompt_callback(e) e ## _callback + + +typedef struct ompt_callbacks_s { +#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event); + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_callbacks_t; + + + +typedef struct { + ompt_frame_t frame; + void* function; + ompt_task_id_t task_id; +} ompt_task_info_t; + + +typedef struct { + ompt_parallel_id_t parallel_id; + void *microtask; +} ompt_team_info_t; + + +typedef struct ompt_lw_taskteam_s { + ompt_team_info_t ompt_team_info; + ompt_task_info_t ompt_task_info; + struct ompt_lw_taskteam_s *parent; +} ompt_lw_taskteam_t; + + +typedef struct ompt_parallel_info_s { + ompt_task_id_t parent_task_id; /* id of parent task */ + ompt_parallel_id_t parallel_id; /* id of parallel region */ + ompt_frame_t *parent_task_frame; /* frame data of parent task */ + void *parallel_function; /* pointer to outlined function */ +} ompt_parallel_info_t; + + +typedef struct { + ompt_state_t state; + ompt_wait_id_t wait_id; + void *idle_frame; +} ompt_thread_info_t; + + +extern ompt_callbacks_t ompt_callbacks; + +#ifdef __cplusplus +extern "C" { +#endif + +void ompt_pre_init(void); +void ompt_post_init(void); +void ompt_fini(void); + +extern int ompt_enabled; + +#ifdef __cplusplus +}; +#endif + +#endif diff --git a/contrib/libs/cxxsupp/openmp/ompt-specific.c b/contrib/libs/cxxsupp/openmp/ompt-specific.c index f718470c640..49f668af100 100644 --- a/contrib/libs/cxxsupp/openmp/ompt-specific.c +++ b/contrib/libs/cxxsupp/openmp/ompt-specific.c @@ -1,332 +1,332 @@ -//****************************************************************************** -// include files -//****************************************************************************** - -#include "kmp.h" -#include "ompt-internal.h" -#include "ompt-specific.h" - -//****************************************************************************** -// macros -//****************************************************************************** - -#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0) - -#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; - -#define OMPT_THREAD_ID_BITS 16 - -// 2013 08 24 - John Mellor-Crummey -// ideally, a thread should assign its own ids based on thread private data. -// however, the way the intel runtime reinitializes thread data structures -// when it creates teams makes it difficult to maintain persistent thread -// data. using a shared variable instead is simple. I leave it to intel to -// sort out how to implement a higher performance version in their runtime. - -// when using fetch_and_add to generate the IDs, there isn't any reason to waste -// bits for thread id. -#if 0 -#define NEXT_ID(id_ptr,tid) \ - ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) -#else -#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) -#endif - -//****************************************************************************** -// private operations -//****************************************************************************** - -//---------------------------------------------------------- -// traverse the team and task hierarchy -// note: __ompt_get_teaminfo and __ompt_get_taskinfo -// traverse the hierarchy similarly and need to be -// kept consistent -//---------------------------------------------------------- - -ompt_team_info_t * -__ompt_get_teaminfo(int depth, int *size) -{ - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_team *team = thr->th.th_team; - if (team == NULL) return NULL; - - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); - - while(depth > 0) { - // next lightweight team (if any) - if (lwt) lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && team) team=team->t.t_parent; - - depth--; - } - - if (lwt) { - // lightweight teams have one task - if (size) *size = 1; - - // return team info for lightweight team - return &lwt->ompt_team_info; - } else if (team) { - // extract size from heavyweight team - if (size) *size = team->t.t_nproc; - - // return team info for heavyweight team - return &team->t.ompt_team_info; - } - } - - return NULL; -} - - -ompt_task_info_t * -__ompt_get_taskinfo(int depth) -{ - ompt_task_info_t *info = NULL; - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); - - while (depth > 0) { - // next lightweight team (if any) - if (lwt) lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && taskdata) { - taskdata = taskdata->td_parent; - if (taskdata) { - lwt = LWT_FROM_TEAM(taskdata->td_team); - } - } - depth--; - } - - if (lwt) { - info = &lwt->ompt_task_info; - } else if (taskdata) { - info = &taskdata->ompt_task_info; - } - } - - return info; -} - - - -//****************************************************************************** -// interface operations -//****************************************************************************** - -//---------------------------------------------------------- -// thread support -//---------------------------------------------------------- - -ompt_parallel_id_t -__ompt_thread_id_new() -{ - static uint64_t ompt_thread_id = 1; - return NEXT_ID(&ompt_thread_id, 0); -} - -void -__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) -{ - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - - -void -__ompt_thread_end(ompt_thread_type_t thread_type, int gtid) -{ - ompt_callbacks.ompt_callback(ompt_event_thread_end)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - - -ompt_thread_id_t -__ompt_get_thread_id_internal() -{ - // FIXME - // until we have a better way of assigning ids, use __kmp_get_gtid - // since the return value might be negative, we need to test that before - // assigning it to an ompt_thread_id_t, which is unsigned. - int id = __kmp_get_gtid(); - assert(id >= 0); - - return GTID_TO_OMPT_THREAD_ID(id); -} - -//---------------------------------------------------------- -// state support -//---------------------------------------------------------- - -void -__ompt_thread_assign_wait_id(void *variable) -{ - int gtid = __kmp_gtid_get_specific(); - kmp_info_t *ti = ompt_get_thread_gtid(gtid); - - ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable; -} - -ompt_state_t -__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) -{ - kmp_info_t *ti = ompt_get_thread(); - - if (ti) { - if (ompt_wait_id) - *ompt_wait_id = ti->th.ompt_thread_info.wait_id; - return ti->th.ompt_thread_info.state; - } - return ompt_state_undefined; -} - -//---------------------------------------------------------- -// idle frame support -//---------------------------------------------------------- - -void * -__ompt_get_idle_frame_internal(void) -{ - kmp_info_t *ti = ompt_get_thread(); - return ti ? ti->th.ompt_thread_info.idle_frame : NULL; -} - - -//---------------------------------------------------------- -// parallel region support -//---------------------------------------------------------- - -ompt_parallel_id_t -__ompt_parallel_id_new(int gtid) -{ - static uint64_t ompt_parallel_id = 1; - return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; -} - - -void * -__ompt_get_parallel_function_internal(int depth) -{ - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - void *function = info ? info->microtask : NULL; - return function; -} - - -ompt_parallel_id_t -__ompt_get_parallel_id_internal(int depth) -{ - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - ompt_parallel_id_t id = info ? info->parallel_id : 0; - return id; -} - - -int -__ompt_get_parallel_team_size_internal(int depth) -{ - // initialize the return value with the error value. - // if there is a team at the specified depth, the default - // value will be overwritten the size of that team. - int size = -1; - (void) __ompt_get_teaminfo(depth, &size); - return size; -} - - -//---------------------------------------------------------- -// lightweight task team support -//---------------------------------------------------------- - -void -__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int gtid, void *microtask, - ompt_parallel_id_t ompt_pid) -{ - lwt->ompt_team_info.parallel_id = ompt_pid; - lwt->ompt_team_info.microtask = microtask; - lwt->ompt_task_info.task_id = 0; - lwt->ompt_task_info.frame.reenter_runtime_frame = 0; - lwt->ompt_task_info.frame.exit_runtime_frame = 0; - lwt->ompt_task_info.function = NULL; - lwt->parent = 0; -} - - -void -__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) -{ - ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; - lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = lwt; -} - - -ompt_lw_taskteam_t * -__ompt_lw_taskteam_unlink(kmp_info_t *thr) -{ - ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - return lwtask; -} - - -//---------------------------------------------------------- -// task support -//---------------------------------------------------------- - -ompt_task_id_t -__ompt_task_id_new(int gtid) -{ - static uint64_t ompt_task_id = 1; - return NEXT_ID(&ompt_task_id, gtid); -} - - -ompt_task_id_t -__ompt_get_task_id_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_task_id_t task_id = info ? info->task_id : 0; - return task_id; -} - - -void * -__ompt_get_task_function_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - void *function = info ? info->function : NULL; - return function; -} - - -ompt_frame_t * -__ompt_get_task_frame_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_frame_t *frame = info ? frame = &info->frame : NULL; - return frame; -} - - -//---------------------------------------------------------- -// team support -//---------------------------------------------------------- - -void -__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) -{ - team->t.ompt_team_info.parallel_id = ompt_pid; -} +//****************************************************************************** +// include files +//****************************************************************************** + +#include "kmp.h" +#include "ompt-internal.h" +#include "ompt-specific.h" + +//****************************************************************************** +// macros +//****************************************************************************** + +#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0) + +#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; + +#define OMPT_THREAD_ID_BITS 16 + +// 2013 08 24 - John Mellor-Crummey +// ideally, a thread should assign its own ids based on thread private data. +// however, the way the intel runtime reinitializes thread data structures +// when it creates teams makes it difficult to maintain persistent thread +// data. using a shared variable instead is simple. I leave it to intel to +// sort out how to implement a higher performance version in their runtime. + +// when using fetch_and_add to generate the IDs, there isn't any reason to waste +// bits for thread id. +#if 0 +#define NEXT_ID(id_ptr,tid) \ + ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) +#else +#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) +#endif + +//****************************************************************************** +// private operations +//****************************************************************************** + +//---------------------------------------------------------- +// traverse the team and task hierarchy +// note: __ompt_get_teaminfo and __ompt_get_taskinfo +// traverse the hierarchy similarly and need to be +// kept consistent +//---------------------------------------------------------- + +ompt_team_info_t * +__ompt_get_teaminfo(int depth, int *size) +{ + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_team *team = thr->th.th_team; + if (team == NULL) return NULL; + + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + + while(depth > 0) { + // next lightweight team (if any) + if (lwt) lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && team) team=team->t.t_parent; + + depth--; + } + + if (lwt) { + // lightweight teams have one task + if (size) *size = 1; + + // return team info for lightweight team + return &lwt->ompt_team_info; + } else if (team) { + // extract size from heavyweight team + if (size) *size = team->t.t_nproc; + + // return team info for heavyweight team + return &team->t.ompt_team_info; + } + } + + return NULL; +} + + +ompt_task_info_t * +__ompt_get_taskinfo(int depth) +{ + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + taskdata = taskdata->td_parent; + if (taskdata) { + lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + depth--; + } + + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; + } + } + + return info; +} + + + +//****************************************************************************** +// interface operations +//****************************************************************************** + +//---------------------------------------------------------- +// thread support +//---------------------------------------------------------- + +ompt_parallel_id_t +__ompt_thread_id_new() +{ + static uint64_t ompt_thread_id = 1; + return NEXT_ID(&ompt_thread_id, 0); +} + +void +__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) +{ + ompt_callbacks.ompt_callback(ompt_event_thread_begin)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +} + + +void +__ompt_thread_end(ompt_thread_type_t thread_type, int gtid) +{ + ompt_callbacks.ompt_callback(ompt_event_thread_end)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +} + + +ompt_thread_id_t +__ompt_get_thread_id_internal() +{ + // FIXME + // until we have a better way of assigning ids, use __kmp_get_gtid + // since the return value might be negative, we need to test that before + // assigning it to an ompt_thread_id_t, which is unsigned. + int id = __kmp_get_gtid(); + assert(id >= 0); + + return GTID_TO_OMPT_THREAD_ID(id); +} + +//---------------------------------------------------------- +// state support +//---------------------------------------------------------- + +void +__ompt_thread_assign_wait_id(void *variable) +{ + int gtid = __kmp_gtid_get_specific(); + kmp_info_t *ti = ompt_get_thread_gtid(gtid); + + ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable; +} + +ompt_state_t +__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) +{ + kmp_info_t *ti = ompt_get_thread(); + + if (ti) { + if (ompt_wait_id) + *ompt_wait_id = ti->th.ompt_thread_info.wait_id; + return ti->th.ompt_thread_info.state; + } + return ompt_state_undefined; +} + +//---------------------------------------------------------- +// idle frame support +//---------------------------------------------------------- + +void * +__ompt_get_idle_frame_internal(void) +{ + kmp_info_t *ti = ompt_get_thread(); + return ti ? ti->th.ompt_thread_info.idle_frame : NULL; +} + + +//---------------------------------------------------------- +// parallel region support +//---------------------------------------------------------- + +ompt_parallel_id_t +__ompt_parallel_id_new(int gtid) +{ + static uint64_t ompt_parallel_id = 1; + return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; +} + + +void * +__ompt_get_parallel_function_internal(int depth) +{ + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + void *function = info ? info->microtask : NULL; + return function; +} + + +ompt_parallel_id_t +__ompt_get_parallel_id_internal(int depth) +{ + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + ompt_parallel_id_t id = info ? info->parallel_id : 0; + return id; +} + + +int +__ompt_get_parallel_team_size_internal(int depth) +{ + // initialize the return value with the error value. + // if there is a team at the specified depth, the default + // value will be overwritten the size of that team. + int size = -1; + (void) __ompt_get_teaminfo(depth, &size); + return size; +} + + +//---------------------------------------------------------- +// lightweight task team support +//---------------------------------------------------------- + +void +__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int gtid, void *microtask, + ompt_parallel_id_t ompt_pid) +{ + lwt->ompt_team_info.parallel_id = ompt_pid; + lwt->ompt_team_info.microtask = microtask; + lwt->ompt_task_info.task_id = 0; + lwt->ompt_task_info.frame.reenter_runtime_frame = 0; + lwt->ompt_task_info.frame.exit_runtime_frame = 0; + lwt->ompt_task_info.function = NULL; + lwt->parent = 0; +} + + +void +__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) +{ + ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; + lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = lwt; +} + + +ompt_lw_taskteam_t * +__ompt_lw_taskteam_unlink(kmp_info_t *thr) +{ + ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; + if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; + return lwtask; +} + + +//---------------------------------------------------------- +// task support +//---------------------------------------------------------- + +ompt_task_id_t +__ompt_task_id_new(int gtid) +{ + static uint64_t ompt_task_id = 1; + return NEXT_ID(&ompt_task_id, gtid); +} + + +ompt_task_id_t +__ompt_get_task_id_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_task_id_t task_id = info ? info->task_id : 0; + return task_id; +} + + +void * +__ompt_get_task_function_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + void *function = info ? info->function : NULL; + return function; +} + + +ompt_frame_t * +__ompt_get_task_frame_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_frame_t *frame = info ? frame = &info->frame : NULL; + return frame; +} + + +//---------------------------------------------------------- +// team support +//---------------------------------------------------------- + +void +__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) +{ + team->t.ompt_team_info.parallel_id = ompt_pid; +} diff --git a/contrib/libs/cxxsupp/openmp/ompt-specific.h b/contrib/libs/cxxsupp/openmp/ompt-specific.h index 2cc213ff140..e8f84a9a585 100644 --- a/contrib/libs/cxxsupp/openmp/ompt-specific.h +++ b/contrib/libs/cxxsupp/openmp/ompt-specific.h @@ -1,90 +1,90 @@ -#ifndef OMPT_SPECIFIC_H -#define OMPT_SPECIFIC_H - -#include "kmp.h" - -/***************************************************************************** - * types - ****************************************************************************/ - -typedef kmp_info_t ompt_thread_t; - - - -/***************************************************************************** - * forward declarations - ****************************************************************************/ - -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); -void __ompt_thread_assign_wait_id(void *variable); - -void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, - int gtid, void *microtask, - ompt_parallel_id_t ompt_pid); - -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); - -ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr); - -ompt_parallel_id_t __ompt_parallel_id_new(int gtid); -ompt_task_id_t __ompt_task_id_new(int gtid); - -ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); - -ompt_task_info_t *__ompt_get_taskinfo(int depth); - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); - -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); - -int __ompt_get_parallel_team_size_internal(int ancestor_level); - -ompt_task_id_t __ompt_get_task_id_internal(int depth); - -ompt_frame_t *__ompt_get_task_frame_internal(int depth); - - - -/***************************************************************************** - * macros - ****************************************************************************/ - -#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE -#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI -#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) - - - -//****************************************************************************** -// inline functions -//****************************************************************************** - -inline ompt_thread_t * -ompt_get_thread_gtid(int gtid) -{ - return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; -} - - -inline ompt_thread_t * -ompt_get_thread() -{ - int gtid = __kmp_gtid_get_specific(); - return ompt_get_thread_gtid(gtid); -} - - -inline void -ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) -{ - thread->th.ompt_thread_info.state = state; -} - - -inline const char * -ompt_get_runtime_version() -{ - return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; -} - -#endif +#ifndef OMPT_SPECIFIC_H +#define OMPT_SPECIFIC_H + +#include "kmp.h" + +/***************************************************************************** + * types + ****************************************************************************/ + +typedef kmp_info_t ompt_thread_t; + + + +/***************************************************************************** + * forward declarations + ****************************************************************************/ + +void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); +void __ompt_thread_assign_wait_id(void *variable); + +void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, + int gtid, void *microtask, + ompt_parallel_id_t ompt_pid); + +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); + +ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr); + +ompt_parallel_id_t __ompt_parallel_id_new(int gtid); +ompt_task_id_t __ompt_task_id_new(int gtid); + +ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); + +ompt_task_info_t *__ompt_get_taskinfo(int depth); + +void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); + +void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); + +int __ompt_get_parallel_team_size_internal(int ancestor_level); + +ompt_task_id_t __ompt_get_task_id_internal(int depth); + +ompt_frame_t *__ompt_get_task_frame_internal(int depth); + + + +/***************************************************************************** + * macros + ****************************************************************************/ + +#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE +#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI +#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) + + + +//****************************************************************************** +// inline functions +//****************************************************************************** + +inline ompt_thread_t * +ompt_get_thread_gtid(int gtid) +{ + return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; +} + + +inline ompt_thread_t * +ompt_get_thread() +{ + int gtid = __kmp_gtid_get_specific(); + return ompt_get_thread_gtid(gtid); +} + + +inline void +ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) +{ + thread->th.ompt_thread_info.state = state; +} + + +inline const char * +ompt_get_runtime_version() +{ + return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; +} + +#endif diff --git a/contrib/libs/cxxsupp/openmp/test-touch.c b/contrib/libs/cxxsupp/openmp/test-touch.c index f724702df59..6ce529ae23a 100644 --- a/contrib/libs/cxxsupp/openmp/test-touch.c +++ b/contrib/libs/cxxsupp/openmp/test-touch.c @@ -1,31 +1,31 @@ -// test-touch.c // - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifdef __cplusplus -extern "C" { -#endif -extern double omp_get_wtime(); -extern int omp_get_num_threads(); -extern int omp_get_max_threads(); -#ifdef __cplusplus -} -#endif - -int main() { - omp_get_wtime(); - omp_get_num_threads(); - omp_get_max_threads(); - return 0; -} - -// end of file // +// test-touch.c // + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifdef __cplusplus +extern "C" { +#endif +extern double omp_get_wtime(); +extern int omp_get_num_threads(); +extern int omp_get_max_threads(); +#ifdef __cplusplus +} +#endif + +int main() { + omp_get_wtime(); + omp_get_num_threads(); + omp_get_max_threads(); + return 0; +} + +// end of file // diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h index 50ccc374d99..4b242fdd8f8 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h @@ -1,29 +1,29 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "ittnotify_config.h" - -#if ITT_PLATFORM==ITT_PLATFORM_WIN - -#pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ -#pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ -#pragma warning (disable: 174) /* expression has no effect */ -#pragma warning (disable: 4127) /* conditional expression is constant */ -#pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */ - -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if defined __INTEL_COMPILER - -#pragma warning (disable: 869) /* parameter "XXXXX" was never referenced */ -#pragma warning (disable: 1418) /* external function definition with no prior declaration */ -#pragma warning (disable: 1419) /* external declaration in primary source file */ - -#endif /* __INTEL_COMPILER */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + +#pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ +#pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ +#pragma warning (disable: 174) /* expression has no effect */ +#pragma warning (disable: 4127) /* conditional expression is constant */ +#pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */ + +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if defined __INTEL_COMPILER + +#pragma warning (disable: 869) /* parameter "XXXXX" was never referenced */ +#pragma warning (disable: 1418) /* external function definition with no prior declaration */ +#pragma warning (disable: 1419) /* external declaration in primary source file */ + +#endif /* __INTEL_COMPILER */ diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h index 0d0ad7938ae..c3792f30a00 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h @@ -1,3804 +1,3804 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _ITTNOTIFY_H_ -#define _ITTNOTIFY_H_ - -/** -@file -@brief Public User API functions and types -@mainpage - -The ITT API is used to annotate a user's program with additional information -that can be used by correctness and performance tools. The user inserts -calls in their program. Those calls generate information that is collected -at runtime, and used by Intel(R) Threading Tools. - -@section API Concepts -The following general concepts are used throughout the API. - -@subsection Unicode Support -Many API functions take character string arguments. On Windows, there -are two versions of each such function. The function name is suffixed -by W if Unicode support is enabled, and by A otherwise. Any API function -that takes a character string argument adheres to this convention. - -@subsection Conditional Compilation -Many users prefer having an option to modify ITT API code when linking it -inside their runtimes. ITT API header file provides a mechanism to replace -ITT API function names inside your code with empty strings. To do this, -define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the -static library from the linker script. - -@subsection Domains -[see domains] -Domains provide a way to separate notification for different modules or -libraries in a program. Domains are specified by dotted character strings, -e.g. TBB.Internal.Control. - -A mechanism (to be specified) is provided to enable and disable -domains. By default, all domains are enabled. -@subsection Named Entities and Instances -Named entities (frames, regions, tasks, and markers) communicate -information about the program to the analysis tools. A named entity often -refers to a section of program code, or to some set of logical concepts -that the programmer wants to group together. - -Named entities relate to the programmer's static view of the program. When -the program actually executes, many instances of a given named entity -may be created. - -The API annotations denote instances of named entities. The actual -named entities are displayed using the analysis tools. In other words, -the named entities come into existence when instances are created. - -Instances of named entities may have instance identifiers (IDs). Some -API calls use instance identifiers to create relationships between -different instances of named entities. Other API calls associate data -with instances of named entities. - -Some named entities must always have instance IDs. In particular, regions -and frames always have IDs. Task and markers need IDs only if the ID is -needed in another API call (such as adding a relation or metadata). - -The lifetime of instance IDs is distinct from the lifetime of -instances. This allows various relationships to be specified separate -from the actual execution of instances. This flexibility comes at the -expense of extra API calls. - -The same ID may not be reused for different instances, unless a previous -[ref] __itt_id_destroy call for that ID has been issued. -*/ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _ITTNOTIFY_H_ +#define _ITTNOTIFY_H_ + +/** +@file +@brief Public User API functions and types +@mainpage + +The ITT API is used to annotate a user's program with additional information +that can be used by correctness and performance tools. The user inserts +calls in their program. Those calls generate information that is collected +at runtime, and used by Intel(R) Threading Tools. + +@section API Concepts +The following general concepts are used throughout the API. + +@subsection Unicode Support +Many API functions take character string arguments. On Windows, there +are two versions of each such function. The function name is suffixed +by W if Unicode support is enabled, and by A otherwise. Any API function +that takes a character string argument adheres to this convention. + +@subsection Conditional Compilation +Many users prefer having an option to modify ITT API code when linking it +inside their runtimes. ITT API header file provides a mechanism to replace +ITT API function names inside your code with empty strings. To do this, +define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the +static library from the linker script. + +@subsection Domains +[see domains] +Domains provide a way to separate notification for different modules or +libraries in a program. Domains are specified by dotted character strings, +e.g. TBB.Internal.Control. + +A mechanism (to be specified) is provided to enable and disable +domains. By default, all domains are enabled. +@subsection Named Entities and Instances +Named entities (frames, regions, tasks, and markers) communicate +information about the program to the analysis tools. A named entity often +refers to a section of program code, or to some set of logical concepts +that the programmer wants to group together. + +Named entities relate to the programmer's static view of the program. When +the program actually executes, many instances of a given named entity +may be created. + +The API annotations denote instances of named entities. The actual +named entities are displayed using the analysis tools. In other words, +the named entities come into existence when instances are created. + +Instances of named entities may have instance identifiers (IDs). Some +API calls use instance identifiers to create relationships between +different instances of named entities. Other API calls associate data +with instances of named entities. + +Some named entities must always have instance IDs. In particular, regions +and frames always have IDs. Task and markers need IDs only if the ID is +needed in another API call (such as adding a relation or metadata). + +The lifetime of instance IDs is distinct from the lifetime of +instances. This allows various relationships to be specified separate +from the actual execution of instances. This flexibility comes at the +expense of extra API calls. + +The same ID may not be reused for different instances, unless a previous +[ref] __itt_id_destroy call for that ID has been issued. +*/ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI CDECL -#define LIBITTAPI CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL CDECL -#define LIBITTAPI_CALL CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# include "legacy/ittnotify.h" -#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ - -/** @cond exclude_from_documentation */ -/* Helper macro for joining tokens */ -#define ITT_JOIN_AUX(p,n) p##n -#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) - -#ifdef ITT_MAJOR -#undef ITT_MAJOR -#endif -#ifdef ITT_MINOR -#undef ITT_MINOR -#endif -#define ITT_MAJOR 3 -#define ITT_MINOR 0 - -/* Standard versioning of a token with major and minor version numbers */ -#define ITT_VERSIONIZE(x) \ - ITT_JOIN(x, \ - ITT_JOIN(_, \ - ITT_JOIN(ITT_MAJOR, \ - ITT_JOIN(_, ITT_MINOR)))) - -#ifndef INTEL_ITTNOTIFY_PREFIX -# define INTEL_ITTNOTIFY_PREFIX __itt_ -#endif /* INTEL_ITTNOTIFY_PREFIX */ -#ifndef INTEL_ITTNOTIFY_POSTFIX -# define INTEL_ITTNOTIFY_POSTFIX _ptr_ -#endif /* INTEL_ITTNOTIFY_POSTFIX */ - -#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) -#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) - -#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) - -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) - -#ifdef ITT_STUB -#undef ITT_STUB -#endif -#ifdef ITT_STUBV -#undef ITT_STUBV -#endif -#define ITT_STUBV(api,type,name,args) \ - typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ - extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); -#define ITT_STUB ITT_STUBV -/** @endcond */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** @cond exclude_from_gpa_documentation */ -/** - * @defgroup public Public API - * @{ - * @} - */ - -/** - * @defgroup control Collection Control - * @ingroup public - * General behavior: application continues to run, but no profiling information is being collected - * - * Pausing occurs not only for the current thread but for all process as well as spawned processes - * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: - * - Does not analyze or report errors that involve memory access. - * - Other errors are reported as usual. Pausing data collection in - * Intel(R) Parallel Inspector and Intel(R) Inspector XE - * only pauses tracing and analyzing memory access. - * It does not pause tracing or analyzing threading APIs. - * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: - * - Does continue to record when new threads are started. - * . - * - Other effects: - * - Possible reduction of runtime overhead. - * . - * @{ - */ -/** @brief Pause collection */ -void ITTAPI __itt_pause(void); -/** @brief Resume collection */ -void ITTAPI __itt_resume(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, pause, (void)) -ITT_STUBV(ITTAPI, void, resume, (void)) -#define __itt_pause ITTNOTIFY_VOID(pause) -#define __itt_pause_ptr ITTNOTIFY_NAME(pause) -#define __itt_resume ITTNOTIFY_VOID(resume) -#define __itt_resume_ptr ITTNOTIFY_NAME(resume) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_pause() -#define __itt_pause_ptr 0 -#define __itt_resume() -#define __itt_resume_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_pause_ptr 0 -#define __itt_resume_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} control group */ -/** @endcond */ - -/** - * @defgroup threads Threads - * @ingroup public - * Give names to threads - * @{ - */ -/** - * @brief Sets thread name of calling thread - * @param[in] name - name of thread - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_thread_set_nameA(const char *name); -void ITTAPI __itt_thread_set_nameW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_thread_set_name __itt_thread_set_nameW -# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr -#else /* UNICODE */ -# define __itt_thread_set_name __itt_thread_set_nameA -# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_thread_set_name(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) -ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) -#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) -#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) -#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) -#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA(name) -#define __itt_thread_set_nameA_ptr 0 -#define __itt_thread_set_nameW(name) -#define __itt_thread_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name(name) -#define __itt_thread_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thread_set_nameA_ptr 0 -#define __itt_thread_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thread_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** - * @brief Mark current thread as ignored from this point on, for the duration of its existence. - */ -void ITTAPI __itt_thread_ignore(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, thread_ignore, (void)) -#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) -#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thread_ignore() -#define __itt_thread_ignore_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thread_ignore_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} threads group */ - -/** - * @defgroup suppress Error suppression - * @ingroup public - * General behavior: application continues to run, but errors are suppressed - * - * @{ - */ - -/*****************************************************************//** - * @name group of functions used for error suppression in correctness tools - *********************************************************************/ -/** @{ */ -/** - * @hideinitializer - * @brief possible value for suppression mask - */ -#define __itt_suppress_all_errors 0x7fffffff - -/** - * @hideinitializer - * @brief possible value for suppression mask (suppresses errors from threading analysis) - */ -#define __itt_suppress_threading_errors 0x000000ff - -/** - * @hideinitializer - * @brief possible value for suppression mask (suppresses errors from memory analysis) - */ -#define __itt_suppress_memory_errors 0x0000ff00 - -/** - * @brief Start suppressing errors identified in mask on this thread - */ -void ITTAPI __itt_suppress_push(unsigned int mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) -#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) -#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_push(mask) -#define __itt_suppress_push_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_push_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Undo the effects of the matching call to __itt_suppress_push - */ -void ITTAPI __itt_suppress_pop(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_pop, (void)) -#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) -#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_pop() -#define __itt_suppress_pop_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_pop_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @enum __itt_model_disable - * @brief Enumerator for the disable methods - */ -typedef enum __itt_suppress_mode { - __itt_unsuppress_range, - __itt_suppress_range -} __itt_suppress_mode_t; - -/** - * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask - */ -void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) -#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) -#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_mark_range(mask) -#define __itt_suppress_mark_range_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_mark_range_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching - * call is found, nothing is changed. - */ -void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) -#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) -#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_suppress_clear_range(mask) -#define __itt_suppress_clear_range_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_suppress_clear_range_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ -/** @} suppress group */ - -/** - * @defgroup sync Synchronization - * @ingroup public - * Indicate user-written synchronization code - * @{ - */ -/** +# define CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# include "legacy/ittnotify.h" +#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup public Public API + * @{ + * @} + */ + +/** + * @defgroup control Collection Control + * @ingroup public + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} control group */ +/** @endcond */ + +/** + * @defgroup threads Threads + * @ingroup public + * Give names to threads + * @{ + */ +/** + * @brief Sets thread name of calling thread + * @param[in] name - name of thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_thread_set_nameA(const char *name); +void ITTAPI __itt_thread_set_nameW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thread_set_name __itt_thread_set_nameW +# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr +#else /* UNICODE */ +# define __itt_thread_set_name __itt_thread_set_nameA +# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_thread_set_name(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) +#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) +#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) +#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) +#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA(name) +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW(name) +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name(name) +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void ITTAPI __itt_thread_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, thread_ignore, (void)) +#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) +#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thread_ignore() +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} threads group */ + +/** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_barrier 1 - -/** + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_mutex 2 - -/** -@brief Name a synchronization object -@param[in] addr Handle for the synchronization object. You should -use a real address to uniquely identify the synchronization object. -@param[in] objtype null-terminated object type string. If NULL is -passed, the name will be "User Synchronization". -@param[in] objname null-terminated object name string. If NULL, -no name will be assigned to the object. -@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] - */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); -void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_create __itt_sync_createW -# define __itt_sync_create_ptr __itt_sync_createW_ptr -#else /* UNICODE */ -# define __itt_sync_create __itt_sync_createA -# define __itt_sync_create_ptr __itt_sync_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) -ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) -#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) -#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) -#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create ITTNOTIFY_VOID(sync_create) -#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA(addr, objtype, objname, attribute) -#define __itt_sync_createA_ptr 0 -#define __itt_sync_createW(addr, objtype, objname, attribute) -#define __itt_sync_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create(addr, objtype, objname, attribute) -#define __itt_sync_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_createA_ptr 0 -#define __itt_sync_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** -@brief Rename a synchronization object - -You can use the rename call to assign or reassign a name to a given -synchronization object. -@param[in] addr handle for the synchronization object. -@param[in] name null-terminated object name string. -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_renameA(void *addr, const char *name); -void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_rename __itt_sync_renameW -# define __itt_sync_rename_ptr __itt_sync_renameW_ptr -#else /* UNICODE */ -# define __itt_sync_rename __itt_sync_renameA -# define __itt_sync_rename_ptr __itt_sync_renameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_rename(void *addr, const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) -ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) -#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) -#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) -#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) -#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA(addr, name) -#define __itt_sync_renameA_ptr 0 -#define __itt_sync_renameW(addr, name) -#define __itt_sync_renameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename(addr, name) -#define __itt_sync_rename_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_renameA_ptr 0 -#define __itt_sync_renameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_rename_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - @brief Destroy a synchronization object. - @param addr Handle for the synchronization object. - */ -void ITTAPI __itt_sync_destroy(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) -#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) -#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_destroy(addr) -#define __itt_sync_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/*****************************************************************//** - * @name group of functions is used for performance measurement tools - *********************************************************************/ -/** @{ */ -/** - * @brief Enter spin loop on user-defined sync object - */ -void ITTAPI __itt_sync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) -#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) -#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_prepare(addr) -#define __itt_sync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Quit spin loop without acquiring spin object - */ -void ITTAPI __itt_sync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) -#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) -#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_cancel(addr) -#define __itt_sync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Successful spin loop completion (sync object acquired) - */ -void ITTAPI __itt_sync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) -#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) -#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_acquired(addr) -#define __itt_sync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Start sync object releasing code. Is called before the lock release call. - */ -void ITTAPI __itt_sync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) -#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) -#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_sync_releasing(addr) -#define __itt_sync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_sync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ - -/** @} sync group */ - -/**************************************************************//** - * @name group of functions is used for correctness checking tools - ******************************************************************/ -/** @{ */ -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_prepare(void* addr); - */ -void ITTAPI __itt_fsync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) -#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) -#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_prepare(addr) -#define __itt_fsync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_cancel(void *addr); - */ -void ITTAPI __itt_fsync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) -#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) -#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_cancel(addr) -#define __itt_fsync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_acquired(void *addr); - */ -void ITTAPI __itt_fsync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) -#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) -#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_acquired(addr) -#define __itt_fsync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup legacy - * @deprecated Legacy API - * @brief Fast synchronization which does no require spinning. - * - This special function is to be used by TBB and OpenMP libraries only when they know - * there is no spin but they need to suppress TC warnings about shared variable modifications. - * - It only has corresponding pointers in static library and does not have corresponding function - * in dynamic library. - * @see void __itt_sync_releasing(void* addr); - */ -void ITTAPI __itt_fsync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) -#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) -#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_fsync_releasing(addr) -#define __itt_fsync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_fsync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} */ - -/** - * @defgroup model Modeling by Intel(R) Parallel Advisor - * @ingroup public - * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. - * This API is called ONLY using annotate.h, by "Annotation" macros - * the user places in their sources during the parallelism modeling steps. - * - * site_begin/end and task_begin/end take the address of handle variables, - * which are writeable by the API. Handles must be 0 initialized prior - * to the first call to begin, or may cause a run-time failure. - * The handles are initialized in a multi-thread safe way by the API if - * the handle is 0. The commonly expected idiom is one static handle to - * identify a site or task. If a site or task of the same name has already - * been started during this collection, the same handle MAY be returned, - * but is not required to be - it is unspecified if data merging is done - * based on name. These routines also take an instance variable. Like - * the lexical instance, these must be 0 initialized. Unlike the lexical - * instance, this is used to track a single dynamic instance. - * - * API used by the Intel(R) Parallel Advisor to describe potential concurrency - * and related activities. User-added source annotations expand to calls - * to these procedures to enable modeling of a hypothetical concurrent - * execution serially. - * @{ - */ -#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) - -typedef void* __itt_model_site; /*!< @brief handle for lexical site */ -typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ -typedef void* __itt_model_task; /*!< @brief handle for lexical site */ -typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ - -/** - * @enum __itt_model_disable - * @brief Enumerator for the disable methods - */ -typedef enum { - __itt_model_disable_observation, - __itt_model_disable_collection -} __itt_model_disable; - -#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ - -/** - * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. - * - * site_begin/end model a potential concurrency site. - * site instances may be recursively nested with themselves. - * site_end exits the most recently started but unended site for the current - * thread. The handle passed to end may be used to validate structure. - * Instances of a site encountered on different threads concurrently - * are considered completely distinct. If the site name for two different - * lexical sites match, it is unspecified whether they are treated as the - * same or different for data presentation. - */ -void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_model_site_beginW(const wchar_t *name); -#endif -void ITTAPI __itt_model_site_beginA(const char *name); -void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); -void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); -void ITTAPI __itt_model_site_end_2(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) -#endif -ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) -ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) -ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) -#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) -#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) -#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) -#endif -#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) -#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) -#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) -#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) -#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) -#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) -#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) -#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_site_begin(site, instance, name) -#define __itt_model_site_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW(name) -#define __itt_model_site_beginW_ptr 0 -#endif -#define __itt_model_site_beginA(name) -#define __itt_model_site_beginA_ptr 0 -#define __itt_model_site_beginAL(name, siteNameLen) -#define __itt_model_site_beginAL_ptr 0 -#define __itt_model_site_end(site, instance) -#define __itt_model_site_end_ptr 0 -#define __itt_model_site_end_2() -#define __itt_model_site_end_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_site_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_site_beginW_ptr 0 -#endif -#define __itt_model_site_beginA_ptr 0 -#define __itt_model_site_beginAL_ptr 0 -#define __itt_model_site_end_ptr 0 -#define __itt_model_site_end_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support - * - * task_begin/end model a potential task, which is contained within the most - * closely enclosing dynamic site. task_end exits the most recently started - * but unended task. The handle passed to end may be used to validate - * structure. It is unspecified if bad dynamic nesting is detected. If it - * is, it should be encoded in the resulting data collection. The collector - * should not fail due to construct nesting issues, nor attempt to directly - * indicate the problem. - */ -void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_model_task_beginW(const wchar_t *name); -void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); -#endif -void ITTAPI __itt_model_task_beginA(const char *name); -void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); -void ITTAPI __itt_model_iteration_taskA(const char *name); -void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); -void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); -void ITTAPI __itt_model_task_end_2(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) -ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) -#endif -ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) -ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) -ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) -ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) -ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) -#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) -#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) -#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) -#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) -#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) -#endif -#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) -#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) -#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) -#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) -#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) -#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) -#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) -#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) -#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) -#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) -#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) -#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_task_begin(task, instance, name) -#define __itt_model_task_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW(name) -#define __itt_model_task_beginW_ptr 0 -#endif -#define __itt_model_task_beginA(name) -#define __itt_model_task_beginA_ptr 0 -#define __itt_model_task_beginAL(name, siteNameLen) -#define __itt_model_task_beginAL_ptr 0 -#define __itt_model_iteration_taskA(name) -#define __itt_model_iteration_taskA_ptr 0 -#define __itt_model_iteration_taskAL(name, siteNameLen) -#define __itt_model_iteration_taskAL_ptr 0 -#define __itt_model_task_end(task, instance) -#define __itt_model_task_end_ptr 0 -#define __itt_model_task_end_2() -#define __itt_model_task_end_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_task_begin_ptr 0 -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_model_task_beginW_ptr 0 -#endif -#define __itt_model_task_beginA_ptr 0 -#define __itt_model_task_beginAL_ptr 0 -#define __itt_model_iteration_taskA_ptr 0 -#define __itt_model_iteration_taskAL_ptr 0 -#define __itt_model_task_end_ptr 0 -#define __itt_model_task_end_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support - * - * lock_acquire/release model a potential lock for both lockset and - * performance modeling. Each unique address is modeled as a separate - * lock, with invalid addresses being valid lock IDs. Specifically: - * no storage is accessed by the API at the specified address - it is only - * used for lock identification. Lock acquires may be self-nested and are - * unlocked by a corresponding number of releases. - * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, - * but may not have identical semantics.) - */ -void ITTAPI __itt_model_lock_acquire(void *lock); -void ITTAPI __itt_model_lock_acquire_2(void *lock); -void ITTAPI __itt_model_lock_release(void *lock); -void ITTAPI __itt_model_lock_release_2(void *lock); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) -ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) -#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) -#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) -#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) -#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) -#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) -#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) -#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) -#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_lock_acquire(lock) -#define __itt_model_lock_acquire_ptr 0 -#define __itt_model_lock_acquire_2(lock) -#define __itt_model_lock_acquire_2_ptr 0 -#define __itt_model_lock_release(lock) -#define __itt_model_lock_release_ptr 0 -#define __itt_model_lock_release_2(lock) -#define __itt_model_lock_release_2_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_lock_acquire_ptr 0 -#define __itt_model_lock_acquire_2_ptr 0 -#define __itt_model_lock_release_ptr 0 -#define __itt_model_lock_release_2_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support - * - * record_allocation/deallocation describe user-defined memory allocator - * behavior, which may be required for correctness modeling to understand - * when storage is not expected to be actually reused across threads. - */ -void ITTAPI __itt_model_record_allocation (void *addr, size_t size); -void ITTAPI __itt_model_record_deallocation(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) -ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) -#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) -#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) -#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) -#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_record_allocation(addr, size) -#define __itt_model_record_allocation_ptr 0 -#define __itt_model_record_deallocation(addr) -#define __itt_model_record_deallocation_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_record_allocation_ptr 0 -#define __itt_model_record_deallocation_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_INDUCTION_USES support - * - * Note particular storage is inductive through the end of the current site - */ -void ITTAPI __itt_model_induction_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) -#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) -#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_induction_uses(addr, size) -#define __itt_model_induction_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_induction_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_REDUCTION_USES support - * - * Note particular storage is used for reduction through the end - * of the current site - */ -void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) -#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) -#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_reduction_uses(addr, size) -#define __itt_model_reduction_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_reduction_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_OBSERVE_USES support - * - * Have correctness modeling record observations about uses of storage - * through the end of the current site - */ -void ITTAPI __itt_model_observe_uses(void* addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) -#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) -#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_observe_uses(addr, size) -#define __itt_model_observe_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_observe_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_CLEAR_USES support - * - * Clear the special handling of a piece of storage related to induction, - * reduction or observe_uses - */ -void ITTAPI __itt_model_clear_uses(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) -#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) -#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_clear_uses(addr) -#define __itt_model_clear_uses_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_clear_uses_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support - * - * disable_push/disable_pop push and pop disabling based on a parameter. - * Disabling observations stops processing of memory references during - * correctness modeling, and all annotations that occur in the disabled - * region. This allows description of code that is expected to be handled - * specially during conversion to parallelism or that is not recognized - * by tools (e.g. some kinds of synchronization operations.) - * This mechanism causes all annotations in the disabled region, other - * than disable_push and disable_pop, to be ignored. (For example, this - * might validly be used to disable an entire parallel site and the contained - * tasks and locking in it for data collection purposes.) - * The disable for collection is a more expensive operation, but reduces - * collector overhead significantly. This applies to BOTH correctness data - * collection and performance data collection. For example, a site - * containing a task might only enable data collection for the first 10 - * iterations. Both performance and correctness data should reflect this, - * and the program should run as close to full speed as possible when - * collection is disabled. - */ -void ITTAPI __itt_model_disable_push(__itt_model_disable x); -void ITTAPI __itt_model_disable_pop(void); -void ITTAPI __itt_model_aggregate_task(size_t x); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) -ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) -ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) -#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) -#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) -#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) -#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) -#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) -#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_model_disable_push(x) -#define __itt_model_disable_push_ptr 0 -#define __itt_model_disable_pop() -#define __itt_model_disable_pop_ptr 0 -#define __itt_model_aggregate_task(x) -#define __itt_model_aggregate_task_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_model_disable_push_ptr 0 -#define __itt_model_disable_pop_ptr 0 -#define __itt_model_aggregate_task_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} model group */ - -/** - * @defgroup heap Heap - * @ingroup public - * Heap group - * @{ - */ - -typedef void* __itt_heap_function; - -/** - * @brief Create an identification for heap function - * @return non-zero identifier or NULL - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); -__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_heap_function_create __itt_heap_function_createW -# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr -#else -# define __itt_heap_function_create __itt_heap_function_createA -# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) -#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) -#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) -#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) -#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_createA_ptr 0 -#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 -#define __itt_heap_function_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_heap_function_createA_ptr 0 -#define __itt_heap_function_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_heap_function_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an allocation begin occurrence. - */ -void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) -#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) -#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_allocate_begin(h, size, initialized) -#define __itt_heap_allocate_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_allocate_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an allocation end occurrence. - */ -void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) -#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) -#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_allocate_end(h, addr, size, initialized) -#define __itt_heap_allocate_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_allocate_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an free begin occurrence. - */ -void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) -#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) -#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_free_begin(h, addr) -#define __itt_heap_free_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_free_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an free end occurrence. - */ -void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) -#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) -#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_free_end(h, addr) -#define __itt_heap_free_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_free_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an reallocation begin occurrence. - */ -void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) -#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) -#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) -#define __itt_heap_reallocate_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reallocate_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an reallocation end occurrence. - */ -void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) -#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) -#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) -#define __itt_heap_reallocate_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reallocate_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief internal access begin */ -void ITTAPI __itt_heap_internal_access_begin(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) -#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) -#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_internal_access_begin() -#define __itt_heap_internal_access_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_internal_access_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief internal access end */ -void ITTAPI __itt_heap_internal_access_end(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) -#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) -#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_internal_access_end() -#define __itt_heap_internal_access_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_internal_access_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief record memory growth begin */ -void ITTAPI __itt_heap_record_memory_growth_begin(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) -#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) -#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record_memory_growth_begin() -#define __itt_heap_record_memory_growth_begin_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_memory_growth_begin_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief record memory growth end */ -void ITTAPI __itt_heap_record_memory_growth_end(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) -#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) -#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record_memory_growth_end() -#define __itt_heap_record_memory_growth_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_memory_growth_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Specify the type of heap detection/reporting to modify. - */ -/** - * @hideinitializer - * @brief Report on memory leaks. - */ -#define __itt_heap_leaks 0x00000001 - -/** - * @hideinitializer - * @brief Report on memory growth. - */ -#define __itt_heap_growth 0x00000002 - - -/** @brief heap reset detection */ -void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) -#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) -#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_reset_detection() -#define __itt_heap_reset_detection_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_reset_detection_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief report */ -void ITTAPI __itt_heap_record(unsigned int record_mask); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) -#define __itt_heap_record ITTNOTIFY_VOID(heap_record) -#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_heap_record() -#define __itt_heap_record_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_heap_record_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} heap group */ -/** @endcond */ -/* ========================================================================== */ - -/** - * @defgroup domains Domains - * @ingroup public - * Domains group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_domain -{ - volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct ___itt_domain* next; -} __itt_domain; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup domains - * @brief Create a domain. - * Create domain using some domain name: the URI naming style is recommended. - * Because the set of domains is expected to be static over the application's - * execution time, there is no mechanism to destroy a domain. - * Any domain can be accessed by any thread in the process, regardless of - * which thread created the domain. This call is thread-safe. - * @param[in] name name of domain - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_domain* ITTAPI __itt_domain_createA(const char *name); -__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_domain_create __itt_domain_createW -# define __itt_domain_create_ptr __itt_domain_createW_ptr -#else /* UNICODE */ -# define __itt_domain_create __itt_domain_createA -# define __itt_domain_create_ptr __itt_domain_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_domain* ITTAPI __itt_domain_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) -#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) -#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) -#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create ITTNOTIFY_DATA(domain_create) -#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA(name) (__itt_domain*)0 -#define __itt_domain_createA_ptr 0 -#define __itt_domain_createW(name) (__itt_domain*)0 -#define __itt_domain_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create(name) (__itt_domain*)0 -#define __itt_domain_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_domain_createA_ptr 0 -#define __itt_domain_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_domain_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} domains group */ - -/** - * @defgroup ids IDs - * @ingroup public - * IDs group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_id -{ - unsigned long long d1, d2, d3; -} __itt_id; - -#pragma pack(pop) -/** @endcond */ - -static const __itt_id __itt_null = { 0, 0, 0 }; - -/** - * @ingroup ids - * @brief A convenience function is provided to create an ID without domain control. - * @brief This is a convenience function to initialize an __itt_id structure. - * After you make the ID with this function, you still must create it with the - * __itt_id_create function before using the ID to identify a named entity. - * @param[in] addr The address of object; high QWORD of the ID value. - * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. - */ - -ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; -ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) -{ - __itt_id id = __itt_null; - id.d1 = (unsigned long long)((uintptr_t)addr); - id.d2 = (unsigned long long)extra; - id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ - return id; -} - -/** - * @ingroup ids - * @brief Create an instance of identifier. - * This establishes the beginning of the lifetime of an instance of - * the given ID in the trace. Once this lifetime starts, the ID - * can be used to tag named entity instances in calls such as - * __itt_task_begin, and to specify relationships among - * identified named entity instances, using the \ref relations APIs. - * Instance IDs are not domain specific! - * @param[in] domain The domain controlling the execution of this call. - * @param[in] id The ID to create. - */ -void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) -#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) -#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_create(domain,id) -#define __itt_id_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup ids - * @brief Destroy an instance of identifier. - * This ends the lifetime of the current instance of the given ID value in the trace. - * Any relationships that are established after this lifetime ends are invalid. - * This call must be performed before the given ID value can be reused for a different - * named entity instance. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] id The ID to destroy. - */ -void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) -#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) -#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_destroy(domain,id) -#define __itt_id_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} ids group */ - -/** - * @defgroup handless String Handles - * @ingroup public - * String Handles group - * @{ - */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_string_handle -{ - const char* strA; /*!< Copy of original string in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* strW; /*!< Copy of original string in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* strW; -#endif /* UNICODE || _UNICODE */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_string_handle* next; -} __itt_string_handle; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup handles - * @brief Create a string handle. - * Create and return handle value that can be associated with a string. - * Consecutive calls to __itt_string_handle_create with the same name - * return the same value. Because the set of string handles is expected to remain - * static during the application's execution time, there is no mechanism to destroy a string handle. - * Any string handle can be accessed by any thread in the process, regardless of which thread created - * the string handle. This call is thread-safe. - * @param[in] name The input string - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); -__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_string_handle_create __itt_string_handle_createW -# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr -#else /* UNICODE */ -# define __itt_string_handle_create __itt_string_handle_createA -# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) -#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) -#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) -#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) -#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA(name) (__itt_string_handle*)0 -#define __itt_string_handle_createA_ptr 0 -#define __itt_string_handle_createW(name) (__itt_string_handle*)0 -#define __itt_string_handle_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create(name) (__itt_string_handle*)0 -#define __itt_string_handle_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_string_handle_createA_ptr 0 -#define __itt_string_handle_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_string_handle_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} handles group */ - -/** @cond exclude_from_documentation */ -typedef unsigned long long __itt_timestamp; -/** @endcond */ - -static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL; - -/** @cond exclude_from_gpa_documentation */ - -/** - * @ingroup timestamps - * @brief Return timestamp corresponding to the current moment. - * This returns the timestamp in the format that is the most relevant for the current - * host or platform (RDTSC, QPC, and others). You can use the "<" operator to - * compare __itt_timestamp values. - */ -__itt_timestamp ITTAPI __itt_get_timestamp(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) -#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) -#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_get_timestamp() -#define __itt_get_timestamp_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_get_timestamp_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} timestamps */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** - * @defgroup regions Regions - * @ingroup public - * Regions group - * @{ - */ -/** - * @ingroup regions - * @brief Begin of region instance. - * Successive calls to __itt_region_begin with the same ID are ignored - * until a call to __itt_region_end with the same ID - * @param[in] domain The domain for this region instance - * @param[in] id The instance ID for this region instance. Must not be __itt_null - * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null - * @param[in] name The name of this region - */ -void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); - -/** - * @ingroup regions - * @brief End of region instance. - * The first call to __itt_region_end with a given ID ends the - * region. Successive calls with the same ID are ignored, as are - * calls that do not have a matching __itt_region_begin call. - * @param[in] domain The domain for this region instance - * @param[in] id The instance ID for this region instance - */ -void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) -#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) -#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) -#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) -#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_region_begin(d,x,y,z) -#define __itt_region_begin_ptr 0 -#define __itt_region_end(d,x) -#define __itt_region_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_region_begin_ptr 0 -#define __itt_region_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} regions group */ - -/** - * @defgroup frames Frames - * @ingroup public - * Frames are similar to regions, but are intended to be easier to use and to implement. - * In particular: - * - Frames always represent periods of elapsed time - * - By default, frames have no nesting relationships - * @{ - */ - -/** - * @ingroup frames - * @brief Begin a frame instance. - * Successive calls to __itt_frame_begin with the - * same ID are ignored until a call to __itt_frame_end with the same ID. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL - */ -void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); - -/** - * @ingroup frames - * @brief End a frame instance. - * The first call to __itt_frame_end with a given ID - * ends the frame. Successive calls with the same ID are ignored, as are - * calls that do not have a matching __itt_frame_begin call. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL for current - */ -void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); - -/** - * @ingroup frames - * @brief Submits a frame instance. - * Successive calls to __itt_frame_begin or __itt_frame_submit with the - * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit - * with the same ID. - * Passing special __itt_timestamp_none value as "end" argument means - * take the current timestamp as the end timestamp. - * @param[in] domain The domain for this frame instance - * @param[in] id The instance ID for this frame instance or NULL - * @param[in] begin Timestamp of the beginning of the frame - * @param[in] end Timestamp of the end of the frame - */ -void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, - __itt_timestamp begin, __itt_timestamp end); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) -ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) -ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) -#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) -#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) -#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) -#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) -#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) -#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_frame_begin_v3(domain,id) -#define __itt_frame_begin_v3_ptr 0 -#define __itt_frame_end_v3(domain,id) -#define __itt_frame_end_v3_ptr 0 -#define __itt_frame_submit_v3(domain,id,begin,end) -#define __itt_frame_submit_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_frame_begin_v3_ptr 0 -#define __itt_frame_end_v3_ptr 0 -#define __itt_frame_submit_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} frames group */ -/** @endcond */ - -/** - * @defgroup taskgroup Task Group - * @ingroup public - * Task Group - * @{ - */ -/** - * @ingroup task_groups - * @brief Denotes a task_group instance. - * Successive calls to __itt_task_group with the same ID are ignored. - * @param[in] domain The domain for this task_group instance - * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. - * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. - * @param[in] name The name of this task_group - */ -void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) -#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_group(d,x,y,z) -#define __itt_task_group_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_group_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} taskgroup group */ - -/** - * @defgroup tasks Tasks - * @ingroup public - * A task instance represents a piece of work performed by a particular - * thread for a period of time. A call to __itt_task_begin creates a - * task instance. This becomes the current instance for that task on that - * thread. A following call to __itt_task_end on the same thread ends the - * instance. There may be multiple simultaneous instances of tasks with the - * same name on different threads. If an ID is specified, the task instance - * receives that ID. Nested tasks are allowed. - * - * Note: The task is defined by the bracketing of __itt_task_begin and - * __itt_task_end on the same thread. If some scheduling mechanism causes - * task switching (the thread executes a different user task) or task - * switching (the user task switches to a different thread) then this breaks - * the notion of current instance. Additional API calls are required to - * deal with that possibility. - * @{ - */ - -/** - * @ingroup tasks - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] taskid The instance ID for this task instance, or __itt_null - * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null - * @param[in] name The name of this task - */ -void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); - -/** - * @ingroup tasks - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] taskid The identifier for this task instance (may be 0) - * @param[in] parentid The parent of this task (may be 0) - * @param[in] fn The pointer to the function you are tracing - */ -void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); - -/** - * @ingroup tasks - * @brief End the current task instance. - * @param[in] domain The domain for this task - */ -void ITTAPI __itt_task_end(const __itt_domain *domain); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) -ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) -#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) -#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) -#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) -#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) -#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) -#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin(domain,id,parentid,name) -#define __itt_task_begin_ptr 0 -#define __itt_task_begin_fn(domain,id,parentid,fn) -#define __itt_task_begin_fn_ptr 0 -#define __itt_task_end(domain) -#define __itt_task_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_ptr 0 -#define __itt_task_begin_fn_ptr 0 -#define __itt_task_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} tasks group */ - -/** - * @defgroup counters Counters - * @ingroup public - * Counters are user-defined objects with a monotonically increasing - * value. Counter values are 64-bit unsigned integers. Counter values - * are tracked per-thread. Counters have names that can be displayed in - * the tools. - * @{ - */ - -/** - * @ingroup counters - * @brief Increment a counter by one. - * The first call with a given name creates a counter by that name and sets its - * value to zero on every thread. Successive calls increment the counter value - * on the thread on which the call is issued. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - */ -void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); - -/** - * @ingroup counters - * @brief Increment a counter by the value specified in delta. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - * @param[in] delta The amount by which to increment the counter - */ -void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) -#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) -#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) -#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) -#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_v3(domain,name) -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3(domain,name,delta) -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} counters group */ - -/** - * @defgroup markers Markers - * Markers represent a single discreet event in time. Markers have a scope, - * described by an enumerated type __itt_scope. Markers are created by - * the API call __itt_marker. A marker instance can be given an ID for use in - * adding metadata. - * @{ - */ - -/** - * @brief Describes the scope of an event object in the trace. - */ -typedef enum -{ - __itt_scope_unknown = 0, - __itt_scope_global, - __itt_scope_track_group, - __itt_scope_track, - __itt_scope_task, - __itt_scope_marker -} __itt_scope; - -/** @cond exclude_from_documentation */ -#define __itt_marker_scope_unknown __itt_scope_unknown -#define __itt_marker_scope_global __itt_scope_global -#define __itt_marker_scope_process __itt_scope_track_group -#define __itt_marker_scope_thread __itt_scope_track -#define __itt_marker_scope_task __itt_scope_task -/** @endcond */ - -/** - * @ingroup markers - * @brief Create a marker instance - * @param[in] domain The domain for this marker - * @param[in] id The instance ID for this marker or __itt_null - * @param[in] name The name for this marker - * @param[in] scope The scope for this marker - */ -void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) -#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) -#define __itt_marker_ptr ITTNOTIFY_NAME(marker) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_marker(domain,id,name,scope) -#define __itt_marker_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_marker_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} markers group */ - -/** - * @defgroup metadata Metadata - * The metadata API is used to attach extra information to named - * entities. Metadata can be attached to an identified named entity by ID, - * or to the current entity (which is always a task). - * - * Conceptually metadata has a type (what kind of metadata), a key (the - * name of the metadata), and a value (the actual data). The encoding of - * the value depends on the type of the metadata. - * - * The type of metadata is specified by an enumerated type __itt_metdata_type. - * @{ - */ - -/** - * @ingroup parameters - * @brief describes the type of metadata - */ -typedef enum { - __itt_metadata_unknown = 0, - __itt_metadata_u64, /**< Unsigned 64-bit integer */ - __itt_metadata_s64, /**< Signed 64-bit integer */ - __itt_metadata_u32, /**< Unsigned 32-bit integer */ - __itt_metadata_s32, /**< Signed 32-bit integer */ - __itt_metadata_u16, /**< Unsigned 16-bit integer */ - __itt_metadata_s16, /**< Signed 16-bit integer */ - __itt_metadata_float, /**< Signed 32-bit floating-point */ - __itt_metadata_double /**< SIgned 64-bit floating-point */ -} __itt_metadata_type; - -/** - * @ingroup parameters - * @brief Add metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - * @param[in] key The name of the metadata - * @param[in] type The type of the metadata - * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. - * @param[in] data The metadata itself -*/ -void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) -#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) -#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_metadata_add(d,x,y,z,a,b) -#define __itt_metadata_add_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_metadata_add_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add string metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - * @param[in] key The name of the metadata - * @param[in] data The metadata itself - * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); -void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_metadata_str_add __itt_metadata_str_addW -# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr -#else /* UNICODE */ -# define __itt_metadata_str_add __itt_metadata_str_addA -# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); -#endif - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) -ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) -#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) -#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) -#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) -#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA(d,x,y,z,a) -#define __itt_metadata_str_addA_ptr 0 -#define __itt_metadata_str_addW(d,x,y,z,a) -#define __itt_metadata_str_addW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add(d,x,y,z,a) -#define __itt_metadata_str_add_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_addA_ptr 0 -#define __itt_metadata_str_addW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] scope The scope of the instance to which the metadata is to be added - - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - - * @param[in] key The name of the metadata - * @param[in] type The type of the metadata - * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. - * @param[in] data The metadata itself -*/ -void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) -#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) -#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_metadata_add_with_scope(d,x,y,z,a,b) -#define __itt_metadata_add_with_scope_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_metadata_add_with_scope_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup parameters - * @brief Add string metadata to an instance of a named entity. - * @param[in] domain The domain controlling the call - * @param[in] scope The scope of the instance to which the metadata is to be added - - * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task - - * @param[in] key The name of the metadata - * @param[in] data The metadata itself - * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); -void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW -# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr -#else /* UNICODE */ -# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA -# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); -#endif - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) -#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) -#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeA_ptr 0 -#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) -#define __itt_metadata_str_add_with_scopeW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope(d,x,y,z,a) -#define __itt_metadata_str_add_with_scope_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_metadata_str_add_with_scopeA_ptr 0 -#define __itt_metadata_str_add_with_scopeW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_metadata_str_add_with_scope_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} metadata group */ - -/** - * @defgroup relations Relations - * Instances of named entities can be explicitly associated with other - * instances using instance IDs and the relationship API calls. - * - * @{ - */ - -/** - * @ingroup relations - * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. - * Relations between instances can be added with an API call. The relation - * API uses instance IDs. Relations can be added before or after the actual - * instances are created and persist independently of the instances. This - * is the motivation for having different lifetimes for instance IDs and - * the actual instances. - */ + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** + * @defgroup sync Synchronization + * @ingroup public + * Indicate user-written synchronization code + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** +@brief Name a synchronization object +@param[in] addr Handle for the synchronization object. You should +use a real address to uniquely identify the synchronization object. +@param[in] objtype null-terminated object type string. If NULL is +passed, the name will be "User Synchronization". +@param[in] objname null-terminated object name string. If NULL, +no name will be assigned to the object. +@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_create __itt_sync_createW +# define __itt_sync_create_ptr __itt_sync_createW_ptr +#else /* UNICODE */ +# define __itt_sync_create __itt_sync_createA +# define __itt_sync_create_ptr __itt_sync_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) +#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) +#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) +#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create ITTNOTIFY_VOID(sync_create) +#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA(addr, objtype, objname, attribute) +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW(addr, objtype, objname, attribute) +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create(addr, objtype, objname, attribute) +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** +@brief Rename a synchronization object + +You can use the rename call to assign or reassign a name to a given +synchronization object. +@param[in] addr handle for the synchronization object. +@param[in] name null-terminated object name string. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_renameA(void *addr, const char *name); +void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_rename __itt_sync_renameW +# define __itt_sync_rename_ptr __itt_sync_renameW_ptr +#else /* UNICODE */ +# define __itt_sync_rename __itt_sync_renameA +# define __itt_sync_rename_ptr __itt_sync_renameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_rename(void *addr, const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) +#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) +#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) +#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) +#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA(addr, name) +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW(addr, name) +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename(addr, name) +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + @brief Destroy a synchronization object. + @param addr Handle for the synchronization object. + */ +void ITTAPI __itt_sync_destroy(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) +#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) +#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_destroy(addr) +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/*****************************************************************//** + * @name group of functions is used for performance measurement tools + *********************************************************************/ +/** @{ */ +/** + * @brief Enter spin loop on user-defined sync object + */ +void ITTAPI __itt_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) +#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) +#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_prepare(addr) +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Quit spin loop without acquiring spin object + */ +void ITTAPI __itt_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) +#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) +#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_cancel(addr) +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Successful spin loop completion (sync object acquired) + */ +void ITTAPI __itt_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) +#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) +#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_acquired(addr) +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void ITTAPI __itt_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) +#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) +#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_releasing(addr) +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** @} sync group */ + +/**************************************************************//** + * @name group of functions is used for correctness checking tools + ******************************************************************/ +/** @{ */ +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_prepare(void* addr); + */ +void ITTAPI __itt_fsync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) +#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) +#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_prepare(addr) +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_cancel(void *addr); + */ +void ITTAPI __itt_fsync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) +#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) +#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_cancel(addr) +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_acquired(void *addr); + */ +void ITTAPI __itt_fsync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) +#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) +#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_acquired(addr) +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_releasing(void* addr); + */ +void ITTAPI __itt_fsync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) +#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) +#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_releasing(addr) +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** + * @defgroup model Modeling by Intel(R) Parallel Advisor + * @ingroup public + * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. + * This API is called ONLY using annotate.h, by "Annotation" macros + * the user places in their sources during the parallelism modeling steps. + * + * site_begin/end and task_begin/end take the address of handle variables, + * which are writeable by the API. Handles must be 0 initialized prior + * to the first call to begin, or may cause a run-time failure. + * The handles are initialized in a multi-thread safe way by the API if + * the handle is 0. The commonly expected idiom is one static handle to + * identify a site or task. If a site or task of the same name has already + * been started during this collection, the same handle MAY be returned, + * but is not required to be - it is unspecified if data merging is done + * based on name. These routines also take an instance variable. Like + * the lexical instance, these must be 0 initialized. Unlike the lexical + * instance, this is used to track a single dynamic instance. + * + * API used by the Intel(R) Parallel Advisor to describe potential concurrency + * and related activities. User-added source annotations expand to calls + * to these procedures to enable modeling of a hypothetical concurrent + * execution serially. + * @{ + */ +#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) + +typedef void* __itt_model_site; /*!< @brief handle for lexical site */ +typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ +typedef void* __itt_model_task; /*!< @brief handle for lexical site */ +typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum { + __itt_model_disable_observation, + __itt_model_disable_collection +} __itt_model_disable; + +#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ + +/** + * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. + * + * site_begin/end model a potential concurrency site. + * site instances may be recursively nested with themselves. + * site_end exits the most recently started but unended site for the current + * thread. The handle passed to end may be used to validate structure. + * Instances of a site encountered on different threads concurrently + * are considered completely distinct. If the site name for two different + * lexical sites match, it is unspecified whether they are treated as the + * same or different for data presentation. + */ +void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_site_beginW(const wchar_t *name); +#endif +void ITTAPI __itt_model_site_beginA(const char *name); +void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); +void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) +#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) +#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) +#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) +#endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) +#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) +#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) +#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) +#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_site_begin(site, instance, name) +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW(name) +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL(name, siteNameLen) +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end(site, instance) +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support + * + * task_begin/end model a potential task, which is contained within the most + * closely enclosing dynamic site. task_end exits the most recently started + * but unended task. The handle passed to end may be used to validate + * structure. It is unspecified if bad dynamic nesting is detected. If it + * is, it should be encoded in the resulting data collection. The collector + * should not fail due to construct nesting issues, nor attempt to directly + * indicate the problem. + */ +void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); +#endif +void ITTAPI __itt_model_task_beginA(const char *name); +void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) +#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) +#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) +#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) +#endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) +#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) +#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) +#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) +#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_task_begin(task, instance, name) +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW(name) +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL(name, siteNameLen) +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end(task, instance) +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support + * + * lock_acquire/release model a potential lock for both lockset and + * performance modeling. Each unique address is modeled as a separate + * lock, with invalid addresses being valid lock IDs. Specifically: + * no storage is accessed by the API at the specified address - it is only + * used for lock identification. Lock acquires may be self-nested and are + * unlocked by a corresponding number of releases. + * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, + * but may not have identical semantics.) + */ +void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); +void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) +#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) +#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) +#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) +#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_lock_acquire(lock) +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release(lock) +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support + * + * record_allocation/deallocation describe user-defined memory allocator + * behavior, which may be required for correctness modeling to understand + * when storage is not expected to be actually reused across threads. + */ +void ITTAPI __itt_model_record_allocation (void *addr, size_t size); +void ITTAPI __itt_model_record_deallocation(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) +#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) +#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) +#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) +#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_record_allocation(addr, size) +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation(addr) +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_INDUCTION_USES support + * + * Note particular storage is inductive through the end of the current site + */ +void ITTAPI __itt_model_induction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) +#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) +#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_induction_uses(addr, size) +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_REDUCTION_USES support + * + * Note particular storage is used for reduction through the end + * of the current site + */ +void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) +#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) +#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_reduction_uses(addr, size) +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_OBSERVE_USES support + * + * Have correctness modeling record observations about uses of storage + * through the end of the current site + */ +void ITTAPI __itt_model_observe_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) +#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) +#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_observe_uses(addr, size) +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_CLEAR_USES support + * + * Clear the special handling of a piece of storage related to induction, + * reduction or observe_uses + */ +void ITTAPI __itt_model_clear_uses(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) +#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) +#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_clear_uses(addr) +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support + * + * disable_push/disable_pop push and pop disabling based on a parameter. + * Disabling observations stops processing of memory references during + * correctness modeling, and all annotations that occur in the disabled + * region. This allows description of code that is expected to be handled + * specially during conversion to parallelism or that is not recognized + * by tools (e.g. some kinds of synchronization operations.) + * This mechanism causes all annotations in the disabled region, other + * than disable_push and disable_pop, to be ignored. (For example, this + * might validly be used to disable an entire parallel site and the contained + * tasks and locking in it for data collection purposes.) + * The disable for collection is a more expensive operation, but reduces + * collector overhead significantly. This applies to BOTH correctness data + * collection and performance data collection. For example, a site + * containing a task might only enable data collection for the first 10 + * iterations. Both performance and correctness data should reflect this, + * and the program should run as close to full speed as possible when + * collection is disabled. + */ +void ITTAPI __itt_model_disable_push(__itt_model_disable x); +void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) +ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) +#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) +#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) +#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) +#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_disable_push(x) +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop() +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} model group */ + +/** + * @defgroup heap Heap + * @ingroup public + * Heap group + * @{ + */ + +typedef void* __itt_heap_function; + +/** + * @brief Create an identification for heap function + * @return non-zero identifier or NULL + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); +__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_heap_function_create __itt_heap_function_createW +# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr +#else +# define __itt_heap_function_create __itt_heap_function_createA +# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) +#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) +#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) +#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) +#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation begin occurrence. + */ +void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) +#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) +#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_begin(h, size, initialized) +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation end occurrence. + */ +void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) +#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) +#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_end(h, addr, size, initialized) +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free begin occurrence. + */ +void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) +#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_begin(h, addr) +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free end occurrence. + */ +void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) +#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_end(h, addr) +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation begin occurrence. + */ +void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) +#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation end occurrence. + */ +void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) +#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access begin */ +void ITTAPI __itt_heap_internal_access_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) +#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) +#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_begin() +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access end */ +void ITTAPI __itt_heap_internal_access_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) +#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) +#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_end() +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ +/* ========================================================================== */ + +/** + * @defgroup domains Domains + * @ingroup public + * Domains group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_domain +{ + volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_domain* next; +} __itt_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup domains + * @brief Create a domain. + * Create domain using some domain name: the URI naming style is recommended. + * Because the set of domains is expected to be static over the application's + * execution time, there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of + * which thread created the domain. This call is thread-safe. + * @param[in] name name of domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_domain* ITTAPI __itt_domain_createA(const char *name); +__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_domain_create __itt_domain_createW +# define __itt_domain_create_ptr __itt_domain_createW_ptr +#else /* UNICODE */ +# define __itt_domain_create __itt_domain_createA +# define __itt_domain_create_ptr __itt_domain_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_domain* ITTAPI __itt_domain_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) +#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) +#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) +#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create ITTNOTIFY_DATA(domain_create) +#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA(name) (__itt_domain*)0 +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW(name) (__itt_domain*)0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create(name) (__itt_domain*)0 +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} domains group */ + +/** + * @defgroup ids IDs + * @ingroup public + * IDs group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_id +{ + unsigned long long d1, d2, d3; +} __itt_id; + +#pragma pack(pop) +/** @endcond */ + +static const __itt_id __itt_null = { 0, 0, 0 }; + +/** + * @ingroup ids + * @brief A convenience function is provided to create an ID without domain control. + * @brief This is a convenience function to initialize an __itt_id structure. + * After you make the ID with this function, you still must create it with the + * __itt_id_create function before using the ID to identify a named entity. + * @param[in] addr The address of object; high QWORD of the ID value. + * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. + */ + +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +{ + __itt_id id = __itt_null; + id.d1 = (unsigned long long)((uintptr_t)addr); + id.d2 = (unsigned long long)extra; + id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ + return id; +} + +/** + * @ingroup ids + * @brief Create an instance of identifier. + * This establishes the beginning of the lifetime of an instance of + * the given ID in the trace. Once this lifetime starts, the ID + * can be used to tag named entity instances in calls such as + * __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * Instance IDs are not domain specific! + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) +#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create(domain,id) +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup ids + * @brief Destroy an instance of identifier. + * This ends the lifetime of the current instance of the given ID value in the trace. + * Any relationships that are established after this lifetime ends are invalid. + * This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) +#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_destroy(domain,id) +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} ids group */ + +/** + * @defgroup handless String Handles + * @ingroup public + * String Handles group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_string_handle +{ + const char* strA; /*!< Copy of original string in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* strW; /*!< Copy of original string in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* strW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_string_handle* next; +} __itt_string_handle; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup handles + * @brief Create a string handle. + * Create and return handle value that can be associated with a string. + * Consecutive calls to __itt_string_handle_create with the same name + * return the same value. Because the set of string handles is expected to remain + * static during the application's execution time, there is no mechanism to destroy a string handle. + * Any string handle can be accessed by any thread in the process, regardless of which thread created + * the string handle. This call is thread-safe. + * @param[in] name The input string + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); +__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_string_handle_create __itt_string_handle_createW +# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr +#else /* UNICODE */ +# define __itt_string_handle_create __itt_string_handle_createA +# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) +#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) +#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) +#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) +#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA(name) (__itt_string_handle*)0 +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW(name) (__itt_string_handle*)0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create(name) (__itt_string_handle*)0 +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} handles group */ + +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL; + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to the current moment. + * This returns the timestamp in the format that is the most relevant for the current + * host or platform (RDTSC, QPC, and others). You can use the "<" operator to + * compare __itt_timestamp values. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @defgroup regions Regions + * @ingroup public + * Regions group + * @{ + */ +/** + * @ingroup regions + * @brief Begin of region instance. + * Successive calls to __itt_region_begin with the same ID are ignored + * until a call to __itt_region_end with the same ID + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance. Must not be __itt_null + * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null + * @param[in] name The name of this region + */ +void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup regions + * @brief End of region instance. + * The first call to __itt_region_end with a given ID ends the + * region. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_region_begin call. + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance + */ +void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) +#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) +#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) +#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) +#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_region_begin(d,x,y,z) +#define __itt_region_begin_ptr 0 +#define __itt_region_end(d,x) +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_region_begin_ptr 0 +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} regions group */ + +/** + * @defgroup frames Frames + * @ingroup public + * Frames are similar to regions, but are intended to be easier to use and to implement. + * In particular: + * - Frames always represent periods of elapsed time + * - By default, frames have no nesting relationships + * @{ + */ + +/** + * @ingroup frames + * @brief Begin a frame instance. + * Successive calls to __itt_frame_begin with the + * same ID are ignored until a call to __itt_frame_end with the same ID. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + */ +void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief End a frame instance. + * The first call to __itt_frame_end with a given ID + * ends the frame. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_frame_begin call. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL for current + */ +void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beginning of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) +#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) +#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) +#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) +#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin_v3(domain,id) +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3(domain,id) +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ +/** @endcond */ + +/** + * @defgroup taskgroup Task Group + * @ingroup public + * Task Group + * @{ + */ +/** + * @ingroup task_groups + * @brief Denotes a task_group instance. + * Successive calls to __itt_task_group with the same ID are ignored. + * @param[in] domain The domain for this task_group instance + * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. + * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. + * @param[in] name The name of this task_group + */ +void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) +#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_group(d,x,y,z) +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} taskgroup group */ + +/** + * @defgroup tasks Tasks + * @ingroup public + * A task instance represents a piece of work performed by a particular + * thread for a period of time. A call to __itt_task_begin creates a + * task instance. This becomes the current instance for that task on that + * thread. A following call to __itt_task_end on the same thread ends the + * instance. There may be multiple simultaneous instances of tasks with the + * same name on different threads. If an ID is specified, the task instance + * receives that ID. Nested tasks are allowed. + * + * Note: The task is defined by the bracketing of __itt_task_begin and + * __itt_task_end on the same thread. If some scheduling mechanism causes + * task switching (the thread executes a different user task) or task + * switching (the user task switches to a different thread) then this breaks + * the notion of current instance. Additional API calls are required to + * deal with that possibility. + * @{ + */ + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The identifier for this task instance (may be 0) + * @param[in] parentid The parent of this task (may be 0) + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup tasks + * @brief End the current task instance. + * @param[in] domain The domain for this task + */ +void ITTAPI __itt_task_end(const __itt_domain *domain); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) +#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) +#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) +#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) +#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) +#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) +#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin(domain,id,parentid,name) +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn(domain,id,parentid,fn) +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end(domain) +#define __itt_task_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} tasks group */ + +/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. Counter values + * are tracked per-thread. Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero on every thread. Successive calls increment the counter value + * on the thread on which the call is issued. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @defgroup markers Markers + * Markers represent a single discreet event in time. Markers have a scope, + * described by an enumerated type __itt_scope. Markers are created by + * the API call __itt_marker. A marker instance can be given an ID for use in + * adding metadata. + * @{ + */ + +/** + * @brief Describes the scope of an event object in the trace. + */ +typedef enum +{ + __itt_scope_unknown = 0, + __itt_scope_global, + __itt_scope_track_group, + __itt_scope_track, + __itt_scope_task, + __itt_scope_marker +} __itt_scope; + +/** @cond exclude_from_documentation */ +#define __itt_marker_scope_unknown __itt_scope_unknown +#define __itt_marker_scope_global __itt_scope_global +#define __itt_marker_scope_process __itt_scope_track_group +#define __itt_marker_scope_thread __itt_scope_track +#define __itt_marker_scope_task __itt_scope_task +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance + * @param[in] domain The domain for this marker + * @param[in] id The instance ID for this marker or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) +#define __itt_marker_ptr ITTNOTIFY_NAME(marker) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker(domain,id,name,scope) +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} markers group */ + +/** + * @defgroup metadata Metadata + * The metadata API is used to attach extra information to named + * entities. Metadata can be attached to an identified named entity by ID, + * or to the current entity (which is always a task). + * + * Conceptually metadata has a type (what kind of metadata), a key (the + * name of the metadata), and a value (the actual data). The encoding of + * the value depends on the type of the metadata. + * + * The type of metadata is specified by an enumerated type __itt_metdata_type. + * @{ + */ + +/** + * @ingroup parameters + * @brief describes the type of metadata + */ +typedef enum { + __itt_metadata_unknown = 0, + __itt_metadata_u64, /**< Unsigned 64-bit integer */ + __itt_metadata_s64, /**< Signed 64-bit integer */ + __itt_metadata_u32, /**< Unsigned 32-bit integer */ + __itt_metadata_s32, /**< Signed 32-bit integer */ + __itt_metadata_u16, /**< Unsigned 16-bit integer */ + __itt_metadata_s16, /**< Signed 16-bit integer */ + __itt_metadata_float, /**< Signed 32-bit floating-point */ + __itt_metadata_double /**< SIgned 64-bit floating-point */ +} __itt_metadata_type; + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) +#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add(d,x,y,z,a,b) +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add __itt_metadata_str_addW +# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add __itt_metadata_str_addA +# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) +#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) +#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) +#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) +#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW(d,x,y,z,a) +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} metadata group */ + +/** + * @defgroup relations Relations + * Instances of named entities can be explicitly associated with other + * instances using instance IDs and the relationship API calls. + * + * @{ + */ + +/** + * @ingroup relations + * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. + * Relations between instances can be added with an API call. The relation + * API uses instance IDs. Relations can be added before or after the actual + * instances are created and persist independently of the instances. This + * is the motivation for having different lifetimes for instance IDs and + * the actual instances. + */ +typedef enum +{ + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ +} __itt_relation; + +/** + * @ingroup relations + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); + +/** + * @ingroup relations + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) +#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) +#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) +#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current(d,x,y) +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add(d,x,y,z) +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} relations group */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_info +{ + unsigned long long clock_freq; /*!< Clock domain frequency */ + unsigned long long clock_base; /*!< Clock domain base timestamp */ +} __itt_clock_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_domain +{ + __itt_clock_info info; /*!< Most recent clock domain info */ + __itt_get_clock_info_fn fn; /*!< Callback function pointer */ + void* fn_data; /*!< Input argument for the callback function */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_clock_domain* next; +} __itt_clock_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Create a clock domain. + * Certain applications require the capability to trace their application using + * a clock domain different than the CPU, for instance the instrumentation of events + * that occur on a GPU. + * Because the set of domains is expected to be static over the application's execution time, + * there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of which thread created + * the domain. This call is thread-safe. + * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps + * @param[in] fn_data Argument for a callback function; may be NULL + */ +__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) +#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) +#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Recalculate clock domains frequences and clock base timestamps. + */ +void ITTAPI __itt_clock_domain_reset(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) +#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) +#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_reset() +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Create an instance of identifier. This establishes the beginning of the lifetime of + * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to + * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** + * @ingroup clockdomain + * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the + * given ID value in the trace. Any relationships that are established after this lifetime ends are + * invalid. This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) +#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) +#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) +#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create_ex(domain,clock_domain,timestamp,id) +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, or __itt_null + * @param[in] parentid The parent of this task, or __itt_null + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup clockdomain + * @brief End the current task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + */ +void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) +#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) +#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) +#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) +#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) +#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) +#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex(domain,clock_domain,timestamp) +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance. + * @param[in] domain The domain for this marker + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The instance ID for this marker, or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) +#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); + +/** + * @ingroup clockdomain + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) +#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) +#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) +#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum ___itt_track_group_type +{ + __itt_track_group_type_normal = 0 +} __itt_track_group_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track_group +{ + __itt_string_handle* name; /*!< Name of the track group */ + struct ___itt_track* track; /*!< List of child tracks */ + __itt_track_group_type tgtype; /*!< Type of the track group */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track_group* next; +} __itt_track_group; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Placeholder for custom track types. Currently, "normal" custom track + * is the only available track type. + */ +typedef enum ___itt_track_type +{ + __itt_track_type_normal = 0 +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + , __itt_track_type_queue +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +} __itt_track_type; + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track +{ + __itt_string_handle* name; /*!< Name of the track group */ + __itt_track_group* group; /*!< Parent group to a track */ + __itt_track_type ttype; /*!< Type of the track */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track* next; +} __itt_track; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create logical track group. + */ +__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) +#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) +#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_group_create(name) (__itt_track_group*)0 +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create logical track. + */ +__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) +#define __itt_track_create ITTNOTIFY_DATA(track_create) +#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the logical track. + */ +void ITTAPI __itt_set_track(__itt_track* track); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) +#define __itt_set_track ITTNOTIFY_VOID(set_track) +#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_set_track(track) +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/* ========================================================================== */ +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup events Events + * @ingroup public + * Events group + * @{ + */ +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ typedef enum -{ - __itt_relation_is_unknown = 0, - __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ - __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ - __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ - __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ - __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ - __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ - __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ -} __itt_relation; - -/** - * @ingroup relations - * @brief Add a relation to the current task instance. - * The current task instance is the head of the relation. - * @param[in] domain The domain controlling this call - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); - -/** - * @ingroup relations - * @brief Add a relation between two instance identifiers. - * @param[in] domain The domain controlling this call - * @param[in] head The ID for the head of the relation - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) -ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) -#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) -#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) -#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) -#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_relation_add_to_current(d,x,y) -#define __itt_relation_add_to_current_ptr 0 -#define __itt_relation_add(d,x,y,z) -#define __itt_relation_add_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_relation_add_to_current_ptr 0 -#define __itt_relation_add_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} relations group */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_clock_info -{ - unsigned long long clock_freq; /*!< Clock domain frequency */ - unsigned long long clock_base; /*!< Clock domain base timestamp */ -} __itt_clock_info; - -#pragma pack(pop) -/** @endcond */ - -/** @cond exclude_from_documentation */ -typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); -/** @endcond */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_clock_domain -{ - __itt_clock_info info; /*!< Most recent clock domain info */ - __itt_get_clock_info_fn fn; /*!< Callback function pointer */ - void* fn_data; /*!< Input argument for the callback function */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_clock_domain* next; -} __itt_clock_domain; - -#pragma pack(pop) -/** @endcond */ - -/** - * @ingroup clockdomains - * @brief Create a clock domain. - * Certain applications require the capability to trace their application using - * a clock domain different than the CPU, for instance the instrumentation of events - * that occur on a GPU. - * Because the set of domains is expected to be static over the application's execution time, - * there is no mechanism to destroy a domain. - * Any domain can be accessed by any thread in the process, regardless of which thread created - * the domain. This call is thread-safe. - * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps - * @param[in] fn_data Argument for a callback function; may be NULL - */ -__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) -#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) -#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 -#define __itt_clock_domain_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_clock_domain_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomains - * @brief Recalculate clock domains frequences and clock base timestamps. - */ -void ITTAPI __itt_clock_domain_reset(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) -#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) -#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_clock_domain_reset() -#define __itt_clock_domain_reset_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_clock_domain_reset_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Create an instance of identifier. This establishes the beginning of the lifetime of - * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to - * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among - * identified named entity instances, using the \ref relations APIs. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The ID to create. - */ -void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); - -/** - * @ingroup clockdomain - * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the - * given ID value in the trace. Any relationships that are established after this lifetime ends are - * invalid. This call must be performed before the given ID value can be reused for a different - * named entity instance. - * @param[in] domain The domain controlling the execution of this call. - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The ID to destroy. - */ -void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) -ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) -#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) -#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) -#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) -#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_id_create_ex(domain,clock_domain,timestamp,id) -#define __itt_id_create_ex_ptr 0 -#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) -#define __itt_id_destroy_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_id_create_ex_ptr 0 -#define __itt_id_destroy_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The instance ID for this task instance, or __itt_null - * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null - * @param[in] name The name of this task - */ -void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup clockdomain - * @brief Begin a task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The identifier for this task instance, or __itt_null - * @param[in] parentid The parent of this task, or __itt_null - * @param[in] fn The pointer to the function you are tracing - */ -void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); - -/** - * @ingroup clockdomain - * @brief End the current task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - */ -void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) -ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) -#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) -#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) -#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) -#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) -#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) -#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) -#define __itt_task_begin_ex_ptr 0 -#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) -#define __itt_task_begin_fn_ex_ptr 0 -#define __itt_task_end_ex(domain,clock_domain,timestamp) -#define __itt_task_end_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_ex_ptr 0 -#define __itt_task_begin_fn_ex_ptr 0 -#define __itt_task_end_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup markers - * @brief Create a marker instance. - * @param[in] domain The domain for this marker - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] id The instance ID for this marker, or __itt_null - * @param[in] name The name for this marker - * @param[in] scope The scope for this marker - */ -void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) -#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) -#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) -#define __itt_marker_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_marker_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup clockdomain - * @brief Add a relation to the current task instance. - * The current task instance is the head of the relation. - * @param[in] domain The domain controlling this call - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); - -/** - * @ingroup clockdomain - * @brief Add a relation between two instance identifiers. - * @param[in] domain The domain controlling this call - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] head The ID for the head of the relation - * @param[in] relation The kind of relation - * @param[in] tail The ID for the tail of the relation - */ -void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) -ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) -#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) -#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) -#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) -#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) -#define __itt_relation_add_to_current_ex_ptr 0 -#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) -#define __itt_relation_add_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_relation_add_to_current_ex_ptr 0 -#define __itt_relation_add_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_documentation */ -typedef enum ___itt_track_group_type -{ - __itt_track_group_type_normal = 0 -} __itt_track_group_type; -/** @endcond */ - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_track_group -{ - __itt_string_handle* name; /*!< Name of the track group */ - struct ___itt_track* track; /*!< List of child tracks */ - __itt_track_group_type tgtype; /*!< Type of the track group */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_track_group* next; -} __itt_track_group; - -#pragma pack(pop) -/** @endcond */ - -/** - * @brief Placeholder for custom track types. Currently, "normal" custom track - * is the only available track type. - */ -typedef enum ___itt_track_type -{ - __itt_track_type_normal = 0 -#ifdef INTEL_ITTNOTIFY_API_PRIVATE - , __itt_track_type_queue -#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ -} __itt_track_type; - -/** @cond exclude_from_documentation */ -#pragma pack(push, 8) - -typedef struct ___itt_track -{ - __itt_string_handle* name; /*!< Name of the track group */ - __itt_track_group* group; /*!< Parent group to a track */ - __itt_track_type ttype; /*!< Type of the track */ - int extra1; /*!< Reserved. Must be zero */ - void* extra2; /*!< Reserved. Must be zero */ - struct ___itt_track* next; -} __itt_track; - -#pragma pack(pop) -/** @endcond */ - -/** - * @brief Create logical track group. - */ -__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) -#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) -#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_track_group_create(name) (__itt_track_group*)0 -#define __itt_track_group_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_track_group_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Create logical track. - */ -__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) -#define __itt_track_create ITTNOTIFY_DATA(track_create) -#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 -#define __itt_track_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_track_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Set the logical track. - */ -void ITTAPI __itt_set_track(__itt_track* track); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) -#define __itt_set_track ITTNOTIFY_VOID(set_track) -#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_set_track(track) -#define __itt_set_track_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_set_track_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/* ========================================================================== */ -/** @cond exclude_from_gpa_documentation */ -/** - * @defgroup events Events - * @ingroup public - * Events group - * @{ - */ -/** @brief user event type */ -typedef int __itt_event; - -/** - * @brief Create an event notification - * @note name or namelen being null/name and namelen not matching, user event feature not enabled - * @return non-zero event identifier upon success and __itt_err otherwise - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); -__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_event_create __itt_event_createW -# define __itt_event_create_ptr __itt_event_createW_ptr -#else -# define __itt_event_create __itt_event_createA -# define __itt_event_create_ptr __itt_event_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA ITTNOTIFY_DATA(event_createA) -#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) -#define __itt_event_createW ITTNOTIFY_DATA(event_createW) -#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create ITTNOTIFY_DATA(event_create) -#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA(name, namelen) (__itt_event)0 -#define __itt_event_createA_ptr 0 -#define __itt_event_createW(name, namelen) (__itt_event)0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create(name, namelen) (__itt_event)0 -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA_ptr 0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event occurrence. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_start(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) -#define __itt_event_start ITTNOTIFY_DATA(event_start) -#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_start(event) (int)0 -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event end occurrence. - * @note It is optional if events do not have durations. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_end(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) -#define __itt_event_end ITTNOTIFY_DATA(event_end) -#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_end(event) (int)0 -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} events group */ - - -/** - * @defgroup arrays Arrays Visualizer - * @ingroup public - * Visualize arrays - * @{ - */ - -/** - * @enum __itt_av_data_type - * @brief Defines types of arrays data (for C/C++ intrinsic types) - */ -typedef enum -{ - __itt_e_first = 0, - __itt_e_char = 0, /* 1-byte integer */ - __itt_e_uchar, /* 1-byte unsigned integer */ - __itt_e_int16, /* 2-byte integer */ - __itt_e_uint16, /* 2-byte unsigned integer */ - __itt_e_int32, /* 4-byte integer */ - __itt_e_uint32, /* 4-byte unsigned integer */ - __itt_e_int64, /* 8-byte integer */ - __itt_e_uint64, /* 8-byte unsigned integer */ - __itt_e_float, /* 4-byte floating */ - __itt_e_double, /* 8-byte floating */ - __itt_e_last = __itt_e_double -} __itt_av_data_type; - -/** - * @brief Save an array data to a file. - * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). - * @param[in] data - pointer to the array data - * @param[in] rank - the rank of the array - * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. - * The size of dimensions must be equal to the rank - * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) - * @param[in] filePath - the file path; the output format is defined by the file extension - * @param[in] columnOrder - defines how the array is stored in the linear memory. - * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). - */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); -int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_av_save __itt_av_saveW -# define __itt_av_save_ptr __itt_av_saveW_ptr -#else /* UNICODE */ -# define __itt_av_save __itt_av_saveA -# define __itt_av_save_ptr __itt_av_saveA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) -ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) -#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) -#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) -#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save ITTNOTIFY_DATA(av_save) -#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA(name) -#define __itt_av_saveA_ptr 0 -#define __itt_av_saveW(name) -#define __itt_av_saveW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save(name) -#define __itt_av_save_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_av_saveA_ptr 0 -#define __itt_av_saveW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_av_save_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -void ITTAPI __itt_enable_attach(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, enable_attach, (void)) -#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) -#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_enable_attach() -#define __itt_enable_attach_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_enable_attach_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @cond exclude_from_gpa_documentation */ - -/** @} arrays group */ - -/** @endcond */ - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _ITTNOTIFY_H_ */ - -#ifdef INTEL_ITTNOTIFY_API_PRIVATE - -#ifndef _ITTNOTIFY_PRIVATE_ -#define _ITTNOTIFY_PRIVATE_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * @ingroup tasks - * @brief Begin an overlapped task instance. - * @param[in] domain The domain for this task. - * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. - * @param[in] parentid The parent of this task, or __itt_null. - * @param[in] name The name of this task. - */ -void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup clockdomain - * @brief Begin an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. - * @param[in] parentid The parent of this task, or __itt_null. - * @param[in] name The name of this task. - */ -void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); - -/** - * @ingroup tasks - * @brief End an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] taskid Explicit ID of finished task - */ -void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); - -/** - * @ingroup clockdomain - * @brief End an overlapped task instance. - * @param[in] domain The domain for this task - * @param[in] clock_domain The clock domain controlling the execution of this call. - * @param[in] timestamp The user defined timestamp. - * @param[in] taskid Explicit ID of finished task - */ -void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) -ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) -ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) -#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) -#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) -#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) -#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) -#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) -#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) -#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) -#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_task_begin_overlapped(domain,taskid,parentid,name) -#define __itt_task_begin_overlapped_ptr 0 -#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) -#define __itt_task_begin_overlapped_ex_ptr 0 -#define __itt_task_end_overlapped(domain,taskid) -#define __itt_task_end_overlapped_ptr 0 -#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) -#define __itt_task_end_overlapped_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_task_begin_overlapped_ptr 0 -#define __itt_task_begin_overlapped_ex_ptr 0 -#define __itt_task_end_overlapped_ptr 0 -#define __itt_task_end_overlapped_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @defgroup makrs_internal Marks - * @ingroup internal - * Marks group - * @warning Internal API: - * - It is not shipped to outside of Intel - * - It is delivered to internal Intel teams using e-mail or SVN access only - * @{ - */ -/** @brief user mark type */ -typedef int __itt_mark_type; - -/** - * @brief Creates a user mark type with the specified name using char or Unicode string. - * @param[in] name - name of mark to create - * @return Returns a handle to the mark type - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_mark_type ITTAPI __itt_mark_createA(const char *name); -__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark_create __itt_mark_createW -# define __itt_mark_create_ptr __itt_mark_createW_ptr -#else /* UNICODE */ -# define __itt_mark_create __itt_mark_createA -# define __itt_mark_create_ptr __itt_mark_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_mark_type ITTAPI __itt_mark_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) -#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) -#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) -#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create ITTNOTIFY_DATA(mark_create) -#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA(name) (__itt_mark_type)0 -#define __itt_mark_createA_ptr 0 -#define __itt_mark_createW(name) (__itt_mark_type)0 -#define __itt_mark_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create(name) (__itt_mark_type)0 -#define __itt_mark_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_createA_ptr 0 -#define __itt_mark_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. - * - * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. - * - The call is "synchronous" - function returns after mark is actually added to results. - * - This function is useful, for example, to mark different phases of application - * (beginning of the next mark automatically meand end of current region). - * - Can be used together with "continuous" marks (see below) at the same collection session - * @param[in] mt - mark, created by __itt_mark_create(const char* name) function - * @param[in] parameter - string parameter of mark - * @return Returns zero value in case of success, non-zero value otherwise. - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); -int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark __itt_markW -# define __itt_mark_ptr __itt_markW_ptr -#else /* UNICODE */ -# define __itt_mark __itt_markA -# define __itt_mark_ptr __itt_markA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) -ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA ITTNOTIFY_DATA(markA) -#define __itt_markA_ptr ITTNOTIFY_NAME(markA) -#define __itt_markW ITTNOTIFY_DATA(markW) -#define __itt_markW_ptr ITTNOTIFY_NAME(markW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark ITTNOTIFY_DATA(mark) -#define __itt_mark_ptr ITTNOTIFY_NAME(mark) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA(mt, parameter) (int)0 -#define __itt_markA_ptr 0 -#define __itt_markW(mt, parameter) (int)0 -#define __itt_markW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark(mt, parameter) (int)0 -#define __itt_mark_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_markA_ptr 0 -#define __itt_markW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Use this if necessary to create a "discrete" user event type (mark) for process - * rather then for one thread - * @see int __itt_mark(__itt_mark_type mt, const char* parameter); - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); -int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_mark_global __itt_mark_globalW -# define __itt_mark_global_ptr __itt_mark_globalW_ptr -#else /* UNICODE */ -# define __itt_mark_global __itt_mark_globalA -# define __itt_mark_global_ptr __itt_mark_globalA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) -ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) -#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) -#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) -#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global ITTNOTIFY_DATA(mark_global) -#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA(mt, parameter) (int)0 -#define __itt_mark_globalA_ptr 0 -#define __itt_mark_globalW(mt, parameter) (int)0 -#define __itt_mark_globalW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global(mt, parameter) (int)0 -#define __itt_mark_global_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_mark_globalA_ptr 0 -#define __itt_mark_globalW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_mark_global_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Creates an "end" point for "continuous" mark with specified name. - * - * - Returns zero value in case of success, non-zero value otherwise. - * Also returns non-zero value when preceding "begin" point for the - * mark with the same name failed to be created or not created. - * - The mark of "continuous" type is placed to collection results in - * case of success. It appears in overtime view(s) as a special tick - * sign (different from "discrete" mark) together with line from - * corresponding "begin" mark to "end" mark. - * @note Continuous marks can overlap and be nested inside each other. - * Discrete mark can be nested inside marked region - * @param[in] mt - mark, created by __itt_mark_create(const char* name) function - * @return Returns zero value in case of success, non-zero value otherwise. - */ -int ITTAPI __itt_mark_off(__itt_mark_type mt); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) -#define __itt_mark_off ITTNOTIFY_DATA(mark_off) -#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_mark_off(mt) (int)0 -#define __itt_mark_off_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_mark_off_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Use this if necessary to create an "end" point for mark of process - * @see int __itt_mark_off(__itt_mark_type mt); - */ -int ITTAPI __itt_mark_global_off(__itt_mark_type mt); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) -#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) -#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_mark_global_off(mt) (int)0 -#define __itt_mark_global_off_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_mark_global_off_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} marks group */ - -/** - * @defgroup counters_internal Counters - * @ingroup internal - * Counters group - * @{ - */ -/** - * @brief opaque structure for counter identification - */ -typedef struct ___itt_counter *__itt_counter; - -/** - * @brief Create a counter with given name/domain for the calling thread - * - * After __itt_counter_create() is called, __itt_counter_inc() / __itt_counter_inc_delta() can be used - * to increment the counter on any thread - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); -__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_counter_create __itt_counter_createW -# define __itt_counter_create_ptr __itt_counter_createW_ptr -#else /* UNICODE */ -# define __itt_counter_create __itt_counter_createA -# define __itt_counter_create_ptr __itt_counter_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) -#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) -#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) -#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create ITTNOTIFY_DATA(counter_create) -#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA(name, domain) -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW(name, domain) -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create(name, domain) -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() - */ -void ITTAPI __itt_counter_destroy(__itt_counter id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) -#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) -#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_destroy(id) -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Increment the counter value - */ -void ITTAPI __itt_counter_inc(__itt_counter id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) -#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) -#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc(id) -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Increment the counter value with x - */ -void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) -#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) -#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_delta(id, value) -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} counters group */ - -/** - * @defgroup stitch Stack Stitching - * @ingroup internal - * Stack Stitching group - * @{ - */ -/** - * @brief opaque structure for counter identification - */ -typedef struct ___itt_caller *__itt_caller; - -/** - * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. - * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. - */ -__itt_caller ITTAPI __itt_stack_caller_create(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) -#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) -#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_caller_create() (__itt_caller)0 -#define __itt_stack_caller_create_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_caller_create_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() - */ -void ITTAPI __itt_stack_caller_destroy(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) -#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) -#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_caller_destroy(id) -#define __itt_stack_caller_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_caller_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Sets the cut point. Stack from each event which occurs after this call will be cut - * at the same stack level the function was called and stitched to the corresponding stitch point. - */ -void ITTAPI __itt_stack_callee_enter(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) -#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) -#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_callee_enter(id) -#define __itt_stack_callee_enter_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_callee_enter_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). - */ -void ITTAPI __itt_stack_callee_leave(__itt_caller id); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) -#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) -#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_stack_callee_leave(id) -#define __itt_stack_callee_leave_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_stack_callee_leave_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} stitch group */ - -/* ***************************************************************************************************************************** */ - -#include - -/** @cond exclude_from_documentation */ -typedef enum __itt_error_code -{ - __itt_error_success = 0, /*!< no error */ - __itt_error_no_module = 1, /*!< module can't be loaded */ - /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ - __itt_error_no_symbol = 2, /*!< symbol not found */ - /* %1$s -- library name, %2$s -- symbol name. */ - __itt_error_unknown_group = 3, /*!< unknown group specified */ - /* %1$s -- env var name, %2$s -- group name. */ - __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ - /* %1$s -- env var name, %2$d -- system error. */ - __itt_error_env_too_long = 5, /*!< variable value too long */ - /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ - __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ - /* %1$s -- function name, %2$d -- errno. */ -} __itt_error_code; - -typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); -__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); - -const char* ITTAPI __itt_api_version(void); -/** @endcond */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) -void __itt_error_handler(__itt_error_code code, va_list args); -extern const int ITTNOTIFY_NAME(err); -#define __itt_err ITTNOTIFY_NAME(err) -ITT_STUB(ITTAPI, const char*, api_version, (void)) -#define __itt_api_version ITTNOTIFY_DATA(api_version) -#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_api_version() (const char*)0 -#define __itt_api_version_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_api_version_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _ITTNOTIFY_PRIVATE_ */ - -#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_H_ */ + +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + +#ifndef _ITTNOTIFY_PRIVATE_ +#define _ITTNOTIFY_PRIVATE_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @ingroup tasks + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup tasks + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); + +/** + * @ingroup clockdomain + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) +#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) +#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) +#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) +#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) +#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) +#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) +#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) +#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_overlapped(domain,taskid,parentid,name) +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped(domain,taskid) +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup makrs_internal Marks + * @ingroup internal + * Marks group + * @warning Internal API: + * - It is not shipped to outside of Intel + * - It is delivered to internal Intel teams using e-mail or SVN access only + * @{ + */ +/** @brief user mark type */ +typedef int __itt_mark_type; + +/** + * @brief Creates a user mark type with the specified name using char or Unicode string. + * @param[in] name - name of mark to create + * @return Returns a handle to the mark type + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_mark_type ITTAPI __itt_mark_createA(const char *name); +__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_create __itt_mark_createW +# define __itt_mark_create_ptr __itt_mark_createW_ptr +#else /* UNICODE */ +# define __itt_mark_create __itt_mark_createA +# define __itt_mark_create_ptr __itt_mark_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_mark_type ITTAPI __itt_mark_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) +#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) +#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) +#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create ITTNOTIFY_DATA(mark_create) +#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA(name) (__itt_mark_type)0 +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW(name) (__itt_mark_type)0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create(name) (__itt_mark_type)0 +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. + * + * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. + * - The call is "synchronous" - function returns after mark is actually added to results. + * - This function is useful, for example, to mark different phases of application + * (beginning of the next mark automatically meand end of current region). + * - Can be used together with "continuous" marks (see below) at the same collection session + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @param[in] parameter - string parameter of mark + * @return Returns zero value in case of success, non-zero value otherwise. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark __itt_markW +# define __itt_mark_ptr __itt_markW_ptr +#else /* UNICODE */ +# define __itt_mark __itt_markA +# define __itt_mark_ptr __itt_markA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA ITTNOTIFY_DATA(markA) +#define __itt_markA_ptr ITTNOTIFY_NAME(markA) +#define __itt_markW ITTNOTIFY_DATA(markW) +#define __itt_markW_ptr ITTNOTIFY_NAME(markW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark ITTNOTIFY_DATA(mark) +#define __itt_mark_ptr ITTNOTIFY_NAME(mark) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA(mt, parameter) (int)0 +#define __itt_markA_ptr 0 +#define __itt_markW(mt, parameter) (int)0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark(mt, parameter) (int)0 +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA_ptr 0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create a "discrete" user event type (mark) for process + * rather then for one thread + * @see int __itt_mark(__itt_mark_type mt, const char* parameter); + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_global __itt_mark_globalW +# define __itt_mark_global_ptr __itt_mark_globalW_ptr +#else /* UNICODE */ +# define __itt_mark_global __itt_mark_globalA +# define __itt_mark_global_ptr __itt_mark_globalA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) +#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) +#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) +#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global ITTNOTIFY_DATA(mark_global) +#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA(mt, parameter) (int)0 +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW(mt, parameter) (int)0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global(mt, parameter) (int)0 +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates an "end" point for "continuous" mark with specified name. + * + * - Returns zero value in case of success, non-zero value otherwise. + * Also returns non-zero value when preceding "begin" point for the + * mark with the same name failed to be created or not created. + * - The mark of "continuous" type is placed to collection results in + * case of success. It appears in overtime view(s) as a special tick + * sign (different from "discrete" mark) together with line from + * corresponding "begin" mark to "end" mark. + * @note Continuous marks can overlap and be nested inside each other. + * Discrete mark can be nested inside marked region + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @return Returns zero value in case of success, non-zero value otherwise. + */ +int ITTAPI __itt_mark_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) +#define __itt_mark_off ITTNOTIFY_DATA(mark_off) +#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_off(mt) (int)0 +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create an "end" point for mark of process + * @see int __itt_mark_off(__itt_mark_type mt); + */ +int ITTAPI __itt_mark_global_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) +#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) +#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_global_off(mt) (int)0 +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} marks group */ + +/** + * @defgroup counters_internal Counters + * @ingroup internal + * Counters group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_counter *__itt_counter; + +/** + * @brief Create a counter with given name/domain for the calling thread + * + * After __itt_counter_create() is called, __itt_counter_inc() / __itt_counter_inc_delta() can be used + * to increment the counter on any thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the counter value + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the counter value with x + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @defgroup stitch Stack Stitching + * @ingroup internal + * Stack Stitching group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_caller *__itt_caller; + +/** + * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. + * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. + */ +__itt_caller ITTAPI __itt_stack_caller_create(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) +#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) +#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_create() (__itt_caller)0 +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + */ +void ITTAPI __itt_stack_caller_destroy(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) +#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) +#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_destroy(id) +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Sets the cut point. Stack from each event which occurs after this call will be cut + * at the same stack level the function was called and stitched to the corresponding stitch point. + */ +void ITTAPI __itt_stack_callee_enter(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) +#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) +#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_enter(id) +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). + */ +void ITTAPI __itt_stack_callee_leave(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) +#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) +#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_leave(id) +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} stitch group */ + +/* ***************************************************************************************************************************** */ + +#include + +/** @cond exclude_from_documentation */ +typedef enum __itt_error_code +{ + __itt_error_success = 0, /*!< no error */ + __itt_error_no_module = 1, /*!< module can't be loaded */ + /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + __itt_error_no_symbol = 2, /*!< symbol not found */ + /* %1$s -- library name, %2$s -- symbol name. */ + __itt_error_unknown_group = 3, /*!< unknown group specified */ + /* %1$s -- env var name, %2$s -- group name. */ + __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ + /* %1$s -- env var name, %2$d -- system error. */ + __itt_error_env_too_long = 5, /*!< variable value too long */ + /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ + __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ + /* %1$s -- function name, %2$d -- errno. */ +} __itt_error_code; + +typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); +__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); + +const char* ITTAPI __itt_api_version(void); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) +void __itt_error_handler(__itt_error_code code, va_list args); +extern const int ITTNOTIFY_NAME(err); +#define __itt_err ITTNOTIFY_NAME(err) +ITT_STUB(ITTAPI, const char*, api_version, (void)) +#define __itt_api_version ITTNOTIFY_DATA(api_version) +#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_api_version() (const char*)0 +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_PRIVATE_ */ + +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h index 3a2aee76cfd..710bbe92264 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h @@ -1,478 +1,478 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _ITTNOTIFY_CONFIG_H_ -#define _ITTNOTIFY_CONFIG_H_ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _ITTNOTIFY_CONFIG_H_ +#define _ITTNOTIFY_CONFIG_H_ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI CDECL -#define LIBITTAPI CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL CDECL -#define LIBITTAPI_CALL CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -#ifndef ITT_ARCH_IA32 -# define ITT_ARCH_IA32 1 -#endif /* ITT_ARCH_IA32 */ - -#ifndef ITT_ARCH_IA32E -# define ITT_ARCH_IA32E 2 -#endif /* ITT_ARCH_IA32E */ - -/* Was there a magical reason we didn't have 3 here before? */ -#ifndef ITT_ARCH_AARCH64 -# define ITT_ARCH_AARCH64 3 -#endif /* ITT_ARCH_AARCH64 */ - -#ifndef ITT_ARCH_ARM -# define ITT_ARCH_ARM 4 -#endif /* ITT_ARCH_ARM */ - -#ifndef ITT_ARCH_PPC64 -# define ITT_ARCH_PPC64 5 -#endif /* ITT_ARCH_PPC64 */ - - -#ifndef ITT_ARCH -# if defined _M_IX86 || defined __i386__ -# define ITT_ARCH ITT_ARCH_IA32 -# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define ITT_ARCH ITT_ARCH_IA32E -# elif defined _M_IA64 || defined __ia64__ -# define ITT_ARCH ITT_ARCH_IA64 -# elif defined _M_ARM || __arm__ -# define ITT_ARCH ITT_ARCH_ARM -# elif defined __powerpc64__ -# define ITT_ARCH ITT_ARCH_PPC64 -# elif defined __aarch64__ -# define ITT_ARCH ITT_ARCH_AARCH64 -# endif -#endif - -#ifdef __cplusplus -# define ITT_EXTERN_C extern "C" -# define ITT_EXTERN_C_BEGIN extern "C" { -# define ITT_EXTERN_C_END } -#else -# define ITT_EXTERN_C /* nothing */ -# define ITT_EXTERN_C_BEGIN /* nothing */ -# define ITT_EXTERN_C_END /* nothing */ -#endif /* __cplusplus */ - -#define ITT_TO_STR_AUX(x) #x -#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) - -#define __ITT_BUILD_ASSERT(expr, suffix) do { \ - static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ - __itt_build_check_##suffix[0] = 0; \ -} while(0) -#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) -#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) - -#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } - -/* Replace with snapshot date YYYYMMDD for promotion build. */ -#define API_VERSION_BUILD 20111111 - -#ifndef API_VERSION_NUM -#define API_VERSION_NUM 0.0.0 -#endif /* API_VERSION_NUM */ - -#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ - " (" ITT_TO_STR(API_VERSION_BUILD) ")" - -/* OS communication functions */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -typedef HMODULE lib_t; -typedef DWORD TIDT; -typedef CRITICAL_SECTION mutex_t; -#define MUTEX_INITIALIZER { 0 } -#define strong_alias(name, aliasname) /* empty for Windows */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ -#endif /* _GNU_SOURCE */ -#ifndef __USE_UNIX98 -#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ -#endif /*__USE_UNIX98*/ -#include -typedef void* lib_t; -typedef pthread_t TIDT; -typedef pthread_mutex_t mutex_t; -#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#define _strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); -#define strong_alias(name, aliasname) _strong_alias(name, aliasname) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_get_proc(lib, name) GetProcAddress(lib, name) -#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) -#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) -#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) -#define __itt_load_lib(name) LoadLibraryA(name) -#define __itt_unload_lib(handle) FreeLibrary(handle) -#define __itt_system_error() (int)GetLastError() -#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) -#define __itt_fstrlen(s) lstrlenA(s) -#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) -#define __itt_fstrdup(s) _strdup(s) -#define __itt_thread_id() GetCurrentThreadId() -#define __itt_thread_yield() SwitchToThread() -#ifndef ITT_SIMPLE_INIT -ITT_INLINE long -__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) -{ - return InterlockedIncrement(ptr); -} -#endif /* ITT_SIMPLE_INIT */ -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -#define __itt_get_proc(lib, name) dlsym(lib, name) -#define __itt_mutex_init(mutex) {\ - pthread_mutexattr_t mutex_attr; \ - int error_code = pthread_mutexattr_init(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ - error_code); \ - error_code = pthread_mutexattr_settype(&mutex_attr, \ - PTHREAD_MUTEX_RECURSIVE); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ - error_code); \ - error_code = pthread_mutex_init(mutex, &mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutex_init", \ - error_code); \ - error_code = pthread_mutexattr_destroy(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ - error_code); \ -} -#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) -#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) -#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) -#define __itt_unload_lib(handle) dlclose(handle) -#define __itt_system_error() errno -#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) -#define __itt_fstrlen(s) strlen(s) -#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) -#define __itt_fstrdup(s) strdup(s) -#define __itt_thread_id() pthread_self() -#define __itt_thread_yield() sched_yield() -#if ITT_ARCH==ITT_ARCH_IA64 -#ifdef __INTEL_COMPILER -#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) -#else /* __INTEL_COMPILER */ -/* TODO: Add Support for not Intel compilers for IA-64 architecture */ -#endif /* __INTEL_COMPILER */ -#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ -ITT_INLINE long -__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) -{ - long result; - __asm__ __volatile__("lock\nxadd %0,%1" - : "=r"(result),"=m"(*(int*)ptr) - : "0"(addend), "m"(*(int*)ptr) - : "memory"); - return result; -} -#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 -#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) -#endif /* ITT_ARCH==ITT_ARCH_IA64 */ -#ifndef ITT_SIMPLE_INIT -ITT_INLINE long -__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; -ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) -{ - return __TBB_machine_fetchadd4(ptr, 1) + 1L; -} -#endif /* ITT_SIMPLE_INIT */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -typedef enum { - __itt_collection_normal = 0, - __itt_collection_paused = 1 -} __itt_collection_state; - -typedef enum { - __itt_thread_normal = 0, - __itt_thread_ignored = 1 -} __itt_thread_state; - -#pragma pack(push, 8) - -typedef struct ___itt_thread_info -{ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - TIDT tid; - __itt_thread_state state; /*!< Thread state (paused or normal) */ - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct ___itt_thread_info* next; -} __itt_thread_info; - -#include "ittnotify_types.h" /* For __itt_group_id definition */ - -typedef struct ___itt_api_info_20101001 -{ - const char* name; - void** func_ptr; - void* init_func; - __itt_group_id group; -} __itt_api_info_20101001; - -typedef struct ___itt_api_info -{ - const char* name; - void** func_ptr; - void* init_func; - void* null_func; - __itt_group_id group; -} __itt_api_info; - -struct ___itt_domain; -struct ___itt_string_handle; - -typedef struct ___itt_global -{ - unsigned char magic[8]; - unsigned long version_major; - unsigned long version_minor; - unsigned long version_build; - volatile long api_initialized; - volatile long mutex_initialized; - volatile long atomic_counter; - mutex_t mutex; - lib_t lib; - void* error_handler; - const char** dll_path_ptr; - __itt_api_info* api_list_ptr; - struct ___itt_global* next; - /* Joinable structures below */ - __itt_thread_info* thread_list; - struct ___itt_domain* domain_list; - struct ___itt_string_handle* string_list; - __itt_collection_state state; -} __itt_global; - -#pragma pack(pop) - -#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ - h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ - if (h != NULL) { \ - h->tid = t; \ - h->nameA = NULL; \ - h->nameW = n ? _wcsdup(n) : NULL; \ - h->state = s; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->thread_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ - h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ - if (h != NULL) { \ - h->tid = t; \ - h->nameA = n ? __itt_fstrdup(n) : NULL; \ - h->nameW = NULL; \ - h->state = s; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->thread_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ - h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ - if (h != NULL) { \ - h->flags = 0; /* domain is disabled by default */ \ - h->nameA = NULL; \ - h->nameW = name ? _wcsdup(name) : NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->domain_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ - h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ - if (h != NULL) { \ - h->flags = 0; /* domain is disabled by default */ \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ - h->nameW = NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->domain_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ - h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ - if (h != NULL) { \ - h->strA = NULL; \ - h->strW = name ? _wcsdup(name) : NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->string_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ - h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ - if (h != NULL) { \ - h->strA = name ? __itt_fstrdup(name) : NULL; \ - h->strW = NULL; \ - h->extra1 = 0; /* reserved */ \ - h->extra2 = NULL; /* reserved */ \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->string_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#endif /* _ITTNOTIFY_CONFIG_H_ */ +# define CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifndef ITT_ARCH_IA32 +# define ITT_ARCH_IA32 1 +#endif /* ITT_ARCH_IA32 */ + +#ifndef ITT_ARCH_IA32E +# define ITT_ARCH_IA32E 2 +#endif /* ITT_ARCH_IA32E */ + +/* Was there a magical reason we didn't have 3 here before? */ +#ifndef ITT_ARCH_AARCH64 +# define ITT_ARCH_AARCH64 3 +#endif /* ITT_ARCH_AARCH64 */ + +#ifndef ITT_ARCH_ARM +# define ITT_ARCH_ARM 4 +#endif /* ITT_ARCH_ARM */ + +#ifndef ITT_ARCH_PPC64 +# define ITT_ARCH_PPC64 5 +#endif /* ITT_ARCH_PPC64 */ + + +#ifndef ITT_ARCH +# if defined _M_IX86 || defined __i386__ +# define ITT_ARCH ITT_ARCH_IA32 +# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define ITT_ARCH ITT_ARCH_IA32E +# elif defined _M_IA64 || defined __ia64__ +# define ITT_ARCH ITT_ARCH_IA64 +# elif defined _M_ARM || __arm__ +# define ITT_ARCH ITT_ARCH_ARM +# elif defined __powerpc64__ +# define ITT_ARCH ITT_ARCH_PPC64 +# elif defined __aarch64__ +# define ITT_ARCH ITT_ARCH_AARCH64 +# endif +#endif + +#ifdef __cplusplus +# define ITT_EXTERN_C extern "C" +# define ITT_EXTERN_C_BEGIN extern "C" { +# define ITT_EXTERN_C_END } +#else +# define ITT_EXTERN_C /* nothing */ +# define ITT_EXTERN_C_BEGIN /* nothing */ +# define ITT_EXTERN_C_END /* nothing */ +#endif /* __cplusplus */ + +#define ITT_TO_STR_AUX(x) #x +#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) + +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) +#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) +#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) + +#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } + +/* Replace with snapshot date YYYYMMDD for promotion build. */ +#define API_VERSION_BUILD 20111111 + +#ifndef API_VERSION_NUM +#define API_VERSION_NUM 0.0.0 +#endif /* API_VERSION_NUM */ + +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" + +/* OS communication functions */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +typedef HMODULE lib_t; +typedef DWORD TIDT; +typedef CRITICAL_SECTION mutex_t; +#define MUTEX_INITIALIZER { 0 } +#define strong_alias(name, aliasname) /* empty for Windows */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ +#endif /* _GNU_SOURCE */ +#ifndef __USE_UNIX98 +#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ +#endif /*__USE_UNIX98*/ +#include +typedef void* lib_t; +typedef pthread_t TIDT; +typedef pthread_mutex_t mutex_t; +#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define strong_alias(name, aliasname) _strong_alias(name, aliasname) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_get_proc(lib, name) GetProcAddress(lib, name) +#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) +#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) +#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_load_lib(name) LoadLibraryA(name) +#define __itt_unload_lib(handle) FreeLibrary(handle) +#define __itt_system_error() (int)GetLastError() +#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) +#define __itt_fstrlen(s) lstrlenA(s) +#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) +#define __itt_fstrdup(s) _strdup(s) +#define __itt_thread_id() GetCurrentThreadId() +#define __itt_thread_yield() SwitchToThread() +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return InterlockedIncrement(ptr); +} +#endif /* ITT_SIMPLE_INIT */ +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#define __itt_get_proc(lib, name) dlsym(lib, name) +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} +#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) +#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) +#define __itt_unload_lib(handle) dlclose(handle) +#define __itt_system_error() errno +#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) +#define __itt_fstrlen(s) strlen(s) +#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) +#define __itt_fstrdup(s) strdup(s) +#define __itt_thread_id() pthread_self() +#define __itt_thread_yield() sched_yield() +#if ITT_ARCH==ITT_ARCH_IA64 +#ifdef __INTEL_COMPILER +#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) +#else /* __INTEL_COMPILER */ +/* TODO: Add Support for not Intel compilers for IA-64 architecture */ +#endif /* __INTEL_COMPILER */ +#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) +{ + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(int*)ptr) + : "0"(addend), "m"(*(int*)ptr) + : "memory"); + return result; +} +#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 +#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) +#endif /* ITT_ARCH==ITT_ARCH_IA64 */ +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return __TBB_machine_fetchadd4(ptr, 1) + 1L; +} +#endif /* ITT_SIMPLE_INIT */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +typedef enum { + __itt_collection_normal = 0, + __itt_collection_paused = 1 +} __itt_collection_state; + +typedef enum { + __itt_thread_normal = 0, + __itt_thread_ignored = 1 +} __itt_thread_state; + +#pragma pack(push, 8) + +typedef struct ___itt_thread_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + TIDT tid; + __itt_thread_state state; /*!< Thread state (paused or normal) */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_thread_info* next; +} __itt_thread_info; + +#include "ittnotify_types.h" /* For __itt_group_id definition */ + +typedef struct ___itt_api_info_20101001 +{ + const char* name; + void** func_ptr; + void* init_func; + __itt_group_id group; +} __itt_api_info_20101001; + +typedef struct ___itt_api_info +{ + const char* name; + void** func_ptr; + void* init_func; + void* null_func; + __itt_group_id group; +} __itt_api_info; + +struct ___itt_domain; +struct ___itt_string_handle; + +typedef struct ___itt_global +{ + unsigned char magic[8]; + unsigned long version_major; + unsigned long version_minor; + unsigned long version_build; + volatile long api_initialized; + volatile long mutex_initialized; + volatile long atomic_counter; + mutex_t mutex; + lib_t lib; + void* error_handler; + const char** dll_path_ptr; + __itt_api_info* api_list_ptr; + struct ___itt_global* next; + /* Joinable structures below */ + __itt_thread_info* thread_list; + struct ___itt_domain* domain_list; + struct ___itt_string_handle* string_list; + __itt_collection_state state; +} __itt_global; + +#pragma pack(pop) + +#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = NULL; \ + h->nameW = n ? _wcsdup(n) : NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = n ? __itt_fstrdup(n) : NULL; \ + h->nameW = NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = NULL; \ + h->strW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = name ? __itt_fstrdup(name) : NULL; \ + h->strW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c index 27f84c50e88..a2723aa670f 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c @@ -1,1051 +1,1051 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" -#include "ittnotify_config.h" - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define PATH_MAX 512 -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -#include -#include -#include -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#include -#include -#include - -#define INTEL_NO_MACRO_BODY -#define INTEL_ITTNOTIFY_API_PRIVATE -#include "ittnotify.h" -#include "legacy/ittnotify.h" - -#include "disable_warnings.h" - -static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 43375 $\n"; - -#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) - -#if ITT_OS==ITT_OS_WIN -static const char* ittnotify_lib_name = "libittnotify.dll"; -#elif ITT_OS==ITT_OS_LINUX -static const char* ittnotify_lib_name = "libittnotify.so"; -#elif ITT_OS==ITT_OS_MAC -static const char* ittnotify_lib_name = "libittnotify.dylib"; -#else -#error Unsupported or unknown OS. -#endif - -#ifdef __ANDROID__ -#include -#include -#include -#include -#include -#include -#include - -#ifdef ITT_ANDROID_LOG - #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" - #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) -#else - #define ITT_ANDROID_LOGI(...) - #define ITT_ANDROID_LOGW(...) - #define ITT_ANDROID_LOGE(...) - #define ITT_ANDROID_LOGD(...) -#endif - -/* default location of userapi collector on Android */ -#define ANDROID_ITTNOTIFY_DEFAULT_PATH "/data/data/com.intel.vtune/intel/libittnotify.so" -#endif - - -#ifndef LIB_VAR_NAME -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 -#else -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 -#endif -#endif /* LIB_VAR_NAME */ - -#define ITT_MUTEX_INIT_AND_LOCK(p) { \ - if (!p.mutex_initialized) \ - { \ - if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ - { \ - __itt_mutex_init(&p.mutex); \ - p.mutex_initialized = 1; \ - } \ - else \ - while (!p.mutex_initialized) \ - __itt_thread_yield(); \ - } \ - __itt_mutex_lock(&p.mutex); \ -} - -const int _N_(err) = 0; - -typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); - -/* this define used to control initialization function name. */ -#ifndef __itt_init_ittlib_name -ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); -static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); -#define __itt_init_ittlib_name __itt_init_ittlib_ptr -#endif /* __itt_init_ittlib_name */ - -typedef void (__itt_fini_ittlib_t)(void); - -/* this define used to control finalization function name. */ -#ifndef __itt_fini_ittlib_name -ITT_EXTERN_C void _N_(fini_ittlib)(void); -static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); -#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr -#endif /* __itt_fini_ittlib_name */ - -/* building pointers to imported funcs */ -#undef ITT_STUBV -#undef ITT_STUB -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - return ITTNOTIFY_NAME(name) params; \ - else \ - return (type)0; \ -} - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - ITTNOTIFY_NAME(name) params; \ - else \ - return; \ -} - -#undef __ITT_INTERNAL_INIT -#include "ittnotify_static.h" - -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT - -ITT_GROUP_LIST(group_list); - -#pragma pack(push, 8) - -typedef struct ___itt_group_alias -{ - const char* env_var; - __itt_group_id groups; -} __itt_group_alias; - -static __itt_group_alias group_alias[] = { - { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, - { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, - { NULL, (__itt_group_none) }, - { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ -}; - -#pragma pack(pop) - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static __itt_api_info api_list[] = { -/* Define functions with static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT -/* Define functions without static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#include "ittnotify_static.h" - {NULL, NULL, NULL, NULL, __itt_group_none} -}; - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/* private, init thread info item. used for internal purposes */ -static __itt_thread_info init_thread_info = { - (const char*)NULL, /* nameA */ -#if defined(UNICODE) || defined(_UNICODE) - (const wchar_t*)NULL, /* nameW */ -#else - (void*)NULL, /* nameW */ -#endif - 0, /* tid */ - __itt_thread_normal, /* state */ - 0, /* extra1 */ - (void*)NULL, /* extra2 */ - (__itt_thread_info*)NULL /* next */ -}; - -/* private, NULL domain item. used for internal purposes */ -static __itt_domain null_domain = { - 0, /* flags: disabled by default */ - (const char*)NULL, /* nameA */ -#if defined(UNICODE) || defined(_UNICODE) - (const wchar_t*)NULL, /* nameW */ -#else - (void*)NULL, /* nameW */ -#endif - 0, /* extra1 */ - (void*)NULL, /* extra2 */ - (__itt_domain*)NULL /* next */ -}; - -/* private, NULL string handle item. used for internal purposes */ -static __itt_string_handle null_string_handle = { - (const char*)NULL, /* strA */ -#if defined(UNICODE) || defined(_UNICODE) - (const wchar_t*)NULL, /* strW */ -#else - (void*)NULL, /* strW */ -#endif - 0, /* extra1 */ - (void*)NULL, /* extra2 */ - (__itt_string_handle*)NULL /* next */ -}; - -static const char dll_path[PATH_MAX] = { 0 }; - -/* static part descriptor which handles. all notification api attributes. */ -__itt_global _N_(_ittapi_global) = { - ITT_MAGIC, /* identification info */ - ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ - 0, /* api_initialized */ - 0, /* mutex_initialized */ - 0, /* atomic_counter */ - MUTEX_INITIALIZER, /* mutex */ - NULL, /* dynamic library handle */ - NULL, /* error_handler */ - (const char**)&dll_path, /* dll_path_ptr */ - (__itt_api_info*)&api_list, /* api_list_ptr */ - NULL, /* next __itt_global */ - (__itt_thread_info*)&init_thread_info, /* thread_list */ - (__itt_domain*)&null_domain, /* domain_list */ - (__itt_string_handle*)&null_string_handle, /* string_list */ - __itt_collection_normal /* collection state */ -}; - -typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); -typedef void (__itt_api_fini_t)(__itt_global*); - -/* ========================================================================= */ - -#ifdef ITT_NOTIFY_EXT_REPORT -ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(push) -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void __itt_report_error(__itt_error_code code, ...) -{ - va_list args; - va_start(args, code); - if (_N_(_ittapi_global).error_handler != NULL) - { - __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - handler(code, args); - } -#ifdef ITT_NOTIFY_EXT_REPORT - _N_(error_handler)(code, args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - va_end(args); -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) -{ - __itt_domain *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) - return ITTNOTIFY_NAME(domain_createW)(name); - } - - if (name == NULL) - return _N_(_ittapi_global).domain_list; - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - if (h->nameW != NULL && !wcscmp(h->nameW, name)) - break; - if (h == NULL) { - NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_domain *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) - return ITTNOTIFY_NAME(domain_createA)(name); -#else - if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) - return ITTNOTIFY_NAME(domain_create)(name); -#endif - } - - if (name == NULL) - return _N_(_ittapi_global).domain_list; - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) - break; - if (h == NULL) { - NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) -{ - __itt_string_handle *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) - return ITTNOTIFY_NAME(string_handle_createW)(name); - } - - if (name == NULL) - return _N_(_ittapi_global).string_list; - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - if (h->strW != NULL && !wcscmp(h->strW, name)) - break; - if (h == NULL) { - NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_string_handle *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) - return ITTNOTIFY_NAME(string_handle_createA)(name); -#else - if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) - return ITTNOTIFY_NAME(string_handle_create)(name); -#endif - } - - if (name == NULL) - return _N_(_ittapi_global).string_list; - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) - break; - if (h == NULL) { - NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -/* -------------------------------------------------------------------------- */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) - { - ITTNOTIFY_NAME(pause)(); - return; - } - } - _N_(_ittapi_global).state = __itt_collection_paused; -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) - { - ITTNOTIFY_NAME(resume)(); - return; - } - } - _N_(_ittapi_global).state = __itt_collection_normal; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) -{ - TIDT tid = __itt_thread_id(); - __itt_thread_info *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) - { - ITTNOTIFY_NAME(thread_set_nameW)(name); - return; - } - } - - __itt_mutex_lock(&_N_(_ittapi_global).mutex); - for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) - if (h->tid == tid) - break; - if (h == NULL) { - NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); - } - else - { - h->nameW = name ? _wcsdup(name) : NULL; - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -} - -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) -{ - namelen = namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); - return 0; -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - TIDT tid = __itt_thread_id(); - __itt_thread_info *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) - { - ITTNOTIFY_NAME(thread_set_nameA)(name); - return; - } -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) - { - ITTNOTIFY_NAME(thread_set_name)(name); - return; - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - } - - __itt_mutex_lock(&_N_(_ittapi_global).mutex); - for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) - if (h->tid == tid) - break; - if (h == NULL) { - NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); - } - else - { - h->nameA = name ? __itt_fstrdup(name) : NULL; - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) -{ - namelen = namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); - return 0; -} -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) -{ - namelen = namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); - return 0; -} -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) -{ - TIDT tid = __itt_thread_id(); - __itt_thread_info *h_tail, *h; - - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) - { - ITTNOTIFY_NAME(thread_ignore)(); - return; - } - } - - __itt_mutex_lock(&_N_(_ittapi_global).mutex); - for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) - if (h->tid == tid) - break; - if (h == NULL) { - static const char* name = "unknown"; - NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name); - } - else - { - h->state = __itt_thread_ignored; - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) -{ - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) -{ -#ifdef __ANDROID__ - /* - * if LIB_VAR_NAME env variable were set before then stay previous value - * else set default path - */ - setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); -#endif -} - -/* -------------------------------------------------------------------------- */ - -static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) -{ - int i; - int j; - - if (!s || !sep || !out || !len) - return NULL; - - for (i = 0; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - if (!s[i]) - return NULL; - - *len = 0; - *out = &s[i]; - - for (; s[i]; i++, (*len)++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (b) - break; - } - - for (; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - return &s[i]; -} - -/* This function return value of env variable that placed into static buffer. - * !!! The same static buffer is used for subsequent calls. !!! - * This was done to aviod dynamic allocation for few calls. - * Actually we need this function only four times. - */ -static const char* __itt_get_env_var(const char* name) -{ -#define MAX_ENV_VALUE_SIZE 4086 - static char env_buff[MAX_ENV_VALUE_SIZE]; - static char* env_value = (char*)env_buff; - - if (name != NULL) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); - if (rc >= max_len) - __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); - else if (rc > 0) - { - const char* ret = (const char*)env_value; - env_value += rc + 1; - return ret; - } - else - { - /* If environment variable is empty, GetEnvirornmentVariables() - * returns zero (number of characters (not including terminating null), - * and GetLastError() returns ERROR_SUCCESS. */ - DWORD err = GetLastError(); - if (err == ERROR_SUCCESS) - return env_value; - - if (err != ERROR_ENVVAR_NOT_FOUND) - __itt_report_error(__itt_error_cant_read_env, name, (int)err); - } -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - char* env = getenv(name); - if (env != NULL) - { - size_t len = strlen(env); - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - if (len < max_len) - { - const char* ret = (const char*)env_value; - strncpy(env_value, env, len + 1); - env_value += len + 1; - return ret; - } else - __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - } - return NULL; -} - -static const char* __itt_get_lib_name(void) -{ - const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - -#ifdef __ANDROID__ - if (lib_name == NULL) - { - const char* const system_wide_marker_filename = "/data/local/tmp/com.intel.itt.collector_lib"; - int itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); - ssize_t res = 0; - - if (itt_marker_file_fd == -1) - { - const pid_t my_pid = getpid(); - char cmdline_path[PATH_MAX] = {0}; - char package_name[PATH_MAX] = {0}; - char app_sandbox_file[PATH_MAX] = {0}; - int cmdline_fd = 0; - - ITT_ANDROID_LOGI("Unable to open system-wide marker file."); - snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); - ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); - cmdline_fd = open(cmdline_path, O_RDONLY); - if (cmdline_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); - return lib_name; - } - res = read(cmdline_fd, package_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - } - return lib_name; - } - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - return lib_name; - } - ITT_ANDROID_LOGI("Package name: %s\n", package_name); - snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/com.intel.itt.collector_lib", package_name); - ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); - itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); - if (itt_marker_file_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open app marker file!"); - return lib_name; - } - } - - { - char itt_lib_name[PATH_MAX] = {0}; - - res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - } - return lib_name; - } - ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - return lib_name; - } - ITT_ANDROID_LOGI("Set env"); - res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to set env var!"); - return lib_name; - } - lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - ITT_ANDROID_LOGI("ITT Lib path from env: %s", itt_lib_name); - } - } -#endif - - return lib_name; -} - -#ifndef min -#define min(a,b) (a) < (b) ? (a) : (b) -#endif /* min */ - -static __itt_group_id __itt_get_groups(void) -{ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "kmp_config.h" +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define PATH_MAX 512 +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#include +#include +#include +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#include +#include +#include + +#define INTEL_NO_MACRO_BODY +#define INTEL_ITTNOTIFY_API_PRIVATE +#include "ittnotify.h" +#include "legacy/ittnotify.h" + +#include "disable_warnings.h" + +static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 43375 $\n"; + +#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) + +#if ITT_OS==ITT_OS_WIN +static const char* ittnotify_lib_name = "libittnotify.dll"; +#elif ITT_OS==ITT_OS_LINUX +static const char* ittnotify_lib_name = "libittnotify.so"; +#elif ITT_OS==ITT_OS_MAC +static const char* ittnotify_lib_name = "libittnotify.dylib"; +#else +#error Unsupported or unknown OS. +#endif + +#ifdef __ANDROID__ +#include +#include +#include +#include +#include +#include +#include + +#ifdef ITT_ANDROID_LOG + #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" + #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) +#else + #define ITT_ANDROID_LOGI(...) + #define ITT_ANDROID_LOGW(...) + #define ITT_ANDROID_LOGE(...) + #define ITT_ANDROID_LOGD(...) +#endif + +/* default location of userapi collector on Android */ +#define ANDROID_ITTNOTIFY_DEFAULT_PATH "/data/data/com.intel.vtune/intel/libittnotify.so" +#endif + + +#ifndef LIB_VAR_NAME +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 +#else +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 +#endif +#endif /* LIB_VAR_NAME */ + +#define ITT_MUTEX_INIT_AND_LOCK(p) { \ + if (!p.mutex_initialized) \ + { \ + if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ + { \ + __itt_mutex_init(&p.mutex); \ + p.mutex_initialized = 1; \ + } \ + else \ + while (!p.mutex_initialized) \ + __itt_thread_yield(); \ + } \ + __itt_mutex_lock(&p.mutex); \ +} + +const int _N_(err) = 0; + +typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); + +/* this define used to control initialization function name. */ +#ifndef __itt_init_ittlib_name +ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); +static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); +#define __itt_init_ittlib_name __itt_init_ittlib_ptr +#endif /* __itt_init_ittlib_name */ + +typedef void (__itt_fini_ittlib_t)(void); + +/* this define used to control finalization function name. */ +#ifndef __itt_fini_ittlib_name +ITT_EXTERN_C void _N_(fini_ittlib)(void); +static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); +#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr +#endif /* __itt_fini_ittlib_name */ + +/* building pointers to imported funcs */ +#undef ITT_STUBV +#undef ITT_STUB +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + return ITTNOTIFY_NAME(name) params; \ + else \ + return (type)0; \ +} + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + ITTNOTIFY_NAME(name) params; \ + else \ + return; \ +} + +#undef __ITT_INTERNAL_INIT +#include "ittnotify_static.h" + +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT + +ITT_GROUP_LIST(group_list); + +#pragma pack(push, 8) + +typedef struct ___itt_group_alias +{ + const char* env_var; + __itt_group_id groups; +} __itt_group_alias; + +static __itt_group_alias group_alias[] = { + { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, + { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, + { NULL, (__itt_group_none) }, + { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ +}; + +#pragma pack(pop) + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static __itt_api_info api_list[] = { +/* Define functions with static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT +/* Define functions without static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#include "ittnotify_static.h" + {NULL, NULL, NULL, NULL, __itt_group_none} +}; + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/* private, init thread info item. used for internal purposes */ +static __itt_thread_info init_thread_info = { + (const char*)NULL, /* nameA */ +#if defined(UNICODE) || defined(_UNICODE) + (const wchar_t*)NULL, /* nameW */ +#else + (void*)NULL, /* nameW */ +#endif + 0, /* tid */ + __itt_thread_normal, /* state */ + 0, /* extra1 */ + (void*)NULL, /* extra2 */ + (__itt_thread_info*)NULL /* next */ +}; + +/* private, NULL domain item. used for internal purposes */ +static __itt_domain null_domain = { + 0, /* flags: disabled by default */ + (const char*)NULL, /* nameA */ +#if defined(UNICODE) || defined(_UNICODE) + (const wchar_t*)NULL, /* nameW */ +#else + (void*)NULL, /* nameW */ +#endif + 0, /* extra1 */ + (void*)NULL, /* extra2 */ + (__itt_domain*)NULL /* next */ +}; + +/* private, NULL string handle item. used for internal purposes */ +static __itt_string_handle null_string_handle = { + (const char*)NULL, /* strA */ +#if defined(UNICODE) || defined(_UNICODE) + (const wchar_t*)NULL, /* strW */ +#else + (void*)NULL, /* strW */ +#endif + 0, /* extra1 */ + (void*)NULL, /* extra2 */ + (__itt_string_handle*)NULL /* next */ +}; + +static const char dll_path[PATH_MAX] = { 0 }; + +/* static part descriptor which handles. all notification api attributes. */ +__itt_global _N_(_ittapi_global) = { + ITT_MAGIC, /* identification info */ + ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ + 0, /* api_initialized */ + 0, /* mutex_initialized */ + 0, /* atomic_counter */ + MUTEX_INITIALIZER, /* mutex */ + NULL, /* dynamic library handle */ + NULL, /* error_handler */ + (const char**)&dll_path, /* dll_path_ptr */ + (__itt_api_info*)&api_list, /* api_list_ptr */ + NULL, /* next __itt_global */ + (__itt_thread_info*)&init_thread_info, /* thread_list */ + (__itt_domain*)&null_domain, /* domain_list */ + (__itt_string_handle*)&null_string_handle, /* string_list */ + __itt_collection_normal /* collection state */ +}; + +typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); +typedef void (__itt_api_fini_t)(__itt_global*); + +/* ========================================================================= */ + +#ifdef ITT_NOTIFY_EXT_REPORT +ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void __itt_report_error(__itt_error_code code, ...) +{ + va_list args; + va_start(args, code); + if (_N_(_ittapi_global).error_handler != NULL) + { + __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + handler(code, args); + } +#ifdef ITT_NOTIFY_EXT_REPORT + _N_(error_handler)(code, args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + va_end(args); +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) +{ + __itt_domain *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) + return ITTNOTIFY_NAME(domain_createW)(name); + } + + if (name == NULL) + return _N_(_ittapi_global).domain_list; + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + if (h->nameW != NULL && !wcscmp(h->nameW, name)) + break; + if (h == NULL) { + NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_domain *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) + return ITTNOTIFY_NAME(domain_createA)(name); +#else + if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) + return ITTNOTIFY_NAME(domain_create)(name); +#endif + } + + if (name == NULL) + return _N_(_ittapi_global).domain_list; + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) + break; + if (h == NULL) { + NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) +{ + __itt_string_handle *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) + return ITTNOTIFY_NAME(string_handle_createW)(name); + } + + if (name == NULL) + return _N_(_ittapi_global).string_list; + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + if (h->strW != NULL && !wcscmp(h->strW, name)) + break; + if (h == NULL) { + NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_string_handle *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) + return ITTNOTIFY_NAME(string_handle_createA)(name); +#else + if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) + return ITTNOTIFY_NAME(string_handle_create)(name); +#endif + } + + if (name == NULL) + return _N_(_ittapi_global).string_list; + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) + break; + if (h == NULL) { + NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +/* -------------------------------------------------------------------------- */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) + { + ITTNOTIFY_NAME(pause)(); + return; + } + } + _N_(_ittapi_global).state = __itt_collection_paused; +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) + { + ITTNOTIFY_NAME(resume)(); + return; + } + } + _N_(_ittapi_global).state = __itt_collection_normal; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) +{ + TIDT tid = __itt_thread_id(); + __itt_thread_info *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) + { + ITTNOTIFY_NAME(thread_set_nameW)(name); + return; + } + } + + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) + if (h->tid == tid) + break; + if (h == NULL) { + NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); + } + else + { + h->nameW = name ? _wcsdup(name) : NULL; + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +} + +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) +{ + namelen = namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); + return 0; +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + TIDT tid = __itt_thread_id(); + __itt_thread_info *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) + { + ITTNOTIFY_NAME(thread_set_nameA)(name); + return; + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) + { + ITTNOTIFY_NAME(thread_set_name)(name); + return; + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) + if (h->tid == tid) + break; + if (h == NULL) { + NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); + } + else + { + h->nameA = name ? __itt_fstrdup(name) : NULL; + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) +{ + namelen = namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); + return 0; +} +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) +{ + namelen = namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); + return 0; +} +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) +{ + TIDT tid = __itt_thread_id(); + __itt_thread_info *h_tail, *h; + + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) + { + ITTNOTIFY_NAME(thread_ignore)(); + return; + } + } + + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) + if (h->tid == tid) + break; + if (h == NULL) { + static const char* name = "unknown"; + NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name); + } + else + { + h->state = __itt_thread_ignored; + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) +{ + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) +{ +#ifdef __ANDROID__ + /* + * if LIB_VAR_NAME env variable were set before then stay previous value + * else set default path + */ + setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) +{ int i; - __itt_group_id res = __itt_group_none; - const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; - const char* group_str = __itt_get_env_var(var_name); - - if (group_str != NULL) - { - int len; - char gr[255]; - const char* chunk; - while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) - { - __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1); - gr[min(len, (int)(sizeof(gr) - 1))] = 0; - - for (i = 0; group_list[i].name != NULL; i++) - { - if (!__itt_fstrcmp(gr, group_list[i].name)) - { - res = (__itt_group_id)(res | group_list[i].id); - break; - } - } - } - /* TODO: !!! Workaround for bug with warning for unknown group !!! - * Should be fixed in new initialization scheme. - * Now the following groups should be set always. */ - for (i = 0; group_list[i].id != __itt_group_none; i++) - if (group_list[i].id != __itt_group_all && - group_list[i].id > __itt_group_splitter_min && - group_list[i].id < __itt_group_splitter_max) - res = (__itt_group_id)(res | group_list[i].id); - return res; - } - else - { - for (i = 0; group_alias[i].env_var != NULL; i++) - if (__itt_get_env_var(group_alias[i].env_var) != NULL) - return group_alias[i].groups; - } - - return res; -} - -static int __itt_lib_version(lib_t lib) -{ - if (lib == NULL) - return 0; - if (__itt_get_proc(lib, "__itt_api_init")) - return 2; - if (__itt_get_proc(lib, "__itt_api_version")) - return 1; - return 0; -} - -/* It's not used right now! Comment it out to avoid warnings. -static void __itt_reinit_all_pointers(void) -{ + int j; + + if (!s || !sep || !out || !len) + return NULL; + + for (i = 0; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + if (!s[i]) + return NULL; + + *len = 0; + *out = &s[i]; + + for (; s[i]; i++, (*len)++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (b) + break; + } + + for (; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + return &s[i]; +} + +/* This function return value of env variable that placed into static buffer. + * !!! The same static buffer is used for subsequent calls. !!! + * This was done to aviod dynamic allocation for few calls. + * Actually we need this function only four times. + */ +static const char* __itt_get_env_var(const char* name) +{ +#define MAX_ENV_VALUE_SIZE 4086 + static char env_buff[MAX_ENV_VALUE_SIZE]; + static char* env_value = (char*)env_buff; + + if (name != NULL) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); + if (rc >= max_len) + __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); + else if (rc > 0) + { + const char* ret = (const char*)env_value; + env_value += rc + 1; + return ret; + } + else + { + /* If environment variable is empty, GetEnvirornmentVariables() + * returns zero (number of characters (not including terminating null), + * and GetLastError() returns ERROR_SUCCESS. */ + DWORD err = GetLastError(); + if (err == ERROR_SUCCESS) + return env_value; + + if (err != ERROR_ENVVAR_NOT_FOUND) + __itt_report_error(__itt_error_cant_read_env, name, (int)err); + } +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + char* env = getenv(name); + if (env != NULL) + { + size_t len = strlen(env); + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + if (len < max_len) + { + const char* ret = (const char*)env_value; + strncpy(env_value, env, len + 1); + env_value += len + 1; + return ret; + } else + __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + return NULL; +} + +static const char* __itt_get_lib_name(void) +{ + const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + +#ifdef __ANDROID__ + if (lib_name == NULL) + { + const char* const system_wide_marker_filename = "/data/local/tmp/com.intel.itt.collector_lib"; + int itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); + ssize_t res = 0; + + if (itt_marker_file_fd == -1) + { + const pid_t my_pid = getpid(); + char cmdline_path[PATH_MAX] = {0}; + char package_name[PATH_MAX] = {0}; + char app_sandbox_file[PATH_MAX] = {0}; + int cmdline_fd = 0; + + ITT_ANDROID_LOGI("Unable to open system-wide marker file."); + snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); + ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); + cmdline_fd = open(cmdline_path, O_RDONLY); + if (cmdline_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); + return lib_name; + } + res = read(cmdline_fd, package_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + } + return lib_name; + } + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + return lib_name; + } + ITT_ANDROID_LOGI("Package name: %s\n", package_name); + snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/com.intel.itt.collector_lib", package_name); + ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); + itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); + if (itt_marker_file_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open app marker file!"); + return lib_name; + } + } + + { + char itt_lib_name[PATH_MAX] = {0}; + + res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + } + return lib_name; + } + ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + return lib_name; + } + ITT_ANDROID_LOGI("Set env"); + res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to set env var!"); + return lib_name; + } + lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + ITT_ANDROID_LOGI("ITT Lib path from env: %s", itt_lib_name); + } + } +#endif + + return lib_name; +} + +#ifndef min +#define min(a,b) (a) < (b) ? (a) : (b) +#endif /* min */ + +static __itt_group_id __itt_get_groups(void) +{ int i; - // Fill all pointers with initial stubs - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; -} -*/ - -static void __itt_nullify_all_pointers(void) -{ + __itt_group_id res = __itt_group_none; + const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; + const char* group_str = __itt_get_env_var(var_name); + + if (group_str != NULL) + { + int len; + char gr[255]; + const char* chunk; + while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) + { + __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1); + gr[min(len, (int)(sizeof(gr) - 1))] = 0; + + for (i = 0; group_list[i].name != NULL; i++) + { + if (!__itt_fstrcmp(gr, group_list[i].name)) + { + res = (__itt_group_id)(res | group_list[i].id); + break; + } + } + } + /* TODO: !!! Workaround for bug with warning for unknown group !!! + * Should be fixed in new initialization scheme. + * Now the following groups should be set always. */ + for (i = 0; group_list[i].id != __itt_group_none; i++) + if (group_list[i].id != __itt_group_all && + group_list[i].id > __itt_group_splitter_min && + group_list[i].id < __itt_group_splitter_max) + res = (__itt_group_id)(res | group_list[i].id); + return res; + } + else + { + for (i = 0; group_alias[i].env_var != NULL; i++) + if (__itt_get_env_var(group_alias[i].env_var) != NULL) + return group_alias[i].groups; + } + + return res; +} + +static int __itt_lib_version(lib_t lib) +{ + if (lib == NULL) + return 0; + if (__itt_get_proc(lib, "__itt_api_init")) + return 2; + if (__itt_get_proc(lib, "__itt_api_version")) + return 1; + return 0; +} + +/* It's not used right now! Comment it out to avoid warnings. +static void __itt_reinit_all_pointers(void) +{ int i; - /* Nulify all pointers except domain_create and string_handle_create */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_EXTERN_C void _N_(fini_ittlib)(void) -{ - __itt_api_fini_t* __itt_api_fini_ptr; - static volatile TIDT current_thread = 0; - - if (_N_(_ittapi_global).api_initialized) - { - __itt_mutex_lock(&_N_(_ittapi_global).mutex); - if (_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - current_thread = __itt_thread_id(); - __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); - if (__itt_api_fini_ptr) - __itt_api_fini_ptr(&_N_(_ittapi_global)); - - __itt_nullify_all_pointers(); - - /* TODO: !!! not safe !!! don't support unload so far. - * if (_N_(_ittapi_global).lib != NULL) - * __itt_unload_lib(_N_(_ittapi_global).lib); - * _N_(_ittapi_global).lib = NULL; - */ - _N_(_ittapi_global).api_initialized = 0; - current_thread = 0; - } - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - } -} - -ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) -{ + // Fill all pointers with initial stubs + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; +} +*/ + +static void __itt_nullify_all_pointers(void) +{ int i; - __itt_group_id groups; -#ifdef ITT_COMPLETE_GROUP - __itt_group_id zero_group = __itt_group_none; -#endif /* ITT_COMPLETE_GROUP */ - static volatile TIDT current_thread = 0; - - if (!_N_(_ittapi_global).api_initialized) - { -#ifndef ITT_SIMPLE_INIT - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); -#endif /* ITT_SIMPLE_INIT */ - - if (!_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - current_thread = __itt_thread_id(); - _N_(_ittapi_global).thread_list->tid = current_thread; - if (lib_name == NULL) - lib_name = __itt_get_lib_name(); - groups = __itt_get_groups(); - if (groups != __itt_group_none || lib_name != NULL) - { - _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); - - if (_N_(_ittapi_global).lib != NULL) - { - __itt_api_init_t* __itt_api_init_ptr; - int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); - - switch (lib_version) { - case 0: - groups = __itt_group_legacy; - case 1: - /* Fill all pointers from dynamic library */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - { - if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) - { - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) - { - /* Restore pointers for function with static implementation */ - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); -#ifdef ITT_COMPLETE_GROUP - zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); -#endif /* ITT_COMPLETE_GROUP */ - } - } - else - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - } - - if (groups == __itt_group_legacy) - { - /* Compatibility with legacy tools */ - ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); - ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); - ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); - ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); - ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); - } - -#ifdef ITT_COMPLETE_GROUP - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -#endif /* ITT_COMPLETE_GROUP */ - break; - case 2: - __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); - if (__itt_api_init_ptr) - __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); - break; - } - } - else - { - __itt_nullify_all_pointers(); - - __itt_report_error(__itt_error_no_module, lib_name, -#if ITT_PLATFORM==ITT_PLATFORM_WIN - __itt_system_error() -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - dlerror() -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ); - } - } - else - { - __itt_nullify_all_pointers(); - } - _N_(_ittapi_global).api_initialized = 1; - current_thread = 0; - /* !!! Just to avoid unused code elimination !!! */ - if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; - } - } - -#ifndef ITT_SIMPLE_INIT - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -#endif /* ITT_SIMPLE_INIT */ - } - - /* Evaluating if any function ptr is non empty and it's in init_groups */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && - _N_(_ittapi_global).api_list_ptr[i].group & init_groups) - return 1; - return 0; -} - -ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) -{ - __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - _N_(_ittapi_global).error_handler = (void*)(size_t)handler; - return prev; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - + /* Nulify all pointers except domain_create and string_handle_create */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_EXTERN_C void _N_(fini_ittlib)(void) +{ + __itt_api_fini_t* __itt_api_fini_ptr; + static volatile TIDT current_thread = 0; + + if (_N_(_ittapi_global).api_initialized) + { + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + if (_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + current_thread = __itt_thread_id(); + __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); + if (__itt_api_fini_ptr) + __itt_api_fini_ptr(&_N_(_ittapi_global)); + + __itt_nullify_all_pointers(); + + /* TODO: !!! not safe !!! don't support unload so far. + * if (_N_(_ittapi_global).lib != NULL) + * __itt_unload_lib(_N_(_ittapi_global).lib); + * _N_(_ittapi_global).lib = NULL; + */ + _N_(_ittapi_global).api_initialized = 0; + current_thread = 0; + } + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + } +} + +ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) +{ + int i; + __itt_group_id groups; +#ifdef ITT_COMPLETE_GROUP + __itt_group_id zero_group = __itt_group_none; +#endif /* ITT_COMPLETE_GROUP */ + static volatile TIDT current_thread = 0; + + if (!_N_(_ittapi_global).api_initialized) + { +#ifndef ITT_SIMPLE_INIT + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); +#endif /* ITT_SIMPLE_INIT */ + + if (!_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + current_thread = __itt_thread_id(); + _N_(_ittapi_global).thread_list->tid = current_thread; + if (lib_name == NULL) + lib_name = __itt_get_lib_name(); + groups = __itt_get_groups(); + if (groups != __itt_group_none || lib_name != NULL) + { + _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); + + if (_N_(_ittapi_global).lib != NULL) + { + __itt_api_init_t* __itt_api_init_ptr; + int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); + + switch (lib_version) { + case 0: + groups = __itt_group_legacy; + case 1: + /* Fill all pointers from dynamic library */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + { + if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) + { + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) + { + /* Restore pointers for function with static implementation */ + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); +#ifdef ITT_COMPLETE_GROUP + zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); +#endif /* ITT_COMPLETE_GROUP */ + } + } + else + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + } + + if (groups == __itt_group_legacy) + { + /* Compatibility with legacy tools */ + ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); + ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); + ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); + ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); + ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); + } + +#ifdef ITT_COMPLETE_GROUP + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +#endif /* ITT_COMPLETE_GROUP */ + break; + case 2: + __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); + if (__itt_api_init_ptr) + __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); + break; + } + } + else + { + __itt_nullify_all_pointers(); + + __itt_report_error(__itt_error_no_module, lib_name, +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_system_error() +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlerror() +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + ); + } + } + else + { + __itt_nullify_all_pointers(); + } + _N_(_ittapi_global).api_initialized = 1; + current_thread = 0; + /* !!! Just to avoid unused code elimination !!! */ + if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; + } + } + +#ifndef ITT_SIMPLE_INIT + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif /* ITT_SIMPLE_INIT */ + } + + /* Evaluating if any function ptr is non empty and it's in init_groups */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && + _N_(_ittapi_global).api_list_ptr[i].group & init_groups) + return 1; + return 0; +} + +ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) +{ + __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + _N_(_ittapi_global).error_handler = (void*)(size_t)handler; + return prev; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h index d49dfc6daee..a218cc87bf1 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h @@ -1,316 +1,316 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#include "ittnotify_config.h" - -#ifndef ITT_FORMAT_DEFINED -# ifndef ITT_FORMAT -# define ITT_FORMAT -# endif /* ITT_FORMAT */ -# ifndef ITT_NO_PARAMS -# define ITT_NO_PARAMS -# endif /* ITT_NO_PARAMS */ -#endif /* ITT_FORMAT_DEFINED */ - -/* - * parameters for macro expected: - * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt) - */ -#ifdef __ITT_INTERNAL_INIT - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"") -ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") -ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") -ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name), (ITT_FORMAT name), thread_set_name, __itt_group_thread, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args") - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") -ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") - -#else /* __ITT_INTERNAL_INIT */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x") -ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") -ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_rename, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr), (ITT_FORMAT addr), sync_destroy, __itt_group_sync | __itt_group_fsync, "%p") - -ITT_STUBV(ITTAPI, void, sync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p") -ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p") - -ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p") -ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args") -ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") -ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") - -ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p") -ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p") - -ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"") -ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance), (ITT_FORMAT site, instance), model_site_end, __itt_group_model, "%p, %p") -ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"") -ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance), (ITT_FORMAT task, instance), model_task_end, __itt_group_model, "%p, %p") -ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr), (ITT_FORMAT addr), model_record_deallocation, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d") -ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"") -ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") -ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args") -ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args") -ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d") -ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"") -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_create, __itt_group_heap, "\"%s\", \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized), (ITT_FORMAT h, size, initialized), heap_allocate_begin, __itt_group_heap, "%p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end, __itt_group_heap, "%p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p") -ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_end, __itt_group_heap, "%p, %p") -ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_size, initialized), heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d") -ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args") -ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") -ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u") - -ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu") -ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") - -ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args") - -ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p") - -ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn), (ITT_FORMAT domain, id, parent, fn), task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), (ITT_FORMAT domain), task_end, __itt_group_structure, "%p") - -ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") - -ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") - -ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu") -ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_add, __itt_group_structure, "%p, %lu, %p, %p, %lu") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, relation, tail), relation_add_to_current, __itt_group_structure, "%p, %lu, %p") -ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add, __itt_group_structure, "%p, %p, %lu, %p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_create, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event), (ITT_FORMAT event), event_start, __itt_group_mark | __itt_group_legacy, "%d") -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event), (ITT_FORMAT event), event_end, __itt_group_mark | __itt_group_legacy, "%d") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x") -ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") -ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *p), (ITT_FORMAT p), notify_sync_prepare, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *p), (ITT_FORMAT p), notify_sync_cancel, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *p), (ITT_FORMAT p), notify_sync_acquired, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") -#endif /* __ITT_INTERNAL_BODY */ - -ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read, __itt_group_legacy, "%p, %lu") -ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write, __itt_group_legacy, "%p, %lu") -ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu") - -ITT_STUB(LIBITTAPI, __itt_state_t, state_get, (void), (ITT_NO_PARAMS), state_get, __itt_group_legacy, "no args") -ITT_STUB(LIBITTAPI, __itt_state_t, state_set, (__itt_state_t s), (ITT_FORMAT s), state_set, __itt_group_legacy, "%d") -ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d") -ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"") -ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") -ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") -ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name), (ITT_FORMAT name), mark_create, __itt_group_mark, "\"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"") -ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark, __itt_group_mark, "%d, \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"") -ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_global, __itt_group_mark, "%d, \"%S\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_global_off, __itt_group_mark, "%d") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create, __itt_group_stitch, "no args") -#endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p") -ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id), (ITT_FORMAT id), stack_callee_enter, __itt_group_stitch, "%p") -ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id), (ITT_FORMAT id), stack_callee_leave, __itt_group_stitch, "%p") - -ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, clock_domain_reset, (void), (ITT_NO_PARAMS), clock_domain_reset, __itt_group_structure, "no args") -ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp), (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu") -ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") -ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu") -ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu") -ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d") -ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p") -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu") -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope, __itt_group_structure, "%p, %d, %p, %p, %lu") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, relation, tail), relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu") -ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex, __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu") -ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type), (ITT_FORMAT name, track_group_type), track_group_create, __itt_group_structure, "%p, %d") -ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create, __itt_group_structure, "%p, %p, %d") -ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), (ITT_FORMAT track), set_track, __itt_group_structure, "%p") - -#ifndef __ITT_INTERNAL_BODY -ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") -#endif /* __ITT_INTERNAL_BODY */ - -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") -ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ - -#endif /* __ITT_INTERNAL_INIT */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#include "ittnotify_config.h" + +#ifndef ITT_FORMAT_DEFINED +# ifndef ITT_FORMAT +# define ITT_FORMAT +# endif /* ITT_FORMAT */ +# ifndef ITT_NO_PARAMS +# define ITT_NO_PARAMS +# endif /* ITT_NO_PARAMS */ +#endif /* ITT_FORMAT_DEFINED */ + +/* + * parameters for macro expected: + * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt) + */ +#ifdef __ITT_INTERNAL_INIT + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") +ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name), (ITT_FORMAT name), thread_set_name, __itt_group_thread, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") +ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") + +#else /* __ITT_INTERNAL_INIT */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x") +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_rename, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr), (ITT_FORMAT addr), sync_destroy, __itt_group_sync | __itt_group_fsync, "%p") + +ITT_STUBV(ITTAPI, void, sync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p") + +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p") +ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args") +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") +ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") + +ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p") + +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"") +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance), (ITT_FORMAT site, instance), model_site_end, __itt_group_model, "%p, %p") +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"") +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance), (ITT_FORMAT task, instance), model_task_end, __itt_group_model, "%p, %p") +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr), (ITT_FORMAT addr), model_record_deallocation, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d") +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"") +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_create, __itt_group_heap, "\"%s\", \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized), (ITT_FORMAT h, size, initialized), heap_allocate_begin, __itt_group_heap, "%p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end, __itt_group_heap, "%p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p") +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_end, __itt_group_heap, "%p, %p") +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_size, initialized), heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u") + +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu") +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") + +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args") + +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p") + +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn), (ITT_FORMAT domain, id, parent, fn), task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), (ITT_FORMAT domain), task_end, __itt_group_structure, "%p") + +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") + +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") + +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu") +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_add, __itt_group_structure, "%p, %lu, %p, %p, %lu") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, relation, tail), relation_add_to_current, __itt_group_structure, "%p, %lu, %p") +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add, __itt_group_structure, "%p, %p, %lu, %p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_create, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event), (ITT_FORMAT event), event_start, __itt_group_mark | __itt_group_legacy, "%d") +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event), (ITT_FORMAT event), event_end, __itt_group_mark | __itt_group_legacy, "%d") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") +ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *p), (ITT_FORMAT p), notify_sync_prepare, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *p), (ITT_FORMAT p), notify_sync_cancel, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *p), (ITT_FORMAT p), notify_sync_acquired, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read, __itt_group_legacy, "%p, %lu") +ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write, __itt_group_legacy, "%p, %lu") +ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu") + +ITT_STUB(LIBITTAPI, __itt_state_t, state_get, (void), (ITT_NO_PARAMS), state_get, __itt_group_legacy, "no args") +ITT_STUB(LIBITTAPI, __itt_state_t, state_set, (__itt_state_t s), (ITT_FORMAT s), state_set, __itt_group_legacy, "%d") +ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d") +ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"") +ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") +ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name), (ITT_FORMAT name), mark_create, __itt_group_mark, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"") +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark, __itt_group_mark, "%d, \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"") +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_global, __itt_group_mark, "%d, \"%S\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_global_off, __itt_group_mark, "%d") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create, __itt_group_stitch, "no args") +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p") +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id), (ITT_FORMAT id), stack_callee_enter, __itt_group_stitch, "%p") +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id), (ITT_FORMAT id), stack_callee_leave, __itt_group_stitch, "%p") + +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void), (ITT_NO_PARAMS), clock_domain_reset, __itt_group_structure, "no args") +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp), (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu") +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu") +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d") +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu") +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope, __itt_group_structure, "%p, %d, %p, %p, %lu") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, relation, tail), relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu") +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex, __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu") +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type), (ITT_FORMAT name, track_group_type), track_group_create, __itt_group_structure, "%p, %d") +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create, __itt_group_structure, "%p, %p, %d") +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), (ITT_FORMAT track), set_track, __itt_group_structure, "%p") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ + +#endif /* __ITT_INTERNAL_INIT */ diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h index 56efda53331..3695a67089b 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h @@ -1,67 +1,67 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _ITTNOTIFY_TYPES_H_ -#define _ITTNOTIFY_TYPES_H_ - -typedef enum ___itt_group_id -{ - __itt_group_none = 0, - __itt_group_legacy = 1<<0, - __itt_group_control = 1<<1, - __itt_group_thread = 1<<2, - __itt_group_mark = 1<<3, - __itt_group_sync = 1<<4, - __itt_group_fsync = 1<<5, - __itt_group_jit = 1<<6, - __itt_group_model = 1<<7, - __itt_group_splitter_min = 1<<7, - __itt_group_counter = 1<<8, - __itt_group_frame = 1<<9, - __itt_group_stitch = 1<<10, - __itt_group_heap = 1<<11, - __itt_group_splitter_max = 1<<12, - __itt_group_structure = 1<<12, - __itt_group_suppress = 1<<13, - __itt_group_arrays = 1<<14, - __itt_group_all = -1 -} __itt_group_id; - -#pragma pack(push, 8) - -typedef struct ___itt_group_list -{ - __itt_group_id id; - const char* name; -} __itt_group_list; - -#pragma pack(pop) - -#define ITT_GROUP_LIST(varname) \ - static __itt_group_list varname[] = { \ - { __itt_group_all, "all" }, \ - { __itt_group_control, "control" }, \ - { __itt_group_thread, "thread" }, \ - { __itt_group_mark, "mark" }, \ - { __itt_group_sync, "sync" }, \ - { __itt_group_fsync, "fsync" }, \ - { __itt_group_jit, "jit" }, \ - { __itt_group_model, "model" }, \ - { __itt_group_counter, "counter" }, \ - { __itt_group_frame, "frame" }, \ - { __itt_group_stitch, "stitch" }, \ - { __itt_group_heap, "heap" }, \ - { __itt_group_structure, "structure" }, \ - { __itt_group_suppress, "suppress" }, \ - { __itt_group_arrays, "arrays" }, \ - { __itt_group_none, NULL } \ - } - -#endif /* _ITTNOTIFY_TYPES_H_ */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _ITTNOTIFY_TYPES_H_ +#define _ITTNOTIFY_TYPES_H_ + +typedef enum ___itt_group_id +{ + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_arrays = 1<<14, + __itt_group_all = -1 +} __itt_group_id; + +#pragma pack(push, 8) + +typedef struct ___itt_group_list +{ + __itt_group_id id; + const char* name; +} __itt_group_list; + +#pragma pack(pop) + +#define ITT_GROUP_LIST(varname) \ + static __itt_group_list varname[] = { \ + { __itt_group_all, "all" }, \ + { __itt_group_control, "control" }, \ + { __itt_group_thread, "thread" }, \ + { __itt_group_mark, "mark" }, \ + { __itt_group_sync, "sync" }, \ + { __itt_group_fsync, "fsync" }, \ + { __itt_group_jit, "jit" }, \ + { __itt_group_model, "model" }, \ + { __itt_group_counter, "counter" }, \ + { __itt_group_frame, "frame" }, \ + { __itt_group_stitch, "stitch" }, \ + { __itt_group_heap, "heap" }, \ + { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_arrays, "arrays" }, \ + { __itt_group_none, NULL } \ + } + +#endif /* _ITTNOTIFY_TYPES_H_ */ diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h index e0410a76f26..4cf81db6348 100644 --- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h +++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h @@ -1,972 +1,972 @@ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef _LEGACY_ITTNOTIFY_H_ -#define _LEGACY_ITTNOTIFY_H_ - -/** - * @file - * @brief Legacy User API functions and types - */ - -/** @cond exclude_from_documentation */ -#ifndef ITT_OS_WIN -# define ITT_OS_WIN 1 -#endif /* ITT_OS_WIN */ - -#ifndef ITT_OS_LINUX -# define ITT_OS_LINUX 2 -#endif /* ITT_OS_LINUX */ - -#ifndef ITT_OS_MAC -# define ITT_OS_MAC 3 -#endif /* ITT_OS_MAC */ - -#ifndef ITT_OS -# if defined WIN32 || defined _WIN32 -# define ITT_OS ITT_OS_WIN -# elif defined( __APPLE__ ) && defined( __MACH__ ) -# define ITT_OS ITT_OS_MAC -# else -# define ITT_OS ITT_OS_LINUX -# endif -#endif /* ITT_OS */ - -#ifndef ITT_PLATFORM_WIN -# define ITT_PLATFORM_WIN 1 -#endif /* ITT_PLATFORM_WIN */ - -#ifndef ITT_PLATFORM_POSIX -# define ITT_PLATFORM_POSIX 2 -#endif /* ITT_PLATFORM_POSIX */ - -#ifndef ITT_PLATFORM_MAC -# define ITT_PLATFORM_MAC 3 -#endif /* ITT_PLATFORM_MAC */ - -#ifndef ITT_PLATFORM -# if ITT_OS==ITT_OS_WIN -# define ITT_PLATFORM ITT_PLATFORM_WIN -# elif ITT_OS==ITT_OS_MAC -# define ITT_PLATFORM ITT_PLATFORM_MAC -# else -# define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif -#endif /* ITT_PLATFORM */ - -#if defined(_UNICODE) && !defined(UNICODE) -#define UNICODE -#endif - -#include -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#include -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#if defined(UNICODE) || defined(_UNICODE) -#include -#endif /* UNICODE || _UNICODE */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef CDECL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define CDECL __cdecl -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ -# define CDECL __attribute__ ((cdecl)) -# else /* _M_IX86 || __i386__ */ -# define CDECL /* actual only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* CDECL */ - -#ifndef STDCALL -# if ITT_PLATFORM==ITT_PLATFORM_WIN -# define STDCALL __stdcall -# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LEGACY_ITTNOTIFY_H_ +#define _LEGACY_ITTNOTIFY_H_ + +/** + * @file + * @brief Legacy User API functions and types + */ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) -# else /* _M_IX86 || __i386__ */ -# define STDCALL /* supported only on x86 platform */ -# endif /* _M_IX86 || __i386__ */ -# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* STDCALL */ - -#define ITTAPI CDECL -#define LIBITTAPI CDECL - -/* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL CDECL -#define LIBITTAPI_CALL CDECL - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -/* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline -#define ITT_INLINE_ATTRIBUTE /* nothing */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/* - * Generally, functions are not inlined unless optimization is specified. - * For functions declared inline, this attribute inlines the function even - * if no optimization level was specified. - */ -#ifdef __STRICT_ANSI__ -#define ITT_INLINE static -#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) -#else /* __STRICT_ANSI__ */ -#define ITT_INLINE static inline -#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) -#endif /* __STRICT_ANSI__ */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** @endcond */ - -/** @cond exclude_from_documentation */ -/* Helper macro for joining tokens */ -#define ITT_JOIN_AUX(p,n) p##n -#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) - -#ifdef ITT_MAJOR -#undef ITT_MAJOR -#endif -#ifdef ITT_MINOR -#undef ITT_MINOR -#endif -#define ITT_MAJOR 3 -#define ITT_MINOR 0 - -/* Standard versioning of a token with major and minor version numbers */ -#define ITT_VERSIONIZE(x) \ - ITT_JOIN(x, \ - ITT_JOIN(_, \ - ITT_JOIN(ITT_MAJOR, \ - ITT_JOIN(_, ITT_MINOR)))) - -#ifndef INTEL_ITTNOTIFY_PREFIX -# define INTEL_ITTNOTIFY_PREFIX __itt_ -#endif /* INTEL_ITTNOTIFY_PREFIX */ -#ifndef INTEL_ITTNOTIFY_POSTFIX -# define INTEL_ITTNOTIFY_POSTFIX _ptr_ -#endif /* INTEL_ITTNOTIFY_POSTFIX */ - -#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) -#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) - -#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) -#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) - -#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) -#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) -#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) -#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) -#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) -#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) -#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) -#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) - -#ifdef ITT_STUB -#undef ITT_STUB -#endif -#ifdef ITT_STUBV -#undef ITT_STUBV -#endif -#define ITT_STUBV(api,type,name,args) \ - typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ - extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); -#define ITT_STUB ITT_STUBV -/** @endcond */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * @defgroup legacy Legacy API - * @{ - * @} - */ - -/** - * @defgroup legacy_control Collection Control - * @ingroup legacy - * General behavior: application continues to run, but no profiling information is being collected - * - * Pausing occurs not only for the current thread but for all process as well as spawned processes - * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: - * - Does not analyze or report errors that involve memory access. - * - Other errors are reported as usual. Pausing data collection in - * Intel(R) Parallel Inspector and Intel(R) Inspector XE - * only pauses tracing and analyzing memory access. - * It does not pause tracing or analyzing threading APIs. - * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: - * - Does continue to record when new threads are started. - * . - * - Other effects: - * - Possible reduction of runtime overhead. - * . - * @{ - */ -#ifndef _ITTNOTIFY_H_ -/** @brief Pause collection */ -void ITTAPI __itt_pause(void); -/** @brief Resume collection */ -void ITTAPI __itt_resume(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, pause, (void)) -ITT_STUBV(ITTAPI, void, resume, (void)) -#define __itt_pause ITTNOTIFY_VOID(pause) -#define __itt_pause_ptr ITTNOTIFY_NAME(pause) -#define __itt_resume ITTNOTIFY_VOID(resume) -#define __itt_resume_ptr ITTNOTIFY_NAME(resume) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_pause() -#define __itt_pause_ptr 0 -#define __itt_resume() -#define __itt_resume_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_pause_ptr 0 -#define __itt_resume_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -#endif /* _ITTNOTIFY_H_ */ -/** @} legacy_control group */ - -/** - * @defgroup legacy_threads Threads - * @ingroup legacy - * Threads group - * @warning Legacy API - * @{ - */ -/** - * @deprecated Legacy API - * @brief Set name to be associated with thread in analysis GUI. - * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int LIBITTAPI __itt_thr_name_setA(const char *name, int namelen); -int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_thr_name_set __itt_thr_name_setW -# define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr -#else -# define __itt_thr_name_set __itt_thr_name_setA -# define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int LIBITTAPI __itt_thr_name_set(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA ITTNOTIFY_DATA(thr_name_setA) -#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA) -#define __itt_thr_name_setW ITTNOTIFY_DATA(thr_name_setW) -#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set ITTNOTIFY_DATA(thr_name_set) -#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA(name, namelen) -#define __itt_thr_name_setA_ptr 0 -#define __itt_thr_name_setW(name, namelen) -#define __itt_thr_name_setW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set(name, namelen) -#define __itt_thr_name_set_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_thr_name_setA_ptr 0 -#define __itt_thr_name_setW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_thr_name_set_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Mark current thread as ignored from this point on, for the duration of its existence. - */ -void LIBITTAPI __itt_thr_ignore(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, thr_ignore, (void)) -#define __itt_thr_ignore ITTNOTIFY_VOID(thr_ignore) -#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thr_ignore() -#define __itt_thr_ignore_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thr_ignore_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_threads group */ - -/** - * @defgroup legacy_sync Synchronization - * @ingroup legacy - * Synchronization group - * @warning Legacy API - * @{ - */ -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_barrier 1 - -/** - * @hideinitializer - * @brief possible value of attribute argument for sync object type - */ -#define __itt_attr_mutex 2 - -/** - * @deprecated Legacy API - * @brief Assign a name to a sync object using char or Unicode string - * @param[in] addr - pointer to the sync object. You should use a real pointer to your object - * to make sure that the values don't clash with other object addresses - * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will - * be assumed to be of generic "User Synchronization" type - * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned - * to the object -- you can use the __itt_sync_rename call later to assign - * the name - * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the - * exact semantics of how prepare/acquired/releasing calls work. - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_sync_set_nameA(void *addr, const char *objtype, const char *objname, int attribute); -void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_sync_set_name __itt_sync_set_nameW -# define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr -#else /* UNICODE */ -# define __itt_sync_set_name __itt_sync_set_nameA -# define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute)) -ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA ITTNOTIFY_VOID(sync_set_nameA) -#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA) -#define __itt_sync_set_nameW ITTNOTIFY_VOID(sync_set_nameW) -#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name ITTNOTIFY_VOID(sync_set_name) -#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA(addr, objtype, objname, attribute) -#define __itt_sync_set_nameA_ptr 0 -#define __itt_sync_set_nameW(addr, objtype, objname, attribute) -#define __itt_sync_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name(addr, objtype, objname, attribute) -#define __itt_sync_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_sync_set_nameA_ptr 0 -#define __itt_sync_set_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_sync_set_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Assign a name and type to a sync object using char or Unicode string - * @param[in] addr - pointer to the sync object. You should use a real pointer to your object - * to make sure that the values don't clash with other object addresses - * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will - * be assumed to be of generic "User Synchronization" type - * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned - * to the object -- you can use the __itt_sync_rename call later to assign - * the name - * @param[in] typelen, namelen - a length of string for appropriate objtype and objname parameter - * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the - * exact semantics of how prepare/acquired/releasing calls work. - * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); -int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_notify_sync_name __itt_notify_sync_nameW -#else -# define __itt_notify_sync_name __itt_notify_sync_nameA -#endif -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) -ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA ITTNOTIFY_DATA(notify_sync_nameA) -#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA) -#define __itt_notify_sync_nameW ITTNOTIFY_DATA(notify_sync_nameW) -#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name ITTNOTIFY_DATA(notify_sync_name) -#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_nameA_ptr 0 -#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute) -#define __itt_notify_sync_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_notify_sync_nameA_ptr 0 -#define __itt_notify_sync_nameW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_notify_sync_name_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Enter spin loop on user-defined sync object - */ -void LIBITTAPI __itt_notify_sync_prepare(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr)) -#define __itt_notify_sync_prepare ITTNOTIFY_VOID(notify_sync_prepare) -#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_prepare(addr) -#define __itt_notify_sync_prepare_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_prepare_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Quit spin loop without acquiring spin object - */ -void LIBITTAPI __itt_notify_sync_cancel(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr)) -#define __itt_notify_sync_cancel ITTNOTIFY_VOID(notify_sync_cancel) -#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_cancel(addr) -#define __itt_notify_sync_cancel_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_cancel_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Successful spin loop completion (sync object acquired) - */ -void LIBITTAPI __itt_notify_sync_acquired(void *addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr)) -#define __itt_notify_sync_acquired ITTNOTIFY_VOID(notify_sync_acquired) -#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_acquired(addr) -#define __itt_notify_sync_acquired_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_acquired_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Start sync object releasing code. Is called before the lock release call. - */ -void LIBITTAPI __itt_notify_sync_releasing(void* addr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr)) -#define __itt_notify_sync_releasing ITTNOTIFY_VOID(notify_sync_releasing) -#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_notify_sync_releasing(addr) -#define __itt_notify_sync_releasing_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_notify_sync_releasing_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_sync group */ - -#ifndef _ITTNOTIFY_H_ -/** - * @defgroup legacy_events Events - * @ingroup legacy - * Events group - * @{ - */ - -/** @brief user event type */ -typedef int __itt_event; - -/** - * @brief Create an event notification - * @note name or namelen being null/name and namelen not matching, user event feature not enabled - * @return non-zero event identifier upon success and __itt_err otherwise - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); -__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_event_create __itt_event_createW -# define __itt_event_create_ptr __itt_event_createW_ptr -#else -# define __itt_event_create __itt_event_createA -# define __itt_event_create_ptr __itt_event_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) -ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA ITTNOTIFY_DATA(event_createA) -#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) -#define __itt_event_createW ITTNOTIFY_DATA(event_createW) -#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create ITTNOTIFY_DATA(event_create) -#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA(name, namelen) (__itt_event)0 -#define __itt_event_createA_ptr 0 -#define __itt_event_createW(name, namelen) (__itt_event)0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create(name, namelen) (__itt_event)0 -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_event_createA_ptr 0 -#define __itt_event_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_event_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event occurrence. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_start(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) -#define __itt_event_start ITTNOTIFY_DATA(event_start) -#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_start(event) (int)0 -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_start_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Record an event end occurrence. - * @note It is optional if events do not have durations. - * @return __itt_err upon failure (invalid event id/user event feature not enabled) - */ -int LIBITTAPI __itt_event_end(__itt_event event); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) -#define __itt_event_end ITTNOTIFY_DATA(event_end) -#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_event_end(event) (int)0 -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_event_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_events group */ -#endif /* _ITTNOTIFY_H_ */ - -/** - * @defgroup legacy_memory Memory Accesses - * @ingroup legacy - */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on reading - */ -void LIBITTAPI __itt_memory_read(void *addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size)) -#define __itt_memory_read ITTNOTIFY_VOID(memory_read) -#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_read(addr, size) -#define __itt_memory_read_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_read_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on writing - */ -void LIBITTAPI __itt_memory_write(void *addr, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size)) -#define __itt_memory_write ITTNOTIFY_VOID(memory_write) -#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_write(addr, size) -#define __itt_memory_write_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_write_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief Inform the tool of memory accesses on updating - */ -void LIBITTAPI __itt_memory_update(void *address, size_t size); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size)) -#define __itt_memory_update ITTNOTIFY_VOID(memory_update) -#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_memory_update(addr, size) -#define __itt_memory_update_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_memory_update_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_memory group */ - -/** - * @defgroup legacy_state Thread and Object States - * @ingroup legacy - */ - -/** @brief state type */ -typedef int __itt_state_t; - -/** @cond exclude_from_documentation */ -typedef enum __itt_obj_state { - __itt_obj_state_err = 0, - __itt_obj_state_clr = 1, - __itt_obj_state_set = 2, - __itt_obj_state_use = 3 -} __itt_obj_state_t; - -typedef enum __itt_thr_state { - __itt_thr_state_err = 0, - __itt_thr_state_clr = 1, - __itt_thr_state_set = 2 -} __itt_thr_state_t; - -typedef enum __itt_obj_prop { - __itt_obj_prop_watch = 1, - __itt_obj_prop_ignore = 2, - __itt_obj_prop_sharable = 3 -} __itt_obj_prop_t; - -typedef enum __itt_thr_prop { - __itt_thr_prop_quiet = 1 -} __itt_thr_prop_t; -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object states - */ -__itt_state_t LIBITTAPI __itt_state_get(void); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_state_t, state_get, (void)) -#define __itt_state_get ITTNOTIFY_DATA(state_get) -#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_state_get(void) (__itt_state_t)0 -#define __itt_state_get_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_state_get_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object states - */ -__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s)) -#define __itt_state_set ITTNOTIFY_DATA(state_set) -#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_state_set(s) (__itt_state_t)0 -#define __itt_state_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_state_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object modes - */ -__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s)) -#define __itt_thr_mode_set ITTNOTIFY_DATA(thr_mode_set) -#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0 -#define __itt_thr_mode_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_thr_mode_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @deprecated Legacy API - * @brief managing thread and object modes - */ -__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s)) -#define __itt_obj_mode_set ITTNOTIFY_DATA(obj_mode_set) -#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0 -#define __itt_obj_mode_set_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_obj_mode_set_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} legacy_state group */ - -/** - * @defgroup frames Frames - * @ingroup legacy - * Frames group - * @{ - */ -/** - * @brief opaque structure for frame identification - */ -typedef struct __itt_frame_t *__itt_frame; - -/** - * @brief Create a global frame with given domain - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_frame ITTAPI __itt_frame_createA(const char *domain); -__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_frame_create __itt_frame_createW -# define __itt_frame_create_ptr __itt_frame_createW_ptr -#else /* UNICODE */ -# define __itt_frame_create __itt_frame_createA -# define __itt_frame_create_ptr __itt_frame_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_frame ITTAPI __itt_frame_create(const char *domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain)) -ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA ITTNOTIFY_DATA(frame_createA) -#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA) -#define __itt_frame_createW ITTNOTIFY_DATA(frame_createW) -#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create ITTNOTIFY_DATA(frame_create) -#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA(domain) -#define __itt_frame_createA_ptr 0 -#define __itt_frame_createW(domain) -#define __itt_frame_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create(domain) -#define __itt_frame_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_frame_createA_ptr 0 -#define __itt_frame_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_frame_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @brief Record an frame begin occurrence. */ -void ITTAPI __itt_frame_begin(__itt_frame frame); -/** @brief Record an frame end occurrence. */ -void ITTAPI __itt_frame_end (__itt_frame frame); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame)) -ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame)) -#define __itt_frame_begin ITTNOTIFY_VOID(frame_begin) -#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin) -#define __itt_frame_end ITTNOTIFY_VOID(frame_end) -#define __itt_frame_end_ptr ITTNOTIFY_NAME(frame_end) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_frame_begin(frame) -#define __itt_frame_begin_ptr 0 -#define __itt_frame_end(frame) -#define __itt_frame_end_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_frame_begin_ptr 0 -#define __itt_frame_end_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} frames group */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _LEGACY_ITTNOTIFY_H_ */ +# define CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @defgroup legacy Legacy API + * @{ + * @} + */ + +/** + * @defgroup legacy_control Collection Control + * @ingroup legacy + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +#ifndef _ITTNOTIFY_H_ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +#endif /* _ITTNOTIFY_H_ */ +/** @} legacy_control group */ + +/** + * @defgroup legacy_threads Threads + * @ingroup legacy + * Threads group + * @warning Legacy API + * @{ + */ +/** + * @deprecated Legacy API + * @brief Set name to be associated with thread in analysis GUI. + * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int LIBITTAPI __itt_thr_name_setA(const char *name, int namelen); +int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thr_name_set __itt_thr_name_setW +# define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr +#else +# define __itt_thr_name_set __itt_thr_name_setA +# define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int LIBITTAPI __itt_thr_name_set(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA ITTNOTIFY_DATA(thr_name_setA) +#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA) +#define __itt_thr_name_setW ITTNOTIFY_DATA(thr_name_setW) +#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set ITTNOTIFY_DATA(thr_name_set) +#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA(name, namelen) +#define __itt_thr_name_setA_ptr 0 +#define __itt_thr_name_setW(name, namelen) +#define __itt_thr_name_setW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set(name, namelen) +#define __itt_thr_name_set_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA_ptr 0 +#define __itt_thr_name_setW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void LIBITTAPI __itt_thr_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, thr_ignore, (void)) +#define __itt_thr_ignore ITTNOTIFY_VOID(thr_ignore) +#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thr_ignore() +#define __itt_thr_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thr_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_threads group */ + +/** + * @defgroup legacy_sync Synchronization + * @ingroup legacy + * Synchronization group + * @warning Legacy API + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** + * @deprecated Legacy API + * @brief Assign a name to a sync object using char or Unicode string + * @param[in] addr - pointer to the sync object. You should use a real pointer to your object + * to make sure that the values don't clash with other object addresses + * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will + * be assumed to be of generic "User Synchronization" type + * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned + * to the object -- you can use the __itt_sync_rename call later to assign + * the name + * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the + * exact semantics of how prepare/acquired/releasing calls work. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_set_nameA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_set_name __itt_sync_set_nameW +# define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr +#else /* UNICODE */ +# define __itt_sync_set_name __itt_sync_set_nameA +# define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA ITTNOTIFY_VOID(sync_set_nameA) +#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA) +#define __itt_sync_set_nameW ITTNOTIFY_VOID(sync_set_nameW) +#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name ITTNOTIFY_VOID(sync_set_name) +#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA(addr, objtype, objname, attribute) +#define __itt_sync_set_nameA_ptr 0 +#define __itt_sync_set_nameW(addr, objtype, objname, attribute) +#define __itt_sync_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name(addr, objtype, objname, attribute) +#define __itt_sync_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA_ptr 0 +#define __itt_sync_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Assign a name and type to a sync object using char or Unicode string + * @param[in] addr - pointer to the sync object. You should use a real pointer to your object + * to make sure that the values don't clash with other object addresses + * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will + * be assumed to be of generic "User Synchronization" type + * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned + * to the object -- you can use the __itt_sync_rename call later to assign + * the name + * @param[in] typelen, namelen - a length of string for appropriate objtype and objname parameter + * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the + * exact semantics of how prepare/acquired/releasing calls work. + * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); +int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_notify_sync_name __itt_notify_sync_nameW +#else +# define __itt_notify_sync_name __itt_notify_sync_nameA +#endif +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) +ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA ITTNOTIFY_DATA(notify_sync_nameA) +#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA) +#define __itt_notify_sync_nameW ITTNOTIFY_DATA(notify_sync_nameW) +#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name ITTNOTIFY_DATA(notify_sync_name) +#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_nameA_ptr 0 +#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA_ptr 0 +#define __itt_notify_sync_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Enter spin loop on user-defined sync object + */ +void LIBITTAPI __itt_notify_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr)) +#define __itt_notify_sync_prepare ITTNOTIFY_VOID(notify_sync_prepare) +#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_prepare(addr) +#define __itt_notify_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Quit spin loop without acquiring spin object + */ +void LIBITTAPI __itt_notify_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr)) +#define __itt_notify_sync_cancel ITTNOTIFY_VOID(notify_sync_cancel) +#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_cancel(addr) +#define __itt_notify_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Successful spin loop completion (sync object acquired) + */ +void LIBITTAPI __itt_notify_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr)) +#define __itt_notify_sync_acquired ITTNOTIFY_VOID(notify_sync_acquired) +#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_acquired(addr) +#define __itt_notify_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void LIBITTAPI __itt_notify_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr)) +#define __itt_notify_sync_releasing ITTNOTIFY_VOID(notify_sync_releasing) +#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_releasing(addr) +#define __itt_notify_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_sync group */ + +#ifndef _ITTNOTIFY_H_ +/** + * @defgroup legacy_events Events + * @ingroup legacy + * Events group + * @{ + */ + +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_events group */ +#endif /* _ITTNOTIFY_H_ */ + +/** + * @defgroup legacy_memory Memory Accesses + * @ingroup legacy + */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on reading + */ +void LIBITTAPI __itt_memory_read(void *addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size)) +#define __itt_memory_read ITTNOTIFY_VOID(memory_read) +#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_read(addr, size) +#define __itt_memory_read_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_read_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on writing + */ +void LIBITTAPI __itt_memory_write(void *addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size)) +#define __itt_memory_write ITTNOTIFY_VOID(memory_write) +#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_write(addr, size) +#define __itt_memory_write_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_write_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on updating + */ +void LIBITTAPI __itt_memory_update(void *address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size)) +#define __itt_memory_update ITTNOTIFY_VOID(memory_update) +#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_update(addr, size) +#define __itt_memory_update_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_update_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_memory group */ + +/** + * @defgroup legacy_state Thread and Object States + * @ingroup legacy + */ + +/** @brief state type */ +typedef int __itt_state_t; + +/** @cond exclude_from_documentation */ +typedef enum __itt_obj_state { + __itt_obj_state_err = 0, + __itt_obj_state_clr = 1, + __itt_obj_state_set = 2, + __itt_obj_state_use = 3 +} __itt_obj_state_t; + +typedef enum __itt_thr_state { + __itt_thr_state_err = 0, + __itt_thr_state_clr = 1, + __itt_thr_state_set = 2 +} __itt_thr_state_t; + +typedef enum __itt_obj_prop { + __itt_obj_prop_watch = 1, + __itt_obj_prop_ignore = 2, + __itt_obj_prop_sharable = 3 +} __itt_obj_prop_t; + +typedef enum __itt_thr_prop { + __itt_thr_prop_quiet = 1 +} __itt_thr_prop_t; +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object states + */ +__itt_state_t LIBITTAPI __itt_state_get(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_state_t, state_get, (void)) +#define __itt_state_get ITTNOTIFY_DATA(state_get) +#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_state_get(void) (__itt_state_t)0 +#define __itt_state_get_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_state_get_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object states + */ +__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s)) +#define __itt_state_set ITTNOTIFY_DATA(state_set) +#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_state_set(s) (__itt_state_t)0 +#define __itt_state_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_state_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object modes + */ +__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s)) +#define __itt_thr_mode_set ITTNOTIFY_DATA(thr_mode_set) +#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0 +#define __itt_thr_mode_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thr_mode_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object modes + */ +__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s)) +#define __itt_obj_mode_set ITTNOTIFY_DATA(obj_mode_set) +#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0 +#define __itt_obj_mode_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_obj_mode_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_state group */ + +/** + * @defgroup frames Frames + * @ingroup legacy + * Frames group + * @{ + */ +/** + * @brief opaque structure for frame identification + */ +typedef struct __itt_frame_t *__itt_frame; + +/** + * @brief Create a global frame with given domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_frame ITTAPI __itt_frame_createA(const char *domain); +__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_frame_create __itt_frame_createW +# define __itt_frame_create_ptr __itt_frame_createW_ptr +#else /* UNICODE */ +# define __itt_frame_create __itt_frame_createA +# define __itt_frame_create_ptr __itt_frame_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_frame ITTAPI __itt_frame_create(const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain)) +ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA ITTNOTIFY_DATA(frame_createA) +#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA) +#define __itt_frame_createW ITTNOTIFY_DATA(frame_createW) +#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create ITTNOTIFY_DATA(frame_create) +#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA(domain) +#define __itt_frame_createA_ptr 0 +#define __itt_frame_createW(domain) +#define __itt_frame_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create(domain) +#define __itt_frame_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA_ptr 0 +#define __itt_frame_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief Record an frame begin occurrence. */ +void ITTAPI __itt_frame_begin(__itt_frame frame); +/** @brief Record an frame end occurrence. */ +void ITTAPI __itt_frame_end (__itt_frame frame); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame)) +ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame)) +#define __itt_frame_begin ITTNOTIFY_VOID(frame_begin) +#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin) +#define __itt_frame_end ITTNOTIFY_VOID(frame_end) +#define __itt_frame_end_ptr ITTNOTIFY_NAME(frame_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin(frame) +#define __itt_frame_begin_ptr 0 +#define __itt_frame_end(frame) +#define __itt_frame_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_ptr 0 +#define __itt_frame_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LEGACY_ITTNOTIFY_H_ */ diff --git a/contrib/libs/cxxsupp/openmp/ya.make b/contrib/libs/cxxsupp/openmp/ya.make index 6b5090ce683..2c644683751 100644 --- a/contrib/libs/cxxsupp/openmp/ya.make +++ b/contrib/libs/cxxsupp/openmp/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - +LIBRARY() + LICENSE( MIT AND NCSA @@ -14,13 +14,13 @@ OWNER( g:contrib g:cpp-contrib ) - -NO_UTIL() -NO_PLATFORM() +NO_UTIL() + +NO_PLATFORM() + +NO_COMPILER_WARNINGS() -NO_COMPILER_WARNINGS() - IF (SANITIZER_TYPE == thread) NO_SANITIZE() CFLAGS( @@ -29,62 +29,62 @@ IF (SANITIZER_TYPE == thread) ENDIF() IF (SANITIZER_TYPE == memory) - NO_SANITIZE() + NO_SANITIZE() CFLAGS( -fPIC ) ENDIF() - + COMPILE_C_AS_CXX() - + CXXFLAGS(-fno-exceptions) - + SET_APPEND(CFLAGS -fno-lto) ADDINCL( GLOBAL contrib/libs/cxxsupp/openmp ) - -ADDINCL( - contrib/libs/cxxsupp/openmp/i18n - contrib/libs/cxxsupp/openmp/include/41 - contrib/libs/cxxsupp/openmp/thirdparty/ittnotify -) - -SRCS( - kmp_alloc.c - kmp_atomic.c - kmp_csupport.c - kmp_debug.c - kmp_itt.c - kmp_environment.c - kmp_error.c - kmp_global.c - kmp_i18n.c - kmp_io.c - kmp_runtime.c - kmp_settings.c - kmp_str.c - kmp_tasking.c - kmp_taskq.c - kmp_threadprivate.c - kmp_utility.c - z_Linux_util.c - kmp_gsupport.c - asm.S - thirdparty/ittnotify/ittnotify_static.c - kmp_barrier.cpp - kmp_wait_release.cpp - kmp_affinity.cpp - kmp_dispatch.cpp - kmp_lock.cpp - kmp_sched.cpp - kmp_taskdeps.cpp - kmp_cancel.cpp - kmp_ftn_cdecl.c - kmp_ftn_extra.c - kmp_version.c - #ompt-general.c -) - -END() + +ADDINCL( + contrib/libs/cxxsupp/openmp/i18n + contrib/libs/cxxsupp/openmp/include/41 + contrib/libs/cxxsupp/openmp/thirdparty/ittnotify +) + +SRCS( + kmp_alloc.c + kmp_atomic.c + kmp_csupport.c + kmp_debug.c + kmp_itt.c + kmp_environment.c + kmp_error.c + kmp_global.c + kmp_i18n.c + kmp_io.c + kmp_runtime.c + kmp_settings.c + kmp_str.c + kmp_tasking.c + kmp_taskq.c + kmp_threadprivate.c + kmp_utility.c + z_Linux_util.c + kmp_gsupport.c + asm.S + thirdparty/ittnotify/ittnotify_static.c + kmp_barrier.cpp + kmp_wait_release.cpp + kmp_affinity.cpp + kmp_dispatch.cpp + kmp_lock.cpp + kmp_sched.cpp + kmp_taskdeps.cpp + kmp_cancel.cpp + kmp_ftn_cdecl.c + kmp_ftn_extra.c + kmp_version.c + #ompt-general.c +) + +END() diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_asm.s b/contrib/libs/cxxsupp/openmp/z_Linux_asm.s index c56182f900e..7f649b849e4 100644 --- a/contrib/libs/cxxsupp/openmp/z_Linux_asm.s +++ b/contrib/libs/cxxsupp/openmp/z_Linux_asm.s @@ -1,1445 +1,1445 @@ -// z_Linux_asm.s: - microtasking routines specifically -// written for Intel platforms running Linux* OS - -// -////===----------------------------------------------------------------------===// -//// -//// The LLVM Compiler Infrastructure -//// -//// This file is dual licensed under the MIT and the University of Illinois Open -//// Source Licenses. See LICENSE.txt for details. -//// -////===----------------------------------------------------------------------===// -// - -// ----------------------------------------------------------------------- -// macros -// ----------------------------------------------------------------------- - -#include "kmp_config.h" - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -# if KMP_MIC -// -// the 'delay r16/r32/r64' should be used instead of the 'pause'. -// The delay operation has the effect of removing the current thread from -// the round-robin HT mechanism, and therefore speeds up the issue rate of -// the other threads on the same core. -// -// A value of 0 works fine for <= 2 threads per core, but causes the EPCC -// barrier time to increase greatly for 3 or more threads per core. -// -// A value of 100 works pretty well for up to 4 threads per core, but isn't -// quite as fast as 0 for 2 threads per core. -// -// We need to check what happens for oversubscription / > 4 threads per core. -// It is possible that we need to pass the delay value in as a parameter -// that the caller determines based on the total # threads / # cores. -// -//.macro pause_op -// mov $100, %rax -// delay %rax -//.endm -# else -# define pause_op .byte 0xf3,0x90 -# endif // KMP_MIC - -# if KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols -# define KMP_LABEL(x) L_##x // form the name of label -.macro KMP_CFI_DEF_OFFSET -.endmacro -.macro KMP_CFI_OFFSET -.endmacro -.macro KMP_CFI_REGISTER -.endmacro -.macro KMP_CFI_DEF -.endmacro -.macro ALIGN - .align $0 -.endmacro -.macro DEBUG_INFO -/* Not sure what .size does in icc, not sure if we need to do something - similar for OS X*. -*/ -.endmacro -.macro PROC - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE($0) -KMP_PREFIX_UNDERSCORE($0): -.endmacro -# else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols -// Format labels so that they don't override function names in gdb's backtraces -// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*) -# if KMP_MIC -# define KMP_LABEL(x) L_##x // local label -# else -# define KMP_LABEL(x) .L_##x // local label hidden from backtraces -# endif // KMP_MIC -.macro ALIGN size - .align 1<<(\size) -.endm -.macro DEBUG_INFO proc - .cfi_endproc -// Not sure why we need .type and .size for the functions - .align 16 - .type \proc,@function - .size \proc,.-\proc -.endm -.macro PROC proc - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE(\proc) -KMP_PREFIX_UNDERSCORE(\proc): - .cfi_startproc -.endm -.macro KMP_CFI_DEF_OFFSET sz - .cfi_def_cfa_offset \sz -.endm -.macro KMP_CFI_OFFSET reg, sz - .cfi_offset \reg,\sz -.endm -.macro KMP_CFI_REGISTER reg - .cfi_def_cfa_register \reg -.endm -.macro KMP_CFI_DEF reg, sz - .cfi_def_cfa \reg,\sz -.endm -# endif // KMP_OS_DARWIN -#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 - - -// ----------------------------------------------------------------------- -// data -// ----------------------------------------------------------------------- - -#ifdef KMP_GOMP_COMPAT - -// -// Support for unnamed common blocks. -// -// Because the symbol ".gomp_critical_user_" contains a ".", we have to -// put this stuff in assembly. -// - -# if KMP_ARCH_X86 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .long .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,4 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86 */ - -# if KMP_ARCH_X86_64 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .quad .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,8 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86_64 */ - -#endif /* KMP_GOMP_COMPAT */ - - -#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture -// running Linux* OS -// ----------------------------------------------------------------------- -// - - .ident "Intel Corporation" - .data - ALIGN 4 -// void -// __kmp_x86_pause( void ); -// - - .text - PROC __kmp_x86_pause - - pause_op - ret - - DEBUG_INFO __kmp_x86_pause - -// -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); -// - PROC __kmp_x86_cpuid - - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %ebx - pushl %ecx - pushl %edx - - movl 8(%ebp), %eax - movl 12(%ebp), %ecx - cpuid // Query the CPUID for the current processor - - movl 16(%ebp), %edi - movl %eax, 0(%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx, 12(%edi) - - popl %edx - popl %ecx - popl %ebx - popl %edi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_x86_cpuid - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -// - - PROC __kmp_test_then_add32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - lock - xaddl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_test_then_add32 - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %al - - PROC __kmp_xchg_fixed8 - - movl 4(%esp), %ecx // "p" - movb 8(%esp), %al // "d" - - lock - xchgb %al,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// return: %ax - - PROC __kmp_xchg_fixed16 - - movl 4(%esp), %ecx // "p" - movw 8(%esp), %ax // "d" - - lock - xchgw %ax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %eax - - PROC __kmp_xchg_fixed32 - - movl 4(%esp), %ecx // "p" - movl 8(%esp), %eax // "d" - - lock - xchgl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -// -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// - - PROC __kmp_compare_and_store8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store8 - -// -// kmp_int16 -// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// - - PROC __kmp_compare_and_store16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store16 - -// -// kmp_int32 -// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// - - PROC __kmp_compare_and_store32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store32 - -// -// kmp_int32 -// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// - PROC __kmp_compare_and_store64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store64 - -// -// kmp_int8 -// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// - - PROC __kmp_compare_and_store_ret8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - -// -// kmp_int16 -// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// - - PROC __kmp_compare_and_store_ret16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - -// -// kmp_int32 -// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// - - PROC __kmp_compare_and_store_ret32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - -// -// kmp_int64 -// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// - PROC __kmp_compare_and_store_ret64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: 4(%esp) -// data: 8(%esp) -// -// return: %eax - - - PROC __kmp_xchg_real32 - - pushl %ebp - movl %esp, %ebp - subl $4, %esp - pushl %esi - - movl 4(%ebp), %esi - flds (%esi) - // load - fsts -4(%ebp) - // store old value - - movl 8(%ebp), %eax - - lock - xchgl %eax, (%esi) - - flds -4(%ebp) - // return old value - - popl %esi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_xchg_real32 - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) -// - - PROC __kmp_load_x87_fpu_control_word - - movl 4(%esp), %eax - fldcw (%eax) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) -// - - PROC __kmp_store_x87_fpu_control_word - - movl 4(%esp), %eax - fstcw (%eax) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); -// -// - - PROC __kmp_clear_x87_fpu_status_word - - fnclex - ret - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & gtid, argv[0], ... ); -// return 1; -// } - -// -- Begin __kmp_invoke_microtask -// mark_begin; - PROC __kmp_invoke_microtask - - pushl %ebp - KMP_CFI_DEF_OFFSET 8 - KMP_CFI_OFFSET ebp,-8 - movl %esp,%ebp // establish the base pointer for this routine. - KMP_CFI_REGISTER ebp - subl $8,%esp // allocate space for two local variables. - // These varibales are: - // argv: -4(%ebp) - // temp: -8(%ebp) - // - pushl %ebx // save %ebx to use during this routine - // -#if OMPT_SUPPORT - movl 28(%ebp),%ebx // get exit_frame address - movl %ebp,(%ebx) // save exit_frame -#endif - - movl 20(%ebp),%ebx // Stack alignment - # args - addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) - shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 - movl %esp,%eax // - subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this - movl %eax,%ebx // Save to %ebx - andl $0xFFFFFF80,%eax // mask off 7 bits - subl %eax,%ebx // Amount to subtract from %esp - subl %ebx,%esp // Prepare the stack ptr -- - // now it will be aligned on 128-byte boundary at the call - - movl 24(%ebp),%eax // copy from p_argv[] - movl %eax,-4(%ebp) // into the local variable *argv. - - movl 20(%ebp),%ebx // argc is 20(%ebp) - shll $2,%ebx - -KMP_LABEL(invoke_2): - cmpl $0,%ebx - jg KMP_LABEL(invoke_4) - jmp KMP_LABEL(invoke_3) - ALIGN 2 -KMP_LABEL(invoke_4): - movl -4(%ebp),%eax - subl $4,%ebx // decrement argc. - addl %ebx,%eax // index into argv. - movl (%eax),%edx - pushl %edx - - jmp KMP_LABEL(invoke_2) - ALIGN 2 -KMP_LABEL(invoke_3): - leal 16(%ebp),%eax // push & tid - pushl %eax - - leal 12(%ebp),%eax // push & gtid - pushl %eax - - movl 8(%ebp),%ebx - call *%ebx // call (*pkfn)(); - - movl $1,%eax // return 1; - - movl -12(%ebp),%ebx // restore %ebx - leave - KMP_CFI_DEF esp,4 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - PROC __kmp_hardware_timestamp - rdtsc - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -// ----------------------------------------------------------------------- -#endif /* KMP_ARCH_X86 */ - - -#if KMP_ARCH_X86_64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture and -// Intel(R) 64 running Linux* OS -// ----------------------------------------------------------------------- - -// -- Machine type P -// mark_description "Intel Corporation"; - .ident "Intel Corporation" -// -- .file "z_Linux_asm.s" - .data - ALIGN 4 - -// To prevent getting our code into .data section .text added to every routine definition for x86_64. -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_x86_cpuid -// -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); -// -// parameters: -// mode: %edi -// mode2: %esi -// cpuid_buffer: %rdx - - .text - PROC __kmp_x86_cpuid - - pushq %rbp - movq %rsp,%rbp - pushq %rbx // callee-save register - - movl %esi, %ecx // "mode2" - movl %edi, %eax // "mode" - movq %rdx, %rsi // cpuid_buffer - cpuid // Query the CPUID for the current processor - - movl %eax, 0(%rsi) // store results into buffer - movl %ebx, 4(%rsi) - movl %ecx, 8(%rsi) - movl %edx, 12(%rsi) - - popq %rbx // callee-save register - movq %rbp, %rsp - popq %rbp - ret - - DEBUG_INFO __kmp_x86_cpuid - - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_test_then_add32 -// -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - - .text - PROC __kmp_test_then_add32 - - movl %esi, %eax // "d" - lock - xaddl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add32 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_test_then_add64 -// -// kmp_int64 -// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - - .text - PROC __kmp_test_then_add64 - - movq %rsi, %rax // "d" - lock - xaddq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add64 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: %rdi -// d: %sil -// -// return: %al - - .text - PROC __kmp_xchg_fixed8 - - movb %sil, %al // "d" - - lock - xchgb %al,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: %rdi -// d: %si -// return: %ax - - .text - PROC __kmp_xchg_fixed16 - - movw %si, %ax // "d" - - lock - xchgw %ax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - - .text - PROC __kmp_xchg_fixed32 - - movl %esi, %eax // "d" - - lock - xchgl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_fixed64 -// -// kmp_int64 -// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - - .text - PROC __kmp_xchg_fixed64 - - movq %rsi, %rax // "d" - - lock - xchgq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed64 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store8 -// -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - - .text - PROC __kmp_compare_and_store8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store8 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store16 -// -// kmp_int16 -// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - - .text - PROC __kmp_compare_and_store16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store16 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store32 -// -// kmp_int32 -// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - - .text - PROC __kmp_compare_and_store32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store32 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store64 -// -// kmp_int32 -// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - - .text - PROC __kmp_compare_and_store64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store64 - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store_ret8 -// -// kmp_int8 -// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - - .text - PROC __kmp_compare_and_store_ret8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store_ret16 -// -// kmp_int16 -// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - - .text - PROC __kmp_compare_and_store_ret16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store_ret32 -// -// kmp_int32 -// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - - .text - PROC __kmp_compare_and_store_ret32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_compare_and_store_ret64 -// -// kmp_int64 -// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - - .text - PROC __kmp_compare_and_store_ret64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - -# endif /* !KMP_ASM_INTRINS */ - - -# if !KMP_MIC - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 4 bytes) -// -// return: %xmm0 (lower 4 bytes) - - .text - PROC __kmp_xchg_real32 - - movd %xmm0, %eax // load "data" to eax - - lock - xchgl %eax, (%rdi) - - movd %eax, %xmm0 // load old value into return register - - ret - - DEBUG_INFO __kmp_xchg_real32 - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_xchg_real64 -// -// kmp_real64 -// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 8 bytes) -// return: %xmm0 (lower 8 bytes) -// - - .text - PROC __kmp_xchg_real64 - - movd %xmm0, %rax // load "data" to rax - - lock - xchgq %rax, (%rdi) - - movd %rax, %xmm0 // load old value into return register - ret - - DEBUG_INFO __kmp_xchg_real64 - - -# endif /* !KMP_MIC */ - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi -// - - .text - PROC __kmp_load_x87_fpu_control_word - - fldcw (%rdi) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi -// - - .text - PROC __kmp_store_x87_fpu_control_word - - fstcw (%rdi) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); -// -// - - .text - PROC __kmp_clear_x87_fpu_status_word - -#if KMP_MIC -// TODO: remove the workaround for problem with fnclex instruction (no CQ known) - fstenv -32(%rsp) // store FP env - andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW - fldenv -32(%rsp) // load FP env back - ret -#else - fnclex - ret -#endif - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// note: -// at call to pkfn must have %rsp 128-byte aligned for compiler -// -// parameters: -// %rdi: pkfn -// %esi: gtid -// %edx: tid -// %ecx: argc -// %r8: p_argv -// %r9: &exit_frame -// -// locals: -// __gtid: gtid parm pushed on stack so can pass >id to pkfn -// __tid: tid parm pushed on stack so can pass &tid to pkfn -// -// reg temps: -// %rax: used all over the place -// %rdx: used in stack pointer alignment calculation -// %r11: used to traverse p_argv array -// %rsi: used as temporary for stack parameters -// used as temporary for number of pkfn parms to push -// %rbx: used to hold pkfn address, and zero constant, callee-save -// -// return: %eax (always 1/TRUE) -// - -__gtid = -16 -__tid = -24 - -// -- Begin __kmp_invoke_microtask -// mark_begin; - .text - PROC __kmp_invoke_microtask - - pushq %rbp // save base pointer - KMP_CFI_DEF_OFFSET 16 - KMP_CFI_OFFSET rbp,-16 - movq %rsp,%rbp // establish the base pointer for this routine. - KMP_CFI_REGISTER rbp - -#if OMPT_SUPPORT - movq %rbp, (%r9) // save exit_frame -#endif - - pushq %rbx // %rbx is callee-saved register - pushq %rsi // Put gtid on stack so can pass &tgid to pkfn - pushq %rdx // Put tid on stack so can pass &tid to pkfn - - movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax - movq $0, %rbx // constant for cmovs later - subq $4, %rax // subtract four args passed in registers to pkfn -#if KMP_MIC - js KMP_LABEL(kmp_0) // jump to movq - jmp KMP_LABEL(kmp_0_exit) // jump ahead -KMP_LABEL(kmp_0): - movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -KMP_LABEL(kmp_0_exit): -#else - cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -#endif // KMP_MIC - - movq %rax, %rsi // save max(0, argc-4) -> %rsi for later - shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 - - movq %rsp, %rdx // - subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- - // without align, stack ptr would be this - movq %rdx, %rax // Save to %rax - - andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) - subq %rax, %rdx // Amount to subtract from %rsp - subq %rdx, %rsp // Prepare the stack ptr -- - // now %rsp will align to 128-byte boundary at call site - - // setup pkfn parameter reg and stack - movq %rcx, %rax // argc -> %rax - cmpq $0, %rsi - je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push - shlq $3, %rcx // argc*8 -> %rcx - movq %r8, %rdx // p_argv -> %rdx - addq %rcx, %rdx // &p_argv[argc] -> %rdx - - movq %rsi, %rcx // max (0, argc-4) -> %rcx - -KMP_LABEL(kmp_invoke_push_parms): - // push nth - 7th parms to pkfn on stack - subq $8, %rdx // decrement p_argv pointer to previous parm - movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi - pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) - subl $1, %ecx - -// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e -// if the name of the label that is an operand of this jecxz starts with a dot ("."); -// Apple's linker does not support 1-byte length relocation; -// Resolution: replace all .labelX entries with L_labelX. - - jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left - jmp KMP_LABEL(kmp_invoke_push_parms) - ALIGN 3 -KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. - // order here is important to avoid trashing - // registers used for both input and output parms! - movq %rdi, %rbx // pkfn -> %rbx - leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) - leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) - - movq %r8, %r11 // p_argv -> %r11 - -#if KMP_MIC - cmpq $4, %rax // argc >= 4? - jns KMP_LABEL(kmp_4) // jump to movq - jmp KMP_LABEL(kmp_4_exit) // jump ahead -KMP_LABEL(kmp_4): - movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) -KMP_LABEL(kmp_4_exit): - - cmpq $3, %rax // argc >= 3? - jns KMP_LABEL(kmp_3) // jump to movq - jmp KMP_LABEL(kmp_3_exit) // jump ahead -KMP_LABEL(kmp_3): - movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) -KMP_LABEL(kmp_3_exit): - - cmpq $2, %rax // argc >= 2? - jns KMP_LABEL(kmp_2) // jump to movq - jmp KMP_LABEL(kmp_2_exit) // jump ahead -KMP_LABEL(kmp_2): - movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) -KMP_LABEL(kmp_2_exit): - - cmpq $1, %rax // argc >= 1? - jns KMP_LABEL(kmp_1) // jump to movq - jmp KMP_LABEL(kmp_1_exit) // jump ahead -KMP_LABEL(kmp_1): - movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -KMP_LABEL(kmp_1_exit): -#else - cmpq $4, %rax // argc >= 4? - cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) - - cmpq $3, %rax // argc >= 3? - cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) - - cmpq $2, %rax // argc >= 2? - cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) - - cmpq $1, %rax // argc >= 1? - cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -#endif // KMP_MIC - - call *%rbx // call (*pkfn)(); - movq $1, %rax // move 1 into return register; - - movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified - movq %rbp, %rsp // restore stack pointer - popq %rbp // restore frame pointer - KMP_CFI_DEF rsp,8 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - .text - PROC __kmp_hardware_timestamp - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -//------------------------------------------------------------------------ -// -// FUNCTION __kmp_bsr32 -// -// int -// __kmp_bsr32( int ); -// - - .text - PROC __kmp_bsr32 - - bsr %edi,%eax - ret - - DEBUG_INFO __kmp_bsr32 - - -// ----------------------------------------------------------------------- -#endif /* KMP_ARCH_X86_64 */ - -#if KMP_ARCH_ARM - .data - .comm .gomp_critical_user_,32,8 - .data - .align 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,4 -#endif /* KMP_ARCH_ARM */ - -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 - .data - .comm .gomp_critical_user_,32,8 - .data - .align 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,8 -#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ - -#if KMP_OS_LINUX -# if KMP_ARCH_ARM -.section .note.GNU-stack,"",%progbits -# else -.section .note.GNU-stack,"",@progbits -# endif -#endif +// z_Linux_asm.s: - microtasking routines specifically +// written for Intel platforms running Linux* OS + +// +////===----------------------------------------------------------------------===// +//// +//// The LLVM Compiler Infrastructure +//// +//// This file is dual licensed under the MIT and the University of Illinois Open +//// Source Licenses. See LICENSE.txt for details. +//// +////===----------------------------------------------------------------------===// +// + +// ----------------------------------------------------------------------- +// macros +// ----------------------------------------------------------------------- + +#include "kmp_config.h" + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +# if KMP_MIC +// +// the 'delay r16/r32/r64' should be used instead of the 'pause'. +// The delay operation has the effect of removing the current thread from +// the round-robin HT mechanism, and therefore speeds up the issue rate of +// the other threads on the same core. +// +// A value of 0 works fine for <= 2 threads per core, but causes the EPCC +// barrier time to increase greatly for 3 or more threads per core. +// +// A value of 100 works pretty well for up to 4 threads per core, but isn't +// quite as fast as 0 for 2 threads per core. +// +// We need to check what happens for oversubscription / > 4 threads per core. +// It is possible that we need to pass the delay value in as a parameter +// that the caller determines based on the total # threads / # cores. +// +//.macro pause_op +// mov $100, %rax +// delay %rax +//.endm +# else +# define pause_op .byte 0xf3,0x90 +# endif // KMP_MIC + +# if KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols +# define KMP_LABEL(x) L_##x // form the name of label +.macro KMP_CFI_DEF_OFFSET +.endmacro +.macro KMP_CFI_OFFSET +.endmacro +.macro KMP_CFI_REGISTER +.endmacro +.macro KMP_CFI_DEF +.endmacro +.macro ALIGN + .align $0 +.endmacro +.macro DEBUG_INFO +/* Not sure what .size does in icc, not sure if we need to do something + similar for OS X*. +*/ +.endmacro +.macro PROC + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE($0) +KMP_PREFIX_UNDERSCORE($0): +.endmacro +# else // KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols +// Format labels so that they don't override function names in gdb's backtraces +// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*) +# if KMP_MIC +# define KMP_LABEL(x) L_##x // local label +# else +# define KMP_LABEL(x) .L_##x // local label hidden from backtraces +# endif // KMP_MIC +.macro ALIGN size + .align 1<<(\size) +.endm +.macro DEBUG_INFO proc + .cfi_endproc +// Not sure why we need .type and .size for the functions + .align 16 + .type \proc,@function + .size \proc,.-\proc +.endm +.macro PROC proc + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE(\proc) +KMP_PREFIX_UNDERSCORE(\proc): + .cfi_startproc +.endm +.macro KMP_CFI_DEF_OFFSET sz + .cfi_def_cfa_offset \sz +.endm +.macro KMP_CFI_OFFSET reg, sz + .cfi_offset \reg,\sz +.endm +.macro KMP_CFI_REGISTER reg + .cfi_def_cfa_register \reg +.endm +.macro KMP_CFI_DEF reg, sz + .cfi_def_cfa \reg,\sz +.endm +# endif // KMP_OS_DARWIN +#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 + + +// ----------------------------------------------------------------------- +// data +// ----------------------------------------------------------------------- + +#ifdef KMP_GOMP_COMPAT + +// +// Support for unnamed common blocks. +// +// Because the symbol ".gomp_critical_user_" contains a ".", we have to +// put this stuff in assembly. +// + +# if KMP_ARCH_X86 +# if KMP_OS_DARWIN + .data + .comm .gomp_critical_user_,32 + .data + .globl ___kmp_unnamed_critical_addr +___kmp_unnamed_critical_addr: + .long .gomp_critical_user_ +# else /* Linux* OS */ + .data + .comm .gomp_critical_user_,32,8 + .data + ALIGN 4 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .type __kmp_unnamed_critical_addr,@object + .size __kmp_unnamed_critical_addr,4 +# endif /* KMP_OS_DARWIN */ +# endif /* KMP_ARCH_X86 */ + +# if KMP_ARCH_X86_64 +# if KMP_OS_DARWIN + .data + .comm .gomp_critical_user_,32 + .data + .globl ___kmp_unnamed_critical_addr +___kmp_unnamed_critical_addr: + .quad .gomp_critical_user_ +# else /* Linux* OS */ + .data + .comm .gomp_critical_user_,32,8 + .data + ALIGN 8 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .8byte .gomp_critical_user_ + .type __kmp_unnamed_critical_addr,@object + .size __kmp_unnamed_critical_addr,8 +# endif /* KMP_OS_DARWIN */ +# endif /* KMP_ARCH_X86_64 */ + +#endif /* KMP_GOMP_COMPAT */ + + +#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 + +// ----------------------------------------------------------------------- +// microtasking routines specifically written for IA-32 architecture +// running Linux* OS +// ----------------------------------------------------------------------- +// + + .ident "Intel Corporation" + .data + ALIGN 4 +// void +// __kmp_x86_pause( void ); +// + + .text + PROC __kmp_x86_pause + + pause_op + ret + + DEBUG_INFO __kmp_x86_pause + +// +// void +// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); +// + PROC __kmp_x86_cpuid + + pushl %ebp + movl %esp,%ebp + pushl %edi + pushl %ebx + pushl %ecx + pushl %edx + + movl 8(%ebp), %eax + movl 12(%ebp), %ecx + cpuid // Query the CPUID for the current processor + + movl 16(%ebp), %edi + movl %eax, 0(%edi) + movl %ebx, 4(%edi) + movl %ecx, 8(%edi) + movl %edx, 12(%edi) + + popl %edx + popl %ecx + popl %ebx + popl %edi + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_x86_cpuid + + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// +// kmp_int32 +// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); +// + + PROC __kmp_test_then_add32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + lock + xaddl %eax,(%ecx) + ret + + DEBUG_INFO __kmp_test_then_add32 + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed8 +// +// kmp_int32 +// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// +// return: %al + + PROC __kmp_xchg_fixed8 + + movl 4(%esp), %ecx // "p" + movb 8(%esp), %al // "d" + + lock + xchgb %al,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed8 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed16 +// +// kmp_int16 +// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// return: %ax + + PROC __kmp_xchg_fixed16 + + movl 4(%esp), %ecx // "p" + movw 8(%esp), %ax // "d" + + lock + xchgw %ax,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed16 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed32 +// +// kmp_int32 +// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// +// return: %eax + + PROC __kmp_xchg_fixed32 + + movl 4(%esp), %ecx // "p" + movl 8(%esp), %eax // "d" + + lock + xchgl %eax,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed32 + + +// +// kmp_int8 +// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// + + PROC __kmp_compare_and_store8 + + movl 4(%esp), %ecx + movb 8(%esp), %al + movb 12(%esp), %dl + lock + cmpxchgb %dl,(%ecx) + sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store8 + +// +// kmp_int16 +// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// + + PROC __kmp_compare_and_store16 + + movl 4(%esp), %ecx + movw 8(%esp), %ax + movw 12(%esp), %dx + lock + cmpxchgw %dx,(%ecx) + sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store16 + +// +// kmp_int32 +// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// + + PROC __kmp_compare_and_store32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + lock + cmpxchgl %edx,(%ecx) + sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store32 + +// +// kmp_int32 +// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// + PROC __kmp_compare_and_store64 + + pushl %ebp + movl %esp, %ebp + pushl %ebx + pushl %edi + movl 8(%ebp), %edi + movl 12(%ebp), %eax // "cv" low order word + movl 16(%ebp), %edx // "cv" high order word + movl 20(%ebp), %ebx // "sv" low order word + movl 24(%ebp), %ecx // "sv" high order word + lock + cmpxchg8b (%edi) + sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + popl %edi + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_compare_and_store64 + +// +// kmp_int8 +// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// + + PROC __kmp_compare_and_store_ret8 + + movl 4(%esp), %ecx + movb 8(%esp), %al + movb 12(%esp), %dl + lock + cmpxchgb %dl,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret8 + +// +// kmp_int16 +// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// + + PROC __kmp_compare_and_store_ret16 + + movl 4(%esp), %ecx + movw 8(%esp), %ax + movw 12(%esp), %dx + lock + cmpxchgw %dx,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret16 + +// +// kmp_int32 +// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// + + PROC __kmp_compare_and_store_ret32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + lock + cmpxchgl %edx,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret32 + +// +// kmp_int64 +// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// + PROC __kmp_compare_and_store_ret64 + + pushl %ebp + movl %esp, %ebp + pushl %ebx + pushl %edi + movl 8(%ebp), %edi + movl 12(%ebp), %eax // "cv" low order word + movl 16(%ebp), %edx // "cv" high order word + movl 20(%ebp), %ebx // "sv" low order word + movl 24(%ebp), %ecx // "sv" high order word + lock + cmpxchg8b (%edi) + popl %edi + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_compare_and_store_ret64 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_real32 +// +// kmp_real32 +// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); +// +// parameters: +// addr: 4(%esp) +// data: 8(%esp) +// +// return: %eax + + + PROC __kmp_xchg_real32 + + pushl %ebp + movl %esp, %ebp + subl $4, %esp + pushl %esi + + movl 4(%ebp), %esi + flds (%esi) + // load + fsts -4(%ebp) + // store old value + + movl 8(%ebp), %eax + + lock + xchgl %eax, (%esi) + + flds -4(%ebp) + // return old value + + popl %esi + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_xchg_real32 + +# endif /* !KMP_ASM_INTRINS */ + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_load_x87_fpu_control_word +// +// void +// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: 4(%esp) +// + + PROC __kmp_load_x87_fpu_control_word + + movl 4(%esp), %eax + fldcw (%eax) + ret + + DEBUG_INFO __kmp_load_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_store_x87_fpu_control_word +// +// void +// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: 4(%esp) +// + + PROC __kmp_store_x87_fpu_control_word + + movl 4(%esp), %eax + fstcw (%eax) + ret + + DEBUG_INFO __kmp_store_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_clear_x87_fpu_status_word +// +// void +// __kmp_clear_x87_fpu_status_word(); +// +// + + PROC __kmp_clear_x87_fpu_status_word + + fnclex + ret + + DEBUG_INFO __kmp_clear_x87_fpu_status_word + + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & gtid, argv[0], ... ); +// return 1; +// } + +// -- Begin __kmp_invoke_microtask +// mark_begin; + PROC __kmp_invoke_microtask + + pushl %ebp + KMP_CFI_DEF_OFFSET 8 + KMP_CFI_OFFSET ebp,-8 + movl %esp,%ebp // establish the base pointer for this routine. + KMP_CFI_REGISTER ebp + subl $8,%esp // allocate space for two local variables. + // These varibales are: + // argv: -4(%ebp) + // temp: -8(%ebp) + // + pushl %ebx // save %ebx to use during this routine + // +#if OMPT_SUPPORT + movl 28(%ebp),%ebx // get exit_frame address + movl %ebp,(%ebx) // save exit_frame +#endif + + movl 20(%ebp),%ebx // Stack alignment - # args + addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) + shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 + movl %esp,%eax // + subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this + movl %eax,%ebx // Save to %ebx + andl $0xFFFFFF80,%eax // mask off 7 bits + subl %eax,%ebx // Amount to subtract from %esp + subl %ebx,%esp // Prepare the stack ptr -- + // now it will be aligned on 128-byte boundary at the call + + movl 24(%ebp),%eax // copy from p_argv[] + movl %eax,-4(%ebp) // into the local variable *argv. + + movl 20(%ebp),%ebx // argc is 20(%ebp) + shll $2,%ebx + +KMP_LABEL(invoke_2): + cmpl $0,%ebx + jg KMP_LABEL(invoke_4) + jmp KMP_LABEL(invoke_3) + ALIGN 2 +KMP_LABEL(invoke_4): + movl -4(%ebp),%eax + subl $4,%ebx // decrement argc. + addl %ebx,%eax // index into argv. + movl (%eax),%edx + pushl %edx + + jmp KMP_LABEL(invoke_2) + ALIGN 2 +KMP_LABEL(invoke_3): + leal 16(%ebp),%eax // push & tid + pushl %eax + + leal 12(%ebp),%eax // push & gtid + pushl %eax + + movl 8(%ebp),%ebx + call *%ebx // call (*pkfn)(); + + movl $1,%eax // return 1; + + movl -12(%ebp),%ebx // restore %ebx + leave + KMP_CFI_DEF esp,4 + ret + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + + +// kmp_uint64 +// __kmp_hardware_timestamp(void) + PROC __kmp_hardware_timestamp + rdtsc + ret + + DEBUG_INFO __kmp_hardware_timestamp +// -- End __kmp_hardware_timestamp + +// ----------------------------------------------------------------------- +#endif /* KMP_ARCH_X86 */ + + +#if KMP_ARCH_X86_64 + +// ----------------------------------------------------------------------- +// microtasking routines specifically written for IA-32 architecture and +// Intel(R) 64 running Linux* OS +// ----------------------------------------------------------------------- + +// -- Machine type P +// mark_description "Intel Corporation"; + .ident "Intel Corporation" +// -- .file "z_Linux_asm.s" + .data + ALIGN 4 + +// To prevent getting our code into .data section .text added to every routine definition for x86_64. +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_x86_cpuid +// +// void +// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); +// +// parameters: +// mode: %edi +// mode2: %esi +// cpuid_buffer: %rdx + + .text + PROC __kmp_x86_cpuid + + pushq %rbp + movq %rsp,%rbp + pushq %rbx // callee-save register + + movl %esi, %ecx // "mode2" + movl %edi, %eax // "mode" + movq %rdx, %rsi // cpuid_buffer + cpuid // Query the CPUID for the current processor + + movl %eax, 0(%rsi) // store results into buffer + movl %ebx, 4(%rsi) + movl %ecx, 8(%rsi) + movl %edx, 12(%rsi) + + popq %rbx // callee-save register + movq %rbp, %rsp + popq %rbp + ret + + DEBUG_INFO __kmp_x86_cpuid + + + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_test_then_add32 +// +// kmp_int32 +// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: %rdi +// d: %esi +// +// return: %eax + + .text + PROC __kmp_test_then_add32 + + movl %esi, %eax // "d" + lock + xaddl %eax,(%rdi) + ret + + DEBUG_INFO __kmp_test_then_add32 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_test_then_add64 +// +// kmp_int64 +// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); +// +// parameters: +// p: %rdi +// d: %rsi +// return: %rax + + .text + PROC __kmp_test_then_add64 + + movq %rsi, %rax // "d" + lock + xaddq %rax,(%rdi) + ret + + DEBUG_INFO __kmp_test_then_add64 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed8 +// +// kmp_int32 +// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +// +// parameters: +// p: %rdi +// d: %sil +// +// return: %al + + .text + PROC __kmp_xchg_fixed8 + + movb %sil, %al // "d" + + lock + xchgb %al,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed8 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed16 +// +// kmp_int16 +// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +// +// parameters: +// p: %rdi +// d: %si +// return: %ax + + .text + PROC __kmp_xchg_fixed16 + + movw %si, %ax // "d" + + lock + xchgw %ax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed16 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed32 +// +// kmp_int32 +// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: %rdi +// d: %esi +// +// return: %eax + + .text + PROC __kmp_xchg_fixed32 + + movl %esi, %eax // "d" + + lock + xchgl %eax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed32 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_fixed64 +// +// kmp_int64 +// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); +// +// parameters: +// p: %rdi +// d: %rsi +// return: %rax + + .text + PROC __kmp_xchg_fixed64 + + movq %rsi, %rax // "d" + + lock + xchgq %rax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed64 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store8 +// +// kmp_int8 +// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + + .text + PROC __kmp_compare_and_store8 + + movb %sil, %al // "cv" + lock + cmpxchgb %dl,(%rdi) + sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store8 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store16 +// +// kmp_int16 +// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// +// parameters: +// p: %rdi +// cv: %si +// sv: %dx +// +// return: %eax + + .text + PROC __kmp_compare_and_store16 + + movw %si, %ax // "cv" + lock + cmpxchgw %dx,(%rdi) + sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store16 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store32 +// +// kmp_int32 +// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + + .text + PROC __kmp_compare_and_store32 + + movl %esi, %eax // "cv" + lock + cmpxchgl %edx,(%rdi) + sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store32 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store64 +// +// kmp_int32 +// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// +// parameters: +// p: %rdi +// cv: %rsi +// sv: %rdx +// return: %eax + + .text + PROC __kmp_compare_and_store64 + + movq %rsi, %rax // "cv" + lock + cmpxchgq %rdx,(%rdi) + sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store64 + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store_ret8 +// +// kmp_int8 +// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + + .text + PROC __kmp_compare_and_store_ret8 + + movb %sil, %al // "cv" + lock + cmpxchgb %dl,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret8 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store_ret16 +// +// kmp_int16 +// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// +// parameters: +// p: %rdi +// cv: %si +// sv: %dx +// +// return: %eax + + .text + PROC __kmp_compare_and_store_ret16 + + movw %si, %ax // "cv" + lock + cmpxchgw %dx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret16 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store_ret32 +// +// kmp_int32 +// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + + .text + PROC __kmp_compare_and_store_ret32 + + movl %esi, %eax // "cv" + lock + cmpxchgl %edx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret32 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_compare_and_store_ret64 +// +// kmp_int64 +// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// +// parameters: +// p: %rdi +// cv: %rsi +// sv: %rdx +// return: %eax + + .text + PROC __kmp_compare_and_store_ret64 + + movq %rsi, %rax // "cv" + lock + cmpxchgq %rdx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret64 + +# endif /* !KMP_ASM_INTRINS */ + + +# if !KMP_MIC + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_real32 +// +// kmp_real32 +// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); +// +// parameters: +// addr: %rdi +// data: %xmm0 (lower 4 bytes) +// +// return: %xmm0 (lower 4 bytes) + + .text + PROC __kmp_xchg_real32 + + movd %xmm0, %eax // load "data" to eax + + lock + xchgl %eax, (%rdi) + + movd %eax, %xmm0 // load old value into return register + + ret + + DEBUG_INFO __kmp_xchg_real32 + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_xchg_real64 +// +// kmp_real64 +// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); +// +// parameters: +// addr: %rdi +// data: %xmm0 (lower 8 bytes) +// return: %xmm0 (lower 8 bytes) +// + + .text + PROC __kmp_xchg_real64 + + movd %xmm0, %rax // load "data" to rax + + lock + xchgq %rax, (%rdi) + + movd %rax, %xmm0 // load old value into return register + ret + + DEBUG_INFO __kmp_xchg_real64 + + +# endif /* !KMP_MIC */ + +# endif /* !KMP_ASM_INTRINS */ + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_load_x87_fpu_control_word +// +// void +// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: %rdi +// + + .text + PROC __kmp_load_x87_fpu_control_word + + fldcw (%rdi) + ret + + DEBUG_INFO __kmp_load_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_store_x87_fpu_control_word +// +// void +// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: %rdi +// + + .text + PROC __kmp_store_x87_fpu_control_word + + fstcw (%rdi) + ret + + DEBUG_INFO __kmp_store_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_clear_x87_fpu_status_word +// +// void +// __kmp_clear_x87_fpu_status_word(); +// +// + + .text + PROC __kmp_clear_x87_fpu_status_word + +#if KMP_MIC +// TODO: remove the workaround for problem with fnclex instruction (no CQ known) + fstenv -32(%rsp) // store FP env + andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW + fldenv -32(%rsp) // load FP env back + ret +#else + fnclex + ret +#endif + + DEBUG_INFO __kmp_clear_x87_fpu_status_word + + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; +// } +// +// note: +// at call to pkfn must have %rsp 128-byte aligned for compiler +// +// parameters: +// %rdi: pkfn +// %esi: gtid +// %edx: tid +// %ecx: argc +// %r8: p_argv +// %r9: &exit_frame +// +// locals: +// __gtid: gtid parm pushed on stack so can pass >id to pkfn +// __tid: tid parm pushed on stack so can pass &tid to pkfn +// +// reg temps: +// %rax: used all over the place +// %rdx: used in stack pointer alignment calculation +// %r11: used to traverse p_argv array +// %rsi: used as temporary for stack parameters +// used as temporary for number of pkfn parms to push +// %rbx: used to hold pkfn address, and zero constant, callee-save +// +// return: %eax (always 1/TRUE) +// + +__gtid = -16 +__tid = -24 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + PROC __kmp_invoke_microtask + + pushq %rbp // save base pointer + KMP_CFI_DEF_OFFSET 16 + KMP_CFI_OFFSET rbp,-16 + movq %rsp,%rbp // establish the base pointer for this routine. + KMP_CFI_REGISTER rbp + +#if OMPT_SUPPORT + movq %rbp, (%r9) // save exit_frame +#endif + + pushq %rbx // %rbx is callee-saved register + pushq %rsi // Put gtid on stack so can pass &tgid to pkfn + pushq %rdx // Put tid on stack so can pass &tid to pkfn + + movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax + movq $0, %rbx // constant for cmovs later + subq $4, %rax // subtract four args passed in registers to pkfn +#if KMP_MIC + js KMP_LABEL(kmp_0) // jump to movq + jmp KMP_LABEL(kmp_0_exit) // jump ahead +KMP_LABEL(kmp_0): + movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) +KMP_LABEL(kmp_0_exit): +#else + cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) +#endif // KMP_MIC + + movq %rax, %rsi // save max(0, argc-4) -> %rsi for later + shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 + + movq %rsp, %rdx // + subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- + // without align, stack ptr would be this + movq %rdx, %rax // Save to %rax + + andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) + subq %rax, %rdx // Amount to subtract from %rsp + subq %rdx, %rsp // Prepare the stack ptr -- + // now %rsp will align to 128-byte boundary at call site + + // setup pkfn parameter reg and stack + movq %rcx, %rax // argc -> %rax + cmpq $0, %rsi + je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push + shlq $3, %rcx // argc*8 -> %rcx + movq %r8, %rdx // p_argv -> %rdx + addq %rcx, %rdx // &p_argv[argc] -> %rdx + + movq %rsi, %rcx // max (0, argc-4) -> %rcx + +KMP_LABEL(kmp_invoke_push_parms): + // push nth - 7th parms to pkfn on stack + subq $8, %rdx // decrement p_argv pointer to previous parm + movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi + pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) + subl $1, %ecx + +// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e +// if the name of the label that is an operand of this jecxz starts with a dot ("."); +// Apple's linker does not support 1-byte length relocation; +// Resolution: replace all .labelX entries with L_labelX. + + jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left + jmp KMP_LABEL(kmp_invoke_push_parms) + ALIGN 3 +KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. + // order here is important to avoid trashing + // registers used for both input and output parms! + movq %rdi, %rbx // pkfn -> %rbx + leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) + leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) + + movq %r8, %r11 // p_argv -> %r11 + +#if KMP_MIC + cmpq $4, %rax // argc >= 4? + jns KMP_LABEL(kmp_4) // jump to movq + jmp KMP_LABEL(kmp_4_exit) // jump ahead +KMP_LABEL(kmp_4): + movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) +KMP_LABEL(kmp_4_exit): + + cmpq $3, %rax // argc >= 3? + jns KMP_LABEL(kmp_3) // jump to movq + jmp KMP_LABEL(kmp_3_exit) // jump ahead +KMP_LABEL(kmp_3): + movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) +KMP_LABEL(kmp_3_exit): + + cmpq $2, %rax // argc >= 2? + jns KMP_LABEL(kmp_2) // jump to movq + jmp KMP_LABEL(kmp_2_exit) // jump ahead +KMP_LABEL(kmp_2): + movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) +KMP_LABEL(kmp_2_exit): + + cmpq $1, %rax // argc >= 1? + jns KMP_LABEL(kmp_1) // jump to movq + jmp KMP_LABEL(kmp_1_exit) // jump ahead +KMP_LABEL(kmp_1): + movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) +KMP_LABEL(kmp_1_exit): +#else + cmpq $4, %rax // argc >= 4? + cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) + + cmpq $3, %rax // argc >= 3? + cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) + + cmpq $2, %rax // argc >= 2? + cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) + + cmpq $1, %rax // argc >= 1? + cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) +#endif // KMP_MIC + + call *%rbx // call (*pkfn)(); + movq $1, %rax // move 1 into return register; + + movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified + movq %rbp, %rsp // restore stack pointer + popq %rbp // restore frame pointer + KMP_CFI_DEF rsp,8 + ret + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + +// kmp_uint64 +// __kmp_hardware_timestamp(void) + .text + PROC __kmp_hardware_timestamp + rdtsc + shlq $32, %rdx + orq %rdx, %rax + ret + + DEBUG_INFO __kmp_hardware_timestamp +// -- End __kmp_hardware_timestamp + +//------------------------------------------------------------------------ +// +// FUNCTION __kmp_bsr32 +// +// int +// __kmp_bsr32( int ); +// + + .text + PROC __kmp_bsr32 + + bsr %edi,%eax + ret + + DEBUG_INFO __kmp_bsr32 + + +// ----------------------------------------------------------------------- +#endif /* KMP_ARCH_X86_64 */ + +#if KMP_ARCH_ARM + .data + .comm .gomp_critical_user_,32,8 + .data + .align 4 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,4 +#endif /* KMP_ARCH_ARM */ + +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 + .data + .comm .gomp_critical_user_,32,8 + .data + .align 8 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .8byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,8 +#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ + +#if KMP_OS_LINUX +# if KMP_ARCH_ARM +.section .note.GNU-stack,"",%progbits +# else +.section .note.GNU-stack,"",@progbits +# endif +#endif diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_util.c b/contrib/libs/cxxsupp/openmp/z_Linux_util.c index 67129797c90..237677b24cf 100644 --- a/contrib/libs/cxxsupp/openmp/z_Linux_util.c +++ b/contrib/libs/cxxsupp/openmp/z_Linux_util.c @@ -1,2706 +1,2706 @@ -/* - * z_Linux_util.c -- platform specific routines. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_stats.h" -#include "kmp_wait_release.h" - -#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD -# include -#endif -#include -#include // HUGE_VAL. -#include -#include -#include -#include - -#if KMP_OS_LINUX && !KMP_OS_CNK -# include -# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) -// We should really include , but that causes compatibility problems on different -// Linux* OS distributions that either require that you include (or break when you try to include) -// . -// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) -// we just define the constants here and don't include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif -# endif -#elif KMP_OS_DARWIN -# include -# include -#elif KMP_OS_FREEBSD -# include -#endif - - -#include -#include -#include - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -struct kmp_sys_timer { - struct timespec start; -}; - -// Convert timespec to nanoseconds. -#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec) - -static struct kmp_sys_timer __kmp_sys_timer_data; - -#if KMP_HANDLE_SIGNALS - typedef void (* sig_func_t )( int ); - STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[ NSIG ]; - static sigset_t __kmp_sigset; -#endif - -static int __kmp_init_runtime = FALSE; - -static int __kmp_fork_count = 0; - -static pthread_condattr_t __kmp_suspend_cond_attr; -static pthread_mutexattr_t __kmp_suspend_mutex_attr; - -static kmp_cond_align_t __kmp_wait_cv; -static kmp_mutex_align_t __kmp_wait_mx; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef DEBUG_SUSPEND -static void -__kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) -{ - KMP_SNPRINTF( buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", - cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, - cond->c_cond.__c_waiting ); -} -#endif - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) - -/* - * Affinity support - */ - -/* - * On some of the older OS's that we build on, these constants aren't present - * in #included from . They must be the same on - * all systems of the same arch where they are defined, and they cannot change. - * stone forever. - */ - -# if KMP_ARCH_X86 || KMP_ARCH_ARM -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 241 -# elif __NR_sched_setaffinity != 241 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 242 -# elif __NR_sched_getaffinity != 242 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_AARCH64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 122 -# elif __NR_sched_setaffinity != 122 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 123 -# elif __NR_sched_getaffinity != 123 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_X86_64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 203 -# elif __NR_sched_setaffinity != 203 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 204 -# elif __NR_sched_getaffinity != 204 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_PPC64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 222 -# elif __NR_sched_setaffinity != 222 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 223 -# elif __NR_sched_getaffinity != 223 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - - -# else -# error Unknown or unsupported architecture - -# endif /* KMP_ARCH_* */ - -int -__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); -#if KMP_USE_HWLOC - int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); -#else - int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); -#endif - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -} - -int -__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - -#if KMP_USE_HWLOC - int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); -#else - int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); -#endif - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -} - -void -__kmp_affinity_bind_thread( int which ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); -} - -/* - * Determine if we can access affinity functionality on this version of - * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set - * __kmp_affin_mask_size to the appropriate value (0 means not capable). - */ -void -__kmp_affinity_determine_capable(const char *env_var) -{ - // - // Check and see if the OS supports thread affinity. - // - -# define KMP_CPU_SET_SIZE_LIMIT (1024*1024) - - int gCode; - int sCode; - kmp_affin_mask_t *buf; - buf = ( kmp_affin_mask_t * ) KMP_INTERNAL_MALLOC( KMP_CPU_SET_SIZE_LIMIT ); - - // If Linux* OS: - // If the syscall fails or returns a suggestion for the size, - // then we don't have to search for an appropriate size. - gCode = syscall( __NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "initial getaffinity call returned %d errno = %d\n", - gCode, errno)); - - //if ((gCode < 0) && (errno == ENOSYS)) - if (gCode < 0) { - // - // System call not supported - // - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( GetAffSysCallNotSupported, env_var ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - if (gCode > 0) { // Linux* OS only - // The optimal situation: the OS returns the size of the buffer - // it expects. - // - // A verification of correct behavior is that Isetaffinity on a NULL - // buffer with the same size fails with errno set to EFAULT. - sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( SetAffSysCallNotSupported, env_var ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } - } - } - - // - // Call the getaffinity system call repeatedly with increasing set sizes - // until we succeed, or reach an upper bound on the search. - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "searching for proper set size\n")); - int size; - for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { - gCode = syscall( __NR_sched_getaffinity, 0, size, buf ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "getaffinity for mask size %d returned %d errno = %d\n", size, - gCode, errno)); - - if (gCode < 0) { - if ( errno == ENOSYS ) - { - // - // We shouldn't get here - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", - size)); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( GetAffSysCallNotSupported, env_var ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - continue; - } - - sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { // Linux* OS only - // - // We shouldn't get here - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", - size)); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( SetAffSysCallNotSupported, env_var ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } - } - } - //int error = errno; // save uncaught error code - KMP_INTERNAL_FREE(buf); - // errno = error; // restore uncaught error code, will be printed at the next KMP_WARNING below - - // - // Affinity is not supported - // - KMP_AFFINITY_DISABLE(); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "cannot determine mask size - affinity not supported\n")); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - KMP_WARNING( AffCantGetMaskSize, env_var ); - } -} - -#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && !KMP_OS_CNK - -int -__kmp_futex_determine_capable() -{ - int loc = 0; - int rc = syscall( __NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0 ); - int retval = ( rc == 0 ) || ( errno != ENOSYS ); - - KA_TRACE(10, ( "__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, - errno ) ); - KA_TRACE(10, ( "__kmp_futex_determine_capable: futex syscall%s supported\n", - retval ? "" : " not" ) ); - - return retval; -} - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) -/* - * Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - * use compare_and_store for these routines - */ - -kmp_int8 -__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value | d; - - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value | d; - } - return old_value; -} - -kmp_int8 -__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value & d; - - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value & d; - } - return old_value; -} - -kmp_int32 -__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; - - old_value = TCR_4( *p ); - new_value = old_value | d; - - while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value | d; - } - return old_value; -} - -kmp_int32 -__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; - - old_value = TCR_4( *p ); - new_value = old_value & d; - - while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value & d; - } - return old_value; -} - -# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 -kmp_int8 -__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value + d; - - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value + d; - } - return old_value; -} - -kmp_int64 -__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value + d; - - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value + d; - } - return old_value; -} -# endif /* KMP_ARCH_X86 */ - -kmp_int64 -__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value | d; - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value | d; - } - return old_value; -} - -kmp_int64 -__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value & d; - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value & d; - } - return old_value; -} - -#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ - -void -__kmp_terminate_thread( int gtid ) -{ - int status; - kmp_info_t *th = __kmp_threads[ gtid ]; - - if ( !th ) return; - - #ifdef KMP_CANCEL_THREADS - KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); - status = pthread_cancel( th->th.th_info.ds.ds_thread ); - if ( status != 0 && status != ESRCH ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantTerminateWorkerThread ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - #endif - __kmp_yield( TRUE ); -} // - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* - * Set thread stack info according to values returned by - * pthread_getattr_np(). - * If values are unreasonable, assume call failed and use - * incremental stack refinement method instead. - * Returns TRUE if the stack parameters could be determined exactly, - * FALSE if incremental refinement is necessary. - */ -static kmp_int32 -__kmp_set_stack_info( int gtid, kmp_info_t *th ) -{ - int stack_data; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - /* Linux* OS only -- no pthread_getattr_np support on OS X* */ - pthread_attr_t attr; - int status; - size_t size = 0; - void * addr = 0; - - /* Always do incremental stack refinement for ubermaster threads since the initial - thread stack range can be reduced by sibling thread creation so pthread_attr_getstack - may cause thread gtid aliasing */ - if ( ! KMP_UBER_GTID(gtid) ) { - - /* Fetch the real thread attributes */ - status = pthread_attr_init( &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_init", status ); -#if KMP_OS_FREEBSD || KMP_OS_NETBSD - status = pthread_attr_get_np( pthread_self(), &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_get_np", status ); -#else - status = pthread_getattr_np( pthread_self(), &attr ); - KMP_CHECK_SYSFAIL( "pthread_getattr_np", status ); -#endif - status = pthread_attr_getstack( &attr, &addr, &size ); - KMP_CHECK_SYSFAIL( "pthread_attr_getstack", status ); - KA_TRACE( 60, ( "__kmp_set_stack_info: T#%d pthread_attr_getstack returned size: %lu, " - "low addr: %p\n", - gtid, size, addr )); - - status = pthread_attr_destroy( &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_destroy", status ); - } - - if ( size != 0 && addr != 0 ) { /* was stack parameter determination successful? */ - /* Store the correct base and size */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); - TCW_PTR(th->th.th_info.ds.ds_stacksize, size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - return TRUE; - } -#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */ - /* Use incremental refinement starting from initial conservative estimate */ - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_PTR(th -> th.th_info.ds.ds_stackbase, &stack_data); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - return FALSE; -} - -static void* -__kmp_launch_worker( void *thr ) -{ - int status, old_type, old_state; -#ifdef KMP_BLOCK_SIGNALS - sigset_t new_set, old_set; -#endif /* KMP_BLOCK_SIGNALS */ - void *exit_val; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - void * volatile padding = 0; -#endif - int gtid; - - gtid = ((kmp_info_t*)thr) -> th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific( gtid ); -#ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; -#endif -#if KMP_STATS_ENABLED - // set __thread local index to point to thread-specific stats - __kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats; -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_name( gtid ); -#endif /* USE_ITT_BUILD */ - -#if KMP_AFFINITY_SUPPORTED - __kmp_affinity_set_init_mask( gtid, FALSE ); -#endif - -#ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); - KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); - /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ - status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); -#endif - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Set the FP control regs to be a copy of - // the parallel initialization thread's. - // - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_load_mxcsr( &__kmp_init_mxcsr ); -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -#ifdef KMP_BLOCK_SIGNALS - status = sigfillset( & new_set ); - KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); - status = pthread_sigmask( SIG_BLOCK, & new_set, & old_set ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); -#endif /* KMP_BLOCK_SIGNALS */ - -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - if ( __kmp_stkoffset > 0 && gtid > 0 ) { - padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); - } -#endif - - KMP_MB(); - __kmp_set_stack_info( gtid, (kmp_info_t*)thr ); - - __kmp_check_stack_overlap( (kmp_info_t*)thr ); - - exit_val = __kmp_launch_thread( (kmp_info_t *) thr ); - -#ifdef KMP_BLOCK_SIGNALS - status = pthread_sigmask( SIG_SETMASK, & old_set, NULL ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); -#endif /* KMP_BLOCK_SIGNALS */ - - return exit_val; -} - - -/* The monitor thread controls all of the threads in the complex */ - -static void* -__kmp_launch_monitor( void *thr ) -{ - int status, old_type, old_state; -#ifdef KMP_BLOCK_SIGNALS - sigset_t new_set; -#endif /* KMP_BLOCK_SIGNALS */ - struct timespec interval; - int yield_count; - int yield_cycles = 0; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ("__kmp_launch_monitor: #1 launched\n" ) ); - - /* register us as the monitor thread */ - __kmp_gtid_set_specific( KMP_GTID_MONITOR ); -#ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_MONITOR; -#endif - - KMP_MB(); - -#if USE_ITT_BUILD - __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. -#endif /* USE_ITT_BUILD */ - - __kmp_set_stack_info( ((kmp_info_t*)thr)->th.th_info.ds.ds_gtid, (kmp_info_t*)thr ); - - __kmp_check_stack_overlap( (kmp_info_t*)thr ); - -#ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); - KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); - /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ - status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); -#endif - - #if KMP_REAL_TIME_FIX - // This is a potential fix which allows application with real-time scheduling policy work. - // However, decision about the fix is not made yet, so it is disabled by default. - { // Are program started with real-time scheduling policy? - int sched = sched_getscheduler( 0 ); - if ( sched == SCHED_FIFO || sched == SCHED_RR ) { - // Yes, we are a part of real-time application. Try to increase the priority of the - // monitor. - struct sched_param param; - int max_priority = sched_get_priority_max( sched ); - int rc; - KMP_WARNING( RealTimeSchedNotSupported ); - sched_getparam( 0, & param ); - if ( param.sched_priority < max_priority ) { - param.sched_priority += 1; - rc = sched_setscheduler( 0, sched, & param ); - if ( rc != 0 ) { - int error = errno; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantChangeMonitorPriority ), - KMP_ERR( error ), - KMP_MSG( MonitorWillStarve ), - __kmp_msg_null - ); - }; // if - } else { - // We cannot abort here, because number of CPUs may be enough for all the threads, - // including the monitor thread, so application could potentially work... - __kmp_msg( - kmp_ms_warning, - KMP_MSG( RunningAtMaxPriority ), - KMP_MSG( MonitorWillStarve ), - KMP_HNT( RunningAtMaxPriority ), - __kmp_msg_null - ); - }; // if - }; // if - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); // AC: free thread that waits for monitor started - } - #endif // KMP_REAL_TIME_FIX - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - if ( __kmp_monitor_wakeups == 1 ) { - interval.tv_sec = 1; - interval.tv_nsec = 0; - } else { - interval.tv_sec = 0; - interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); - } - - KA_TRACE( 10, ("__kmp_launch_monitor: #2 monitor\n" ) ); - - if (__kmp_yield_cycle) { - __kmp_yielding_on = 0; /* Start out with yielding shut off */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Yielding is on permanently */ - } - - while( ! TCR_4( __kmp_global.g.g_done ) ) { - struct timespec now; - struct timeval tval; - - /* This thread monitors the state of the system */ - - KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); - - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &now ); - - now.tv_sec += interval.tv_sec; - now.tv_nsec += interval.tv_nsec; - - if (now.tv_nsec >= KMP_NSEC_PER_SEC) { - now.tv_sec += 1; - now.tv_nsec -= KMP_NSEC_PER_SEC; - } - - status = pthread_mutex_lock( & __kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); - // AC: the monitor should not fall asleep if g_done has been set - if ( !TCR_4(__kmp_global.g.g_done) ) { // check once more under mutex - status = pthread_cond_timedwait( &__kmp_wait_cv.c_cond, &__kmp_wait_mx.m_mutex, &now ); - if ( status != 0 ) { - if ( status != ETIMEDOUT && status != EINTR ) { - KMP_SYSFAIL( "pthread_cond_timedwait", status ); - }; - }; - }; - status = pthread_mutex_unlock( & __kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - - if (__kmp_yield_cycle) { - yield_cycles++; - if ( (yield_cycles % yield_count) == 0 ) { - if (__kmp_yielding_on) { - __kmp_yielding_on = 0; /* Turn it off now */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Turn it on now */ - yield_count = __kmp_yield_on_count; - } - yield_cycles = 0; - } - } else { - __kmp_yielding_on = 1; - } - - TCW_4( __kmp_global.g.g_time.dt.t_value, - TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE( 10, ("__kmp_launch_monitor: #3 cleanup\n" ) ); - -#ifdef KMP_BLOCK_SIGNALS - status = sigfillset( & new_set ); - KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); - status = pthread_sigmask( SIG_UNBLOCK, & new_set, NULL ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); -#endif /* KMP_BLOCK_SIGNALS */ - - KA_TRACE( 10, ("__kmp_launch_monitor: #4 finished\n" ) ); - - if( __kmp_global.g.g_abort != 0 ) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ - - int gtid; - - KA_TRACE( 10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", __kmp_global.g.g_abort ) ); - - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread( gtid ); - - __kmp_cleanup(); - - KA_TRACE( 10, ("__kmp_launch_monitor: #6 raise sig=%d\n", __kmp_global.g.g_abort ) ); - - if (__kmp_global.g.g_abort > 0) - raise( __kmp_global.g.g_abort ); - - } - - KA_TRACE( 10, ("__kmp_launch_monitor: #7 exit\n" ) ); - - return thr; -} - -void -__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) -{ - pthread_t handle; - pthread_attr_t thread_attr; - int status; - - - th->th.th_info.ds.ds_gtid = gtid; - -#if KMP_STATS_ENABLED - // sets up worker thread stats - __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); - - // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker - // So when thread is created (goes into __kmp_launch_worker) it will - // set it's __thread local pointer to th->th.th_stats - th->th.th_stats = __kmp_stats_list.push_back(gtid); - if(KMP_UBER_GTID(gtid)) { - __kmp_stats_start_time = tsc_tick_count::now(); - __kmp_stats_thread_ptr = th->th.th_stats; - __kmp_stats_init(); - KMP_START_EXPLICIT_TIMER(OMP_serial); - KMP_START_EXPLICIT_TIMER(OMP_start_end); - } - __kmp_release_tas_lock(&__kmp_stats_lock, gtid); - -#endif // KMP_STATS_ENABLED - - if ( KMP_UBER_GTID(gtid) ) { - KA_TRACE( 10, ("__kmp_create_worker: uber thread (%d)\n", gtid ) ); - th -> th.th_info.ds.ds_thread = pthread_self(); - __kmp_set_stack_info( gtid, th ); - __kmp_check_stack_overlap( th ); - return; - }; // if - - KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - -#ifdef KMP_THREAD_ATTR - { - status = pthread_attr_init( &thread_attr ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantInitThreadAttrs ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetWorkerState ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - - /* Set stack size for this thread now. - * The multiple of 2 is there because on some machines, requesting an unusual stacksize - * causes the thread to have an offset before the dummy alloca() takes place to create the - * offset. Since we want the user to have a sufficient stacksize AND support a stack offset, we - * alloca() twice the offset so that the upcoming alloca() does not eliminate any premade - * offset, and also gives the user the stack space they requested for all threads */ - stack_size += gtid * __kmp_stkoffset * 2; - - KA_TRACE( 10, ( "__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) ); - -# ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_setstacksize( & thread_attr, stack_size ); -# ifdef KMP_BACKUP_STKSIZE - if ( status != 0 ) { - if ( ! __kmp_env_stksize ) { - stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; - __kmp_stksize = KMP_BACKUP_STKSIZE; - KA_TRACE( 10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " - "bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) - ); - status = pthread_attr_setstacksize( &thread_attr, stack_size ); - }; // if - }; // if -# endif /* KMP_BACKUP_STKSIZE */ - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetWorkerStackSize, stack_size ), - KMP_ERR( status ), - KMP_HNT( ChangeWorkerStackSize ), - __kmp_msg_null - ); - }; // if -# endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - } -#endif /* KMP_THREAD_ATTR */ - - { - status = pthread_create( & handle, & thread_attr, __kmp_launch_worker, (void *) th ); - if ( status != 0 || ! handle ) { // ??? Why do we check handle?? -#ifdef _POSIX_THREAD_ATTR_STACKSIZE - if ( status == EINVAL ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetWorkerStackSize, stack_size ), - KMP_ERR( status ), - KMP_HNT( IncreaseWorkerStackSize ), - __kmp_msg_null - ); - }; - if ( status == ENOMEM ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetWorkerStackSize, stack_size ), - KMP_ERR( status ), - KMP_HNT( DecreaseWorkerStackSize ), - __kmp_msg_null - ); - }; -#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if ( status == EAGAIN ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( NoResourcesForWorkerThread ), - KMP_ERR( status ), - KMP_HNT( Decrease_NUM_THREADS ), - __kmp_msg_null - ); - }; // if - KMP_SYSFAIL( "pthread_create", status ); - }; // if - - th->th.th_info.ds.ds_thread = handle; - } - -#ifdef KMP_THREAD_ATTR - { - status = pthread_attr_destroy( & thread_attr ); - if ( status ) { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantDestroyThreadAttrs ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - } -#endif /* KMP_THREAD_ATTR */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); - -} // __kmp_create_worker - - -void -__kmp_create_monitor( kmp_info_t *th ) -{ - pthread_t handle; - pthread_attr_t thread_attr; - size_t size; - int status; - int auto_adj_size = FALSE; - - KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; - #if KMP_REAL_TIME_FIX - TCW_4( __kmp_global.g.g_time.dt.t_value, -1 ); // Will use it for synchronization a bit later. - #else - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); - #endif // KMP_REAL_TIME_FIX - - #ifdef KMP_THREAD_ATTR - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - auto_adj_size = TRUE; - } - status = pthread_attr_init( &thread_attr ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantInitThreadAttrs ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorState ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_getstacksize( & thread_attr, & size ); - KMP_CHECK_SYSFAIL( "pthread_attr_getstacksize", status ); - #else - size = __kmp_sys_min_stksize; - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - #endif /* KMP_THREAD_ATTR */ - - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - } - if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; - } - - KA_TRACE( 10, ( "__kmp_create_monitor: default stacksize = %lu bytes," - "requested stacksize = %lu bytes\n", - size, __kmp_monitor_stksize ) ); - - retry: - - /* Set stack size for this thread now. */ - - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - KA_TRACE( 10, ( "__kmp_create_monitor: setting stacksize = %lu bytes,", - __kmp_monitor_stksize ) ); - status = pthread_attr_setstacksize( & thread_attr, __kmp_monitor_stksize ); - if ( status != 0 ) { - if ( auto_adj_size ) { - __kmp_monitor_stksize *= 2; - goto retry; - } - __kmp_msg( - kmp_ms_warning, // should this be fatal? BB - KMP_MSG( CantSetMonitorStackSize, (long int) __kmp_monitor_stksize ), - KMP_ERR( status ), - KMP_HNT( ChangeMonitorStackSize ), - __kmp_msg_null - ); - }; // if - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - - status = pthread_create( &handle, & thread_attr, __kmp_launch_monitor, (void *) th ); - - if ( status != 0 ) { - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - if ( status == EINVAL ) { - if ( auto_adj_size && ( __kmp_monitor_stksize < (size_t)0x40000000 ) ) { - __kmp_monitor_stksize *= 2; - goto retry; - } - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), - KMP_ERR( status ), - KMP_HNT( IncreaseMonitorStackSize ), - __kmp_msg_null - ); - }; // if - if ( status == ENOMEM ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), - KMP_ERR( status ), - KMP_HNT( DecreaseMonitorStackSize ), - __kmp_msg_null - ); - }; // if - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if ( status == EAGAIN ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( NoResourcesForMonitorThread ), - KMP_ERR( status ), - KMP_HNT( DecreaseNumberOfThreadsInUse ), - __kmp_msg_null - ); - }; // if - KMP_SYSFAIL( "pthread_create", status ); - }; // if - - th->th.th_info.ds.ds_thread = handle; - - #if KMP_REAL_TIME_FIX - // Wait for the monitor thread is really started and set its *priority*. - KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == sizeof( __kmp_global.g.g_time.dt.t_value ) ); - __kmp_wait_yield_4( - (kmp_uint32 volatile *) & __kmp_global.g.g_time.dt.t_value, -1, & __kmp_neq_4, NULL - ); - #endif // KMP_REAL_TIME_FIX - - #ifdef KMP_THREAD_ATTR - status = pthread_attr_destroy( & thread_attr ); - if ( status != 0 ) { - __kmp_msg( // - kmp_ms_warning, - KMP_MSG( CantDestroyThreadAttrs ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - #endif - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ( "__kmp_create_monitor: monitor created %#.8lx\n", th->th.th_info.ds.ds_thread ) ); - -} // __kmp_create_monitor - -void -__kmp_exit_thread( - int exit_status -) { - pthread_exit( (void *)(intptr_t) exit_status ); -} // __kmp_exit_thread - -void __kmp_resume_monitor(); - -void -__kmp_reap_monitor( kmp_info_t *th ) -{ - int status; - void *exit_val; - - KA_TRACE( 10, ("__kmp_reap_monitor: try to reap monitor thread with handle %#.8lx\n", - th->th.th_info.ds.ds_thread ) ); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); - if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { - return; - }; // if - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - - /* First, check to see whether the monitor thread exists. This could prevent a hang, - but if the monitor dies after the pthread_kill call and before the pthread_join - call, it will still hang. */ - - status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); - if (status == ESRCH) { - - KA_TRACE( 10, ("__kmp_reap_monitor: monitor does not exist, returning\n") ); - - } else - { - __kmp_resume_monitor(); // Wake up the monitor thread - status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); - if (exit_val != th) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( ReapMonitorError ), - KMP_ERR( status ), - __kmp_msg_null - ); - } - } - - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - - KA_TRACE( 10, ("__kmp_reap_monitor: done reaping monitor thread with handle %#.8lx\n", - th->th.th_info.ds.ds_thread ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - -} - -void -__kmp_reap_worker( kmp_info_t *th ) -{ - int status; - void *exit_val; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid ) ); - - /* First, check to see whether the worker thread exists. This could prevent a hang, - but if the worker dies after the pthread_kill call and before the pthread_join - call, it will still hang. */ - - { - status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); - if (status == ESRCH) { - KA_TRACE( 10, ("__kmp_reap_worker: worker T#%d does not exist, returning\n", - th->th.th_info.ds.ds_gtid ) ); - } - else { - KA_TRACE( 10, ("__kmp_reap_worker: try to join with worker T#%d\n", - th->th.th_info.ds.ds_gtid ) ); - - status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); -#ifdef KMP_DEBUG - /* Don't expose these to the user until we understand when they trigger */ - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( ReapWorkerError ), - KMP_ERR( status ), - __kmp_msg_null - ); - } - if ( exit_val != th ) { - KA_TRACE( 10, ( "__kmp_reap_worker: worker T#%d did not reap properly, " - "exit_val = %p\n", - th->th.th_info.ds.ds_gtid, exit_val ) ); - } -#endif /* KMP_DEBUG */ - } - } - - KA_TRACE( 10, ("__kmp_reap_worker: done reaping T#%d\n", th->th.th_info.ds.ds_gtid ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if KMP_HANDLE_SIGNALS - - -static void -__kmp_null_handler( int signo ) -{ - // Do nothing, for doing SIG_IGN-type actions. -} // __kmp_null_handler - - -static void -__kmp_team_handler( int signo ) -{ - if ( __kmp_global.g.g_abort == 0 ) { - /* Stage 1 signal handler, let's shut down all of the threads */ - #ifdef KMP_DEBUG - __kmp_debug_printf( "__kmp_team_handler: caught signal = %d\n", signo ); - #endif - switch ( signo ) { - case SIGHUP : - case SIGINT : - case SIGQUIT : - case SIGILL : - case SIGABRT : - case SIGFPE : - case SIGBUS : - case SIGSEGV : - #ifdef SIGSYS - case SIGSYS : - #endif - case SIGTERM : - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer( ); - }; // if - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_abort, signo ); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_done, TRUE ); - KMP_MB(); // Flush all pending memory write invalidates. - break; - default: - #ifdef KMP_DEBUG - __kmp_debug_printf( "__kmp_team_handler: unknown signal type" ); - #endif - break; - }; // switch - }; // if -} // __kmp_team_handler - - -static -void __kmp_sigaction( int signum, const struct sigaction * act, struct sigaction * oldact ) { - int rc = sigaction( signum, act, oldact ); - KMP_CHECK_SYSFAIL_ERRNO( "sigaction", rc ); -} - - -static void -__kmp_install_one_handler( int sig, sig_func_t handler_func, int parallel_init ) -{ - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE( 60, ( "__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init ) ); - if ( parallel_init ) { - struct sigaction new_action; - struct sigaction old_action; - new_action.sa_handler = handler_func; - new_action.sa_flags = 0; - sigfillset( & new_action.sa_mask ); - __kmp_sigaction( sig, & new_action, & old_action ); - if ( old_action.sa_handler == __kmp_sighldrs[ sig ].sa_handler ) { - sigaddset( & __kmp_sigset, sig ); - } else { - // Restore/keep user's handler if one previously installed. - __kmp_sigaction( sig, & old_action, NULL ); - }; // if - } else { - // Save initial/system signal handlers to see if user handlers installed. - __kmp_sigaction( sig, NULL, & __kmp_sighldrs[ sig ] ); - }; // if - KMP_MB(); // Flush all pending memory write invalidates. -} // __kmp_install_one_handler - - -static void -__kmp_remove_one_handler( int sig ) -{ - KB_TRACE( 60, ( "__kmp_remove_one_handler( %d )\n", sig ) ); - if ( sigismember( & __kmp_sigset, sig ) ) { - struct sigaction old; - KMP_MB(); // Flush all pending memory write invalidates. - __kmp_sigaction( sig, & __kmp_sighldrs[ sig ], & old ); - if ( ( old.sa_handler != __kmp_team_handler ) && ( old.sa_handler != __kmp_null_handler ) ) { - // Restore the users signal handler. - KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); - __kmp_sigaction( sig, & old, NULL ); - }; // if - sigdelset( & __kmp_sigset, sig ); - KMP_MB(); // Flush all pending memory write invalidates. - }; // if -} // __kmp_remove_one_handler - - -void -__kmp_install_signals( int parallel_init ) -{ - KB_TRACE( 10, ( "__kmp_install_signals( %d )\n", parallel_init ) ); - if ( __kmp_handle_signals || ! parallel_init ) { - // If ! parallel_init, we do not install handlers, just save original handlers. - // Let us do it even __handle_signals is 0. - sigemptyset( & __kmp_sigset ); - __kmp_install_one_handler( SIGHUP, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGQUIT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGBUS, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); - #ifdef SIGSYS - __kmp_install_one_handler( SIGSYS, __kmp_team_handler, parallel_init ); - #endif // SIGSYS - __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); - #ifdef SIGPIPE - __kmp_install_one_handler( SIGPIPE, __kmp_team_handler, parallel_init ); - #endif // SIGPIPE - }; // if -} // __kmp_install_signals - - -void -__kmp_remove_signals( void ) -{ - int sig; - KB_TRACE( 10, ( "__kmp_remove_signals()\n" ) ); - for ( sig = 1; sig < NSIG; ++ sig ) { - __kmp_remove_one_handler( sig ); - }; // for sig -} // __kmp_remove_signals - - -#endif // KMP_HANDLE_SIGNALS - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_enable( int new_state ) -{ - #ifdef KMP_CANCEL_THREADS - int status, old_state; - status = pthread_setcancelstate( new_state, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); - KMP_DEBUG_ASSERT( old_state == PTHREAD_CANCEL_DISABLE ); - #endif -} - -void -__kmp_disable( int * old_state ) -{ - #ifdef KMP_CANCEL_THREADS - int status; - status = pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); - #endif -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static void -__kmp_atfork_prepare (void) -{ - /* nothing to do */ -} - -static void -__kmp_atfork_parent (void) -{ - /* nothing to do */ -} - -/* - Reset the library so execution in the child starts "all over again" with - clean data structures in initial states. Don't worry about freeing memory - allocated by parent, just abandon it to be safe. -*/ -static void -__kmp_atfork_child (void) -{ - /* TODO make sure this is done right for nested/sibling */ - // ATT: Memory leaks are here? TODO: Check it and fix. - /* KMP_ASSERT( 0 ); */ - - ++__kmp_fork_count; - - __kmp_init_runtime = FALSE; - __kmp_init_monitor = 0; - __kmp_init_parallel = FALSE; - __kmp_init_middle = FALSE; - __kmp_init_serial = FALSE; - TCW_4(__kmp_init_gtid, FALSE); - __kmp_init_common = FALSE; - - TCW_4(__kmp_init_user_locks, FALSE); -#if ! KMP_USE_DYNAMIC_LOCK - __kmp_user_lock_table.used = 1; - __kmp_user_lock_table.allocated = 0; - __kmp_user_lock_table.table = NULL; - __kmp_lock_blocks = NULL; -#endif - - __kmp_all_nth = 0; - TCW_4(__kmp_nth, 0); - - /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate here - so threadprivate doesn't use stale data */ - KA_TRACE( 10, ( "__kmp_atfork_child: checking cache address list %p\n", - __kmp_threadpriv_cache_list ) ); - - while ( __kmp_threadpriv_cache_list != NULL ) { - - if ( *__kmp_threadpriv_cache_list -> addr != NULL ) { - KC_TRACE( 50, ( "__kmp_atfork_child: zeroing cache at address %p\n", - &(*__kmp_threadpriv_cache_list -> addr) ) ); - - *__kmp_threadpriv_cache_list -> addr = NULL; - } - __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list -> next; - } - - __kmp_init_runtime = FALSE; - - /* reset statically initialized locks */ - __kmp_init_bootstrap_lock( &__kmp_initz_lock ); - __kmp_init_bootstrap_lock( &__kmp_stdio_lock ); - __kmp_init_bootstrap_lock( &__kmp_console_lock ); - - /* This is necessary to make sure no stale data is left around */ - /* AC: customers complain that we use unsafe routines in the atfork - handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen - in dynamic_link when check the presence of shared tbbmalloc library. - Suggestion is to make the library initialization lazier, similar - to what done for __kmpc_begin(). */ - // TODO: synchronize all static initializations with regular library - // startup; look at kmp_global.c and etc. - //__kmp_internal_begin (); - -} - -void -__kmp_register_atfork(void) { - if ( __kmp_need_register_atfork ) { - int status = pthread_atfork( __kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child ); - KMP_CHECK_SYSFAIL( "pthread_atfork", status ); - __kmp_need_register_atfork = FALSE; - } -} - -void -__kmp_suspend_initialize( void ) -{ - int status; - status = pthread_mutexattr_init( &__kmp_suspend_mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); - status = pthread_condattr_init( &__kmp_suspend_cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); -} - -static void -__kmp_suspend_initialize_thread( kmp_info_t *th ) -{ - if ( th->th.th_suspend_init_count <= __kmp_fork_count ) { - /* this means we haven't initialized the suspension pthread objects for this thread - in this instance of the process */ - int status; - status = pthread_cond_init( &th->th.th_suspend_cv.c_cond, &__kmp_suspend_cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); - status = pthread_mutex_init( &th->th.th_suspend_mx.m_mutex, & __kmp_suspend_mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); - *(volatile int*)&th->th.th_suspend_init_count = __kmp_fork_count + 1; - }; -} - -void -__kmp_suspend_uninitialize_thread( kmp_info_t *th ) -{ - if(th->th.th_suspend_init_count > __kmp_fork_count) { - /* this means we have initialize the suspension pthread objects for this thread - in this instance of the process */ - int status; - - status = pthread_cond_destroy( &th->th.th_suspend_cv.c_cond ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_cond_destroy", status ); - }; - status = pthread_mutex_destroy( &th->th.th_suspend_mx.m_mutex ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_mutex_destroy", status ); - }; - --th->th.th_suspend_init_count; - KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); - } -} - -/* This routine puts the calling thread to sleep after setting the - * sleep bit for the indicated flag variable to true. - */ -template -static inline void __kmp_suspend_template( int th_gtid, C *flag ) -{ - KMP_TIME_DEVELOPER_BLOCK(USER_suspend); - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, flag->get() ) ); - - __kmp_suspend_initialize_thread( th ); - - status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); - - KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", - th_gtid, flag->get() ) ); - - /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread - gets called first? - */ - old_spin = flag->set_sleeping(); - - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x, was %x\n", - th_gtid, flag->get(), *(flag->get()), old_spin ) ); - - if ( flag->done_check_val(old_spin) ) { - old_spin = flag->unset_sleeping(); - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for spin(%p)\n", - th_gtid, flag->get()) ); - } else { - /* Encapsulate in a loop as the documentation states that this may - * "with low probability" return when the condition variable has - * not been signaled or broadcast - */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - while ( flag->is_sleeping() ) { -#ifdef DEBUG_SUSPEND - char buffer[128]; - __kmp_suspend_count++; - __kmp_print_cond( buffer, &th->th.th_suspend_cv ); - __kmp_printf( "__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, buffer ); -#endif - // Mark the thread as no longer active (only in the first iteration of the loop). - if ( ! deactivated ) { - th->th.th_active = FALSE; - if ( th->th.th_active_in_pool ) { - th->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } - deactivated = TRUE; - - - } - -#if USE_SUSPEND_TIMEOUT - struct timespec now; - struct timeval tval; - int msecs; - - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &now ); - - msecs = (4*__kmp_dflt_blocktime) + 200; - now.tv_sec += msecs / 1000; - now.tv_nsec += (msecs % 1000)*1000; - - KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_timedwait\n", - th_gtid ) ); - status = pthread_cond_timedwait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex, & now ); -#else - KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_wait\n", - th_gtid ) ); - status = pthread_cond_wait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex ); -#endif - - if ( (status != 0) && (status != EINTR) && (status != ETIMEDOUT) ) { - KMP_SYSFAIL( "pthread_cond_wait", status ); - } -#ifdef KMP_DEBUG - if (status == ETIMEDOUT) { - if ( flag->is_sleeping() ) { - KF_TRACE( 100, ( "__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid ) ); - } else { - KF_TRACE( 2, ( "__kmp_suspend_template: T#%d timeout wakeup, sleep bit not set!\n", - th_gtid ) ); - } - } else if ( flag->is_sleeping() ) { - KF_TRACE( 100, ( "__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ) ); - } -#endif - } // while - - // Mark the thread as active again (if it was previous marked as inactive) - if ( deactivated ) { - th->th.th_active = TRUE; - if ( TCR_4(th->th.th_in_pool) ) { - KMP_TEST_THEN_INC32( (kmp_int32 *) &__kmp_thread_pool_active_nth ); - th->th.th_active_in_pool = TRUE; - } - } - } - -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &th->th.th_suspend_cv); - __kmp_printf( "__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, buffer ); - } -#endif - - - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); -} - -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); -} - - -/* This routine signals the thread specified by target_gtid to wake up - * after setting the sleep bit indicated by the flag argument to FALSE. - * The target thread must already have called __kmp_suspend_template() - */ -template -static inline void __kmp_resume_template( int target_gtid, C *flag ) -{ - KMP_TIME_DEVELOPER_BLOCK(USER_resume); - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; - -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - - KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); - KMP_DEBUG_ASSERT( gtid != target_gtid ); - - __kmp_suspend_initialize_thread( th ); - - status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); - - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)th->th.th_sleep_loc; - } - - // First, check if the flag is null or its type has changed. If so, someone else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p)\n", - gtid, target_gtid, NULL ) ); - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - return; - } - else { // if multiple threads are sleeping, flag should be internally referring to a specific thread here - typename C::flag_t old_spin = flag->unset_sleeping(); - if ( ! flag->is_sleeping_val(old_spin) ) { - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); - - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - return; - } - KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); - } - TCW_PTR(th->th.th_sleep_loc, NULL); - - -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &th->th.th_suspend_cv ); - __kmp_printf( "__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, target_gtid, buffer ); - } -#endif - - - status = pthread_cond_signal( &th->th.th_suspend_cv.c_cond ); - KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", - gtid, target_gtid ) ); -} - -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); -} - -void -__kmp_resume_monitor() -{ - int status; -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; - KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", - gtid, KMP_GTID_MONITOR ) ); - KMP_DEBUG_ASSERT( gtid != KMP_GTID_MONITOR ); -#endif - status = pthread_mutex_lock( &__kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); -#ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &__kmp_wait_cv.c_cond ); - __kmp_printf( "__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, KMP_GTID_MONITOR, buffer ); - } -#endif - status = pthread_cond_signal( &__kmp_wait_cv.c_cond ); - KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); - status = pthread_mutex_unlock( &__kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d exiting after signaling wake up for T#%d\n", - gtid, KMP_GTID_MONITOR ) ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_yield( int cond ) -{ - if (cond && __kmp_yielding_on) { - sched_yield(); - } -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_gtid_set_specific( int gtid ) -{ - int status; - KMP_ASSERT( __kmp_init_runtime ); - status = pthread_setspecific( __kmp_gtid_threadprivate_key, (void*)(intptr_t)(gtid+1) ); - KMP_CHECK_SYSFAIL( "pthread_setspecific", status ); -} - -int -__kmp_gtid_get_specific() -{ - int gtid; - if ( !__kmp_init_runtime ) { - KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(size_t)pthread_getspecific( __kmp_gtid_threadprivate_key ); - if ( gtid == 0 ) { - gtid = KMP_GTID_DNE; - } - else { - gtid--; - } - KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid )); - return gtid; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -double -__kmp_read_cpu_time( void ) -{ - /*clock_t t;*/ - struct tms buffer; - - /*t =*/ times( & buffer ); - - return (buffer.tms_utime + buffer.tms_cutime) / (double) CLOCKS_PER_SEC; -} - -int -__kmp_read_system_info( struct kmp_sys_info *info ) -{ - int status; - struct rusage r_usage; - - memset( info, 0, sizeof( *info ) ); - - status = getrusage( RUSAGE_SELF, &r_usage); - KMP_CHECK_SYSFAIL_ERRNO( "getrusage", status ); - - info->maxrss = r_usage.ru_maxrss; /* the maximum resident set size utilized (in kilobytes) */ - info->minflt = r_usage.ru_minflt; /* the number of page faults serviced without any I/O */ - info->majflt = r_usage.ru_majflt; /* the number of page faults serviced that required I/O */ - info->nswap = r_usage.ru_nswap; /* the number of times a process was "swapped" out of memory */ - info->inblock = r_usage.ru_inblock; /* the number of times the file system had to perform input */ - info->oublock = r_usage.ru_oublock; /* the number of times the file system had to perform output */ - info->nvcsw = r_usage.ru_nvcsw; /* the number of times a context switch was voluntarily */ - info->nivcsw = r_usage.ru_nivcsw; /* the number of times a context switch was forced */ - - return (status != 0); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_read_system_time( double *delta ) -{ - double t_ns; - struct timeval tval; - struct timespec stop; - int status; - - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &stop ); - t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); - *delta = (t_ns * 1e-9); -} - -void -__kmp_clear_system_time( void ) -{ - struct timeval tval; - int status; - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &__kmp_sys_timer_data.start ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#ifdef BUILD_TV - -void -__kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ) -{ - struct tv_data *p; - - p = (struct tv_data *) __kmp_allocate( sizeof( *p ) ); - - p->u.tp.global_addr = global_addr; - p->u.tp.thread_addr = thread_addr; - - p->type = (void *) 1; - - p->next = th->th.th_local.tv_data; - th->th.th_local.tv_data = p; - - if ( p->next == 0 ) { - int rc = pthread_setspecific( __kmp_tv_key, p ); - KMP_CHECK_SYSFAIL( "pthread_setspecific", rc ); - } -} - -#endif /* BUILD_TV */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static int -__kmp_get_xproc( void ) { - - int r = 0; - - #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - - r = sysconf( _SC_NPROCESSORS_ONLN ); - - #elif KMP_OS_DARWIN - - // Bug C77011 High "OpenMP Threads and number of active cores". - - // Find the number of available CPUs. - kern_return_t rc; - host_basic_info_data_t info; - mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; - rc = host_info( mach_host_self(), HOST_BASIC_INFO, (host_info_t) & info, & num ); - if ( rc == 0 && num == HOST_BASIC_INFO_COUNT ) { - // Cannot use KA_TRACE() here because this code works before trace support is - // initialized. - r = info.avail_cpus; - } else { - KMP_WARNING( CantGetNumAvailCPU ); - KMP_INFORM( AssumedNumCPU ); - }; // if - - #else - - #error "Unknown or unsupported OS." - - #endif - - return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ - -} // __kmp_get_xproc - -int -__kmp_read_from_file( char const *path, char const *format, ... ) -{ - int result; - va_list args; - - va_start(args, format); - FILE *f = fopen(path, "rb"); - if ( f == NULL ) - return 0; - result = vfscanf(f, format, args); - fclose(f); - - return result; -} - -void -__kmp_runtime_initialize( void ) -{ - int status; - pthread_mutexattr_t mutex_attr; - pthread_condattr_t cond_attr; - - if ( __kmp_init_runtime ) { - return; - }; // if - - #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( &__kmp_cpuinfo ); - }; // if - #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - __kmp_xproc = __kmp_get_xproc(); - - if ( sysconf( _SC_THREADS ) ) { - - /* Query the maximum number of threads */ - __kmp_sys_max_nth = sysconf( _SC_THREAD_THREADS_MAX ); - if ( __kmp_sys_max_nth == -1 ) { - /* Unlimited threads for NPTL */ - __kmp_sys_max_nth = INT_MAX; - } - else if ( __kmp_sys_max_nth <= 1 ) { - /* Can't tell, just use PTHREAD_THREADS_MAX */ - __kmp_sys_max_nth = KMP_MAX_NTH; - } - - /* Query the minimum stack size */ - __kmp_sys_min_stksize = sysconf( _SC_THREAD_STACK_MIN ); - if ( __kmp_sys_min_stksize <= 1 ) { - __kmp_sys_min_stksize = KMP_MIN_STKSIZE; - } - } - - /* Set up minimum number of threads to switch to TLS gtid */ - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; - - #ifdef BUILD_TV - { - int rc = pthread_key_create( & __kmp_tv_key, 0 ); - KMP_CHECK_SYSFAIL( "pthread_key_create", rc ); - } - #endif - - status = pthread_key_create( &__kmp_gtid_threadprivate_key, __kmp_internal_end_dest ); - KMP_CHECK_SYSFAIL( "pthread_key_create", status ); - status = pthread_mutexattr_init( & mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); - status = pthread_mutex_init( & __kmp_wait_mx.m_mutex, & mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); - status = pthread_condattr_init( & cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); - status = pthread_cond_init( & __kmp_wait_cv.c_cond, & cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); -#if USE_ITT_BUILD - __kmp_itt_initialize(); -#endif /* USE_ITT_BUILD */ - - __kmp_init_runtime = TRUE; -} - -void -__kmp_runtime_destroy( void ) -{ - int status; - - if ( ! __kmp_init_runtime ) { - return; // Nothing to do. - }; - -#if USE_ITT_BUILD - __kmp_itt_destroy(); -#endif /* USE_ITT_BUILD */ - - status = pthread_key_delete( __kmp_gtid_threadprivate_key ); - KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); - #ifdef BUILD_TV - status = pthread_key_delete( __kmp_tv_key ); - KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); - #endif - - status = pthread_mutex_destroy( & __kmp_wait_mx.m_mutex ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_mutex_destroy", status ); - } - status = pthread_cond_destroy( & __kmp_wait_cv.c_cond ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_cond_destroy", status ); - } - #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); - #endif - - __kmp_init_runtime = FALSE; -} - - -/* Put the thread to sleep for a time period */ -/* NOTE: not currently used anywhere */ -void -__kmp_thread_sleep( int millis ) -{ - sleep( ( millis + 500 ) / 1000 ); -} - -/* Calculate the elapsed wall clock time for the user */ -void -__kmp_elapsed( double *t ) -{ - int status; -# ifdef FIX_SGI_CLOCK - struct timespec ts; - - status = clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ); - KMP_CHECK_SYSFAIL_ERRNO( "clock_gettime", status ); - *t = (double) ts.tv_nsec * (1.0 / (double) KMP_NSEC_PER_SEC) + - (double) ts.tv_sec; -# else - struct timeval tv; - - status = gettimeofday( & tv, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - *t = (double) tv.tv_usec * (1.0 / (double) KMP_USEC_PER_SEC) + - (double) tv.tv_sec; -# endif -} - -/* Calculate the elapsed wall clock tick for the user */ -void -__kmp_elapsed_tick( double *t ) -{ - *t = 1 / (double) CLOCKS_PER_SEC; -} - -/* - Determine whether the given address is mapped into the current address space. -*/ - -int -__kmp_is_address_mapped( void * addr ) { - - int found = 0; - int rc; - - #if KMP_OS_LINUX || KMP_OS_FREEBSD - - /* - On Linux* OS, read the /proc//maps pseudo-file to get all the address ranges mapped - into the address space. - */ - - char * name = __kmp_str_format( "/proc/%d/maps", getpid() ); - FILE * file = NULL; - - file = fopen( name, "r" ); - KMP_ASSERT( file != NULL ); - - for ( ; ; ) { - - void * beginning = NULL; - void * ending = NULL; - char perms[ 5 ]; - - rc = fscanf( file, "%p-%p %4s %*[^\n]\n", & beginning, & ending, perms ); - if ( rc == EOF ) { - break; - }; // if - KMP_ASSERT( rc == 3 && KMP_STRLEN( perms ) == 4 ); // Make sure all fields are read. - - // Ending address is not included in the region, but beginning is. - if ( ( addr >= beginning ) && ( addr < ending ) ) { - perms[ 2 ] = 0; // 3th and 4th character does not matter. - if ( strcmp( perms, "rw" ) == 0 ) { - // Memory we are looking for should be readable and writable. - found = 1; - }; // if - break; - }; // if - - }; // forever - - // Free resources. - fclose( file ); - KMP_INTERNAL_FREE( name ); - - #elif KMP_OS_DARWIN - - /* - On OS X*, /proc pseudo filesystem is not available. Try to read memory using vm - interface. - */ - - int buffer; - vm_size_t count; - rc = - vm_read_overwrite( - mach_task_self(), // Task to read memory of. - (vm_address_t)( addr ), // Address to read from. - 1, // Number of bytes to be read. - (vm_address_t)( & buffer ), // Address of buffer to save read bytes in. - & count // Address of var to save number of read bytes in. - ); - if ( rc == 0 ) { - // Memory successfully read. - found = 1; - }; // if - - #elif KMP_OS_FREEBSD || KMP_OS_NETBSD - - // FIXME(FreeBSD, NetBSD): Implement this - found = 1; - - #else - - #error "Unknown or unsupported OS" - - #endif - - return found; - -} // __kmp_is_address_mapped - -#ifdef USE_LOAD_BALANCE - - -# if KMP_OS_DARWIN - -// The function returns the rounded value of the system load average -// during given time interval which depends on the value of -// __kmp_load_balance_interval variable (default is 60 sec, other values -// may be 300 sec or 900 sec). -// It returns -1 in case of error. -int -__kmp_get_load_balance( int max ) -{ - double averages[3]; - int ret_avg = 0; - - int res = getloadavg( averages, 3 ); - - //Check __kmp_load_balance_interval to determine which of averages to use. - // getloadavg() may return the number of samples less than requested that is - // less than 3. - if ( __kmp_load_balance_interval < 180 && ( res >= 1 ) ) { - ret_avg = averages[0];// 1 min - } else if ( ( __kmp_load_balance_interval >= 180 - && __kmp_load_balance_interval < 600 ) && ( res >= 2 ) ) { - ret_avg = averages[1];// 5 min - } else if ( ( __kmp_load_balance_interval >= 600 ) && ( res == 3 ) ) { - ret_avg = averages[2];// 15 min - } else {// Error occurred - return -1; - } - - return ret_avg; -} - -# else // Linux* OS - -// The fuction returns number of running (not sleeping) threads, or -1 in case of error. -// Error could be reported if Linux* OS kernel too old (without "/proc" support). -// Counting running threads stops if max running threads encountered. -int -__kmp_get_load_balance( int max ) -{ - static int permanent_error = 0; - - static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ - static double glb_call_time = 0; /* Thread balance algorithm call time */ - - int running_threads = 0; // Number of running threads in the system. - - DIR * proc_dir = NULL; // Handle of "/proc/" directory. - struct dirent * proc_entry = NULL; - - kmp_str_buf_t task_path; // "/proc//task//" path. - DIR * task_dir = NULL; // Handle of "/proc//task//" directory. - struct dirent * task_entry = NULL; - int task_path_fixed_len; - - kmp_str_buf_t stat_path; // "/proc//task//stat" path. - int stat_file = -1; - int stat_path_fixed_len; - - int total_processes = 0; // Total number of processes in system. - int total_threads = 0; // Total number of threads in system. - - double call_time = 0.0; - - __kmp_str_buf_init( & task_path ); - __kmp_str_buf_init( & stat_path ); - - __kmp_elapsed( & call_time ); - - if ( glb_call_time && - ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { - running_threads = glb_running_threads; - goto finish; - } - - glb_call_time = call_time; - - // Do not spend time on scanning "/proc/" if we have a permanent error. - if ( permanent_error ) { - running_threads = -1; - goto finish; - }; // if - - if ( max <= 0 ) { - max = INT_MAX; - }; // if - - // Open "/proc/" directory. - proc_dir = opendir( "/proc" ); - if ( proc_dir == NULL ) { - // Cannot open "/prroc/". Probably the kernel does not support it. Return an error now and - // in subsequent calls. - running_threads = -1; - permanent_error = 1; - goto finish; - }; // if - - // Initialize fixed part of task_path. This part will not change. - __kmp_str_buf_cat( & task_path, "/proc/", 6 ); - task_path_fixed_len = task_path.used; // Remember number of used characters. - - proc_entry = readdir( proc_dir ); - while ( proc_entry != NULL ) { - // Proc entry is a directory and name starts with a digit. Assume it is a process' - // directory. - if ( proc_entry->d_type == DT_DIR && isdigit( proc_entry->d_name[ 0 ] ) ) { - - ++ total_processes; - // Make sure init process is the very first in "/proc", so we can replace - // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == 1. - // We are going to check that total_processes == 1 => d_name == "1" is true (where - // "=>" is implication). Since C++ does not have => operator, let us replace it with its - // equivalent: a => b == ! a || b. - KMP_DEBUG_ASSERT( total_processes != 1 || strcmp( proc_entry->d_name, "1" ) == 0 ); - - // Construct task_path. - task_path.used = task_path_fixed_len; // Reset task_path to "/proc/". - __kmp_str_buf_cat( & task_path, proc_entry->d_name, KMP_STRLEN( proc_entry->d_name ) ); - __kmp_str_buf_cat( & task_path, "/task", 5 ); - - task_dir = opendir( task_path.str ); - if ( task_dir == NULL ) { - // Process can finish between reading "/proc/" directory entry and opening process' - // "task/" directory. So, in general case we should not complain, but have to skip - // this process and read the next one. - // But on systems with no "task/" support we will spend lot of time to scan "/proc/" - // tree again and again without any benefit. "init" process (its pid is 1) should - // exist always, so, if we cannot open "/proc/1/task/" directory, it means "task/" - // is not supported by kernel. Report an error now and in the future. - if ( strcmp( proc_entry->d_name, "1" ) == 0 ) { - running_threads = -1; - permanent_error = 1; - goto finish; - }; // if - } else { - // Construct fixed part of stat file path. - __kmp_str_buf_clear( & stat_path ); - __kmp_str_buf_cat( & stat_path, task_path.str, task_path.used ); - __kmp_str_buf_cat( & stat_path, "/", 1 ); - stat_path_fixed_len = stat_path.used; - - task_entry = readdir( task_dir ); - while ( task_entry != NULL ) { - // It is a directory and name starts with a digit. - if ( proc_entry->d_type == DT_DIR && isdigit( task_entry->d_name[ 0 ] ) ) { - - ++ total_threads; - - // Consruct complete stat file path. Easiest way would be: - // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, task_entry->d_name ); - // but seriae of __kmp_str_buf_cat works a bit faster. - stat_path.used = stat_path_fixed_len; // Reset stat path to its fixed part. - __kmp_str_buf_cat( & stat_path, task_entry->d_name, KMP_STRLEN( task_entry->d_name ) ); - __kmp_str_buf_cat( & stat_path, "/stat", 5 ); - - // Note: Low-level API (open/read/close) is used. High-level API - // (fopen/fclose) works ~ 30 % slower. - stat_file = open( stat_path.str, O_RDONLY ); - if ( stat_file == -1 ) { - // We cannot report an error because task (thread) can terminate just - // before reading this file. - } else { - /* - Content of "stat" file looks like: - - 24285 (program) S ... - - It is a single line (if program name does not include fanny - symbols). First number is a thread id, then name of executable file - name in paretheses, then state of the thread. We need just thread - state. - - Good news: Length of program name is 15 characters max. Longer - names are truncated. - - Thus, we need rather short buffer: 15 chars for program name + - 2 parenthesis, + 3 spaces + ~7 digits of pid = 37. - - Bad news: Program name may contain special symbols like space, - closing parenthesis, or even new line. This makes parsing "stat" - file not 100 % reliable. In case of fanny program names parsing - may fail (report incorrect thread state). - - Parsing "status" file looks more promissing (due to different - file structure and escaping special symbols) but reading and - parsing of "status" file works slower. - - -- ln - */ - char buffer[ 65 ]; - int len; - len = read( stat_file, buffer, sizeof( buffer ) - 1 ); - if ( len >= 0 ) { - buffer[ len ] = 0; - // Using scanf: - // sscanf( buffer, "%*d (%*s) %c ", & state ); - // looks very nice, but searching for a closing parenthesis works a - // bit faster. - char * close_parent = strstr( buffer, ") " ); - if ( close_parent != NULL ) { - char state = * ( close_parent + 2 ); - if ( state == 'R' ) { - ++ running_threads; - if ( running_threads >= max ) { - goto finish; - }; // if - }; // if - }; // if - }; // if - close( stat_file ); - stat_file = -1; - }; // if - }; // if - task_entry = readdir( task_dir ); - }; // while - closedir( task_dir ); - task_dir = NULL; - }; // if - }; // if - proc_entry = readdir( proc_dir ); - }; // while - - // - // There _might_ be a timing hole where the thread executing this - // code get skipped in the load balance, and running_threads is 0. - // Assert in the debug builds only!!! - // - KMP_DEBUG_ASSERT( running_threads > 0 ); - if ( running_threads <= 0 ) { - running_threads = 1; - } - - finish: // Clean up and exit. - if ( proc_dir != NULL ) { - closedir( proc_dir ); - }; // if - __kmp_str_buf_free( & task_path ); - if ( task_dir != NULL ) { - closedir( task_dir ); - }; // if - __kmp_str_buf_free( & stat_path ); - if ( stat_file != -1 ) { - close( stat_file ); - }; // if - - glb_running_threads = running_threads; - - return running_threads; - -} // __kmp_get_load_balance - -# endif // KMP_OS_DARWIN - -#endif // USE_LOAD_BALANCE - -#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC) - -// we really only need the case with 1 argument, because CLANG always build -// a struct of pointers to shared variables referenced in the outlined function -int -__kmp_invoke_microtask( microtask_t pkfn, - int gtid, int tid, - int argc, void *p_argv[] -#if OMPT_SUPPORT - , void **exit_frame_ptr -#endif -) -{ -#if OMPT_SUPPORT - *exit_frame_ptr = __builtin_frame_address(0); -#endif - - switch (argc) { - default: - fprintf(stderr, "Too many args to microtask: %d!\n", argc); - fflush(stderr); - exit(-1); - case 0: - (*pkfn)(>id, &tid); - break; - case 1: - (*pkfn)(>id, &tid, p_argv[0]); - break; - case 2: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); - break; - case 3: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); - break; - case 4: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); - break; - case 5: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); - break; - case 6: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5]); - break; - case 7: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6]); - break; - case 8: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7]); - break; - case 9: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8]); - break; - case 10: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); - break; - case 11: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); - break; - case 12: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11]); - break; - case 13: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12]); - break; - case 14: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13]); - break; - case 15: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], - p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13], p_argv[14]); - break; - } - -#if OMPT_SUPPORT - *exit_frame_ptr = 0; -#endif - - return 1; -} - -#endif - -// end of file // - +/* + * z_Linux_util.c -- platform specific routines. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_wrapper_getpid.h" +#include "kmp_itt.h" +#include "kmp_str.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_stats.h" +#include "kmp_wait_release.h" + +#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD +# include +#endif +#include +#include // HUGE_VAL. +#include +#include +#include +#include + +#if KMP_OS_LINUX && !KMP_OS_CNK +# include +# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) +// We should really include , but that causes compatibility problems on different +// Linux* OS distributions that either require that you include (or break when you try to include) +// . +// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) +// we just define the constants here and don't include +# ifndef FUTEX_WAIT +# define FUTEX_WAIT 0 +# endif +# ifndef FUTEX_WAKE +# define FUTEX_WAKE 1 +# endif +# endif +#elif KMP_OS_DARWIN +# include +# include +#elif KMP_OS_FREEBSD +# include +#endif + + +#include +#include +#include + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +struct kmp_sys_timer { + struct timespec start; +}; + +// Convert timespec to nanoseconds. +#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec) + +static struct kmp_sys_timer __kmp_sys_timer_data; + +#if KMP_HANDLE_SIGNALS + typedef void (* sig_func_t )( int ); + STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[ NSIG ]; + static sigset_t __kmp_sigset; +#endif + +static int __kmp_init_runtime = FALSE; + +static int __kmp_fork_count = 0; + +static pthread_condattr_t __kmp_suspend_cond_attr; +static pthread_mutexattr_t __kmp_suspend_mutex_attr; + +static kmp_cond_align_t __kmp_wait_cv; +static kmp_mutex_align_t __kmp_wait_mx; + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef DEBUG_SUSPEND +static void +__kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) +{ + KMP_SNPRINTF( buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", + cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, + cond->c_cond.__c_waiting ); +} +#endif + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) + +/* + * Affinity support + */ + +/* + * On some of the older OS's that we build on, these constants aren't present + * in #included from . They must be the same on + * all systems of the same arch where they are defined, and they cannot change. + * stone forever. + */ + +# if KMP_ARCH_X86 || KMP_ARCH_ARM +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 241 +# elif __NR_sched_setaffinity != 241 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 242 +# elif __NR_sched_getaffinity != 242 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ + +# elif KMP_ARCH_AARCH64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 122 +# elif __NR_sched_setaffinity != 122 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 123 +# elif __NR_sched_getaffinity != 123 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ + +# elif KMP_ARCH_X86_64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 203 +# elif __NR_sched_setaffinity != 203 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 204 +# elif __NR_sched_getaffinity != 204 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ + +# elif KMP_ARCH_PPC64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 222 +# elif __NR_sched_setaffinity != 222 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 223 +# elif __NR_sched_getaffinity != 223 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ + + +# else +# error Unknown or unsupported architecture + +# endif /* KMP_ARCH_* */ + +int +__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) +{ + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); +#if KMP_USE_HWLOC + int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); +#else + int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); +#endif + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +} + +int +__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) +{ + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + +#if KMP_USE_HWLOC + int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); +#else + int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); +#endif + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +} + +void +__kmp_affinity_bind_thread( int which ) +{ + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); + + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(which, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); +} + +/* + * Determine if we can access affinity functionality on this version of + * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set + * __kmp_affin_mask_size to the appropriate value (0 means not capable). + */ +void +__kmp_affinity_determine_capable(const char *env_var) +{ + // + // Check and see if the OS supports thread affinity. + // + +# define KMP_CPU_SET_SIZE_LIMIT (1024*1024) + + int gCode; + int sCode; + kmp_affin_mask_t *buf; + buf = ( kmp_affin_mask_t * ) KMP_INTERNAL_MALLOC( KMP_CPU_SET_SIZE_LIMIT ); + + // If Linux* OS: + // If the syscall fails or returns a suggestion for the size, + // then we don't have to search for an appropriate size. + gCode = syscall( __NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf ); + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "initial getaffinity call returned %d errno = %d\n", + gCode, errno)); + + //if ((gCode < 0) && (errno == ENOSYS)) + if (gCode < 0) { + // + // System call not supported + // + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none) + && (__kmp_affinity_type != affinity_default) + && (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( GetAffSysCallNotSupported, env_var ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + if (gCode > 0) { // Linux* OS only + // The optimal situation: the OS returns the size of the buffer + // it expects. + // + // A verification of correct behavior is that Isetaffinity on a NULL + // buffer with the same size fails with errno set to EFAULT. + sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "setaffinity for mask size %d returned %d errno = %d\n", + gCode, sCode, errno)); + if (sCode < 0) { + if (errno == ENOSYS) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none) + && (__kmp_affinity_type != affinity_default) + && (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( SetAffSysCallNotSupported, env_var ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + } + if (errno == EFAULT) { + KMP_AFFINITY_ENABLE(gCode); + KA_TRACE(10, ( "__kmp_affinity_determine_capable: " + "affinity supported (mask size %d)\n", + (int)__kmp_affin_mask_size)); + KMP_INTERNAL_FREE(buf); + return; + } + } + } + + // + // Call the getaffinity system call repeatedly with increasing set sizes + // until we succeed, or reach an upper bound on the search. + // + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "searching for proper set size\n")); + int size; + for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { + gCode = syscall( __NR_sched_getaffinity, 0, size, buf ); + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "getaffinity for mask size %d returned %d errno = %d\n", size, + gCode, errno)); + + if (gCode < 0) { + if ( errno == ENOSYS ) + { + // + // We shouldn't get here + // + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", + size)); + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none) + && (__kmp_affinity_type != affinity_default) + && (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( GetAffSysCallNotSupported, env_var ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + continue; + } + + sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "setaffinity for mask size %d returned %d errno = %d\n", + gCode, sCode, errno)); + if (sCode < 0) { + if (errno == ENOSYS) { // Linux* OS only + // + // We shouldn't get here + // + KA_TRACE(30, ( "__kmp_affinity_determine_capable: " + "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", + size)); + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none) + && (__kmp_affinity_type != affinity_default) + && (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( SetAffSysCallNotSupported, env_var ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + if (errno == EFAULT) { + KMP_AFFINITY_ENABLE(gCode); + KA_TRACE(10, ( "__kmp_affinity_determine_capable: " + "affinity supported (mask size %d)\n", + (int)__kmp_affin_mask_size)); + KMP_INTERNAL_FREE(buf); + return; + } + } + } + //int error = errno; // save uncaught error code + KMP_INTERNAL_FREE(buf); + // errno = error; // restore uncaught error code, will be printed at the next KMP_WARNING below + + // + // Affinity is not supported + // + KMP_AFFINITY_DISABLE(); + KA_TRACE(10, ( "__kmp_affinity_determine_capable: " + "cannot determine mask size - affinity not supported\n")); + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none) + && (__kmp_affinity_type != affinity_default) + && (__kmp_affinity_type != affinity_disabled))) { + KMP_WARNING( AffCantGetMaskSize, env_var ); + } +} + +#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && !KMP_OS_CNK + +int +__kmp_futex_determine_capable() +{ + int loc = 0; + int rc = syscall( __NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0 ); + int retval = ( rc == 0 ) || ( errno != ENOSYS ); + + KA_TRACE(10, ( "__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, + errno ) ); + KA_TRACE(10, ( "__kmp_futex_determine_capable: futex syscall%s supported\n", + retval ? "" : " not" ) ); + + return retval; +} + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) +/* + * Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to + * use compare_and_store for these routines + */ + +kmp_int8 +__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int8 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value | d; + + while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value | d; + } + return old_value; +} + +kmp_int8 +__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int8 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value & d; + + while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value & d; + } + return old_value; +} + +kmp_int32 +__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) +{ + kmp_int32 old_value, new_value; + + old_value = TCR_4( *p ); + new_value = old_value | d; + + while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_4( *p ); + new_value = old_value | d; + } + return old_value; +} + +kmp_int32 +__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) +{ + kmp_int32 old_value, new_value; + + old_value = TCR_4( *p ); + new_value = old_value & d; + + while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_4( *p ); + new_value = old_value & d; + } + return old_value; +} + +# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 +kmp_int8 +__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int8 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value + d; + + while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value + d; + } + return old_value; +} + +kmp_int64 +__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value + d; + + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value + d; + } + return old_value; +} +# endif /* KMP_ARCH_X86 */ + +kmp_int64 +__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value | d; + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value | d; + } + return old_value; +} + +kmp_int64 +__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value & d; + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value & d; + } + return old_value; +} + +#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ + +void +__kmp_terminate_thread( int gtid ) +{ + int status; + kmp_info_t *th = __kmp_threads[ gtid ]; + + if ( !th ) return; + + #ifdef KMP_CANCEL_THREADS + KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); + status = pthread_cancel( th->th.th_info.ds.ds_thread ); + if ( status != 0 && status != ESRCH ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantTerminateWorkerThread ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + #endif + __kmp_yield( TRUE ); +} // + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* + * Set thread stack info according to values returned by + * pthread_getattr_np(). + * If values are unreasonable, assume call failed and use + * incremental stack refinement method instead. + * Returns TRUE if the stack parameters could be determined exactly, + * FALSE if incremental refinement is necessary. + */ +static kmp_int32 +__kmp_set_stack_info( int gtid, kmp_info_t *th ) +{ + int stack_data; +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD + /* Linux* OS only -- no pthread_getattr_np support on OS X* */ + pthread_attr_t attr; + int status; + size_t size = 0; + void * addr = 0; + + /* Always do incremental stack refinement for ubermaster threads since the initial + thread stack range can be reduced by sibling thread creation so pthread_attr_getstack + may cause thread gtid aliasing */ + if ( ! KMP_UBER_GTID(gtid) ) { + + /* Fetch the real thread attributes */ + status = pthread_attr_init( &attr ); + KMP_CHECK_SYSFAIL( "pthread_attr_init", status ); +#if KMP_OS_FREEBSD || KMP_OS_NETBSD + status = pthread_attr_get_np( pthread_self(), &attr ); + KMP_CHECK_SYSFAIL( "pthread_attr_get_np", status ); +#else + status = pthread_getattr_np( pthread_self(), &attr ); + KMP_CHECK_SYSFAIL( "pthread_getattr_np", status ); +#endif + status = pthread_attr_getstack( &attr, &addr, &size ); + KMP_CHECK_SYSFAIL( "pthread_attr_getstack", status ); + KA_TRACE( 60, ( "__kmp_set_stack_info: T#%d pthread_attr_getstack returned size: %lu, " + "low addr: %p\n", + gtid, size, addr )); + + status = pthread_attr_destroy( &attr ); + KMP_CHECK_SYSFAIL( "pthread_attr_destroy", status ); + } + + if ( size != 0 && addr != 0 ) { /* was stack parameter determination successful? */ + /* Store the correct base and size */ + TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); + TCW_PTR(th->th.th_info.ds.ds_stacksize, size); + TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); + return TRUE; + } +#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */ + /* Use incremental refinement starting from initial conservative estimate */ + TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); + TCW_PTR(th -> th.th_info.ds.ds_stackbase, &stack_data); + TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); + return FALSE; +} + +static void* +__kmp_launch_worker( void *thr ) +{ + int status, old_type, old_state; +#ifdef KMP_BLOCK_SIGNALS + sigset_t new_set, old_set; +#endif /* KMP_BLOCK_SIGNALS */ + void *exit_val; +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD + void * volatile padding = 0; +#endif + int gtid; + + gtid = ((kmp_info_t*)thr) -> th.th_info.ds.ds_gtid; + __kmp_gtid_set_specific( gtid ); +#ifdef KMP_TDATA_GTID + __kmp_gtid = gtid; +#endif +#if KMP_STATS_ENABLED + // set __thread local index to point to thread-specific stats + __kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats; +#endif + +#if USE_ITT_BUILD + __kmp_itt_thread_name( gtid ); +#endif /* USE_ITT_BUILD */ + +#if KMP_AFFINITY_SUPPORTED + __kmp_affinity_set_init_mask( gtid, FALSE ); +#endif + +#ifdef KMP_CANCEL_THREADS + status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); + KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); + /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ + status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); + KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); +#endif + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + // + // Set the FP control regs to be a copy of + // the parallel initialization thread's. + // + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); + __kmp_load_mxcsr( &__kmp_init_mxcsr ); +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#ifdef KMP_BLOCK_SIGNALS + status = sigfillset( & new_set ); + KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); + status = pthread_sigmask( SIG_BLOCK, & new_set, & old_set ); + KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); +#endif /* KMP_BLOCK_SIGNALS */ + +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD + if ( __kmp_stkoffset > 0 && gtid > 0 ) { + padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); + } +#endif + + KMP_MB(); + __kmp_set_stack_info( gtid, (kmp_info_t*)thr ); + + __kmp_check_stack_overlap( (kmp_info_t*)thr ); + + exit_val = __kmp_launch_thread( (kmp_info_t *) thr ); + +#ifdef KMP_BLOCK_SIGNALS + status = pthread_sigmask( SIG_SETMASK, & old_set, NULL ); + KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); +#endif /* KMP_BLOCK_SIGNALS */ + + return exit_val; +} + + +/* The monitor thread controls all of the threads in the complex */ + +static void* +__kmp_launch_monitor( void *thr ) +{ + int status, old_type, old_state; +#ifdef KMP_BLOCK_SIGNALS + sigset_t new_set; +#endif /* KMP_BLOCK_SIGNALS */ + struct timespec interval; + int yield_count; + int yield_cycles = 0; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ("__kmp_launch_monitor: #1 launched\n" ) ); + + /* register us as the monitor thread */ + __kmp_gtid_set_specific( KMP_GTID_MONITOR ); +#ifdef KMP_TDATA_GTID + __kmp_gtid = KMP_GTID_MONITOR; +#endif + + KMP_MB(); + +#if USE_ITT_BUILD + __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. +#endif /* USE_ITT_BUILD */ + + __kmp_set_stack_info( ((kmp_info_t*)thr)->th.th_info.ds.ds_gtid, (kmp_info_t*)thr ); + + __kmp_check_stack_overlap( (kmp_info_t*)thr ); + +#ifdef KMP_CANCEL_THREADS + status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); + KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); + /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ + status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); + KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); +#endif + + #if KMP_REAL_TIME_FIX + // This is a potential fix which allows application with real-time scheduling policy work. + // However, decision about the fix is not made yet, so it is disabled by default. + { // Are program started with real-time scheduling policy? + int sched = sched_getscheduler( 0 ); + if ( sched == SCHED_FIFO || sched == SCHED_RR ) { + // Yes, we are a part of real-time application. Try to increase the priority of the + // monitor. + struct sched_param param; + int max_priority = sched_get_priority_max( sched ); + int rc; + KMP_WARNING( RealTimeSchedNotSupported ); + sched_getparam( 0, & param ); + if ( param.sched_priority < max_priority ) { + param.sched_priority += 1; + rc = sched_setscheduler( 0, sched, & param ); + if ( rc != 0 ) { + int error = errno; + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantChangeMonitorPriority ), + KMP_ERR( error ), + KMP_MSG( MonitorWillStarve ), + __kmp_msg_null + ); + }; // if + } else { + // We cannot abort here, because number of CPUs may be enough for all the threads, + // including the monitor thread, so application could potentially work... + __kmp_msg( + kmp_ms_warning, + KMP_MSG( RunningAtMaxPriority ), + KMP_MSG( MonitorWillStarve ), + KMP_HNT( RunningAtMaxPriority ), + __kmp_msg_null + ); + }; // if + }; // if + TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); // AC: free thread that waits for monitor started + } + #endif // KMP_REAL_TIME_FIX + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + if ( __kmp_monitor_wakeups == 1 ) { + interval.tv_sec = 1; + interval.tv_nsec = 0; + } else { + interval.tv_sec = 0; + interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); + } + + KA_TRACE( 10, ("__kmp_launch_monitor: #2 monitor\n" ) ); + + if (__kmp_yield_cycle) { + __kmp_yielding_on = 0; /* Start out with yielding shut off */ + yield_count = __kmp_yield_off_count; + } else { + __kmp_yielding_on = 1; /* Yielding is on permanently */ + } + + while( ! TCR_4( __kmp_global.g.g_done ) ) { + struct timespec now; + struct timeval tval; + + /* This thread monitors the state of the system */ + + KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); + + status = gettimeofday( &tval, NULL ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + TIMEVAL_TO_TIMESPEC( &tval, &now ); + + now.tv_sec += interval.tv_sec; + now.tv_nsec += interval.tv_nsec; + + if (now.tv_nsec >= KMP_NSEC_PER_SEC) { + now.tv_sec += 1; + now.tv_nsec -= KMP_NSEC_PER_SEC; + } + + status = pthread_mutex_lock( & __kmp_wait_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); + // AC: the monitor should not fall asleep if g_done has been set + if ( !TCR_4(__kmp_global.g.g_done) ) { // check once more under mutex + status = pthread_cond_timedwait( &__kmp_wait_cv.c_cond, &__kmp_wait_mx.m_mutex, &now ); + if ( status != 0 ) { + if ( status != ETIMEDOUT && status != EINTR ) { + KMP_SYSFAIL( "pthread_cond_timedwait", status ); + }; + }; + }; + status = pthread_mutex_unlock( & __kmp_wait_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + + if (__kmp_yield_cycle) { + yield_cycles++; + if ( (yield_cycles % yield_count) == 0 ) { + if (__kmp_yielding_on) { + __kmp_yielding_on = 0; /* Turn it off now */ + yield_count = __kmp_yield_off_count; + } else { + __kmp_yielding_on = 1; /* Turn it on now */ + yield_count = __kmp_yield_on_count; + } + yield_cycles = 0; + } + } else { + __kmp_yielding_on = 1; + } + + TCW_4( __kmp_global.g.g_time.dt.t_value, + TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + } + + KA_TRACE( 10, ("__kmp_launch_monitor: #3 cleanup\n" ) ); + +#ifdef KMP_BLOCK_SIGNALS + status = sigfillset( & new_set ); + KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); + status = pthread_sigmask( SIG_UNBLOCK, & new_set, NULL ); + KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); +#endif /* KMP_BLOCK_SIGNALS */ + + KA_TRACE( 10, ("__kmp_launch_monitor: #4 finished\n" ) ); + + if( __kmp_global.g.g_abort != 0 ) { + /* now we need to terminate the worker threads */ + /* the value of t_abort is the signal we caught */ + + int gtid; + + KA_TRACE( 10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", __kmp_global.g.g_abort ) ); + + /* terminate the OpenMP worker threads */ + /* TODO this is not valid for sibling threads!! + * the uber master might not be 0 anymore.. */ + for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) + __kmp_terminate_thread( gtid ); + + __kmp_cleanup(); + + KA_TRACE( 10, ("__kmp_launch_monitor: #6 raise sig=%d\n", __kmp_global.g.g_abort ) ); + + if (__kmp_global.g.g_abort > 0) + raise( __kmp_global.g.g_abort ); + + } + + KA_TRACE( 10, ("__kmp_launch_monitor: #7 exit\n" ) ); + + return thr; +} + +void +__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) +{ + pthread_t handle; + pthread_attr_t thread_attr; + int status; + + + th->th.th_info.ds.ds_gtid = gtid; + +#if KMP_STATS_ENABLED + // sets up worker thread stats + __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); + + // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker + // So when thread is created (goes into __kmp_launch_worker) it will + // set it's __thread local pointer to th->th.th_stats + th->th.th_stats = __kmp_stats_list.push_back(gtid); + if(KMP_UBER_GTID(gtid)) { + __kmp_stats_start_time = tsc_tick_count::now(); + __kmp_stats_thread_ptr = th->th.th_stats; + __kmp_stats_init(); + KMP_START_EXPLICIT_TIMER(OMP_serial); + KMP_START_EXPLICIT_TIMER(OMP_start_end); + } + __kmp_release_tas_lock(&__kmp_stats_lock, gtid); + +#endif // KMP_STATS_ENABLED + + if ( KMP_UBER_GTID(gtid) ) { + KA_TRACE( 10, ("__kmp_create_worker: uber thread (%d)\n", gtid ) ); + th -> th.th_info.ds.ds_thread = pthread_self(); + __kmp_set_stack_info( gtid, th ); + __kmp_check_stack_overlap( th ); + return; + }; // if + + KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + +#ifdef KMP_THREAD_ATTR + { + status = pthread_attr_init( &thread_attr ); + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantInitThreadAttrs ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetWorkerState ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + + /* Set stack size for this thread now. + * The multiple of 2 is there because on some machines, requesting an unusual stacksize + * causes the thread to have an offset before the dummy alloca() takes place to create the + * offset. Since we want the user to have a sufficient stacksize AND support a stack offset, we + * alloca() twice the offset so that the upcoming alloca() does not eliminate any premade + * offset, and also gives the user the stack space they requested for all threads */ + stack_size += gtid * __kmp_stkoffset * 2; + + KA_TRACE( 10, ( "__kmp_create_worker: T#%d, default stacksize = %lu bytes, " + "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", + gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) ); + +# ifdef _POSIX_THREAD_ATTR_STACKSIZE + status = pthread_attr_setstacksize( & thread_attr, stack_size ); +# ifdef KMP_BACKUP_STKSIZE + if ( status != 0 ) { + if ( ! __kmp_env_stksize ) { + stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; + __kmp_stksize = KMP_BACKUP_STKSIZE; + KA_TRACE( 10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " + "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " + "bytes\n", + gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) + ); + status = pthread_attr_setstacksize( &thread_attr, stack_size ); + }; // if + }; // if +# endif /* KMP_BACKUP_STKSIZE */ + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetWorkerStackSize, stack_size ), + KMP_ERR( status ), + KMP_HNT( ChangeWorkerStackSize ), + __kmp_msg_null + ); + }; // if +# endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + } +#endif /* KMP_THREAD_ATTR */ + + { + status = pthread_create( & handle, & thread_attr, __kmp_launch_worker, (void *) th ); + if ( status != 0 || ! handle ) { // ??? Why do we check handle?? +#ifdef _POSIX_THREAD_ATTR_STACKSIZE + if ( status == EINVAL ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetWorkerStackSize, stack_size ), + KMP_ERR( status ), + KMP_HNT( IncreaseWorkerStackSize ), + __kmp_msg_null + ); + }; + if ( status == ENOMEM ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetWorkerStackSize, stack_size ), + KMP_ERR( status ), + KMP_HNT( DecreaseWorkerStackSize ), + __kmp_msg_null + ); + }; +#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + if ( status == EAGAIN ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( NoResourcesForWorkerThread ), + KMP_ERR( status ), + KMP_HNT( Decrease_NUM_THREADS ), + __kmp_msg_null + ); + }; // if + KMP_SYSFAIL( "pthread_create", status ); + }; // if + + th->th.th_info.ds.ds_thread = handle; + } + +#ifdef KMP_THREAD_ATTR + { + status = pthread_attr_destroy( & thread_attr ); + if ( status ) { + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantDestroyThreadAttrs ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + } +#endif /* KMP_THREAD_ATTR */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); + +} // __kmp_create_worker + + +void +__kmp_create_monitor( kmp_info_t *th ) +{ + pthread_t handle; + pthread_attr_t thread_attr; + size_t size; + int status; + int auto_adj_size = FALSE; + + KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; + th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; + #if KMP_REAL_TIME_FIX + TCW_4( __kmp_global.g.g_time.dt.t_value, -1 ); // Will use it for synchronization a bit later. + #else + TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); + #endif // KMP_REAL_TIME_FIX + + #ifdef KMP_THREAD_ATTR + if ( __kmp_monitor_stksize == 0 ) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + auto_adj_size = TRUE; + } + status = pthread_attr_init( &thread_attr ); + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantInitThreadAttrs ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetMonitorState ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + + #ifdef _POSIX_THREAD_ATTR_STACKSIZE + status = pthread_attr_getstacksize( & thread_attr, & size ); + KMP_CHECK_SYSFAIL( "pthread_attr_getstacksize", status ); + #else + size = __kmp_sys_min_stksize; + #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + #endif /* KMP_THREAD_ATTR */ + + if ( __kmp_monitor_stksize == 0 ) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + } + if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { + __kmp_monitor_stksize = __kmp_sys_min_stksize; + } + + KA_TRACE( 10, ( "__kmp_create_monitor: default stacksize = %lu bytes," + "requested stacksize = %lu bytes\n", + size, __kmp_monitor_stksize ) ); + + retry: + + /* Set stack size for this thread now. */ + + #ifdef _POSIX_THREAD_ATTR_STACKSIZE + KA_TRACE( 10, ( "__kmp_create_monitor: setting stacksize = %lu bytes,", + __kmp_monitor_stksize ) ); + status = pthread_attr_setstacksize( & thread_attr, __kmp_monitor_stksize ); + if ( status != 0 ) { + if ( auto_adj_size ) { + __kmp_monitor_stksize *= 2; + goto retry; + } + __kmp_msg( + kmp_ms_warning, // should this be fatal? BB + KMP_MSG( CantSetMonitorStackSize, (long int) __kmp_monitor_stksize ), + KMP_ERR( status ), + KMP_HNT( ChangeMonitorStackSize ), + __kmp_msg_null + ); + }; // if + #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + + status = pthread_create( &handle, & thread_attr, __kmp_launch_monitor, (void *) th ); + + if ( status != 0 ) { + #ifdef _POSIX_THREAD_ATTR_STACKSIZE + if ( status == EINVAL ) { + if ( auto_adj_size && ( __kmp_monitor_stksize < (size_t)0x40000000 ) ) { + __kmp_monitor_stksize *= 2; + goto retry; + } + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), + KMP_ERR( status ), + KMP_HNT( IncreaseMonitorStackSize ), + __kmp_msg_null + ); + }; // if + if ( status == ENOMEM ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), + KMP_ERR( status ), + KMP_HNT( DecreaseMonitorStackSize ), + __kmp_msg_null + ); + }; // if + #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + if ( status == EAGAIN ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( NoResourcesForMonitorThread ), + KMP_ERR( status ), + KMP_HNT( DecreaseNumberOfThreadsInUse ), + __kmp_msg_null + ); + }; // if + KMP_SYSFAIL( "pthread_create", status ); + }; // if + + th->th.th_info.ds.ds_thread = handle; + + #if KMP_REAL_TIME_FIX + // Wait for the monitor thread is really started and set its *priority*. + KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == sizeof( __kmp_global.g.g_time.dt.t_value ) ); + __kmp_wait_yield_4( + (kmp_uint32 volatile *) & __kmp_global.g.g_time.dt.t_value, -1, & __kmp_neq_4, NULL + ); + #endif // KMP_REAL_TIME_FIX + + #ifdef KMP_THREAD_ATTR + status = pthread_attr_destroy( & thread_attr ); + if ( status != 0 ) { + __kmp_msg( // + kmp_ms_warning, + KMP_MSG( CantDestroyThreadAttrs ), + KMP_ERR( status ), + __kmp_msg_null + ); + }; // if + #endif + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ( "__kmp_create_monitor: monitor created %#.8lx\n", th->th.th_info.ds.ds_thread ) ); + +} // __kmp_create_monitor + +void +__kmp_exit_thread( + int exit_status +) { + pthread_exit( (void *)(intptr_t) exit_status ); +} // __kmp_exit_thread + +void __kmp_resume_monitor(); + +void +__kmp_reap_monitor( kmp_info_t *th ) +{ + int status; + void *exit_val; + + KA_TRACE( 10, ("__kmp_reap_monitor: try to reap monitor thread with handle %#.8lx\n", + th->th.th_info.ds.ds_thread ) ); + + // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. + // If both tid and gtid are 0, it means the monitor did not ever start. + // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. + KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); + if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { + return; + }; // if + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + + /* First, check to see whether the monitor thread exists. This could prevent a hang, + but if the monitor dies after the pthread_kill call and before the pthread_join + call, it will still hang. */ + + status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); + if (status == ESRCH) { + + KA_TRACE( 10, ("__kmp_reap_monitor: monitor does not exist, returning\n") ); + + } else + { + __kmp_resume_monitor(); // Wake up the monitor thread + status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); + if (exit_val != th) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( ReapMonitorError ), + KMP_ERR( status ), + __kmp_msg_null + ); + } + } + + th->th.th_info.ds.ds_tid = KMP_GTID_DNE; + th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; + + KA_TRACE( 10, ("__kmp_reap_monitor: done reaping monitor thread with handle %#.8lx\n", + th->th.th_info.ds.ds_thread ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + +} + +void +__kmp_reap_worker( kmp_info_t *th ) +{ + int status; + void *exit_val; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid ) ); + + /* First, check to see whether the worker thread exists. This could prevent a hang, + but if the worker dies after the pthread_kill call and before the pthread_join + call, it will still hang. */ + + { + status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); + if (status == ESRCH) { + KA_TRACE( 10, ("__kmp_reap_worker: worker T#%d does not exist, returning\n", + th->th.th_info.ds.ds_gtid ) ); + } + else { + KA_TRACE( 10, ("__kmp_reap_worker: try to join with worker T#%d\n", + th->th.th_info.ds.ds_gtid ) ); + + status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); +#ifdef KMP_DEBUG + /* Don't expose these to the user until we understand when they trigger */ + if ( status != 0 ) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( ReapWorkerError ), + KMP_ERR( status ), + __kmp_msg_null + ); + } + if ( exit_val != th ) { + KA_TRACE( 10, ( "__kmp_reap_worker: worker T#%d did not reap properly, " + "exit_val = %p\n", + th->th.th_info.ds.ds_gtid, exit_val ) ); + } +#endif /* KMP_DEBUG */ + } + } + + KA_TRACE( 10, ("__kmp_reap_worker: done reaping T#%d\n", th->th.th_info.ds.ds_gtid ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ +} + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if KMP_HANDLE_SIGNALS + + +static void +__kmp_null_handler( int signo ) +{ + // Do nothing, for doing SIG_IGN-type actions. +} // __kmp_null_handler + + +static void +__kmp_team_handler( int signo ) +{ + if ( __kmp_global.g.g_abort == 0 ) { + /* Stage 1 signal handler, let's shut down all of the threads */ + #ifdef KMP_DEBUG + __kmp_debug_printf( "__kmp_team_handler: caught signal = %d\n", signo ); + #endif + switch ( signo ) { + case SIGHUP : + case SIGINT : + case SIGQUIT : + case SIGILL : + case SIGABRT : + case SIGFPE : + case SIGBUS : + case SIGSEGV : + #ifdef SIGSYS + case SIGSYS : + #endif + case SIGTERM : + if ( __kmp_debug_buf ) { + __kmp_dump_debug_buffer( ); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4( __kmp_global.g.g_abort, signo ); + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4( __kmp_global.g.g_done, TRUE ); + KMP_MB(); // Flush all pending memory write invalidates. + break; + default: + #ifdef KMP_DEBUG + __kmp_debug_printf( "__kmp_team_handler: unknown signal type" ); + #endif + break; + }; // switch + }; // if +} // __kmp_team_handler + + +static +void __kmp_sigaction( int signum, const struct sigaction * act, struct sigaction * oldact ) { + int rc = sigaction( signum, act, oldact ); + KMP_CHECK_SYSFAIL_ERRNO( "sigaction", rc ); +} + + +static void +__kmp_install_one_handler( int sig, sig_func_t handler_func, int parallel_init ) +{ + KMP_MB(); // Flush all pending memory write invalidates. + KB_TRACE( 60, ( "__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init ) ); + if ( parallel_init ) { + struct sigaction new_action; + struct sigaction old_action; + new_action.sa_handler = handler_func; + new_action.sa_flags = 0; + sigfillset( & new_action.sa_mask ); + __kmp_sigaction( sig, & new_action, & old_action ); + if ( old_action.sa_handler == __kmp_sighldrs[ sig ].sa_handler ) { + sigaddset( & __kmp_sigset, sig ); + } else { + // Restore/keep user's handler if one previously installed. + __kmp_sigaction( sig, & old_action, NULL ); + }; // if + } else { + // Save initial/system signal handlers to see if user handlers installed. + __kmp_sigaction( sig, NULL, & __kmp_sighldrs[ sig ] ); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. +} // __kmp_install_one_handler + + +static void +__kmp_remove_one_handler( int sig ) +{ + KB_TRACE( 60, ( "__kmp_remove_one_handler( %d )\n", sig ) ); + if ( sigismember( & __kmp_sigset, sig ) ) { + struct sigaction old; + KMP_MB(); // Flush all pending memory write invalidates. + __kmp_sigaction( sig, & __kmp_sighldrs[ sig ], & old ); + if ( ( old.sa_handler != __kmp_team_handler ) && ( old.sa_handler != __kmp_null_handler ) ) { + // Restore the users signal handler. + KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); + __kmp_sigaction( sig, & old, NULL ); + }; // if + sigdelset( & __kmp_sigset, sig ); + KMP_MB(); // Flush all pending memory write invalidates. + }; // if +} // __kmp_remove_one_handler + + +void +__kmp_install_signals( int parallel_init ) +{ + KB_TRACE( 10, ( "__kmp_install_signals( %d )\n", parallel_init ) ); + if ( __kmp_handle_signals || ! parallel_init ) { + // If ! parallel_init, we do not install handlers, just save original handlers. + // Let us do it even __handle_signals is 0. + sigemptyset( & __kmp_sigset ); + __kmp_install_one_handler( SIGHUP, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGQUIT, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGBUS, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); + #ifdef SIGSYS + __kmp_install_one_handler( SIGSYS, __kmp_team_handler, parallel_init ); + #endif // SIGSYS + __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); + #ifdef SIGPIPE + __kmp_install_one_handler( SIGPIPE, __kmp_team_handler, parallel_init ); + #endif // SIGPIPE + }; // if +} // __kmp_install_signals + + +void +__kmp_remove_signals( void ) +{ + int sig; + KB_TRACE( 10, ( "__kmp_remove_signals()\n" ) ); + for ( sig = 1; sig < NSIG; ++ sig ) { + __kmp_remove_one_handler( sig ); + }; // for sig +} // __kmp_remove_signals + + +#endif // KMP_HANDLE_SIGNALS + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_enable( int new_state ) +{ + #ifdef KMP_CANCEL_THREADS + int status, old_state; + status = pthread_setcancelstate( new_state, & old_state ); + KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); + KMP_DEBUG_ASSERT( old_state == PTHREAD_CANCEL_DISABLE ); + #endif +} + +void +__kmp_disable( int * old_state ) +{ + #ifdef KMP_CANCEL_THREADS + int status; + status = pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, old_state ); + KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); + #endif +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +static void +__kmp_atfork_prepare (void) +{ + /* nothing to do */ +} + +static void +__kmp_atfork_parent (void) +{ + /* nothing to do */ +} + +/* + Reset the library so execution in the child starts "all over again" with + clean data structures in initial states. Don't worry about freeing memory + allocated by parent, just abandon it to be safe. +*/ +static void +__kmp_atfork_child (void) +{ + /* TODO make sure this is done right for nested/sibling */ + // ATT: Memory leaks are here? TODO: Check it and fix. + /* KMP_ASSERT( 0 ); */ + + ++__kmp_fork_count; + + __kmp_init_runtime = FALSE; + __kmp_init_monitor = 0; + __kmp_init_parallel = FALSE; + __kmp_init_middle = FALSE; + __kmp_init_serial = FALSE; + TCW_4(__kmp_init_gtid, FALSE); + __kmp_init_common = FALSE; + + TCW_4(__kmp_init_user_locks, FALSE); +#if ! KMP_USE_DYNAMIC_LOCK + __kmp_user_lock_table.used = 1; + __kmp_user_lock_table.allocated = 0; + __kmp_user_lock_table.table = NULL; + __kmp_lock_blocks = NULL; +#endif + + __kmp_all_nth = 0; + TCW_4(__kmp_nth, 0); + + /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate here + so threadprivate doesn't use stale data */ + KA_TRACE( 10, ( "__kmp_atfork_child: checking cache address list %p\n", + __kmp_threadpriv_cache_list ) ); + + while ( __kmp_threadpriv_cache_list != NULL ) { + + if ( *__kmp_threadpriv_cache_list -> addr != NULL ) { + KC_TRACE( 50, ( "__kmp_atfork_child: zeroing cache at address %p\n", + &(*__kmp_threadpriv_cache_list -> addr) ) ); + + *__kmp_threadpriv_cache_list -> addr = NULL; + } + __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list -> next; + } + + __kmp_init_runtime = FALSE; + + /* reset statically initialized locks */ + __kmp_init_bootstrap_lock( &__kmp_initz_lock ); + __kmp_init_bootstrap_lock( &__kmp_stdio_lock ); + __kmp_init_bootstrap_lock( &__kmp_console_lock ); + + /* This is necessary to make sure no stale data is left around */ + /* AC: customers complain that we use unsafe routines in the atfork + handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen + in dynamic_link when check the presence of shared tbbmalloc library. + Suggestion is to make the library initialization lazier, similar + to what done for __kmpc_begin(). */ + // TODO: synchronize all static initializations with regular library + // startup; look at kmp_global.c and etc. + //__kmp_internal_begin (); + +} + +void +__kmp_register_atfork(void) { + if ( __kmp_need_register_atfork ) { + int status = pthread_atfork( __kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child ); + KMP_CHECK_SYSFAIL( "pthread_atfork", status ); + __kmp_need_register_atfork = FALSE; + } +} + +void +__kmp_suspend_initialize( void ) +{ + int status; + status = pthread_mutexattr_init( &__kmp_suspend_mutex_attr ); + KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); + status = pthread_condattr_init( &__kmp_suspend_cond_attr ); + KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); +} + +static void +__kmp_suspend_initialize_thread( kmp_info_t *th ) +{ + if ( th->th.th_suspend_init_count <= __kmp_fork_count ) { + /* this means we haven't initialized the suspension pthread objects for this thread + in this instance of the process */ + int status; + status = pthread_cond_init( &th->th.th_suspend_cv.c_cond, &__kmp_suspend_cond_attr ); + KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); + status = pthread_mutex_init( &th->th.th_suspend_mx.m_mutex, & __kmp_suspend_mutex_attr ); + KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); + *(volatile int*)&th->th.th_suspend_init_count = __kmp_fork_count + 1; + }; +} + +void +__kmp_suspend_uninitialize_thread( kmp_info_t *th ) +{ + if(th->th.th_suspend_init_count > __kmp_fork_count) { + /* this means we have initialize the suspension pthread objects for this thread + in this instance of the process */ + int status; + + status = pthread_cond_destroy( &th->th.th_suspend_cv.c_cond ); + if ( status != 0 && status != EBUSY ) { + KMP_SYSFAIL( "pthread_cond_destroy", status ); + }; + status = pthread_mutex_destroy( &th->th.th_suspend_mx.m_mutex ); + if ( status != 0 && status != EBUSY ) { + KMP_SYSFAIL( "pthread_mutex_destroy", status ); + }; + --th->th.th_suspend_init_count; + KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); + } +} + +/* This routine puts the calling thread to sleep after setting the + * sleep bit for the indicated flag variable to true. + */ +template +static inline void __kmp_suspend_template( int th_gtid, C *flag ) +{ + KMP_TIME_DEVELOPER_BLOCK(USER_suspend); + kmp_info_t *th = __kmp_threads[th_gtid]; + int status; + typename C::flag_t old_spin; + + KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, flag->get() ) ); + + __kmp_suspend_initialize_thread( th ); + + status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); + + KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", + th_gtid, flag->get() ) ); + + /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread + gets called first? + */ + old_spin = flag->set_sleeping(); + + KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x, was %x\n", + th_gtid, flag->get(), *(flag->get()), old_spin ) ); + + if ( flag->done_check_val(old_spin) ) { + old_spin = flag->unset_sleeping(); + KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for spin(%p)\n", + th_gtid, flag->get()) ); + } else { + /* Encapsulate in a loop as the documentation states that this may + * "with low probability" return when the condition variable has + * not been signaled or broadcast + */ + int deactivated = FALSE; + TCW_PTR(th->th.th_sleep_loc, (void *)flag); + while ( flag->is_sleeping() ) { +#ifdef DEBUG_SUSPEND + char buffer[128]; + __kmp_suspend_count++; + __kmp_print_cond( buffer, &th->th.th_suspend_cv ); + __kmp_printf( "__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, buffer ); +#endif + // Mark the thread as no longer active (only in the first iteration of the loop). + if ( ! deactivated ) { + th->th.th_active = FALSE; + if ( th->th.th_active_in_pool ) { + th->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32( + (kmp_int32 *) &__kmp_thread_pool_active_nth ); + KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); + } + deactivated = TRUE; + + + } + +#if USE_SUSPEND_TIMEOUT + struct timespec now; + struct timeval tval; + int msecs; + + status = gettimeofday( &tval, NULL ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + TIMEVAL_TO_TIMESPEC( &tval, &now ); + + msecs = (4*__kmp_dflt_blocktime) + 200; + now.tv_sec += msecs / 1000; + now.tv_nsec += (msecs % 1000)*1000; + + KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_timedwait\n", + th_gtid ) ); + status = pthread_cond_timedwait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex, & now ); +#else + KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_wait\n", + th_gtid ) ); + status = pthread_cond_wait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex ); +#endif + + if ( (status != 0) && (status != EINTR) && (status != ETIMEDOUT) ) { + KMP_SYSFAIL( "pthread_cond_wait", status ); + } +#ifdef KMP_DEBUG + if (status == ETIMEDOUT) { + if ( flag->is_sleeping() ) { + KF_TRACE( 100, ( "__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid ) ); + } else { + KF_TRACE( 2, ( "__kmp_suspend_template: T#%d timeout wakeup, sleep bit not set!\n", + th_gtid ) ); + } + } else if ( flag->is_sleeping() ) { + KF_TRACE( 100, ( "__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ) ); + } +#endif + } // while + + // Mark the thread as active again (if it was previous marked as inactive) + if ( deactivated ) { + th->th.th_active = TRUE; + if ( TCR_4(th->th.th_in_pool) ) { + KMP_TEST_THEN_INC32( (kmp_int32 *) &__kmp_thread_pool_active_nth ); + th->th.th_active_in_pool = TRUE; + } + } + } + +#ifdef DEBUG_SUSPEND + { + char buffer[128]; + __kmp_print_cond( buffer, &th->th.th_suspend_cv); + __kmp_printf( "__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, buffer ); + } +#endif + + + status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + + KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); +} + +void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { + __kmp_suspend_template(th_gtid, flag); +} +void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { + __kmp_suspend_template(th_gtid, flag); +} +void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { + __kmp_suspend_template(th_gtid, flag); +} + + +/* This routine signals the thread specified by target_gtid to wake up + * after setting the sleep bit indicated by the flag argument to FALSE. + * The target thread must already have called __kmp_suspend_template() + */ +template +static inline void __kmp_resume_template( int target_gtid, C *flag ) +{ + KMP_TIME_DEVELOPER_BLOCK(USER_resume); + kmp_info_t *th = __kmp_threads[target_gtid]; + int status; + +#ifdef KMP_DEBUG + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; +#endif + + KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); + KMP_DEBUG_ASSERT( gtid != target_gtid ); + + __kmp_suspend_initialize_thread( th ); + + status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); + + if (!flag) { // coming from __kmp_null_resume_wrapper + flag = (C *)th->th.th_sleep_loc; + } + + // First, check if the flag is null or its type has changed. If so, someone else woke it up. + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to + KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p)\n", + gtid, target_gtid, NULL ) ); + status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + return; + } + else { // if multiple threads are sleeping, flag should be internally referring to a specific thread here + typename C::flag_t old_spin = flag->unset_sleeping(); + if ( ! flag->is_sleeping_val(old_spin) ) { + KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); + + status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + return; + } + KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); + } + TCW_PTR(th->th.th_sleep_loc, NULL); + + +#ifdef DEBUG_SUSPEND + { + char buffer[128]; + __kmp_print_cond( buffer, &th->th.th_suspend_cv ); + __kmp_printf( "__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, target_gtid, buffer ); + } +#endif + + + status = pthread_cond_signal( &th->th.th_suspend_cv.c_cond ); + KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); + status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", + gtid, target_gtid ) ); +} + +void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { + __kmp_resume_template(target_gtid, flag); +} +void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { + __kmp_resume_template(target_gtid, flag); +} +void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { + __kmp_resume_template(target_gtid, flag); +} + +void +__kmp_resume_monitor() +{ + int status; +#ifdef KMP_DEBUG + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; + KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", + gtid, KMP_GTID_MONITOR ) ); + KMP_DEBUG_ASSERT( gtid != KMP_GTID_MONITOR ); +#endif + status = pthread_mutex_lock( &__kmp_wait_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); +#ifdef DEBUG_SUSPEND + { + char buffer[128]; + __kmp_print_cond( buffer, &__kmp_wait_cv.c_cond ); + __kmp_printf( "__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, KMP_GTID_MONITOR, buffer ); + } +#endif + status = pthread_cond_signal( &__kmp_wait_cv.c_cond ); + KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); + status = pthread_mutex_unlock( &__kmp_wait_mx.m_mutex ); + KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); + KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d exiting after signaling wake up for T#%d\n", + gtid, KMP_GTID_MONITOR ) ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_yield( int cond ) +{ + if (cond && __kmp_yielding_on) { + sched_yield(); + } +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_gtid_set_specific( int gtid ) +{ + int status; + KMP_ASSERT( __kmp_init_runtime ); + status = pthread_setspecific( __kmp_gtid_threadprivate_key, (void*)(intptr_t)(gtid+1) ); + KMP_CHECK_SYSFAIL( "pthread_setspecific", status ); +} + +int +__kmp_gtid_get_specific() +{ + int gtid; + if ( !__kmp_init_runtime ) { + KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); + return KMP_GTID_SHUTDOWN; + } + gtid = (int)(size_t)pthread_getspecific( __kmp_gtid_threadprivate_key ); + if ( gtid == 0 ) { + gtid = KMP_GTID_DNE; + } + else { + gtid--; + } + KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", + __kmp_gtid_threadprivate_key, gtid )); + return gtid; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +double +__kmp_read_cpu_time( void ) +{ + /*clock_t t;*/ + struct tms buffer; + + /*t =*/ times( & buffer ); + + return (buffer.tms_utime + buffer.tms_cutime) / (double) CLOCKS_PER_SEC; +} + +int +__kmp_read_system_info( struct kmp_sys_info *info ) +{ + int status; + struct rusage r_usage; + + memset( info, 0, sizeof( *info ) ); + + status = getrusage( RUSAGE_SELF, &r_usage); + KMP_CHECK_SYSFAIL_ERRNO( "getrusage", status ); + + info->maxrss = r_usage.ru_maxrss; /* the maximum resident set size utilized (in kilobytes) */ + info->minflt = r_usage.ru_minflt; /* the number of page faults serviced without any I/O */ + info->majflt = r_usage.ru_majflt; /* the number of page faults serviced that required I/O */ + info->nswap = r_usage.ru_nswap; /* the number of times a process was "swapped" out of memory */ + info->inblock = r_usage.ru_inblock; /* the number of times the file system had to perform input */ + info->oublock = r_usage.ru_oublock; /* the number of times the file system had to perform output */ + info->nvcsw = r_usage.ru_nvcsw; /* the number of times a context switch was voluntarily */ + info->nivcsw = r_usage.ru_nivcsw; /* the number of times a context switch was forced */ + + return (status != 0); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_read_system_time( double *delta ) +{ + double t_ns; + struct timeval tval; + struct timespec stop; + int status; + + status = gettimeofday( &tval, NULL ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + TIMEVAL_TO_TIMESPEC( &tval, &stop ); + t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); + *delta = (t_ns * 1e-9); +} + +void +__kmp_clear_system_time( void ) +{ + struct timeval tval; + int status; + status = gettimeofday( &tval, NULL ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + TIMEVAL_TO_TIMESPEC( &tval, &__kmp_sys_timer_data.start ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#ifdef BUILD_TV + +void +__kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ) +{ + struct tv_data *p; + + p = (struct tv_data *) __kmp_allocate( sizeof( *p ) ); + + p->u.tp.global_addr = global_addr; + p->u.tp.thread_addr = thread_addr; + + p->type = (void *) 1; + + p->next = th->th.th_local.tv_data; + th->th.th_local.tv_data = p; + + if ( p->next == 0 ) { + int rc = pthread_setspecific( __kmp_tv_key, p ); + KMP_CHECK_SYSFAIL( "pthread_setspecific", rc ); + } +} + +#endif /* BUILD_TV */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +static int +__kmp_get_xproc( void ) { + + int r = 0; + + #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD + + r = sysconf( _SC_NPROCESSORS_ONLN ); + + #elif KMP_OS_DARWIN + + // Bug C77011 High "OpenMP Threads and number of active cores". + + // Find the number of available CPUs. + kern_return_t rc; + host_basic_info_data_t info; + mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; + rc = host_info( mach_host_self(), HOST_BASIC_INFO, (host_info_t) & info, & num ); + if ( rc == 0 && num == HOST_BASIC_INFO_COUNT ) { + // Cannot use KA_TRACE() here because this code works before trace support is + // initialized. + r = info.avail_cpus; + } else { + KMP_WARNING( CantGetNumAvailCPU ); + KMP_INFORM( AssumedNumCPU ); + }; // if + + #else + + #error "Unknown or unsupported OS." + + #endif + + return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ + +} // __kmp_get_xproc + +int +__kmp_read_from_file( char const *path, char const *format, ... ) +{ + int result; + va_list args; + + va_start(args, format); + FILE *f = fopen(path, "rb"); + if ( f == NULL ) + return 0; + result = vfscanf(f, format, args); + fclose(f); + + return result; +} + +void +__kmp_runtime_initialize( void ) +{ + int status; + pthread_mutexattr_t mutex_attr; + pthread_condattr_t cond_attr; + + if ( __kmp_init_runtime ) { + return; + }; // if + + #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) + if ( ! __kmp_cpuinfo.initialized ) { + __kmp_query_cpuid( &__kmp_cpuinfo ); + }; // if + #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + __kmp_xproc = __kmp_get_xproc(); + + if ( sysconf( _SC_THREADS ) ) { + + /* Query the maximum number of threads */ + __kmp_sys_max_nth = sysconf( _SC_THREAD_THREADS_MAX ); + if ( __kmp_sys_max_nth == -1 ) { + /* Unlimited threads for NPTL */ + __kmp_sys_max_nth = INT_MAX; + } + else if ( __kmp_sys_max_nth <= 1 ) { + /* Can't tell, just use PTHREAD_THREADS_MAX */ + __kmp_sys_max_nth = KMP_MAX_NTH; + } + + /* Query the minimum stack size */ + __kmp_sys_min_stksize = sysconf( _SC_THREAD_STACK_MIN ); + if ( __kmp_sys_min_stksize <= 1 ) { + __kmp_sys_min_stksize = KMP_MIN_STKSIZE; + } + } + + /* Set up minimum number of threads to switch to TLS gtid */ + __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; + + #ifdef BUILD_TV + { + int rc = pthread_key_create( & __kmp_tv_key, 0 ); + KMP_CHECK_SYSFAIL( "pthread_key_create", rc ); + } + #endif + + status = pthread_key_create( &__kmp_gtid_threadprivate_key, __kmp_internal_end_dest ); + KMP_CHECK_SYSFAIL( "pthread_key_create", status ); + status = pthread_mutexattr_init( & mutex_attr ); + KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); + status = pthread_mutex_init( & __kmp_wait_mx.m_mutex, & mutex_attr ); + KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); + status = pthread_condattr_init( & cond_attr ); + KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); + status = pthread_cond_init( & __kmp_wait_cv.c_cond, & cond_attr ); + KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); +#if USE_ITT_BUILD + __kmp_itt_initialize(); +#endif /* USE_ITT_BUILD */ + + __kmp_init_runtime = TRUE; +} + +void +__kmp_runtime_destroy( void ) +{ + int status; + + if ( ! __kmp_init_runtime ) { + return; // Nothing to do. + }; + +#if USE_ITT_BUILD + __kmp_itt_destroy(); +#endif /* USE_ITT_BUILD */ + + status = pthread_key_delete( __kmp_gtid_threadprivate_key ); + KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); + #ifdef BUILD_TV + status = pthread_key_delete( __kmp_tv_key ); + KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); + #endif + + status = pthread_mutex_destroy( & __kmp_wait_mx.m_mutex ); + if ( status != 0 && status != EBUSY ) { + KMP_SYSFAIL( "pthread_mutex_destroy", status ); + } + status = pthread_cond_destroy( & __kmp_wait_cv.c_cond ); + if ( status != 0 && status != EBUSY ) { + KMP_SYSFAIL( "pthread_cond_destroy", status ); + } + #if KMP_AFFINITY_SUPPORTED + __kmp_affinity_uninitialize(); + #endif + + __kmp_init_runtime = FALSE; +} + + +/* Put the thread to sleep for a time period */ +/* NOTE: not currently used anywhere */ +void +__kmp_thread_sleep( int millis ) +{ + sleep( ( millis + 500 ) / 1000 ); +} + +/* Calculate the elapsed wall clock time for the user */ +void +__kmp_elapsed( double *t ) +{ + int status; +# ifdef FIX_SGI_CLOCK + struct timespec ts; + + status = clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ); + KMP_CHECK_SYSFAIL_ERRNO( "clock_gettime", status ); + *t = (double) ts.tv_nsec * (1.0 / (double) KMP_NSEC_PER_SEC) + + (double) ts.tv_sec; +# else + struct timeval tv; + + status = gettimeofday( & tv, NULL ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); + *t = (double) tv.tv_usec * (1.0 / (double) KMP_USEC_PER_SEC) + + (double) tv.tv_sec; +# endif +} + +/* Calculate the elapsed wall clock tick for the user */ +void +__kmp_elapsed_tick( double *t ) +{ + *t = 1 / (double) CLOCKS_PER_SEC; +} + +/* + Determine whether the given address is mapped into the current address space. +*/ + +int +__kmp_is_address_mapped( void * addr ) { + + int found = 0; + int rc; + + #if KMP_OS_LINUX || KMP_OS_FREEBSD + + /* + On Linux* OS, read the /proc//maps pseudo-file to get all the address ranges mapped + into the address space. + */ + + char * name = __kmp_str_format( "/proc/%d/maps", getpid() ); + FILE * file = NULL; + + file = fopen( name, "r" ); + KMP_ASSERT( file != NULL ); + + for ( ; ; ) { + + void * beginning = NULL; + void * ending = NULL; + char perms[ 5 ]; + + rc = fscanf( file, "%p-%p %4s %*[^\n]\n", & beginning, & ending, perms ); + if ( rc == EOF ) { + break; + }; // if + KMP_ASSERT( rc == 3 && KMP_STRLEN( perms ) == 4 ); // Make sure all fields are read. + + // Ending address is not included in the region, but beginning is. + if ( ( addr >= beginning ) && ( addr < ending ) ) { + perms[ 2 ] = 0; // 3th and 4th character does not matter. + if ( strcmp( perms, "rw" ) == 0 ) { + // Memory we are looking for should be readable and writable. + found = 1; + }; // if + break; + }; // if + + }; // forever + + // Free resources. + fclose( file ); + KMP_INTERNAL_FREE( name ); + + #elif KMP_OS_DARWIN + + /* + On OS X*, /proc pseudo filesystem is not available. Try to read memory using vm + interface. + */ + + int buffer; + vm_size_t count; + rc = + vm_read_overwrite( + mach_task_self(), // Task to read memory of. + (vm_address_t)( addr ), // Address to read from. + 1, // Number of bytes to be read. + (vm_address_t)( & buffer ), // Address of buffer to save read bytes in. + & count // Address of var to save number of read bytes in. + ); + if ( rc == 0 ) { + // Memory successfully read. + found = 1; + }; // if + + #elif KMP_OS_FREEBSD || KMP_OS_NETBSD + + // FIXME(FreeBSD, NetBSD): Implement this + found = 1; + + #else + + #error "Unknown or unsupported OS" + + #endif + + return found; + +} // __kmp_is_address_mapped + +#ifdef USE_LOAD_BALANCE + + +# if KMP_OS_DARWIN + +// The function returns the rounded value of the system load average +// during given time interval which depends on the value of +// __kmp_load_balance_interval variable (default is 60 sec, other values +// may be 300 sec or 900 sec). +// It returns -1 in case of error. +int +__kmp_get_load_balance( int max ) +{ + double averages[3]; + int ret_avg = 0; + + int res = getloadavg( averages, 3 ); + + //Check __kmp_load_balance_interval to determine which of averages to use. + // getloadavg() may return the number of samples less than requested that is + // less than 3. + if ( __kmp_load_balance_interval < 180 && ( res >= 1 ) ) { + ret_avg = averages[0];// 1 min + } else if ( ( __kmp_load_balance_interval >= 180 + && __kmp_load_balance_interval < 600 ) && ( res >= 2 ) ) { + ret_avg = averages[1];// 5 min + } else if ( ( __kmp_load_balance_interval >= 600 ) && ( res == 3 ) ) { + ret_avg = averages[2];// 15 min + } else {// Error occurred + return -1; + } + + return ret_avg; +} + +# else // Linux* OS + +// The fuction returns number of running (not sleeping) threads, or -1 in case of error. +// Error could be reported if Linux* OS kernel too old (without "/proc" support). +// Counting running threads stops if max running threads encountered. +int +__kmp_get_load_balance( int max ) +{ + static int permanent_error = 0; + + static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ + static double glb_call_time = 0; /* Thread balance algorithm call time */ + + int running_threads = 0; // Number of running threads in the system. + + DIR * proc_dir = NULL; // Handle of "/proc/" directory. + struct dirent * proc_entry = NULL; + + kmp_str_buf_t task_path; // "/proc//task//" path. + DIR * task_dir = NULL; // Handle of "/proc//task//" directory. + struct dirent * task_entry = NULL; + int task_path_fixed_len; + + kmp_str_buf_t stat_path; // "/proc//task//stat" path. + int stat_file = -1; + int stat_path_fixed_len; + + int total_processes = 0; // Total number of processes in system. + int total_threads = 0; // Total number of threads in system. + + double call_time = 0.0; + + __kmp_str_buf_init( & task_path ); + __kmp_str_buf_init( & stat_path ); + + __kmp_elapsed( & call_time ); + + if ( glb_call_time && + ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { + running_threads = glb_running_threads; + goto finish; + } + + glb_call_time = call_time; + + // Do not spend time on scanning "/proc/" if we have a permanent error. + if ( permanent_error ) { + running_threads = -1; + goto finish; + }; // if + + if ( max <= 0 ) { + max = INT_MAX; + }; // if + + // Open "/proc/" directory. + proc_dir = opendir( "/proc" ); + if ( proc_dir == NULL ) { + // Cannot open "/prroc/". Probably the kernel does not support it. Return an error now and + // in subsequent calls. + running_threads = -1; + permanent_error = 1; + goto finish; + }; // if + + // Initialize fixed part of task_path. This part will not change. + __kmp_str_buf_cat( & task_path, "/proc/", 6 ); + task_path_fixed_len = task_path.used; // Remember number of used characters. + + proc_entry = readdir( proc_dir ); + while ( proc_entry != NULL ) { + // Proc entry is a directory and name starts with a digit. Assume it is a process' + // directory. + if ( proc_entry->d_type == DT_DIR && isdigit( proc_entry->d_name[ 0 ] ) ) { + + ++ total_processes; + // Make sure init process is the very first in "/proc", so we can replace + // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == 1. + // We are going to check that total_processes == 1 => d_name == "1" is true (where + // "=>" is implication). Since C++ does not have => operator, let us replace it with its + // equivalent: a => b == ! a || b. + KMP_DEBUG_ASSERT( total_processes != 1 || strcmp( proc_entry->d_name, "1" ) == 0 ); + + // Construct task_path. + task_path.used = task_path_fixed_len; // Reset task_path to "/proc/". + __kmp_str_buf_cat( & task_path, proc_entry->d_name, KMP_STRLEN( proc_entry->d_name ) ); + __kmp_str_buf_cat( & task_path, "/task", 5 ); + + task_dir = opendir( task_path.str ); + if ( task_dir == NULL ) { + // Process can finish between reading "/proc/" directory entry and opening process' + // "task/" directory. So, in general case we should not complain, but have to skip + // this process and read the next one. + // But on systems with no "task/" support we will spend lot of time to scan "/proc/" + // tree again and again without any benefit. "init" process (its pid is 1) should + // exist always, so, if we cannot open "/proc/1/task/" directory, it means "task/" + // is not supported by kernel. Report an error now and in the future. + if ( strcmp( proc_entry->d_name, "1" ) == 0 ) { + running_threads = -1; + permanent_error = 1; + goto finish; + }; // if + } else { + // Construct fixed part of stat file path. + __kmp_str_buf_clear( & stat_path ); + __kmp_str_buf_cat( & stat_path, task_path.str, task_path.used ); + __kmp_str_buf_cat( & stat_path, "/", 1 ); + stat_path_fixed_len = stat_path.used; + + task_entry = readdir( task_dir ); + while ( task_entry != NULL ) { + // It is a directory and name starts with a digit. + if ( proc_entry->d_type == DT_DIR && isdigit( task_entry->d_name[ 0 ] ) ) { + + ++ total_threads; + + // Consruct complete stat file path. Easiest way would be: + // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, task_entry->d_name ); + // but seriae of __kmp_str_buf_cat works a bit faster. + stat_path.used = stat_path_fixed_len; // Reset stat path to its fixed part. + __kmp_str_buf_cat( & stat_path, task_entry->d_name, KMP_STRLEN( task_entry->d_name ) ); + __kmp_str_buf_cat( & stat_path, "/stat", 5 ); + + // Note: Low-level API (open/read/close) is used. High-level API + // (fopen/fclose) works ~ 30 % slower. + stat_file = open( stat_path.str, O_RDONLY ); + if ( stat_file == -1 ) { + // We cannot report an error because task (thread) can terminate just + // before reading this file. + } else { + /* + Content of "stat" file looks like: + + 24285 (program) S ... + + It is a single line (if program name does not include fanny + symbols). First number is a thread id, then name of executable file + name in paretheses, then state of the thread. We need just thread + state. + + Good news: Length of program name is 15 characters max. Longer + names are truncated. + + Thus, we need rather short buffer: 15 chars for program name + + 2 parenthesis, + 3 spaces + ~7 digits of pid = 37. + + Bad news: Program name may contain special symbols like space, + closing parenthesis, or even new line. This makes parsing "stat" + file not 100 % reliable. In case of fanny program names parsing + may fail (report incorrect thread state). + + Parsing "status" file looks more promissing (due to different + file structure and escaping special symbols) but reading and + parsing of "status" file works slower. + + -- ln + */ + char buffer[ 65 ]; + int len; + len = read( stat_file, buffer, sizeof( buffer ) - 1 ); + if ( len >= 0 ) { + buffer[ len ] = 0; + // Using scanf: + // sscanf( buffer, "%*d (%*s) %c ", & state ); + // looks very nice, but searching for a closing parenthesis works a + // bit faster. + char * close_parent = strstr( buffer, ") " ); + if ( close_parent != NULL ) { + char state = * ( close_parent + 2 ); + if ( state == 'R' ) { + ++ running_threads; + if ( running_threads >= max ) { + goto finish; + }; // if + }; // if + }; // if + }; // if + close( stat_file ); + stat_file = -1; + }; // if + }; // if + task_entry = readdir( task_dir ); + }; // while + closedir( task_dir ); + task_dir = NULL; + }; // if + }; // if + proc_entry = readdir( proc_dir ); + }; // while + + // + // There _might_ be a timing hole where the thread executing this + // code get skipped in the load balance, and running_threads is 0. + // Assert in the debug builds only!!! + // + KMP_DEBUG_ASSERT( running_threads > 0 ); + if ( running_threads <= 0 ) { + running_threads = 1; + } + + finish: // Clean up and exit. + if ( proc_dir != NULL ) { + closedir( proc_dir ); + }; // if + __kmp_str_buf_free( & task_path ); + if ( task_dir != NULL ) { + closedir( task_dir ); + }; // if + __kmp_str_buf_free( & stat_path ); + if ( stat_file != -1 ) { + close( stat_file ); + }; // if + + glb_running_threads = running_threads; + + return running_threads; + +} // __kmp_get_load_balance + +# endif // KMP_OS_DARWIN + +#endif // USE_LOAD_BALANCE + +#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC) + +// we really only need the case with 1 argument, because CLANG always build +// a struct of pointers to shared variables referenced in the outlined function +int +__kmp_invoke_microtask( microtask_t pkfn, + int gtid, int tid, + int argc, void *p_argv[] +#if OMPT_SUPPORT + , void **exit_frame_ptr +#endif +) +{ +#if OMPT_SUPPORT + *exit_frame_ptr = __builtin_frame_address(0); +#endif + + switch (argc) { + default: + fprintf(stderr, "Too many args to microtask: %d!\n", argc); + fflush(stderr); + exit(-1); + case 0: + (*pkfn)(>id, &tid); + break; + case 1: + (*pkfn)(>id, &tid, p_argv[0]); + break; + case 2: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); + break; + case 3: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); + break; + case 4: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); + break; + case 5: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); + break; + case 6: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5]); + break; + case 7: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6]); + break; + case 8: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7]); + break; + case 9: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8]); + break; + case 10: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); + break; + case 11: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); + break; + case 12: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11]); + break; + case 13: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12]); + break; + case 14: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13]); + break; + case 15: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14]); + break; + } + +#if OMPT_SUPPORT + *exit_frame_ptr = 0; +#endif + + return 1; +} + +#endif + +// end of file // + diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm index 507d093778c..a4f9a38ae7a 100644 --- a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm +++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm @@ -1,1402 +1,1402 @@ -; z_Windows_NT-586_asm.asm: - microtasking routines specifically -; written for IA-32 architecture and Intel(R) 64 running Windows* OS - -; -;//===----------------------------------------------------------------------===// -;// -;// The LLVM Compiler Infrastructure -;// -;// This file is dual licensed under the MIT and the University of Illinois Open -;// Source Licenses. See LICENSE.txt for details. -;// -;//===----------------------------------------------------------------------===// -; - - TITLE z_Windows_NT-586_asm.asm - -; ============================= IA-32 architecture ========================== -ifdef _M_IA32 - - .586P - -if @Version gt 510 - .model HUGE -else -_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' -_TEXT ENDS -_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' -_DATA ENDS -CONST SEGMENT DWORD USE32 PUBLIC 'CONST' -CONST ENDS -_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' -_BSS ENDS -$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' -$$SYMBOLS ENDS -$$TYPES SEGMENT BYTE USE32 'DEBTYP' -$$TYPES ENDS -_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' -_TLS ENDS -FLAT GROUP _DATA, CONST, _BSS - ASSUME CS: FLAT, DS: FLAT, SS: FLAT -endif - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_x86_pause -; -; void -; __kmp_x86_pause( void ) -; - -PUBLIC ___kmp_x86_pause -_p$ = 4 -_d$ = 8 -_TEXT SEGMENT - ALIGN 16 -___kmp_x86_pause PROC NEAR - - db 0f3H - db 090H ;; pause - ret - -___kmp_x86_pause ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_x86_cpuid -; -; void -; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -; - -PUBLIC ___kmp_x86_cpuid -_TEXT SEGMENT - ALIGN 16 -_mode$ = 8 -_mode2$ = 12 -_p$ = 16 -_eax$ = 0 -_ebx$ = 4 -_ecx$ = 8 -_edx$ = 12 - -___kmp_x86_cpuid PROC NEAR - - push ebp - mov ebp, esp - - push edi - push ebx - push ecx - push edx - - mov eax, DWORD PTR _mode$[ebp] - mov ecx, DWORD PTR _mode2$[ebp] - cpuid ; Query the CPUID for the current processor - - mov edi, DWORD PTR _p$[ebp] - mov DWORD PTR _eax$[ edi ], eax - mov DWORD PTR _ebx$[ edi ], ebx - mov DWORD PTR _ecx$[ edi ], ecx - mov DWORD PTR _edx$[ edi ], edx - - pop edx - pop ecx - pop ebx - pop edi - - mov esp, ebp - pop ebp - ret - -___kmp_x86_cpuid ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_test_then_add32 -; -; kmp_int32 -; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -; - -PUBLIC ___kmp_test_then_add32 -_p$ = 4 -_d$ = 8 -_TEXT SEGMENT - ALIGN 16 -___kmp_test_then_add32 PROC NEAR - - mov eax, DWORD PTR _d$[esp] - mov ecx, DWORD PTR _p$[esp] -lock xadd DWORD PTR [ecx], eax - ret - -___kmp_test_then_add32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store8 -; -; kmp_int8 -; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; - -PUBLIC ___kmp_compare_and_store8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _cv$[esp] - mov dl, BYTE PTR _sv$[esp] -lock cmpxchg BYTE PTR [ecx], dl - sete al ; if al == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store16 -; -; kmp_int16 -; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; - -PUBLIC ___kmp_compare_and_store16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _cv$[esp] - mov dx, WORD PTR _sv$[esp] -lock cmpxchg WORD PTR [ecx], dx - sete al ; if ax == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store32 -; -; kmp_int32 -; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; - -PUBLIC ___kmp_compare_and_store32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _cv$[esp] - mov edx, DWORD PTR _sv$[esp] -lock cmpxchg DWORD PTR [ecx], edx - sete al ; if eax == [ecx] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - ret - -___kmp_compare_and_store32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store64 -; -; kmp_int32 -; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; - -PUBLIC ___kmp_compare_and_store64 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_cv_low$ = 12 -_cv_high$ = 16 -_sv_low$ = 20 -_sv_high$ = 24 - -___kmp_compare_and_store64 PROC NEAR - - push ebp - mov ebp, esp - push ebx - push edi - mov edi, DWORD PTR _p$[ebp] - mov eax, DWORD PTR _cv_low$[ebp] - mov edx, DWORD PTR _cv_high$[ebp] - mov ebx, DWORD PTR _sv_low$[ebp] - mov ecx, DWORD PTR _sv_high$[ebp] -lock cmpxchg8b QWORD PTR [edi] - sete al ; if edx:eax == [edi] set al = 1 else set al = 0 - and eax, 1 ; sign extend previous instruction - pop edi - pop ebx - mov esp, ebp - pop ebp - ret - -___kmp_compare_and_store64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed8 -; -; kmp_int8 -; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -; - -PUBLIC ___kmp_xchg_fixed8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _d$[esp] -lock xchg BYTE PTR [ecx], al - ret - -___kmp_xchg_fixed8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed16 -; -; kmp_int16 -; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -; - -PUBLIC ___kmp_xchg_fixed16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _d$[esp] -lock xchg WORD PTR [ecx], ax - ret - -___kmp_xchg_fixed16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed32 -; -; kmp_int32 -; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -; - -PUBLIC ___kmp_xchg_fixed32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_d$ = 8 - -___kmp_xchg_fixed32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _d$[esp] -lock xchg DWORD PTR [ecx], eax - ret - -___kmp_xchg_fixed32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_real32 -; -; kmp_real32 -; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); -; - -PUBLIC ___kmp_xchg_real32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_d$ = 12 -_old_value$ = -4 - -___kmp_xchg_real32 PROC NEAR - - push ebp - mov ebp, esp - sub esp, 4 - push esi - mov esi, DWORD PTR _p$[ebp] - - fld DWORD PTR [esi] - ;; load - fst DWORD PTR _old_value$[ebp] - ;; store into old_value - - mov eax, DWORD PTR _d$[ebp] - -lock xchg DWORD PTR [esi], eax - - fld DWORD PTR _old_value$[ebp] - ;; return old_value - pop esi - mov esp, ebp - pop ebp - ret - -___kmp_xchg_real32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store_ret8 -; -; kmp_int8 -; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; - -PUBLIC ___kmp_compare_and_store_ret8 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret8 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov al, BYTE PTR _cv$[esp] - mov dl, BYTE PTR _sv$[esp] -lock cmpxchg BYTE PTR [ecx], dl - ret - -___kmp_compare_and_store_ret8 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store_ret16 -; -; kmp_int16 -; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; - -PUBLIC ___kmp_compare_and_store_ret16 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret16 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov ax, WORD PTR _cv$[esp] - mov dx, WORD PTR _sv$[esp] -lock cmpxchg WORD PTR [ecx], dx - ret - -___kmp_compare_and_store_ret16 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store_ret32 -; -; kmp_int32 -; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; - -PUBLIC ___kmp_compare_and_store_ret32 -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 -_cv$ = 8 -_sv$ = 12 - -___kmp_compare_and_store_ret32 PROC NEAR - - mov ecx, DWORD PTR _p$[esp] - mov eax, DWORD PTR _cv$[esp] - mov edx, DWORD PTR _sv$[esp] -lock cmpxchg DWORD PTR [ecx], edx - ret - -___kmp_compare_and_store_ret32 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_compare_and_store_ret64 -; -; kmp_int64 -; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; - -PUBLIC ___kmp_compare_and_store_ret64 -_TEXT SEGMENT - ALIGN 16 -_p$ = 8 -_cv_low$ = 12 -_cv_high$ = 16 -_sv_low$ = 20 -_sv_high$ = 24 - -___kmp_compare_and_store_ret64 PROC NEAR - - push ebp - mov ebp, esp - push ebx - push edi - mov edi, DWORD PTR _p$[ebp] - mov eax, DWORD PTR _cv_low$[ebp] - mov edx, DWORD PTR _cv_high$[ebp] - mov ebx, DWORD PTR _sv_low$[ebp] - mov ecx, DWORD PTR _sv_high$[ebp] -lock cmpxchg8b QWORD PTR [edi] - pop edi - pop ebx - mov esp, ebp - pop ebp - ret - -___kmp_compare_and_store_ret64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_load_x87_fpu_control_word -; -; void -; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: 4(%esp) - -PUBLIC ___kmp_load_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 - -___kmp_load_x87_fpu_control_word PROC NEAR - - mov eax, DWORD PTR _p$[esp] - fldcw WORD PTR [eax] - ret - -___kmp_load_x87_fpu_control_word ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_store_x87_fpu_control_word -; -; void -; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: 4(%esp) - -PUBLIC ___kmp_store_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -_p$ = 4 - -___kmp_store_x87_fpu_control_word PROC NEAR - - mov eax, DWORD PTR _p$[esp] - fstcw WORD PTR [eax] - ret - -___kmp_store_x87_fpu_control_word ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_clear_x87_fpu_status_word -; -; void -; __kmp_clear_x87_fpu_status_word(); -; - -PUBLIC ___kmp_clear_x87_fpu_status_word -_TEXT SEGMENT - ALIGN 16 - -___kmp_clear_x87_fpu_status_word PROC NEAR - - fnclex - ret - -___kmp_clear_x87_fpu_status_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_invoke_microtask -; -; typedef void (*microtask_t)( int *gtid, int *tid, ... ); -; -; int -; __kmp_invoke_microtask( microtask_t pkfn, -; int gtid, int tid, -; int argc, void *p_argv[] ) -; - -PUBLIC ___kmp_invoke_microtask -_TEXT SEGMENT - ALIGN 16 -_pkfn$ = 8 -_gtid$ = 12 -_tid$ = 16 -_argc$ = 20 -_argv$ = 24 -if OMPT_SUPPORT -_exit_frame$ = 28 -endif -_i$ = -8 -_stk_adj$ = -16 -_vptr$ = -12 -_qptr$ = -4 - -___kmp_invoke_microtask PROC NEAR -; Line 102 - push ebp - mov ebp, esp - sub esp, 16 ; 00000010H - push ebx - push esi - push edi -if OMPT_SUPPORT - mov eax, DWORD PTR _exit_frame$[ebp] - mov DWORD PTR [eax], ebp -endif -; Line 114 - mov eax, DWORD PTR _argc$[ebp] - mov DWORD PTR _i$[ebp], eax - -;; ------------------------------------------------------------ - lea edx, DWORD PTR [eax*4+8] - mov ecx, esp ; Save current SP into ECX - mov eax,edx ; Save the size of the args in eax - sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this - mov edx,ecx ; Save to edx - and ecx,-128 ; Mask off 7 bits - sub edx,ecx ; Amount to subtract from esp - sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call - - add edx,eax ; Calculate total size of the stack decrement. - mov DWORD PTR _stk_adj$[ebp], edx -;; ------------------------------------------------------------ - - jmp SHORT $L22237 -$L22238: - mov ecx, DWORD PTR _i$[ebp] - sub ecx, 1 - mov DWORD PTR _i$[ebp], ecx -$L22237: - cmp DWORD PTR _i$[ebp], 0 - jle SHORT $L22239 -; Line 116 - mov edx, DWORD PTR _i$[ebp] - mov eax, DWORD PTR _argv$[ebp] - mov ecx, DWORD PTR [eax+edx*4-4] - mov DWORD PTR _vptr$[ebp], ecx -; Line 123 - mov eax, DWORD PTR _vptr$[ebp] -; Line 124 - push eax -; Line 127 - jmp SHORT $L22238 -$L22239: -; Line 129 - lea edx, DWORD PTR _tid$[ebp] - mov DWORD PTR _vptr$[ebp], edx -; Line 130 - lea eax, DWORD PTR _gtid$[ebp] - mov DWORD PTR _qptr$[ebp], eax -; Line 143 - mov eax, DWORD PTR _vptr$[ebp] -; Line 144 - push eax -; Line 145 - mov eax, DWORD PTR _qptr$[ebp] -; Line 146 - push eax -; Line 147 - call DWORD PTR _pkfn$[ebp] -; Line 148 - add esp, DWORD PTR _stk_adj$[ebp] -; Line 152 - mov eax, 1 -; Line 153 - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret 0 -___kmp_invoke_microtask ENDP -_TEXT ENDS - -endif - -; ==================================== Intel(R) 64 =================================== - -ifdef _M_AMD64 - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_x86_cpuid -; -; void -; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -; -; parameters: -; mode: ecx -; mode2: edx -; cpuid_buffer: r8 - -PUBLIC __kmp_x86_cpuid -_TEXT SEGMENT - ALIGN 16 - -__kmp_x86_cpuid PROC FRAME ;NEAR - - push rbp - .pushreg rbp - mov rbp, rsp - .setframe rbp, 0 - push rbx ; callee-save register - .pushreg rbx - .ENDPROLOG - - mov r10, r8 ; p parameter - mov eax, ecx ; mode parameter - mov ecx, edx ; mode2 parameter - cpuid ; Query the CPUID for the current processor - - mov DWORD PTR 0[ r10 ], eax ; store results into buffer - mov DWORD PTR 4[ r10 ], ebx - mov DWORD PTR 8[ r10 ], ecx - mov DWORD PTR 12[ r10 ], edx - - pop rbx ; callee-save register - mov rsp, rbp - pop rbp - ret - -__kmp_x86_cpuid ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_test_then_add32 -; -; kmp_int32 -; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -; -; parameters: -; p: rcx -; d: edx -; -; return: eax - -PUBLIC __kmp_test_then_add32 -_TEXT SEGMENT - ALIGN 16 -__kmp_test_then_add32 PROC ;NEAR - - mov eax, edx -lock xadd DWORD PTR [rcx], eax - ret - -__kmp_test_then_add32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_test_then_add64 -; -; kmp_int32 -; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); -; -; parameters: -; p: rcx -; d: rdx -; -; return: rax - -PUBLIC __kmp_test_then_add64 -_TEXT SEGMENT - ALIGN 16 -__kmp_test_then_add64 PROC ;NEAR - - mov rax, rdx -lock xadd QWORD PTR [rcx], rax - ret - -__kmp_test_then_add64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store8 -; -; kmp_int8 -; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store8 PROC ;NEAR - - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - sete al ; if al == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store16 -; -; kmp_int16 -; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store16 PROC ;NEAR - - mov ax, dx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg WORD PTR [rcx], dx - sete al ; if ax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store32 -; -; kmp_int32 -; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store32 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store32 PROC ;NEAR - - mov eax, edx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg DWORD PTR [rcx], edx - sete al ; if eax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store64 -; -; kmp_int32 -; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; parameters: -; p: rcx -; cv: rdx -; sv: r8 -; -; return: eax - -PUBLIC __kmp_compare_and_store64 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store64 PROC ;NEAR - - mov rax, rdx ; "cv" - mov rdx, r8 ; "sv" -lock cmpxchg QWORD PTR [rcx], rdx - sete al ; if rax == [rcx] set al = 1 else set al = 0 - and rax, 1 ; sign extend previous instruction - ret - -__kmp_compare_and_store64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed8 -; -; kmp_int8 -; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -; -; parameters: -; p: rcx -; d: dl -; -; return: al - -PUBLIC __kmp_xchg_fixed8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_xchg_fixed8 PROC ;NEAR - - mov al, dl -lock xchg BYTE PTR [rcx], al - ret - -__kmp_xchg_fixed8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed16 -; -; kmp_int16 -; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -; -; parameters: -; p: rcx -; d: dx -; -; return: ax - -PUBLIC __kmp_xchg_fixed16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_xchg_fixed16 PROC ;NEAR - - mov ax, dx -lock xchg WORD PTR [rcx], ax - ret - -__kmp_xchg_fixed16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed32 -; -; kmp_int32 -; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -; -; parameters: -; p: rcx -; d: edx -; -; return: eax - -PUBLIC __kmp_xchg_fixed32 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_fixed32 PROC ;NEAR - - mov eax, edx -lock xchg DWORD PTR [rcx], eax - ret - -__kmp_xchg_fixed32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION ___kmp_xchg_fixed64 -; -; kmp_int64 -; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); -; -; parameters: -; p: rcx -; d: rdx -; -; return: rax - -PUBLIC __kmp_xchg_fixed64 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_fixed64 PROC ;NEAR - - mov rax, rdx -lock xchg QWORD PTR [rcx], rax - ret - -__kmp_xchg_fixed64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store_ret8 -; -; kmp_int8 -; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store_ret8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret8 PROC ;NEAR - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - ; Compare AL with [rcx]. If equal set - ; ZF and exchange DL with [rcx]. Else, clear - ; ZF and load [rcx] into AL. - ret - -__kmp_compare_and_store_ret8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store_ret16 -; -; kmp_int16 -; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store_ret16 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret16 PROC ;NEAR - - mov ax, dx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg WORD PTR [rcx], dx - ret - -__kmp_compare_and_store_ret16 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store_ret32 -; -; kmp_int32 -; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: eax - -PUBLIC __kmp_compare_and_store_ret32 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret32 PROC ;NEAR - - mov eax, edx ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg DWORD PTR [rcx], edx - ret - -__kmp_compare_and_store_ret32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store_ret64 -; -; kmp_int64 -; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; parameters: -; p: rcx -; cv: rdx -; sv: r8 -; -; return: rax - -PUBLIC __kmp_compare_and_store_ret64 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_ret64 PROC ;NEAR - - mov rax, rdx ; "cv" - mov rdx, r8 ; "sv" -lock cmpxchg QWORD PTR [rcx], rdx - ret - -__kmp_compare_and_store_ret64 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_compare_and_store_loop8 -; -; kmp_int8 -; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; parameters: -; p: rcx -; cv: edx -; sv: r8d -; -; return: al - -PUBLIC __kmp_compare_and_store_loop8 -_TEXT SEGMENT - ALIGN 16 - -__kmp_compare_and_store_loop8 PROC ;NEAR -$__kmp_loop: - mov al, dl ; "cv" - mov edx, r8d ; "sv" -lock cmpxchg BYTE PTR [rcx], dl - ; Compare AL with [rcx]. If equal set - ; ZF and exchange DL with [rcx]. Else, clear - ; ZF and load [rcx] into AL. - jz SHORT $__kmp_success - - db 0f3H - db 090H ; pause - - jmp SHORT $__kmp_loop - -$__kmp_success: - ret - -__kmp_compare_and_store_loop8 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_xchg_real32 -; -; kmp_real32 -; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); -; -; parameters: -; p: rcx -; d: xmm1 (lower 4 bytes) -; -; return: xmm0 (lower 4 bytes) - -PUBLIC __kmp_xchg_real32 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_real32 PROC ;NEAR - - movd eax, xmm1 ; load d - -lock xchg DWORD PTR [rcx], eax - - movd xmm0, eax ; load old value into return register - ret - -__kmp_xchg_real32 ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_xchg_real64 -; -; kmp_real64 -; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); -; -; parameters: -; p: rcx -; d: xmm1 (lower 8 bytes) -; -; return: xmm0 (lower 8 bytes) - -PUBLIC __kmp_xchg_real64 -_TEXT SEGMENT - ALIGN 16 -__kmp_xchg_real64 PROC ;NEAR - - movd rax, xmm1 ; load "d" - -lock xchg QWORD PTR [rcx], rax - - movd xmm0, rax ; load old value into return register - ret - -__kmp_xchg_real64 ENDP -_TEXT ENDS - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_load_x87_fpu_control_word -; -; void -; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: rcx -; - -PUBLIC __kmp_load_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -__kmp_load_x87_fpu_control_word PROC ;NEAR - - fldcw WORD PTR [rcx] - ret - -__kmp_load_x87_fpu_control_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_store_x87_fpu_control_word -; -; void -; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -; -; parameters: -; p: rcx -; - -PUBLIC __kmp_store_x87_fpu_control_word -_TEXT SEGMENT - ALIGN 16 -__kmp_store_x87_fpu_control_word PROC ;NEAR - - fstcw WORD PTR [rcx] - ret - -__kmp_store_x87_fpu_control_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_clear_x87_fpu_status_word -; -; void -; __kmp_clear_x87_fpu_status_word() -; - -PUBLIC __kmp_clear_x87_fpu_status_word -_TEXT SEGMENT - ALIGN 16 -__kmp_clear_x87_fpu_status_word PROC ;NEAR - - fnclex - ret - -__kmp_clear_x87_fpu_status_word ENDP -_TEXT ENDS - - -;------------------------------------------------------------------------ -; -; FUNCTION __kmp_invoke_microtask -; -; typedef void (*microtask_t)( int *gtid, int *tid, ... ); -; -; int -; __kmp_invoke_microtask( microtask_t pkfn, -; int gtid, int tid, -; int argc, void *p_argv[] ) { -; -; (*pkfn) ( >id, &tid, argv[0], ... ); -; return 1; -; } -; -; note: -; just before call to pkfn must have rsp 128-byte aligned for compiler -; -; parameters: -; rcx: pkfn 16[rbp] -; edx: gtid 24[rbp] -; r8d: tid 32[rbp] -; r9d: argc 40[rbp] -; [st]: p_argv 48[rbp] -; -; reg temps: -; rax: used all over the place -; rdx: used all over the place -; rcx: used as argument counter for push parms loop -; r10: used to hold pkfn function pointer argument -; -; return: eax (always 1/TRUE) -; - -$_pkfn = 16 -$_gtid = 24 -$_tid = 32 -$_argc = 40 -$_p_argv = 48 -if OMPT_SUPPORT -$_exit_frame = 56 -endif - -PUBLIC __kmp_invoke_microtask -_TEXT SEGMENT - ALIGN 16 - -__kmp_invoke_microtask PROC FRAME ;NEAR - mov QWORD PTR 16[rsp], rdx ; home gtid parameter - mov QWORD PTR 24[rsp], r8 ; home tid parameter - push rbp ; save base pointer - .pushreg rbp - sub rsp, 0 ; no fixed allocation necessary - end prolog - - lea rbp, QWORD PTR [rsp] ; establish the base pointer - .setframe rbp, 0 - .ENDPROLOG -if OMPT_SUPPORT - mov rax, QWORD PTR $_exit_frame[rbp] - mov QWORD PTR [rax], rbp -endif - mov r10, rcx ; save pkfn pointer for later - -;; ------------------------------------------------------------ - mov rax, r9 ; rax <= argc - cmp rax, 2 - jge SHORT $_kmp_invoke_stack_align - mov rax, 2 ; set 4 homes if less than 2 parms -$_kmp_invoke_stack_align: - lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 - mov rax, rsp ; Save current SP into rax - sub rax, rdx ; rsp - ((argc+2)*8) -> rax - ; without align, rsp would be this - and rax, -128 ; Mask off 7 bits (128-byte align) - add rax, rdx ; add space for push's in a loop below - mov rsp, rax ; Prepare the stack ptr - ; Now it will align to 128-byte at the call -;; ------------------------------------------------------------ - ; setup pkfn parameter stack - mov rax, r9 ; rax <= argc - shl rax, 3 ; rax <= argc*8 - mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv - add rdx, rax ; rdx <= &p_argv[argc] - mov rcx, r9 ; rcx <= argc - jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0 - cmp ecx, 1 ; if argc=1 branch ahead - je SHORT $_kmp_invoke_one_parm - sub ecx, 2 ; if argc=2 branch ahead, subtract two from - je SHORT $_kmp_invoke_two_parms - -$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack - sub rdx, 8 ; decrement p_argv pointer to previous parm - mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] - push r8 ; push p_argv[rcx-1] onto stack (reverse order) - sub ecx, 1 - jecxz SHORT $_kmp_invoke_two_parms - jmp SHORT $_kmp_invoke_push_parms - -$_kmp_invoke_two_parms: - sub rdx, 8 ; put 4th parm to pkfn in r9 - mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1] - -$_kmp_invoke_one_parm: - sub rdx, 8 ; put 3rd parm to pkfn in r8 - mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0] - -$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers - lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn) - lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= >id (1st parm to pkfn) - sub rsp, 32 ; add stack space for first four parms - mov rax, r10 ; rax <= pkfn - call rax ; call (*pkfn)() - mov rax, 1 ; move 1 into return register; - - lea rsp, QWORD PTR [rbp] ; restore stack pointer - -; add rsp, 0 ; no fixed allocation necessary - start epilog - pop rbp ; restore frame pointer - ret -__kmp_invoke_microtask ENDP -_TEXT ENDS - -endif - -END +; z_Windows_NT-586_asm.asm: - microtasking routines specifically +; written for IA-32 architecture and Intel(R) 64 running Windows* OS + +; +;//===----------------------------------------------------------------------===// +;// +;// The LLVM Compiler Infrastructure +;// +;// This file is dual licensed under the MIT and the University of Illinois Open +;// Source Licenses. See LICENSE.txt for details. +;// +;//===----------------------------------------------------------------------===// +; + + TITLE z_Windows_NT-586_asm.asm + +; ============================= IA-32 architecture ========================== +ifdef _M_IA32 + + .586P + +if @Version gt 510 + .model HUGE +else +_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' +_TEXT ENDS +_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' +_DATA ENDS +CONST SEGMENT DWORD USE32 PUBLIC 'CONST' +CONST ENDS +_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' +_BSS ENDS +$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' +$$SYMBOLS ENDS +$$TYPES SEGMENT BYTE USE32 'DEBTYP' +$$TYPES ENDS +_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' +_TLS ENDS +FLAT GROUP _DATA, CONST, _BSS + ASSUME CS: FLAT, DS: FLAT, SS: FLAT +endif + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_x86_pause +; +; void +; __kmp_x86_pause( void ) +; + +PUBLIC ___kmp_x86_pause +_p$ = 4 +_d$ = 8 +_TEXT SEGMENT + ALIGN 16 +___kmp_x86_pause PROC NEAR + + db 0f3H + db 090H ;; pause + ret + +___kmp_x86_pause ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_x86_cpuid +; +; void +; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); +; + +PUBLIC ___kmp_x86_cpuid +_TEXT SEGMENT + ALIGN 16 +_mode$ = 8 +_mode2$ = 12 +_p$ = 16 +_eax$ = 0 +_ebx$ = 4 +_ecx$ = 8 +_edx$ = 12 + +___kmp_x86_cpuid PROC NEAR + + push ebp + mov ebp, esp + + push edi + push ebx + push ecx + push edx + + mov eax, DWORD PTR _mode$[ebp] + mov ecx, DWORD PTR _mode2$[ebp] + cpuid ; Query the CPUID for the current processor + + mov edi, DWORD PTR _p$[ebp] + mov DWORD PTR _eax$[ edi ], eax + mov DWORD PTR _ebx$[ edi ], ebx + mov DWORD PTR _ecx$[ edi ], ecx + mov DWORD PTR _edx$[ edi ], edx + + pop edx + pop ecx + pop ebx + pop edi + + mov esp, ebp + pop ebp + ret + +___kmp_x86_cpuid ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_test_then_add32 +; +; kmp_int32 +; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); +; + +PUBLIC ___kmp_test_then_add32 +_p$ = 4 +_d$ = 8 +_TEXT SEGMENT + ALIGN 16 +___kmp_test_then_add32 PROC NEAR + + mov eax, DWORD PTR _d$[esp] + mov ecx, DWORD PTR _p$[esp] +lock xadd DWORD PTR [ecx], eax + ret + +___kmp_test_then_add32 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store8 +; +; kmp_int8 +; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +; + +PUBLIC ___kmp_compare_and_store8 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store8 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov al, BYTE PTR _cv$[esp] + mov dl, BYTE PTR _sv$[esp] +lock cmpxchg BYTE PTR [ecx], dl + sete al ; if al == [ecx] set al = 1 else set al = 0 + and eax, 1 ; sign extend previous instruction + ret + +___kmp_compare_and_store8 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store16 +; +; kmp_int16 +; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +; + +PUBLIC ___kmp_compare_and_store16 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store16 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov ax, WORD PTR _cv$[esp] + mov dx, WORD PTR _sv$[esp] +lock cmpxchg WORD PTR [ecx], dx + sete al ; if ax == [ecx] set al = 1 else set al = 0 + and eax, 1 ; sign extend previous instruction + ret + +___kmp_compare_and_store16 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store32 +; +; kmp_int32 +; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +; + +PUBLIC ___kmp_compare_and_store32 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store32 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov eax, DWORD PTR _cv$[esp] + mov edx, DWORD PTR _sv$[esp] +lock cmpxchg DWORD PTR [ecx], edx + sete al ; if eax == [ecx] set al = 1 else set al = 0 + and eax, 1 ; sign extend previous instruction + ret + +___kmp_compare_and_store32 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store64 +; +; kmp_int32 +; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +; + +PUBLIC ___kmp_compare_and_store64 +_TEXT SEGMENT + ALIGN 16 +_p$ = 8 +_cv_low$ = 12 +_cv_high$ = 16 +_sv_low$ = 20 +_sv_high$ = 24 + +___kmp_compare_and_store64 PROC NEAR + + push ebp + mov ebp, esp + push ebx + push edi + mov edi, DWORD PTR _p$[ebp] + mov eax, DWORD PTR _cv_low$[ebp] + mov edx, DWORD PTR _cv_high$[ebp] + mov ebx, DWORD PTR _sv_low$[ebp] + mov ecx, DWORD PTR _sv_high$[ebp] +lock cmpxchg8b QWORD PTR [edi] + sete al ; if edx:eax == [edi] set al = 1 else set al = 0 + and eax, 1 ; sign extend previous instruction + pop edi + pop ebx + mov esp, ebp + pop ebp + ret + +___kmp_compare_and_store64 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed8 +; +; kmp_int8 +; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +; + +PUBLIC ___kmp_xchg_fixed8 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_d$ = 8 + +___kmp_xchg_fixed8 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov al, BYTE PTR _d$[esp] +lock xchg BYTE PTR [ecx], al + ret + +___kmp_xchg_fixed8 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed16 +; +; kmp_int16 +; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +; + +PUBLIC ___kmp_xchg_fixed16 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_d$ = 8 + +___kmp_xchg_fixed16 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov ax, WORD PTR _d$[esp] +lock xchg WORD PTR [ecx], ax + ret + +___kmp_xchg_fixed16 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed32 +; +; kmp_int32 +; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +; + +PUBLIC ___kmp_xchg_fixed32 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_d$ = 8 + +___kmp_xchg_fixed32 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov eax, DWORD PTR _d$[esp] +lock xchg DWORD PTR [ecx], eax + ret + +___kmp_xchg_fixed32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_real32 +; +; kmp_real32 +; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); +; + +PUBLIC ___kmp_xchg_real32 +_TEXT SEGMENT + ALIGN 16 +_p$ = 8 +_d$ = 12 +_old_value$ = -4 + +___kmp_xchg_real32 PROC NEAR + + push ebp + mov ebp, esp + sub esp, 4 + push esi + mov esi, DWORD PTR _p$[ebp] + + fld DWORD PTR [esi] + ;; load + fst DWORD PTR _old_value$[ebp] + ;; store into old_value + + mov eax, DWORD PTR _d$[ebp] + +lock xchg DWORD PTR [esi], eax + + fld DWORD PTR _old_value$[ebp] + ;; return old_value + pop esi + mov esp, ebp + pop ebp + ret + +___kmp_xchg_real32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store_ret8 +; +; kmp_int8 +; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +; + +PUBLIC ___kmp_compare_and_store_ret8 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store_ret8 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov al, BYTE PTR _cv$[esp] + mov dl, BYTE PTR _sv$[esp] +lock cmpxchg BYTE PTR [ecx], dl + ret + +___kmp_compare_and_store_ret8 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store_ret16 +; +; kmp_int16 +; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +; + +PUBLIC ___kmp_compare_and_store_ret16 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store_ret16 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov ax, WORD PTR _cv$[esp] + mov dx, WORD PTR _sv$[esp] +lock cmpxchg WORD PTR [ecx], dx + ret + +___kmp_compare_and_store_ret16 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store_ret32 +; +; kmp_int32 +; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +; + +PUBLIC ___kmp_compare_and_store_ret32 +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 +_cv$ = 8 +_sv$ = 12 + +___kmp_compare_and_store_ret32 PROC NEAR + + mov ecx, DWORD PTR _p$[esp] + mov eax, DWORD PTR _cv$[esp] + mov edx, DWORD PTR _sv$[esp] +lock cmpxchg DWORD PTR [ecx], edx + ret + +___kmp_compare_and_store_ret32 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_compare_and_store_ret64 +; +; kmp_int64 +; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +; + +PUBLIC ___kmp_compare_and_store_ret64 +_TEXT SEGMENT + ALIGN 16 +_p$ = 8 +_cv_low$ = 12 +_cv_high$ = 16 +_sv_low$ = 20 +_sv_high$ = 24 + +___kmp_compare_and_store_ret64 PROC NEAR + + push ebp + mov ebp, esp + push ebx + push edi + mov edi, DWORD PTR _p$[ebp] + mov eax, DWORD PTR _cv_low$[ebp] + mov edx, DWORD PTR _cv_high$[ebp] + mov ebx, DWORD PTR _sv_low$[ebp] + mov ecx, DWORD PTR _sv_high$[ebp] +lock cmpxchg8b QWORD PTR [edi] + pop edi + pop ebx + mov esp, ebp + pop ebp + ret + +___kmp_compare_and_store_ret64 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_load_x87_fpu_control_word +; +; void +; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +; +; parameters: +; p: 4(%esp) + +PUBLIC ___kmp_load_x87_fpu_control_word +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 + +___kmp_load_x87_fpu_control_word PROC NEAR + + mov eax, DWORD PTR _p$[esp] + fldcw WORD PTR [eax] + ret + +___kmp_load_x87_fpu_control_word ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_store_x87_fpu_control_word +; +; void +; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +; +; parameters: +; p: 4(%esp) + +PUBLIC ___kmp_store_x87_fpu_control_word +_TEXT SEGMENT + ALIGN 16 +_p$ = 4 + +___kmp_store_x87_fpu_control_word PROC NEAR + + mov eax, DWORD PTR _p$[esp] + fstcw WORD PTR [eax] + ret + +___kmp_store_x87_fpu_control_word ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_clear_x87_fpu_status_word +; +; void +; __kmp_clear_x87_fpu_status_word(); +; + +PUBLIC ___kmp_clear_x87_fpu_status_word +_TEXT SEGMENT + ALIGN 16 + +___kmp_clear_x87_fpu_status_word PROC NEAR + + fnclex + ret + +___kmp_clear_x87_fpu_status_word ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_invoke_microtask +; +; typedef void (*microtask_t)( int *gtid, int *tid, ... ); +; +; int +; __kmp_invoke_microtask( microtask_t pkfn, +; int gtid, int tid, +; int argc, void *p_argv[] ) +; + +PUBLIC ___kmp_invoke_microtask +_TEXT SEGMENT + ALIGN 16 +_pkfn$ = 8 +_gtid$ = 12 +_tid$ = 16 +_argc$ = 20 +_argv$ = 24 +if OMPT_SUPPORT +_exit_frame$ = 28 +endif +_i$ = -8 +_stk_adj$ = -16 +_vptr$ = -12 +_qptr$ = -4 + +___kmp_invoke_microtask PROC NEAR +; Line 102 + push ebp + mov ebp, esp + sub esp, 16 ; 00000010H + push ebx + push esi + push edi +if OMPT_SUPPORT + mov eax, DWORD PTR _exit_frame$[ebp] + mov DWORD PTR [eax], ebp +endif +; Line 114 + mov eax, DWORD PTR _argc$[ebp] + mov DWORD PTR _i$[ebp], eax + +;; ------------------------------------------------------------ + lea edx, DWORD PTR [eax*4+8] + mov ecx, esp ; Save current SP into ECX + mov eax,edx ; Save the size of the args in eax + sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this + mov edx,ecx ; Save to edx + and ecx,-128 ; Mask off 7 bits + sub edx,ecx ; Amount to subtract from esp + sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call + + add edx,eax ; Calculate total size of the stack decrement. + mov DWORD PTR _stk_adj$[ebp], edx +;; ------------------------------------------------------------ + + jmp SHORT $L22237 +$L22238: + mov ecx, DWORD PTR _i$[ebp] + sub ecx, 1 + mov DWORD PTR _i$[ebp], ecx +$L22237: + cmp DWORD PTR _i$[ebp], 0 + jle SHORT $L22239 +; Line 116 + mov edx, DWORD PTR _i$[ebp] + mov eax, DWORD PTR _argv$[ebp] + mov ecx, DWORD PTR [eax+edx*4-4] + mov DWORD PTR _vptr$[ebp], ecx +; Line 123 + mov eax, DWORD PTR _vptr$[ebp] +; Line 124 + push eax +; Line 127 + jmp SHORT $L22238 +$L22239: +; Line 129 + lea edx, DWORD PTR _tid$[ebp] + mov DWORD PTR _vptr$[ebp], edx +; Line 130 + lea eax, DWORD PTR _gtid$[ebp] + mov DWORD PTR _qptr$[ebp], eax +; Line 143 + mov eax, DWORD PTR _vptr$[ebp] +; Line 144 + push eax +; Line 145 + mov eax, DWORD PTR _qptr$[ebp] +; Line 146 + push eax +; Line 147 + call DWORD PTR _pkfn$[ebp] +; Line 148 + add esp, DWORD PTR _stk_adj$[ebp] +; Line 152 + mov eax, 1 +; Line 153 + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret 0 +___kmp_invoke_microtask ENDP +_TEXT ENDS + +endif + +; ==================================== Intel(R) 64 =================================== + +ifdef _M_AMD64 + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_x86_cpuid +; +; void +; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); +; +; parameters: +; mode: ecx +; mode2: edx +; cpuid_buffer: r8 + +PUBLIC __kmp_x86_cpuid +_TEXT SEGMENT + ALIGN 16 + +__kmp_x86_cpuid PROC FRAME ;NEAR + + push rbp + .pushreg rbp + mov rbp, rsp + .setframe rbp, 0 + push rbx ; callee-save register + .pushreg rbx + .ENDPROLOG + + mov r10, r8 ; p parameter + mov eax, ecx ; mode parameter + mov ecx, edx ; mode2 parameter + cpuid ; Query the CPUID for the current processor + + mov DWORD PTR 0[ r10 ], eax ; store results into buffer + mov DWORD PTR 4[ r10 ], ebx + mov DWORD PTR 8[ r10 ], ecx + mov DWORD PTR 12[ r10 ], edx + + pop rbx ; callee-save register + mov rsp, rbp + pop rbp + ret + +__kmp_x86_cpuid ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_test_then_add32 +; +; kmp_int32 +; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); +; +; parameters: +; p: rcx +; d: edx +; +; return: eax + +PUBLIC __kmp_test_then_add32 +_TEXT SEGMENT + ALIGN 16 +__kmp_test_then_add32 PROC ;NEAR + + mov eax, edx +lock xadd DWORD PTR [rcx], eax + ret + +__kmp_test_then_add32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_test_then_add64 +; +; kmp_int32 +; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); +; +; parameters: +; p: rcx +; d: rdx +; +; return: rax + +PUBLIC __kmp_test_then_add64 +_TEXT SEGMENT + ALIGN 16 +__kmp_test_then_add64 PROC ;NEAR + + mov rax, rdx +lock xadd QWORD PTR [rcx], rax + ret + +__kmp_test_then_add64 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store8 +; +; kmp_int8 +; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store8 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store8 PROC ;NEAR + + mov al, dl ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg BYTE PTR [rcx], dl + sete al ; if al == [rcx] set al = 1 else set al = 0 + and rax, 1 ; sign extend previous instruction + ret + +__kmp_compare_and_store8 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store16 +; +; kmp_int16 +; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store16 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store16 PROC ;NEAR + + mov ax, dx ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg WORD PTR [rcx], dx + sete al ; if ax == [rcx] set al = 1 else set al = 0 + and rax, 1 ; sign extend previous instruction + ret + +__kmp_compare_and_store16 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store32 +; +; kmp_int32 +; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store32 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store32 PROC ;NEAR + + mov eax, edx ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg DWORD PTR [rcx], edx + sete al ; if eax == [rcx] set al = 1 else set al = 0 + and rax, 1 ; sign extend previous instruction + ret + +__kmp_compare_and_store32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store64 +; +; kmp_int32 +; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +; parameters: +; p: rcx +; cv: rdx +; sv: r8 +; +; return: eax + +PUBLIC __kmp_compare_and_store64 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store64 PROC ;NEAR + + mov rax, rdx ; "cv" + mov rdx, r8 ; "sv" +lock cmpxchg QWORD PTR [rcx], rdx + sete al ; if rax == [rcx] set al = 1 else set al = 0 + and rax, 1 ; sign extend previous instruction + ret + +__kmp_compare_and_store64 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed8 +; +; kmp_int8 +; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +; +; parameters: +; p: rcx +; d: dl +; +; return: al + +PUBLIC __kmp_xchg_fixed8 +_TEXT SEGMENT + ALIGN 16 + +__kmp_xchg_fixed8 PROC ;NEAR + + mov al, dl +lock xchg BYTE PTR [rcx], al + ret + +__kmp_xchg_fixed8 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed16 +; +; kmp_int16 +; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +; +; parameters: +; p: rcx +; d: dx +; +; return: ax + +PUBLIC __kmp_xchg_fixed16 +_TEXT SEGMENT + ALIGN 16 + +__kmp_xchg_fixed16 PROC ;NEAR + + mov ax, dx +lock xchg WORD PTR [rcx], ax + ret + +__kmp_xchg_fixed16 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed32 +; +; kmp_int32 +; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +; +; parameters: +; p: rcx +; d: edx +; +; return: eax + +PUBLIC __kmp_xchg_fixed32 +_TEXT SEGMENT + ALIGN 16 +__kmp_xchg_fixed32 PROC ;NEAR + + mov eax, edx +lock xchg DWORD PTR [rcx], eax + ret + +__kmp_xchg_fixed32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION ___kmp_xchg_fixed64 +; +; kmp_int64 +; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); +; +; parameters: +; p: rcx +; d: rdx +; +; return: rax + +PUBLIC __kmp_xchg_fixed64 +_TEXT SEGMENT + ALIGN 16 +__kmp_xchg_fixed64 PROC ;NEAR + + mov rax, rdx +lock xchg QWORD PTR [rcx], rax + ret + +__kmp_xchg_fixed64 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store_ret8 +; +; kmp_int8 +; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store_ret8 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store_ret8 PROC ;NEAR + mov al, dl ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg BYTE PTR [rcx], dl + ; Compare AL with [rcx]. If equal set + ; ZF and exchange DL with [rcx]. Else, clear + ; ZF and load [rcx] into AL. + ret + +__kmp_compare_and_store_ret8 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store_ret16 +; +; kmp_int16 +; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store_ret16 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store_ret16 PROC ;NEAR + + mov ax, dx ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg WORD PTR [rcx], dx + ret + +__kmp_compare_and_store_ret16 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store_ret32 +; +; kmp_int32 +; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: eax + +PUBLIC __kmp_compare_and_store_ret32 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store_ret32 PROC ;NEAR + + mov eax, edx ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg DWORD PTR [rcx], edx + ret + +__kmp_compare_and_store_ret32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store_ret64 +; +; kmp_int64 +; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +; parameters: +; p: rcx +; cv: rdx +; sv: r8 +; +; return: rax + +PUBLIC __kmp_compare_and_store_ret64 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store_ret64 PROC ;NEAR + + mov rax, rdx ; "cv" + mov rdx, r8 ; "sv" +lock cmpxchg QWORD PTR [rcx], rdx + ret + +__kmp_compare_and_store_ret64 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_compare_and_store_loop8 +; +; kmp_int8 +; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +; parameters: +; p: rcx +; cv: edx +; sv: r8d +; +; return: al + +PUBLIC __kmp_compare_and_store_loop8 +_TEXT SEGMENT + ALIGN 16 + +__kmp_compare_and_store_loop8 PROC ;NEAR +$__kmp_loop: + mov al, dl ; "cv" + mov edx, r8d ; "sv" +lock cmpxchg BYTE PTR [rcx], dl + ; Compare AL with [rcx]. If equal set + ; ZF and exchange DL with [rcx]. Else, clear + ; ZF and load [rcx] into AL. + jz SHORT $__kmp_success + + db 0f3H + db 090H ; pause + + jmp SHORT $__kmp_loop + +$__kmp_success: + ret + +__kmp_compare_and_store_loop8 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_xchg_real32 +; +; kmp_real32 +; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); +; +; parameters: +; p: rcx +; d: xmm1 (lower 4 bytes) +; +; return: xmm0 (lower 4 bytes) + +PUBLIC __kmp_xchg_real32 +_TEXT SEGMENT + ALIGN 16 +__kmp_xchg_real32 PROC ;NEAR + + movd eax, xmm1 ; load d + +lock xchg DWORD PTR [rcx], eax + + movd xmm0, eax ; load old value into return register + ret + +__kmp_xchg_real32 ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_xchg_real64 +; +; kmp_real64 +; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); +; +; parameters: +; p: rcx +; d: xmm1 (lower 8 bytes) +; +; return: xmm0 (lower 8 bytes) + +PUBLIC __kmp_xchg_real64 +_TEXT SEGMENT + ALIGN 16 +__kmp_xchg_real64 PROC ;NEAR + + movd rax, xmm1 ; load "d" + +lock xchg QWORD PTR [rcx], rax + + movd xmm0, rax ; load old value into return register + ret + +__kmp_xchg_real64 ENDP +_TEXT ENDS + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_load_x87_fpu_control_word +; +; void +; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +; +; parameters: +; p: rcx +; + +PUBLIC __kmp_load_x87_fpu_control_word +_TEXT SEGMENT + ALIGN 16 +__kmp_load_x87_fpu_control_word PROC ;NEAR + + fldcw WORD PTR [rcx] + ret + +__kmp_load_x87_fpu_control_word ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_store_x87_fpu_control_word +; +; void +; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +; +; parameters: +; p: rcx +; + +PUBLIC __kmp_store_x87_fpu_control_word +_TEXT SEGMENT + ALIGN 16 +__kmp_store_x87_fpu_control_word PROC ;NEAR + + fstcw WORD PTR [rcx] + ret + +__kmp_store_x87_fpu_control_word ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_clear_x87_fpu_status_word +; +; void +; __kmp_clear_x87_fpu_status_word() +; + +PUBLIC __kmp_clear_x87_fpu_status_word +_TEXT SEGMENT + ALIGN 16 +__kmp_clear_x87_fpu_status_word PROC ;NEAR + + fnclex + ret + +__kmp_clear_x87_fpu_status_word ENDP +_TEXT ENDS + + +;------------------------------------------------------------------------ +; +; FUNCTION __kmp_invoke_microtask +; +; typedef void (*microtask_t)( int *gtid, int *tid, ... ); +; +; int +; __kmp_invoke_microtask( microtask_t pkfn, +; int gtid, int tid, +; int argc, void *p_argv[] ) { +; +; (*pkfn) ( >id, &tid, argv[0], ... ); +; return 1; +; } +; +; note: +; just before call to pkfn must have rsp 128-byte aligned for compiler +; +; parameters: +; rcx: pkfn 16[rbp] +; edx: gtid 24[rbp] +; r8d: tid 32[rbp] +; r9d: argc 40[rbp] +; [st]: p_argv 48[rbp] +; +; reg temps: +; rax: used all over the place +; rdx: used all over the place +; rcx: used as argument counter for push parms loop +; r10: used to hold pkfn function pointer argument +; +; return: eax (always 1/TRUE) +; + +$_pkfn = 16 +$_gtid = 24 +$_tid = 32 +$_argc = 40 +$_p_argv = 48 +if OMPT_SUPPORT +$_exit_frame = 56 +endif + +PUBLIC __kmp_invoke_microtask +_TEXT SEGMENT + ALIGN 16 + +__kmp_invoke_microtask PROC FRAME ;NEAR + mov QWORD PTR 16[rsp], rdx ; home gtid parameter + mov QWORD PTR 24[rsp], r8 ; home tid parameter + push rbp ; save base pointer + .pushreg rbp + sub rsp, 0 ; no fixed allocation necessary - end prolog + + lea rbp, QWORD PTR [rsp] ; establish the base pointer + .setframe rbp, 0 + .ENDPROLOG +if OMPT_SUPPORT + mov rax, QWORD PTR $_exit_frame[rbp] + mov QWORD PTR [rax], rbp +endif + mov r10, rcx ; save pkfn pointer for later + +;; ------------------------------------------------------------ + mov rax, r9 ; rax <= argc + cmp rax, 2 + jge SHORT $_kmp_invoke_stack_align + mov rax, 2 ; set 4 homes if less than 2 parms +$_kmp_invoke_stack_align: + lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 + mov rax, rsp ; Save current SP into rax + sub rax, rdx ; rsp - ((argc+2)*8) -> rax + ; without align, rsp would be this + and rax, -128 ; Mask off 7 bits (128-byte align) + add rax, rdx ; add space for push's in a loop below + mov rsp, rax ; Prepare the stack ptr + ; Now it will align to 128-byte at the call +;; ------------------------------------------------------------ + ; setup pkfn parameter stack + mov rax, r9 ; rax <= argc + shl rax, 3 ; rax <= argc*8 + mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv + add rdx, rax ; rdx <= &p_argv[argc] + mov rcx, r9 ; rcx <= argc + jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0 + cmp ecx, 1 ; if argc=1 branch ahead + je SHORT $_kmp_invoke_one_parm + sub ecx, 2 ; if argc=2 branch ahead, subtract two from + je SHORT $_kmp_invoke_two_parms + +$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack + sub rdx, 8 ; decrement p_argv pointer to previous parm + mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] + push r8 ; push p_argv[rcx-1] onto stack (reverse order) + sub ecx, 1 + jecxz SHORT $_kmp_invoke_two_parms + jmp SHORT $_kmp_invoke_push_parms + +$_kmp_invoke_two_parms: + sub rdx, 8 ; put 4th parm to pkfn in r9 + mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1] + +$_kmp_invoke_one_parm: + sub rdx, 8 ; put 3rd parm to pkfn in r8 + mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0] + +$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers + lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn) + lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= >id (1st parm to pkfn) + sub rsp, 32 ; add stack space for first four parms + mov rax, r10 ; rax <= pkfn + call rax ; call (*pkfn)() + mov rax, 1 ; move 1 into return register; + + lea rsp, QWORD PTR [rbp] ; restore stack pointer + +; add rsp, 0 ; no fixed allocation necessary - start epilog + pop rbp ; restore frame pointer + ret +__kmp_invoke_microtask ENDP +_TEXT ENDS + +endif + +END diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c index 8aa07f0b4c9..3aeafae9102 100644 --- a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c +++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c @@ -1,163 +1,163 @@ -/* - * z_Windows_NT-586_util.c -- platform specific routines. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" - -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) -/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - * use compare_and_store for these routines - */ - -kmp_int8 -__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value | d; - - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value | d; - } - return old_value; -} - -kmp_int8 -__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value & d; - - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value & d; - } - return old_value; -} - -kmp_int32 -__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; - - old_value = TCR_4( *p ); - new_value = old_value | d; - - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value | d; - } - return old_value; -} - -kmp_int32 -__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; - - old_value = TCR_4( *p ); - new_value = old_value & d; - - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value & d; - } - return old_value; -} - -kmp_int8 -__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_1( *p ); - new_value = old_value + d; - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value + d; - } - return old_value; -} - -#if KMP_ARCH_X86 -kmp_int64 -__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value + d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value + d; - } - return old_value; -} -#endif /* KMP_ARCH_X86 */ - -kmp_int64 -__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value | d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value | d; - } - - return old_value; -} - -kmp_int64 -__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); - new_value = old_value & d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value & d; - } - - return old_value; -} - -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - +/* + * z_Windows_NT-586_util.c -- platform specific routines. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" + +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) +/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to + * use compare_and_store for these routines + */ + +kmp_int8 +__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int8 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value | d; + + while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value | d; + } + return old_value; +} + +kmp_int8 +__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int8 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value & d; + + while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value & d; + } + return old_value; +} + +kmp_int32 +__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) +{ + kmp_int32 old_value, new_value; + + old_value = TCR_4( *p ); + new_value = old_value | d; + + while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_4( *p ); + new_value = old_value | d; + } + return old_value; +} + +kmp_int32 +__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) +{ + kmp_int32 old_value, new_value; + + old_value = TCR_4( *p ); + new_value = old_value & d; + + while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_4( *p ); + new_value = old_value & d; + } + return old_value; +} + +kmp_int8 +__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_1( *p ); + new_value = old_value + d; + while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_1( *p ); + new_value = old_value + d; + } + return old_value; +} + +#if KMP_ARCH_X86 +kmp_int64 +__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value + d; + while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value + d; + } + return old_value; +} +#endif /* KMP_ARCH_X86 */ + +kmp_int64 +__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value | d; + while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value | d; + } + + return old_value; +} + +kmp_int64 +__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) +{ + kmp_int64 old_value, new_value; + + old_value = TCR_8( *p ); + new_value = old_value & d; + while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + { + KMP_CPU_PAUSE(); + old_value = TCR_8( *p ); + new_value = old_value & d; + } + + return old_value; +} + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c b/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c index f678ba320e7..03a4afe5e14 100644 --- a/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c +++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c @@ -1,1932 +1,1932 @@ -/* - * z_Windows_NT_util.c -- platform specific routines. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#include "kmp.h" -#include "kmp_itt.h" -#include "kmp_i18n.h" -#include "kmp_io.h" -#include "kmp_wait_release.h" - - - -/* ----------------------------------------------------------------------------------- */ -/* ----------------------------------------------------------------------------------- */ - -/* This code is related to NtQuerySystemInformation() function. This function - is used in the Load balance algorithm for OMP_DYNAMIC=true to find the - number of running threads in the system. */ - -#include -#include // UNICODE_STRING - -enum SYSTEM_INFORMATION_CLASS { - SystemProcessInformation = 5 -}; // SYSTEM_INFORMATION_CLASS - -struct CLIENT_ID { - HANDLE UniqueProcess; - HANDLE UniqueThread; -}; // struct CLIENT_ID - -enum THREAD_STATE { - StateInitialized, - StateReady, - StateRunning, - StateStandby, - StateTerminated, - StateWait, - StateTransition, - StateUnknown -}; // enum THREAD_STATE - -struct VM_COUNTERS { - SIZE_T PeakVirtualSize; - SIZE_T VirtualSize; - ULONG PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; - SIZE_T PrivatePageCount; -}; // struct VM_COUNTERS - -struct SYSTEM_THREAD { - LARGE_INTEGER KernelTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER CreateTime; - ULONG WaitTime; - LPVOID StartAddress; - CLIENT_ID ClientId; - DWORD Priority; - LONG BasePriority; - ULONG ContextSwitchCount; - THREAD_STATE State; - ULONG WaitReason; -}; // SYSTEM_THREAD - -KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 ); -#if KMP_ARCH_X86 - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 ); -#else - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 ); -#endif - -struct SYSTEM_PROCESS_INFORMATION { - ULONG NextEntryOffset; - ULONG NumberOfThreads; - LARGE_INTEGER Reserved[ 3 ]; - LARGE_INTEGER CreateTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER KernelTime; - UNICODE_STRING ImageName; - DWORD BasePriority; - HANDLE ProcessId; - HANDLE ParentProcessId; - ULONG HandleCount; - ULONG Reserved2[ 2 ]; - VM_COUNTERS VMCounters; - IO_COUNTERS IOCounters; - SYSTEM_THREAD Threads[ 1 ]; -}; // SYSTEM_PROCESS_INFORMATION -typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION; - -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 ); -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 ); -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 ); -#if KMP_ARCH_X86 - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 ); -#else - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 ); -#endif - -typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG ); -NtQuerySystemInformation_t NtQuerySystemInformation = NULL; - -HMODULE ntdll = NULL; - -/* End of NtQuerySystemInformation()-related code */ - -#if KMP_GROUP_AFFINITY -static HMODULE kernel32 = NULL; -#endif /* KMP_GROUP_AFFINITY */ - -/* ----------------------------------------------------------------------------------- */ -/* ----------------------------------------------------------------------------------- */ - -#if KMP_HANDLE_SIGNALS - typedef void (* sig_func_t )( int ); - static sig_func_t __kmp_sighldrs[ NSIG ]; - static int __kmp_siginstalled[ NSIG ]; -#endif - -static HANDLE __kmp_monitor_ev; -static kmp_int64 __kmp_win32_time; -double __kmp_win32_tick; - -int __kmp_init_runtime = FALSE; -CRITICAL_SECTION __kmp_win32_section; - -void -__kmp_win32_mutex_init( kmp_win32_mutex_t *mx ) -{ - InitializeCriticalSection( & mx->cs ); -#if USE_ITT_BUILD - __kmp_itt_system_object_created( & mx->cs, "Critical Section" ); -#endif /* USE_ITT_BUILD */ -} - -void -__kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx ) -{ - DeleteCriticalSection( & mx->cs ); -} - -void -__kmp_win32_mutex_lock( kmp_win32_mutex_t *mx ) -{ - EnterCriticalSection( & mx->cs ); -} - -void -__kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx ) -{ - LeaveCriticalSection( & mx->cs ); -} - -void -__kmp_win32_cond_init( kmp_win32_cond_t *cv ) -{ - cv->waiters_count_ = 0; - cv->wait_generation_count_ = 0; - cv->release_count_ = 0; - - /* Initialize the critical section */ - __kmp_win32_mutex_init( & cv->waiters_count_lock_ ); - - /* Create a manual-reset event. */ - cv->event_ = CreateEvent( NULL, // no security - TRUE, // manual-reset - FALSE, // non-signaled initially - NULL ); // unnamed -#if USE_ITT_BUILD - __kmp_itt_system_object_created( cv->event_, "Event" ); -#endif /* USE_ITT_BUILD */ -} - -void -__kmp_win32_cond_destroy( kmp_win32_cond_t *cv ) -{ - __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ ); - __kmp_free_handle( cv->event_ ); - memset( cv, '\0', sizeof( *cv ) ); -} - -/* TODO associate cv with a team instead of a thread so as to optimize - * the case where we wake up a whole team */ - -void -__kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load ) -{ - int my_generation; - int last_waiter; - - /* Avoid race conditions */ - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); - - /* Increment count of waiters */ - cv->waiters_count_++; - - /* Store current generation in our activation record. */ - my_generation = cv->wait_generation_count_; - - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); - __kmp_win32_mutex_unlock( mx ); - - - for (;;) { - int wait_done; - - /* Wait until the event is signaled */ - WaitForSingleObject( cv->event_, INFINITE ); - - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); - - /* Exit the loop when the event_> is signaled and - * there are still waiting threads from this - * that haven't been released from this wait yet. */ - wait_done = ( cv->release_count_ > 0 ) && - ( cv->wait_generation_count_ != my_generation ); - - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_); - - /* there used to be a semicolon after the if statement, - * it looked like a bug, so i removed it */ - if( wait_done ) - break; - } - - __kmp_win32_mutex_lock( mx ); - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); - - cv->waiters_count_--; - cv->release_count_--; - - last_waiter = ( cv->release_count_ == 0 ); - - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); - - if( last_waiter ) { - /* We're the last waiter to be notified, so reset the manual event. */ - ResetEvent( cv->event_ ); - } -} - -void -__kmp_win32_cond_broadcast( kmp_win32_cond_t *cv ) -{ - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); - - if( cv->waiters_count_ > 0 ) { - SetEvent( cv->event_ ); - /* Release all the threads in this generation. */ - - cv->release_count_ = cv->waiters_count_; - - /* Start a new generation. */ - cv->wait_generation_count_++; - } - - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); -} - -void -__kmp_win32_cond_signal( kmp_win32_cond_t *cv ) -{ - __kmp_win32_cond_broadcast( cv ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_enable( int new_state ) -{ - if (__kmp_init_runtime) - LeaveCriticalSection( & __kmp_win32_section ); -} - -void -__kmp_disable( int *old_state ) -{ - *old_state = 0; - - if (__kmp_init_runtime) - EnterCriticalSection( & __kmp_win32_section ); -} - -void -__kmp_suspend_initialize( void ) -{ - /* do nothing */ -} - -static void -__kmp_suspend_initialize_thread( kmp_info_t *th ) -{ - if ( ! TCR_4( th->th.th_suspend_init ) ) { - /* this means we haven't initialized the suspension pthread objects for this thread - in this instance of the process */ - __kmp_win32_cond_init( &th->th.th_suspend_cv ); - __kmp_win32_mutex_init( &th->th.th_suspend_mx ); - TCW_4( th->th.th_suspend_init, TRUE ); - } -} - -void -__kmp_suspend_uninitialize_thread( kmp_info_t *th ) -{ - if ( TCR_4( th->th.th_suspend_init ) ) { - /* this means we have initialize the suspension pthread objects for this thread - in this instance of the process */ - __kmp_win32_cond_destroy( & th->th.th_suspend_cv ); - __kmp_win32_mutex_destroy( & th->th.th_suspend_mx ); - TCW_4( th->th.th_suspend_init, FALSE ); - } -} - -/* This routine puts the calling thread to sleep after setting the - * sleep bit for the indicated flag variable to true. - */ -template -static inline void __kmp_suspend_template( int th_gtid, C *flag ) -{ - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) ); - - __kmp_suspend_initialize_thread( th ); - __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); - - KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n", - th_gtid, flag->get() ) ); - - /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread - gets called first? - */ - old_spin = flag->set_sleeping(); - - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n", - th_gtid, flag->get(), *(flag->get()) ) ); - - if ( flag->done_check_val(old_spin) ) { - old_spin = flag->unset_sleeping(); - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n", - th_gtid, flag->get()) ); - } else { -#ifdef DEBUG_SUSPEND - __kmp_suspend_count++; -#endif - /* Encapsulate in a loop as the documentation states that this may - * "with low probability" return when the condition variable has - * not been signaled or broadcast - */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - while ( flag->is_sleeping() ) { - KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n", - th_gtid ) ); - // Mark the thread as no longer active (only in the first iteration of the loop). - if ( ! deactivated ) { - th->th.th_active = FALSE; - if ( th->th.th_active_in_pool ) { - th->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } - deactivated = TRUE; - - - __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); - } - else { - __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); - } - -#ifdef KMP_DEBUG - if( flag->is_sleeping() ) { - KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid )); - } -#endif /* KMP_DEBUG */ - - } // while - - // Mark the thread as active again (if it was previous marked as inactive) - if ( deactivated ) { - th->th.th_active = TRUE; - if ( TCR_4(th->th.th_in_pool) ) { - KMP_TEST_THEN_INC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - th->th.th_active_in_pool = TRUE; - } - } - } - - - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); -} - -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} -void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); -} - - -/* This routine signals the thread specified by target_gtid to wake up - * after setting the sleep bit indicated by the flag argument to FALSE - */ -template -static inline void __kmp_resume_template( int target_gtid, C *flag ) -{ - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; - -#ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; -#endif - - KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); - - __kmp_suspend_initialize_thread( th ); - __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); - - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)th->th.th_sleep_loc; - } - - // First, check if the flag is null or its type has changed. If so, someone else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n", - gtid, target_gtid, NULL ) ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - return; - } - else { - typename C::flag_t old_spin = flag->unset_sleeping(); - if ( !flag->is_sleeping_val(old_spin) ) { - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - return; - } - } - TCW_PTR(th->th.th_sleep_loc, NULL); - - KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n", - gtid, target_gtid, flag->get() ) ); - - - __kmp_win32_cond_signal( &th->th.th_suspend_cv ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - - KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", - gtid, target_gtid ) ); -} - -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} -void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); -} - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_yield( int cond ) -{ - if (cond) - Sleep(0); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_gtid_set_specific( int gtid ) -{ - KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n", - gtid, __kmp_gtid_threadprivate_key )); - KMP_ASSERT( __kmp_init_runtime ); - if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) ) - KMP_FATAL( TLSSetValueFailed ); -} - -int -__kmp_gtid_get_specific() -{ - int gtid; - if( !__kmp_init_runtime ) { - KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ); - if ( gtid == 0 ) { - gtid = KMP_GTID_DNE; - } - else { - gtid--; - } - KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid )); - return gtid; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if KMP_GROUP_AFFINITY - -// -// Only 1 DWORD in the mask should have any procs set. -// Return the appropriate index, or -1 for an invalid mask. -// -int -__kmp_get_proc_group( kmp_affin_mask_t const *mask ) -{ - int i; - int group = -1; - for (i = 0; i < __kmp_num_proc_groups; i++) { - if (mask[i] == 0) { - continue; - } - if (group >= 0) { - return -1; - } - group = i; - } - return group; -} - -#endif /* KMP_GROUP_AFFINITY */ - -int -__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) -{ - -#if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - // - // Check for a valid mask. - // - GROUP_AFFINITY ga; - int group = __kmp_get_proc_group( mask ); - if (group < 0) { - if (abort_on_error) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - return -1; - } - - // - // Transform the bit vector into a GROUP_AFFINITY struct - // and make the system call to set affinity. - // - ga.Group = group; - ga.Mask = mask[group]; - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; - } - } - else - -#endif /* KMP_GROUP_AFFINITY */ - - { - if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; - } - } - return 0; -} - -int -__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) -{ - -#if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - KMP_CPU_ZERO(mask); - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); - - if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - - if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) - || (ga.Mask == 0)) { - return -1; - } - - mask[ga.Group] = ga.Mask; - } - else - -#endif /* KMP_GROUP_AFFINITY */ - - { - kmp_affin_mask_t newMask, sysMask, retval; - - if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "GetProcessAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - retval = SetThreadAffinityMask(GetCurrentThread(), newMask); - if (! retval) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - newMask = SetThreadAffinityMask(GetCurrentThread(), retval); - if (! newMask) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - } - *mask = retval; - } - return 0; -} - -void -__kmp_affinity_bind_thread( int proc ) -{ - -#if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - // - // Form the GROUP_AFFINITY struct directly, rather than filling - // out a bit vector and calling __kmp_set_system_affinity(). - // - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups - * CHAR_BIT * sizeof(DWORD_PTR)))); - ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); - ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (__kmp_affinity_verbose) { // AC: continue silently if not verbose - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - } - } - else - -#endif /* KMP_GROUP_AFFINITY */ - - { - kmp_affin_mask_t mask; - KMP_CPU_ZERO(&mask); - KMP_CPU_SET(proc, &mask); - __kmp_set_system_affinity(&mask, TRUE); - } -} - -void -__kmp_affinity_determine_capable( const char *env_var ) -{ - // - // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). - // - -#if KMP_GROUP_AFFINITY - KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t)); -#else - KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t)); -#endif - - KA_TRACE( 10, ( - "__kmp_affinity_determine_capable: " - "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n", - __kmp_affin_mask_size - ) ); -} - -double -__kmp_read_cpu_time( void ) -{ - FILETIME CreationTime, ExitTime, KernelTime, UserTime; - int status; - double cpu_time; - - cpu_time = 0; - - status = GetProcessTimes( GetCurrentProcess(), &CreationTime, - &ExitTime, &KernelTime, &UserTime ); - - if (status) { - double sec = 0; - - sec += KernelTime.dwHighDateTime; - sec += UserTime.dwHighDateTime; - - /* Shift left by 32 bits */ - sec *= (double) (1 << 16) * (double) (1 << 16); - - sec += KernelTime.dwLowDateTime; - sec += UserTime.dwLowDateTime; - - cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; - } - - return cpu_time; -} - -int -__kmp_read_system_info( struct kmp_sys_info *info ) -{ - info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ - info->minflt = 0; /* the number of page faults serviced without any I/O */ - info->majflt = 0; /* the number of page faults serviced that required I/O */ - info->nswap = 0; /* the number of times a process was "swapped" out of memory */ - info->inblock = 0; /* the number of times the file system had to perform input */ - info->oublock = 0; /* the number of times the file system had to perform output */ - info->nvcsw = 0; /* the number of times a context switch was voluntarily */ - info->nivcsw = 0; /* the number of times a context switch was forced */ - - return 1; -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -void -__kmp_runtime_initialize( void ) -{ - SYSTEM_INFO info; - kmp_str_buf_t path; - UINT path_size; - - if ( __kmp_init_runtime ) { - return; - }; - -#if KMP_DYNAMIC_LIB - /* Pin dynamic library for the lifetime of application */ - { - // First, turn off error message boxes - UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS); - HMODULE h; - BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS - |GET_MODULE_HANDLE_EX_FLAG_PIN, - (LPCTSTR)&__kmp_serial_initialize, &h); - KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); - SetErrorMode (err_mode); // Restore error mode - KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") ); - } -#endif - - InitializeCriticalSection( & __kmp_win32_section ); -#if USE_ITT_BUILD - __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" ); -#endif /* USE_ITT_BUILD */ - __kmp_initialize_system_tick(); - - #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( & __kmp_cpuinfo ); - }; // if - #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - /* Set up minimum number of threads to switch to TLS gtid */ - #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB - // Windows* OS, static library. - /* - New thread may use stack space previously used by another thread, currently terminated. - On Windows* OS, in case of static linking, we do not know the moment of thread termination, - and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead - threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly - finds gtid (by searching through stack addresses of all known threads) for unregistered - foreign tread. - - Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id() - does not search through stacks, but get gtid from TLS immediately. - - --ln - */ - __kmp_tls_gtid_min = 0; - #else - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; - #endif - - /* for the static library */ - if ( !__kmp_gtid_threadprivate_key ) { - __kmp_gtid_threadprivate_key = TlsAlloc(); - if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) { - KMP_FATAL( TLSOutOfIndexes ); - } - } - - - // - // Load ntdll.dll. - // - /* - Simple - GetModuleHandle( "ntdll.dl" ) - is not suitable due to security issue (see - http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full - path to the library. - */ - __kmp_str_buf_init( & path ); - path_size = GetSystemDirectory( path.str, path.size ); - KMP_DEBUG_ASSERT( path_size > 0 ); - if ( path_size >= path.size ) { - // - // Buffer is too short. Expand the buffer and try again. - // - __kmp_str_buf_reserve( & path, path_size ); - path_size = GetSystemDirectory( path.str, path.size ); - KMP_DEBUG_ASSERT( path_size > 0 ); - }; // if - if ( path_size > 0 && path_size < path.size ) { - // - // Now we have system directory name in the buffer. - // Append backslash and name of dll to form full path, - // - path.used = path_size; - __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" ); - - // - // Now load ntdll using full path. - // - ntdll = GetModuleHandle( path.str ); - } - - KMP_DEBUG_ASSERT( ntdll != NULL ); - if ( ntdll != NULL ) { - NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" ); - } - KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL ); - -#if KMP_GROUP_AFFINITY - // - // Load kernel32.dll. - // Same caveat - must use full system path name. - // - if ( path_size > 0 && path_size < path.size ) { - // - // Truncate the buffer back to just the system path length, - // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". - // - path.used = path_size; - __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" ); - - // - // Load kernel32.dll using full path. - // - kernel32 = GetModuleHandle( path.str ); - KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) ); - - // - // Load the function pointers to kernel32.dll routines - // that may or may not exist on this system. - // - if ( kernel32 != NULL ) { - __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" ); - __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" ); - __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" ); - __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" ); - - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) ); - - // - // See if group affinity is supported on this system. - // If so, calculate the #groups and #procs. - // - // Group affinity was introduced with Windows* 7 OS and - // Windows* Server 2008 R2 OS. - // - if ( ( __kmp_GetActiveProcessorCount != NULL ) - && ( __kmp_GetActiveProcessorGroupCount != NULL ) - && ( __kmp_GetThreadGroupAffinity != NULL ) - && ( __kmp_SetThreadGroupAffinity != NULL ) - && ( ( __kmp_num_proc_groups - = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) { - // - // Calculate the total number of active OS procs. - // - int i; - - KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); - - __kmp_xproc = 0; - - for ( i = 0; i < __kmp_num_proc_groups; i++ ) { - DWORD size = __kmp_GetActiveProcessorCount( i ); - __kmp_xproc += size; - KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) ); - } - } - else { - KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); - } - } - } - if ( __kmp_num_proc_groups <= 1 ) { - GetSystemInfo( & info ); - __kmp_xproc = info.dwNumberOfProcessors; - } -#else - GetSystemInfo( & info ); - __kmp_xproc = info.dwNumberOfProcessors; -#endif /* KMP_GROUP_AFFINITY */ - - // - // If the OS said there were 0 procs, take a guess and use a value of 2. - // This is done for Linux* OS, also. Do we need error / warning? - // - if ( __kmp_xproc <= 0 ) { - __kmp_xproc = 2; - } - - KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) ); - - __kmp_str_buf_free( & path ); - -#if USE_ITT_BUILD - __kmp_itt_initialize(); -#endif /* USE_ITT_BUILD */ - - __kmp_init_runtime = TRUE; -} // __kmp_runtime_initialize - -void -__kmp_runtime_destroy( void ) -{ - if ( ! __kmp_init_runtime ) { - return; - } - -#if USE_ITT_BUILD - __kmp_itt_destroy(); -#endif /* USE_ITT_BUILD */ - - /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ - /* due to the KX_TRACE() commands */ - KA_TRACE( 40, ("__kmp_runtime_destroy\n" )); - - if( __kmp_gtid_threadprivate_key ) { - TlsFree( __kmp_gtid_threadprivate_key ); - __kmp_gtid_threadprivate_key = 0; - } - - __kmp_affinity_uninitialize(); - DeleteCriticalSection( & __kmp_win32_section ); - - ntdll = NULL; - NtQuerySystemInformation = NULL; - -#if KMP_ARCH_X86_64 - kernel32 = NULL; - __kmp_GetActiveProcessorCount = NULL; - __kmp_GetActiveProcessorGroupCount = NULL; - __kmp_GetThreadGroupAffinity = NULL; - __kmp_SetThreadGroupAffinity = NULL; -#endif // KMP_ARCH_X86_64 - - __kmp_init_runtime = FALSE; -} - - -void -__kmp_terminate_thread( int gtid ) -{ - kmp_info_t *th = __kmp_threads[ gtid ]; - - if( !th ) return; - - KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); - - if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) { - /* It's OK, the thread may have exited already */ - } - __kmp_free_handle( th->th.th_info.ds.ds_thread ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_clear_system_time( void ) -{ - BOOL status; - LARGE_INTEGER time; - status = QueryPerformanceCounter( & time ); - __kmp_win32_time = (kmp_int64) time.QuadPart; -} - -void -__kmp_initialize_system_tick( void ) -{ - { - BOOL status; - LARGE_INTEGER freq; - - status = QueryPerformanceFrequency( & freq ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ), - KMP_ERR( error ), - __kmp_msg_null - ); - - } - else { - __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart; - } - } -} - -/* Calculate the elapsed wall clock time for the user */ - -void -__kmp_elapsed( double *t ) -{ - BOOL status; - LARGE_INTEGER now; - status = QueryPerformanceCounter( & now ); - *t = ((double) now.QuadPart) * __kmp_win32_tick; -} - -/* Calculate the elapsed wall clock tick for the user */ - -void -__kmp_elapsed_tick( double *t ) -{ - *t = __kmp_win32_tick; -} - -void -__kmp_read_system_time( double *delta ) -{ - - if (delta != NULL) { - BOOL status; - LARGE_INTEGER now; - - status = QueryPerformanceCounter( & now ); - - *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time)) - * __kmp_win32_tick; - } -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void * __stdcall -__kmp_launch_worker( void *arg ) -{ - volatile void *stack_data; - void *exit_val; - void *padding = 0; - kmp_info_t *this_thr = (kmp_info_t *) arg; - int gtid; - - gtid = this_thr->th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific( gtid ); -#ifdef KMP_TDATA_GTID - #error "This define causes problems with LoadLibrary() + declspec(thread) " \ - "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ - "reference: http://support.microsoft.com/kb/118816" - //__kmp_gtid = gtid; -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_name( gtid ); -#endif /* USE_ITT_BUILD */ - - __kmp_affinity_set_init_mask( gtid, FALSE ); - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Set the FP control regs to be a copy of - // the parallel initialization thread's. - // - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_load_mxcsr( &__kmp_init_mxcsr ); -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - if ( __kmp_stkoffset > 0 && gtid > 0 ) { - padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); - } - - KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); - this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); - - if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid - TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); - KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE ); - __kmp_check_stack_overlap( this_thr ); - } - KMP_MB(); - exit_val = __kmp_launch_thread( this_thr ); - KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); - TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); - KMP_MB(); - return exit_val; -} - - -/* The monitor thread controls all of the threads in the complex */ - -void * __stdcall -__kmp_launch_monitor( void *arg ) -{ - DWORD wait_status; - kmp_thread_t monitor; - int status; - int interval; - kmp_info_t *this_thr = (kmp_info_t *) arg; - - KMP_DEBUG_ASSERT(__kmp_init_monitor); - TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started - // TODO: hide "2" in enum (like {true,false,started}) - this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) ); - - monitor = GetCurrentThread(); - - /* set thread priority */ - status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadPriority ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - - /* register us as monitor */ - __kmp_gtid_set_specific( KMP_GTID_MONITOR ); -#ifdef KMP_TDATA_GTID - #error "This define causes problems with LoadLibrary() + declspec(thread) " \ - "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ - "reference: http://support.microsoft.com/kb/118816" - //__kmp_gtid = KMP_GTID_MONITOR; -#endif - -#if USE_ITT_BUILD - __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. -#endif /* USE_ITT_BUILD */ - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */ - - while (! TCR_4(__kmp_global.g.g_done)) { - /* This thread monitors the state of the system */ - - KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); - - wait_status = WaitForSingleObject( __kmp_monitor_ev, interval ); - - if (wait_status == WAIT_TIMEOUT) { - TCW_4( __kmp_global.g.g_time.dt.t_value, - TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) ); - - status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadPriority ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - - if (__kmp_global.g.g_abort != 0) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ - - int gtid; - - KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) ); - - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread( gtid ); - - __kmp_cleanup(); - - Sleep( 0 ); - - KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) ); - - if (__kmp_global.g.g_abort > 0) { - raise( __kmp_global.g.g_abort ); - } - } - - TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); - - KMP_MB(); - return arg; -} - -void -__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) -{ - kmp_thread_t handle; - DWORD idThread; - - KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); - - th->th.th_info.ds.ds_gtid = gtid; - - if ( KMP_UBER_GTID(gtid) ) { - int stack_data; - - /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use. - Is it appropriate to just use GetCurrentThread? When should we close this handle? When - unregistering the root? - */ - { - BOOL rc; - rc = DuplicateHandle( - GetCurrentProcess(), - GetCurrentThread(), - GetCurrentProcess(), - &th->th.th_info.ds.ds_thread, - 0, - FALSE, - DUPLICATE_SAME_ACCESS - ); - KMP_ASSERT( rc ); - KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", - (LPVOID)th, - th->th.th_info.ds.ds_thread ) ); - th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - } - if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid - /* we will dynamically update the stack range if gtid_mode == 1 */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - __kmp_check_stack_overlap( th ); - } - } - else { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* Set stack size for this thread now. */ - KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC - " bytes\n", stack_size ) ); - - stack_size += gtid * __kmp_stkoffset; - - TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - - KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %" - KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, " - "&idThread = %p\n", - (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, - (LPVOID) th, &idThread ) ); - - { - handle = CreateThread( NULL, (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) __kmp_launch_worker, - (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); - } - - KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %" - KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, " - "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", - (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, - (LPVOID) th, idThread, handle ) ); - - { - if ( handle == 0 ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCreateThread ), - KMP_ERR( error ), - __kmp_msg_null - ); - } else { - th->th.th_info.ds.ds_thread = handle; - } - } - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - - KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); -} - -int -__kmp_still_running(kmp_info_t *th) { - return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0)); -} - -void -__kmp_create_monitor( kmp_info_t *th ) -{ - kmp_thread_t handle; - DWORD idThread; - int ideal, new_ideal; - - KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL ); - if ( __kmp_monitor_ev == NULL ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCreateEvent ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if -#if USE_ITT_BUILD - __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" ); -#endif /* USE_ITT_BUILD */ - - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; - - // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how - // to automatically expand stacksize based on CreateThread error code. - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - } - if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; - } - - KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", - (int) __kmp_monitor_stksize ) ); - - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); - - handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize, - (LPTHREAD_START_ROUTINE) __kmp_launch_monitor, - (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); - if (handle == 0) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCreateThread ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - else - th->th.th_info.ds.ds_thread = handle; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n", - (void *) th->th.th_info.ds.ds_thread ) ); -} - -/* - Check to see if thread is still alive. - - NOTE: The ExitProcess(code) system call causes all threads to Terminate - with a exit_val = code. Because of this we can not rely on - exit_val having any particular value. So this routine may - return STILL_ALIVE in exit_val even after the thread is dead. -*/ - -int -__kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ) -{ - DWORD rc; - rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val ); - if ( rc == 0 ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "GetExitCodeThread()" ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - return ( *exit_val == STILL_ACTIVE ); -} - - -void -__kmp_exit_thread( - int exit_status -) { - ExitThread( exit_status ); -} // __kmp_exit_thread - -/* - This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). -*/ -static void -__kmp_reap_common( kmp_info_t * th ) -{ - DWORD exit_val; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) ); - - /* - 2006-10-19: - - There are two opposite situations: - - 1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread - function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very - slow".) - 2. Windows* OS may kill thread before it resets ds_alive flag. - - Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting. - - */ - - { - // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to - // cover this usage also. - void * obj = NULL; +/* + * z_Windows_NT_util.c -- platform specific routines. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_itt.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_wait_release.h" + + + +/* ----------------------------------------------------------------------------------- */ +/* ----------------------------------------------------------------------------------- */ + +/* This code is related to NtQuerySystemInformation() function. This function + is used in the Load balance algorithm for OMP_DYNAMIC=true to find the + number of running threads in the system. */ + +#include +#include // UNICODE_STRING + +enum SYSTEM_INFORMATION_CLASS { + SystemProcessInformation = 5 +}; // SYSTEM_INFORMATION_CLASS + +struct CLIENT_ID { + HANDLE UniqueProcess; + HANDLE UniqueThread; +}; // struct CLIENT_ID + +enum THREAD_STATE { + StateInitialized, + StateReady, + StateRunning, + StateStandby, + StateTerminated, + StateWait, + StateTransition, + StateUnknown +}; // enum THREAD_STATE + +struct VM_COUNTERS { + SIZE_T PeakVirtualSize; + SIZE_T VirtualSize; + ULONG PageFaultCount; + SIZE_T PeakWorkingSetSize; + SIZE_T WorkingSetSize; + SIZE_T QuotaPeakPagedPoolUsage; + SIZE_T QuotaPagedPoolUsage; + SIZE_T QuotaPeakNonPagedPoolUsage; + SIZE_T QuotaNonPagedPoolUsage; + SIZE_T PagefileUsage; + SIZE_T PeakPagefileUsage; + SIZE_T PrivatePageCount; +}; // struct VM_COUNTERS + +struct SYSTEM_THREAD { + LARGE_INTEGER KernelTime; + LARGE_INTEGER UserTime; + LARGE_INTEGER CreateTime; + ULONG WaitTime; + LPVOID StartAddress; + CLIENT_ID ClientId; + DWORD Priority; + LONG BasePriority; + ULONG ContextSwitchCount; + THREAD_STATE State; + ULONG WaitReason; +}; // SYSTEM_THREAD + +KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 ); +#if KMP_ARCH_X86 + KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 ); +#else + KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 ); +#endif + +struct SYSTEM_PROCESS_INFORMATION { + ULONG NextEntryOffset; + ULONG NumberOfThreads; + LARGE_INTEGER Reserved[ 3 ]; + LARGE_INTEGER CreateTime; + LARGE_INTEGER UserTime; + LARGE_INTEGER KernelTime; + UNICODE_STRING ImageName; + DWORD BasePriority; + HANDLE ProcessId; + HANDLE ParentProcessId; + ULONG HandleCount; + ULONG Reserved2[ 2 ]; + VM_COUNTERS VMCounters; + IO_COUNTERS IOCounters; + SYSTEM_THREAD Threads[ 1 ]; +}; // SYSTEM_PROCESS_INFORMATION +typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION; + +KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 ); +KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 ); +KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 ); +#if KMP_ARCH_X86 + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 ); +#else + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 ); + KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 ); +#endif + +typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG ); +NtQuerySystemInformation_t NtQuerySystemInformation = NULL; + +HMODULE ntdll = NULL; + +/* End of NtQuerySystemInformation()-related code */ + +#if KMP_GROUP_AFFINITY +static HMODULE kernel32 = NULL; +#endif /* KMP_GROUP_AFFINITY */ + +/* ----------------------------------------------------------------------------------- */ +/* ----------------------------------------------------------------------------------- */ + +#if KMP_HANDLE_SIGNALS + typedef void (* sig_func_t )( int ); + static sig_func_t __kmp_sighldrs[ NSIG ]; + static int __kmp_siginstalled[ NSIG ]; +#endif + +static HANDLE __kmp_monitor_ev; +static kmp_int64 __kmp_win32_time; +double __kmp_win32_tick; + +int __kmp_init_runtime = FALSE; +CRITICAL_SECTION __kmp_win32_section; + +void +__kmp_win32_mutex_init( kmp_win32_mutex_t *mx ) +{ + InitializeCriticalSection( & mx->cs ); +#if USE_ITT_BUILD + __kmp_itt_system_object_created( & mx->cs, "Critical Section" ); +#endif /* USE_ITT_BUILD */ +} + +void +__kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx ) +{ + DeleteCriticalSection( & mx->cs ); +} + +void +__kmp_win32_mutex_lock( kmp_win32_mutex_t *mx ) +{ + EnterCriticalSection( & mx->cs ); +} + +void +__kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx ) +{ + LeaveCriticalSection( & mx->cs ); +} + +void +__kmp_win32_cond_init( kmp_win32_cond_t *cv ) +{ + cv->waiters_count_ = 0; + cv->wait_generation_count_ = 0; + cv->release_count_ = 0; + + /* Initialize the critical section */ + __kmp_win32_mutex_init( & cv->waiters_count_lock_ ); + + /* Create a manual-reset event. */ + cv->event_ = CreateEvent( NULL, // no security + TRUE, // manual-reset + FALSE, // non-signaled initially + NULL ); // unnamed +#if USE_ITT_BUILD + __kmp_itt_system_object_created( cv->event_, "Event" ); +#endif /* USE_ITT_BUILD */ +} + +void +__kmp_win32_cond_destroy( kmp_win32_cond_t *cv ) +{ + __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ ); + __kmp_free_handle( cv->event_ ); + memset( cv, '\0', sizeof( *cv ) ); +} + +/* TODO associate cv with a team instead of a thread so as to optimize + * the case where we wake up a whole team */ + +void +__kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load ) +{ + int my_generation; + int last_waiter; + + /* Avoid race conditions */ + __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + + /* Increment count of waiters */ + cv->waiters_count_++; + + /* Store current generation in our activation record. */ + my_generation = cv->wait_generation_count_; + + __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); + __kmp_win32_mutex_unlock( mx ); + + + for (;;) { + int wait_done; + + /* Wait until the event is signaled */ + WaitForSingleObject( cv->event_, INFINITE ); + + __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + + /* Exit the loop when the event_> is signaled and + * there are still waiting threads from this + * that haven't been released from this wait yet. */ + wait_done = ( cv->release_count_ > 0 ) && + ( cv->wait_generation_count_ != my_generation ); + + __kmp_win32_mutex_unlock( &cv->waiters_count_lock_); + + /* there used to be a semicolon after the if statement, + * it looked like a bug, so i removed it */ + if( wait_done ) + break; + } + + __kmp_win32_mutex_lock( mx ); + __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + + cv->waiters_count_--; + cv->release_count_--; + + last_waiter = ( cv->release_count_ == 0 ); + + __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); + + if( last_waiter ) { + /* We're the last waiter to be notified, so reset the manual event. */ + ResetEvent( cv->event_ ); + } +} + +void +__kmp_win32_cond_broadcast( kmp_win32_cond_t *cv ) +{ + __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + + if( cv->waiters_count_ > 0 ) { + SetEvent( cv->event_ ); + /* Release all the threads in this generation. */ + + cv->release_count_ = cv->waiters_count_; + + /* Start a new generation. */ + cv->wait_generation_count_++; + } + + __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); +} + +void +__kmp_win32_cond_signal( kmp_win32_cond_t *cv ) +{ + __kmp_win32_cond_broadcast( cv ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_enable( int new_state ) +{ + if (__kmp_init_runtime) + LeaveCriticalSection( & __kmp_win32_section ); +} + +void +__kmp_disable( int *old_state ) +{ + *old_state = 0; + + if (__kmp_init_runtime) + EnterCriticalSection( & __kmp_win32_section ); +} + +void +__kmp_suspend_initialize( void ) +{ + /* do nothing */ +} + +static void +__kmp_suspend_initialize_thread( kmp_info_t *th ) +{ + if ( ! TCR_4( th->th.th_suspend_init ) ) { + /* this means we haven't initialized the suspension pthread objects for this thread + in this instance of the process */ + __kmp_win32_cond_init( &th->th.th_suspend_cv ); + __kmp_win32_mutex_init( &th->th.th_suspend_mx ); + TCW_4( th->th.th_suspend_init, TRUE ); + } +} + +void +__kmp_suspend_uninitialize_thread( kmp_info_t *th ) +{ + if ( TCR_4( th->th.th_suspend_init ) ) { + /* this means we have initialize the suspension pthread objects for this thread + in this instance of the process */ + __kmp_win32_cond_destroy( & th->th.th_suspend_cv ); + __kmp_win32_mutex_destroy( & th->th.th_suspend_mx ); + TCW_4( th->th.th_suspend_init, FALSE ); + } +} + +/* This routine puts the calling thread to sleep after setting the + * sleep bit for the indicated flag variable to true. + */ +template +static inline void __kmp_suspend_template( int th_gtid, C *flag ) +{ + kmp_info_t *th = __kmp_threads[th_gtid]; + int status; + typename C::flag_t old_spin; + + KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) ); + + __kmp_suspend_initialize_thread( th ); + __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); + + KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n", + th_gtid, flag->get() ) ); + + /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread + gets called first? + */ + old_spin = flag->set_sleeping(); + + KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n", + th_gtid, flag->get(), *(flag->get()) ) ); + + if ( flag->done_check_val(old_spin) ) { + old_spin = flag->unset_sleeping(); + KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n", + th_gtid, flag->get()) ); + } else { +#ifdef DEBUG_SUSPEND + __kmp_suspend_count++; +#endif + /* Encapsulate in a loop as the documentation states that this may + * "with low probability" return when the condition variable has + * not been signaled or broadcast + */ + int deactivated = FALSE; + TCW_PTR(th->th.th_sleep_loc, (void *)flag); + while ( flag->is_sleeping() ) { + KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n", + th_gtid ) ); + // Mark the thread as no longer active (only in the first iteration of the loop). + if ( ! deactivated ) { + th->th.th_active = FALSE; + if ( th->th.th_active_in_pool ) { + th->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32( + (kmp_int32 *) &__kmp_thread_pool_active_nth ); + KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); + } + deactivated = TRUE; + + + __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); + } + else { + __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); + } + +#ifdef KMP_DEBUG + if( flag->is_sleeping() ) { + KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid )); + } +#endif /* KMP_DEBUG */ + + } // while + + // Mark the thread as active again (if it was previous marked as inactive) + if ( deactivated ) { + th->th.th_active = TRUE; + if ( TCR_4(th->th.th_in_pool) ) { + KMP_TEST_THEN_INC32( + (kmp_int32 *) &__kmp_thread_pool_active_nth ); + th->th.th_active_in_pool = TRUE; + } + } + } + + + __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + + KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); +} + +void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { + __kmp_suspend_template(th_gtid, flag); +} +void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { + __kmp_suspend_template(th_gtid, flag); +} +void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { + __kmp_suspend_template(th_gtid, flag); +} + + +/* This routine signals the thread specified by target_gtid to wake up + * after setting the sleep bit indicated by the flag argument to FALSE + */ +template +static inline void __kmp_resume_template( int target_gtid, C *flag ) +{ + kmp_info_t *th = __kmp_threads[target_gtid]; + int status; + +#ifdef KMP_DEBUG + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; +#endif + + KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); + + __kmp_suspend_initialize_thread( th ); + __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); + + if (!flag) { // coming from __kmp_null_resume_wrapper + flag = (C *)th->th.th_sleep_loc; + } + + // First, check if the flag is null or its type has changed. If so, someone else woke it up. + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to + KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n", + gtid, target_gtid, NULL ) ); + __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + return; + } + else { + typename C::flag_t old_spin = flag->unset_sleeping(); + if ( !flag->is_sleeping_val(old_spin) ) { + KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) ); + __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + return; + } + } + TCW_PTR(th->th.th_sleep_loc, NULL); + + KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n", + gtid, target_gtid, flag->get() ) ); + + + __kmp_win32_cond_signal( &th->th.th_suspend_cv ); + __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + + KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", + gtid, target_gtid ) ); +} + +void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { + __kmp_resume_template(target_gtid, flag); +} +void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { + __kmp_resume_template(target_gtid, flag); +} +void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { + __kmp_resume_template(target_gtid, flag); +} + + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_yield( int cond ) +{ + if (cond) + Sleep(0); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_gtid_set_specific( int gtid ) +{ + KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n", + gtid, __kmp_gtid_threadprivate_key )); + KMP_ASSERT( __kmp_init_runtime ); + if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) ) + KMP_FATAL( TLSSetValueFailed ); +} + +int +__kmp_gtid_get_specific() +{ + int gtid; + if( !__kmp_init_runtime ) { + KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); + return KMP_GTID_SHUTDOWN; + } + gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ); + if ( gtid == 0 ) { + gtid = KMP_GTID_DNE; + } + else { + gtid--; + } + KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", + __kmp_gtid_threadprivate_key, gtid )); + return gtid; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if KMP_GROUP_AFFINITY + +// +// Only 1 DWORD in the mask should have any procs set. +// Return the appropriate index, or -1 for an invalid mask. +// +int +__kmp_get_proc_group( kmp_affin_mask_t const *mask ) +{ + int i; + int group = -1; + for (i = 0; i < __kmp_num_proc_groups; i++) { + if (mask[i] == 0) { + continue; + } + if (group >= 0) { + return -1; + } + group = i; + } + return group; +} + +#endif /* KMP_GROUP_AFFINITY */ + +int +__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) +{ + +#if KMP_GROUP_AFFINITY + + if (__kmp_num_proc_groups > 1) { + // + // Check for a valid mask. + // + GROUP_AFFINITY ga; + int group = __kmp_get_proc_group( mask ); + if (group < 0) { + if (abort_on_error) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + return -1; + } + + // + // Transform the bit vector into a GROUP_AFFINITY struct + // and make the system call to set affinity. + // + ga.Group = group; + ga.Mask = mask[group]; + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetThreadAffMask ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; + } + } + else + +#endif /* KMP_GROUP_AFFINITY */ + + { + if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetThreadAffMask ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; + } + } + return 0; +} + +int +__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) +{ + +#if KMP_GROUP_AFFINITY + + if (__kmp_num_proc_groups > 1) { + KMP_CPU_ZERO(mask); + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); + + if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), + KMP_ERR(error), + __kmp_msg_null + ); + } + return error; + } + + if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) + || (ga.Mask == 0)) { + return -1; + } + + mask[ga.Group] = ga.Mask; + } + else + +#endif /* KMP_GROUP_AFFINITY */ + + { + kmp_affin_mask_t newMask, sysMask, retval; + + if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(FunctionError, "GetProcessAffinityMask()"), + KMP_ERR(error), + __kmp_msg_null + ); + } + return error; + } + retval = SetThreadAffinityMask(GetCurrentThread(), newMask); + if (! retval) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), + __kmp_msg_null + ); + } + return error; + } + newMask = SetThreadAffinityMask(GetCurrentThread(), retval); + if (! newMask) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), + __kmp_msg_null + ); + } + } + *mask = retval; + } + return 0; +} + +void +__kmp_affinity_bind_thread( int proc ) +{ + +#if KMP_GROUP_AFFINITY + + if (__kmp_num_proc_groups > 1) { + // + // Form the GROUP_AFFINITY struct directly, rather than filling + // out a bit vector and calling __kmp_set_system_affinity(). + // + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups + * CHAR_BIT * sizeof(DWORD_PTR)))); + ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); + ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (__kmp_affinity_verbose) { // AC: continue silently if not verbose + __kmp_msg( + kmp_ms_warning, + KMP_MSG( CantSetThreadAffMask ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + } + } + else + +#endif /* KMP_GROUP_AFFINITY */ + + { + kmp_affin_mask_t mask; + KMP_CPU_ZERO(&mask); + KMP_CPU_SET(proc, &mask); + __kmp_set_system_affinity(&mask, TRUE); + } +} + +void +__kmp_affinity_determine_capable( const char *env_var ) +{ + // + // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). + // + +#if KMP_GROUP_AFFINITY + KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t)); +#else + KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t)); +#endif + + KA_TRACE( 10, ( + "__kmp_affinity_determine_capable: " + "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n", + __kmp_affin_mask_size + ) ); +} + +double +__kmp_read_cpu_time( void ) +{ + FILETIME CreationTime, ExitTime, KernelTime, UserTime; + int status; + double cpu_time; + + cpu_time = 0; + + status = GetProcessTimes( GetCurrentProcess(), &CreationTime, + &ExitTime, &KernelTime, &UserTime ); + + if (status) { + double sec = 0; + + sec += KernelTime.dwHighDateTime; + sec += UserTime.dwHighDateTime; + + /* Shift left by 32 bits */ + sec *= (double) (1 << 16) * (double) (1 << 16); + + sec += KernelTime.dwLowDateTime; + sec += UserTime.dwLowDateTime; + + cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; + } + + return cpu_time; +} + +int +__kmp_read_system_info( struct kmp_sys_info *info ) +{ + info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ + info->minflt = 0; /* the number of page faults serviced without any I/O */ + info->majflt = 0; /* the number of page faults serviced that required I/O */ + info->nswap = 0; /* the number of times a process was "swapped" out of memory */ + info->inblock = 0; /* the number of times the file system had to perform input */ + info->oublock = 0; /* the number of times the file system had to perform output */ + info->nvcsw = 0; /* the number of times a context switch was voluntarily */ + info->nivcsw = 0; /* the number of times a context switch was forced */ + + return 1; +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + + +void +__kmp_runtime_initialize( void ) +{ + SYSTEM_INFO info; + kmp_str_buf_t path; + UINT path_size; + + if ( __kmp_init_runtime ) { + return; + }; + +#if KMP_DYNAMIC_LIB + /* Pin dynamic library for the lifetime of application */ + { + // First, turn off error message boxes + UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS); + HMODULE h; + BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS + |GET_MODULE_HANDLE_EX_FLAG_PIN, + (LPCTSTR)&__kmp_serial_initialize, &h); + KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); + SetErrorMode (err_mode); // Restore error mode + KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") ); + } +#endif + + InitializeCriticalSection( & __kmp_win32_section ); +#if USE_ITT_BUILD + __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" ); +#endif /* USE_ITT_BUILD */ + __kmp_initialize_system_tick(); + + #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) + if ( ! __kmp_cpuinfo.initialized ) { + __kmp_query_cpuid( & __kmp_cpuinfo ); + }; // if + #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + /* Set up minimum number of threads to switch to TLS gtid */ + #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB + // Windows* OS, static library. + /* + New thread may use stack space previously used by another thread, currently terminated. + On Windows* OS, in case of static linking, we do not know the moment of thread termination, + and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead + threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly + finds gtid (by searching through stack addresses of all known threads) for unregistered + foreign tread. + + Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id() + does not search through stacks, but get gtid from TLS immediately. + + --ln + */ + __kmp_tls_gtid_min = 0; + #else + __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; + #endif + + /* for the static library */ + if ( !__kmp_gtid_threadprivate_key ) { + __kmp_gtid_threadprivate_key = TlsAlloc(); + if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) { + KMP_FATAL( TLSOutOfIndexes ); + } + } + + + // + // Load ntdll.dll. + // + /* + Simple + GetModuleHandle( "ntdll.dl" ) + is not suitable due to security issue (see + http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full + path to the library. + */ + __kmp_str_buf_init( & path ); + path_size = GetSystemDirectory( path.str, path.size ); + KMP_DEBUG_ASSERT( path_size > 0 ); + if ( path_size >= path.size ) { + // + // Buffer is too short. Expand the buffer and try again. + // + __kmp_str_buf_reserve( & path, path_size ); + path_size = GetSystemDirectory( path.str, path.size ); + KMP_DEBUG_ASSERT( path_size > 0 ); + }; // if + if ( path_size > 0 && path_size < path.size ) { + // + // Now we have system directory name in the buffer. + // Append backslash and name of dll to form full path, + // + path.used = path_size; + __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" ); + + // + // Now load ntdll using full path. + // + ntdll = GetModuleHandle( path.str ); + } + + KMP_DEBUG_ASSERT( ntdll != NULL ); + if ( ntdll != NULL ) { + NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" ); + } + KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL ); + +#if KMP_GROUP_AFFINITY + // + // Load kernel32.dll. + // Same caveat - must use full system path name. + // + if ( path_size > 0 && path_size < path.size ) { + // + // Truncate the buffer back to just the system path length, + // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". + // + path.used = path_size; + __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" ); + + // + // Load kernel32.dll using full path. + // + kernel32 = GetModuleHandle( path.str ); + KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) ); + + // + // Load the function pointers to kernel32.dll routines + // that may or may not exist on this system. + // + if ( kernel32 != NULL ) { + __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" ); + __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" ); + __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" ); + __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" ); + + KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) ); + KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) ); + KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) ); + KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) ); + KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) ); + + // + // See if group affinity is supported on this system. + // If so, calculate the #groups and #procs. + // + // Group affinity was introduced with Windows* 7 OS and + // Windows* Server 2008 R2 OS. + // + if ( ( __kmp_GetActiveProcessorCount != NULL ) + && ( __kmp_GetActiveProcessorGroupCount != NULL ) + && ( __kmp_GetThreadGroupAffinity != NULL ) + && ( __kmp_SetThreadGroupAffinity != NULL ) + && ( ( __kmp_num_proc_groups + = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) { + // + // Calculate the total number of active OS procs. + // + int i; + + KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); + + __kmp_xproc = 0; + + for ( i = 0; i < __kmp_num_proc_groups; i++ ) { + DWORD size = __kmp_GetActiveProcessorCount( i ); + __kmp_xproc += size; + KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) ); + } + } + else { + KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); + } + } + } + if ( __kmp_num_proc_groups <= 1 ) { + GetSystemInfo( & info ); + __kmp_xproc = info.dwNumberOfProcessors; + } +#else + GetSystemInfo( & info ); + __kmp_xproc = info.dwNumberOfProcessors; +#endif /* KMP_GROUP_AFFINITY */ + + // + // If the OS said there were 0 procs, take a guess and use a value of 2. + // This is done for Linux* OS, also. Do we need error / warning? + // + if ( __kmp_xproc <= 0 ) { + __kmp_xproc = 2; + } + + KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) ); + + __kmp_str_buf_free( & path ); + +#if USE_ITT_BUILD + __kmp_itt_initialize(); +#endif /* USE_ITT_BUILD */ + + __kmp_init_runtime = TRUE; +} // __kmp_runtime_initialize + +void +__kmp_runtime_destroy( void ) +{ + if ( ! __kmp_init_runtime ) { + return; + } + +#if USE_ITT_BUILD + __kmp_itt_destroy(); +#endif /* USE_ITT_BUILD */ + + /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ + /* due to the KX_TRACE() commands */ + KA_TRACE( 40, ("__kmp_runtime_destroy\n" )); + + if( __kmp_gtid_threadprivate_key ) { + TlsFree( __kmp_gtid_threadprivate_key ); + __kmp_gtid_threadprivate_key = 0; + } + + __kmp_affinity_uninitialize(); + DeleteCriticalSection( & __kmp_win32_section ); + + ntdll = NULL; + NtQuerySystemInformation = NULL; + +#if KMP_ARCH_X86_64 + kernel32 = NULL; + __kmp_GetActiveProcessorCount = NULL; + __kmp_GetActiveProcessorGroupCount = NULL; + __kmp_GetThreadGroupAffinity = NULL; + __kmp_SetThreadGroupAffinity = NULL; +#endif // KMP_ARCH_X86_64 + + __kmp_init_runtime = FALSE; +} + + +void +__kmp_terminate_thread( int gtid ) +{ + kmp_info_t *th = __kmp_threads[ gtid ]; + + if( !th ) return; + + KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); + + if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) { + /* It's OK, the thread may have exited already */ + } + __kmp_free_handle( th->th.th_info.ds.ds_thread ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void +__kmp_clear_system_time( void ) +{ + BOOL status; + LARGE_INTEGER time; + status = QueryPerformanceCounter( & time ); + __kmp_win32_time = (kmp_int64) time.QuadPart; +} + +void +__kmp_initialize_system_tick( void ) +{ + { + BOOL status; + LARGE_INTEGER freq; + + status = QueryPerformanceFrequency( & freq ); + if (! status) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ), + KMP_ERR( error ), + __kmp_msg_null + ); + + } + else { + __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart; + } + } +} + +/* Calculate the elapsed wall clock time for the user */ + +void +__kmp_elapsed( double *t ) +{ + BOOL status; + LARGE_INTEGER now; + status = QueryPerformanceCounter( & now ); + *t = ((double) now.QuadPart) * __kmp_win32_tick; +} + +/* Calculate the elapsed wall clock tick for the user */ + +void +__kmp_elapsed_tick( double *t ) +{ + *t = __kmp_win32_tick; +} + +void +__kmp_read_system_time( double *delta ) +{ + + if (delta != NULL) { + BOOL status; + LARGE_INTEGER now; + + status = QueryPerformanceCounter( & now ); + + *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time)) + * __kmp_win32_tick; + } +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +void * __stdcall +__kmp_launch_worker( void *arg ) +{ + volatile void *stack_data; + void *exit_val; + void *padding = 0; + kmp_info_t *this_thr = (kmp_info_t *) arg; + int gtid; + + gtid = this_thr->th.th_info.ds.ds_gtid; + __kmp_gtid_set_specific( gtid ); +#ifdef KMP_TDATA_GTID + #error "This define causes problems with LoadLibrary() + declspec(thread) " \ + "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ + "reference: http://support.microsoft.com/kb/118816" + //__kmp_gtid = gtid; +#endif + +#if USE_ITT_BUILD + __kmp_itt_thread_name( gtid ); +#endif /* USE_ITT_BUILD */ + + __kmp_affinity_set_init_mask( gtid, FALSE ); + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + // + // Set the FP control regs to be a copy of + // the parallel initialization thread's. + // + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); + __kmp_load_mxcsr( &__kmp_init_mxcsr ); +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + + if ( __kmp_stkoffset > 0 && gtid > 0 ) { + padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); + } + + KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); + this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); + TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); + + if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid + TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); + KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE ); + __kmp_check_stack_overlap( this_thr ); + } + KMP_MB(); + exit_val = __kmp_launch_thread( this_thr ); + KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); + TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); + KMP_MB(); + return exit_val; +} + + +/* The monitor thread controls all of the threads in the complex */ + +void * __stdcall +__kmp_launch_monitor( void *arg ) +{ + DWORD wait_status; + kmp_thread_t monitor; + int status; + int interval; + kmp_info_t *this_thr = (kmp_info_t *) arg; + + KMP_DEBUG_ASSERT(__kmp_init_monitor); + TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started + // TODO: hide "2" in enum (like {true,false,started}) + this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); + TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) ); + + monitor = GetCurrentThread(); + + /* set thread priority */ + status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST ); + if (! status) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetThreadPriority ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + + /* register us as monitor */ + __kmp_gtid_set_specific( KMP_GTID_MONITOR ); +#ifdef KMP_TDATA_GTID + #error "This define causes problems with LoadLibrary() + declspec(thread) " \ + "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ + "reference: http://support.microsoft.com/kb/118816" + //__kmp_gtid = KMP_GTID_MONITOR; +#endif + +#if USE_ITT_BUILD + __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. +#endif /* USE_ITT_BUILD */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */ + + while (! TCR_4(__kmp_global.g.g_done)) { + /* This thread monitors the state of the system */ + + KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); + + wait_status = WaitForSingleObject( __kmp_monitor_ev, interval ); + + if (wait_status == WAIT_TIMEOUT) { + TCW_4( __kmp_global.g.g_time.dt.t_value, + TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + } + + KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) ); + + status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL ); + if (! status) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetThreadPriority ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + + if (__kmp_global.g.g_abort != 0) { + /* now we need to terminate the worker threads */ + /* the value of t_abort is the signal we caught */ + + int gtid; + + KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) ); + + /* terminate the OpenMP worker threads */ + /* TODO this is not valid for sibling threads!! + * the uber master might not be 0 anymore.. */ + for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) + __kmp_terminate_thread( gtid ); + + __kmp_cleanup(); + + Sleep( 0 ); + + KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) ); + + if (__kmp_global.g.g_abort > 0) { + raise( __kmp_global.g.g_abort ); + } + } + + TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); + + KMP_MB(); + return arg; +} + +void +__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) +{ + kmp_thread_t handle; + DWORD idThread; + + KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); + + th->th.th_info.ds.ds_gtid = gtid; + + if ( KMP_UBER_GTID(gtid) ) { + int stack_data; + + /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use. + Is it appropriate to just use GetCurrentThread? When should we close this handle? When + unregistering the root? + */ + { + BOOL rc; + rc = DuplicateHandle( + GetCurrentProcess(), + GetCurrentThread(), + GetCurrentProcess(), + &th->th.th_info.ds.ds_thread, + 0, + FALSE, + DUPLICATE_SAME_ACCESS + ); + KMP_ASSERT( rc ); + KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", + (LPVOID)th, + th->th.th_info.ds.ds_thread ) ); + th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); + } + if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid + /* we will dynamically update the stack range if gtid_mode == 1 */ + TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); + TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); + TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); + __kmp_check_stack_overlap( th ); + } + } + else { + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* Set stack size for this thread now. */ + KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC + " bytes\n", stack_size ) ); + + stack_size += gtid * __kmp_stkoffset; + + TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); + TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); + + KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %" + KMP_SIZE_T_SPEC + " bytes, &__kmp_launch_worker = %p, th = %p, " + "&idThread = %p\n", + (SIZE_T) stack_size, + (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, + (LPVOID) th, &idThread ) ); + + { + handle = CreateThread( NULL, (SIZE_T) stack_size, + (LPTHREAD_START_ROUTINE) __kmp_launch_worker, + (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); + } + + KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %" + KMP_SIZE_T_SPEC + " bytes, &__kmp_launch_worker = %p, th = %p, " + "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", + (SIZE_T) stack_size, + (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, + (LPVOID) th, idThread, handle ) ); + + { + if ( handle == 0 ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantCreateThread ), + KMP_ERR( error ), + __kmp_msg_null + ); + } else { + th->th.th_info.ds.ds_thread = handle; + } + } + KMP_MB(); /* Flush all pending memory write invalidates. */ + } + + KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); +} + +int +__kmp_still_running(kmp_info_t *th) { + return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0)); +} + +void +__kmp_create_monitor( kmp_info_t *th ) +{ + kmp_thread_t handle; + DWORD idThread; + int ideal, new_ideal; + + KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL ); + if ( __kmp_monitor_ev == NULL ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantCreateEvent ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if +#if USE_ITT_BUILD + __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" ); +#endif /* USE_ITT_BUILD */ + + th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; + th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; + + // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how + // to automatically expand stacksize based on CreateThread error code. + if ( __kmp_monitor_stksize == 0 ) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + } + if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { + __kmp_monitor_stksize = __kmp_sys_min_stksize; + } + + KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", + (int) __kmp_monitor_stksize ) ); + + TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); + + handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize, + (LPTHREAD_START_ROUTINE) __kmp_launch_monitor, + (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); + if (handle == 0) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantCreateThread ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + else + th->th.th_info.ds.ds_thread = handle; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n", + (void *) th->th.th_info.ds.ds_thread ) ); +} + +/* + Check to see if thread is still alive. + + NOTE: The ExitProcess(code) system call causes all threads to Terminate + with a exit_val = code. Because of this we can not rely on + exit_val having any particular value. So this routine may + return STILL_ALIVE in exit_val even after the thread is dead. +*/ + +int +__kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ) +{ + DWORD rc; + rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val ); + if ( rc == 0 ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FunctionError, "GetExitCodeThread()" ), + KMP_ERR( error ), + __kmp_msg_null + ); + }; // if + return ( *exit_val == STILL_ACTIVE ); +} + + +void +__kmp_exit_thread( + int exit_status +) { + ExitThread( exit_status ); +} // __kmp_exit_thread + +/* + This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). +*/ +static void +__kmp_reap_common( kmp_info_t * th ) +{ + DWORD exit_val; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) ); + + /* + 2006-10-19: + + There are two opposite situations: + + 1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread + function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very + slow".) + 2. Windows* OS may kill thread before it resets ds_alive flag. + + Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting. + + */ + + { + // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to + // cover this usage also. + void * obj = NULL; kmp_uint32 spins; -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive ); -#endif /* USE_ITT_BUILD */ - KMP_INIT_YIELD( spins ); - do { -#if USE_ITT_BUILD - KMP_FSYNC_SPIN_PREPARE( obj ); -#endif /* USE_ITT_BUILD */ - __kmp_is_thread_alive( th, &exit_val ); - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) ); -#if USE_ITT_BUILD - if ( exit_val == STILL_ACTIVE ) { - KMP_FSYNC_CANCEL( obj ); - } else { - KMP_FSYNC_SPIN_ACQUIRED( obj ); - }; // if -#endif /* USE_ITT_BUILD */ - } - - __kmp_free_handle( th->th.th_info.ds.ds_thread ); - - /* - * NOTE: The ExitProcess(code) system call causes all threads to Terminate - * with a exit_val = code. Because of this we can not rely on - * exit_val having any particular value. - */ - if ( exit_val == STILL_ACTIVE ) { - KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) ); - } else if ( (void *) exit_val != (void *) th) { - KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) ); - }; // if - - KA_TRACE( 10, - ( - "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n", - th->th.th_info.ds.ds_gtid, - th->th.th_info.ds.ds_thread - ) - ); - - th->th.th_info.ds.ds_thread = 0; - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - th->th.th_info.ds.ds_thread_id = 0; - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -void -__kmp_reap_monitor( kmp_info_t *th ) -{ - int status; - - KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n", - (void *) th->th.th_info.ds.ds_thread ) ); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); - if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { - return; - }; // if - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - status = SetEvent( __kmp_monitor_ev ); - if ( status == FALSE ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEvent ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); - __kmp_reap_common( th ); - - __kmp_free_handle( __kmp_monitor_ev ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ -} - -void -__kmp_reap_worker( kmp_info_t * th ) -{ - KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); - __kmp_reap_common( th ); -} - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -#if KMP_HANDLE_SIGNALS - - -static void -__kmp_team_handler( int signo ) -{ - if ( __kmp_global.g.g_abort == 0 ) { - // Stage 1 signal handler, let's shut down all of the threads. - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer(); - }; // if - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_abort, signo ); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_done, TRUE ); - KMP_MB(); // Flush all pending memory write invalidates. - } -} // __kmp_team_handler - - - -static -sig_func_t __kmp_signal( int signum, sig_func_t handler ) { - sig_func_t old = signal( signum, handler ); - if ( old == SIG_ERR ) { - int error = errno; - __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null ); - }; // if - return old; -} - -static void -__kmp_install_one_handler( - int sig, - sig_func_t handler, - int parallel_init -) { - sig_func_t old; - KMP_MB(); /* Flush all pending memory write invalidates. */ - KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) ); - if ( parallel_init ) { - old = __kmp_signal( sig, handler ); - // SIG_DFL on Windows* OS in NULL or 0. - if ( old == __kmp_sighldrs[ sig ] ) { - __kmp_siginstalled[ sig ] = 1; - } else { - // Restore/keep user's handler if one previously installed. - old = __kmp_signal( sig, old ); - }; // if - } else { - // Save initial/system signal handlers to see if user handlers installed. - // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with - // parallel_init == TRUE. - old = __kmp_signal( sig, SIG_DFL ); - __kmp_sighldrs[ sig ] = old; - __kmp_signal( sig, old ); - }; // if - KMP_MB(); /* Flush all pending memory write invalidates. */ -} // __kmp_install_one_handler - -static void -__kmp_remove_one_handler( int sig ) { - if ( __kmp_siginstalled[ sig ] ) { - sig_func_t old; - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) ); - old = __kmp_signal( sig, __kmp_sighldrs[ sig ] ); - if ( old != __kmp_team_handler ) { - KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); - old = __kmp_signal( sig, old ); - }; // if - __kmp_sighldrs[ sig ] = NULL; - __kmp_siginstalled[ sig ] = 0; - KMP_MB(); // Flush all pending memory write invalidates. - }; // if -} // __kmp_remove_one_handler - - -void -__kmp_install_signals( int parallel_init ) -{ - KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) ); - if ( ! __kmp_handle_signals ) { - KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) ); - return; - }; // if - __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); -} // __kmp_install_signals - - -void -__kmp_remove_signals( void ) -{ - int sig; - KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) ); - for ( sig = 1; sig < NSIG; ++ sig ) { - __kmp_remove_one_handler( sig ); - }; // for sig -} // __kmp_remove_signals - - -#endif // KMP_HANDLE_SIGNALS - -/* Put the thread to sleep for a time period */ -void -__kmp_thread_sleep( int millis ) -{ - DWORD status; - - status = SleepEx( (DWORD) millis, FALSE ); - if ( status ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "SleepEx()" ), - KMP_ERR( error ), - __kmp_msg_null - ); - } -} - -/* Determine whether the given address is mapped into the current address space. */ -int -__kmp_is_address_mapped( void * addr ) -{ - DWORD status; - MEMORY_BASIC_INFORMATION lpBuffer; - SIZE_T dwLength; - - dwLength = sizeof(MEMORY_BASIC_INFORMATION); - - status = VirtualQuery( addr, &lpBuffer, dwLength ); - - return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) || - (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE ))); -} - -kmp_uint64 -__kmp_hardware_timestamp(void) -{ - kmp_uint64 r = 0; - - QueryPerformanceCounter((LARGE_INTEGER*) &r); - return r; -} - -/* Free handle and check the error code */ -void -__kmp_free_handle( kmp_thread_t tHandle ) -{ -/* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */ - BOOL rc; - rc = CloseHandle( tHandle ); - if ( !rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCloseHandle ), - KMP_ERR( error ), - __kmp_msg_null - ); - } -} - -int -__kmp_get_load_balance( int max ) { - - static ULONG glb_buff_size = 100 * 1024; - - static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ - static double glb_call_time = 0; /* Thread balance algorithm call time */ - - int running_threads = 0; // Number of running threads in the system. - NTSTATUS status = 0; - ULONG buff_size = 0; - ULONG info_size = 0; - void * buffer = NULL; - PSYSTEM_PROCESS_INFORMATION spi = NULL; - int first_time = 1; - - double call_time = 0.0; //start, finish; - - __kmp_elapsed( & call_time ); - - if ( glb_call_time && - ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { - running_threads = glb_running_threads; - goto finish; - } - glb_call_time = call_time; - - // Do not spend time on running algorithm if we have a permanent error. - if ( NtQuerySystemInformation == NULL ) { - running_threads = -1; - goto finish; - }; // if - - if ( max <= 0 ) { - max = INT_MAX; - }; // if - - do { - - if ( first_time ) { - buff_size = glb_buff_size; - } else { - buff_size = 2 * buff_size; - } - - buffer = KMP_INTERNAL_REALLOC( buffer, buff_size ); - if ( buffer == NULL ) { - running_threads = -1; - goto finish; - }; // if - status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size ); - first_time = 0; - - } while ( status == STATUS_INFO_LENGTH_MISMATCH ); - glb_buff_size = buff_size; - - #define CHECK( cond ) \ - { \ - KMP_DEBUG_ASSERT( cond ); \ - if ( ! ( cond ) ) { \ - running_threads = -1; \ - goto finish; \ - } \ - } - - CHECK( buff_size >= info_size ); - spi = PSYSTEM_PROCESS_INFORMATION( buffer ); - for ( ; ; ) { - ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer ); - CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size ); - HANDLE pid = spi->ProcessId; - ULONG num = spi->NumberOfThreads; - CHECK( num >= 1 ); - size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 ); - CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer. - if ( spi->NextEntryOffset != 0 ) { - CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record. - }; // if - // pid == 0 corresponds to the System Idle Process. It always has running threads - // on all cores. So, we don't consider the running threads of this process. - if ( pid != 0 ) { - for ( int i = 0; i < num; ++ i ) { - THREAD_STATE state = spi->Threads[ i ].State; - // Count threads that have Ready or Running state. - // !!! TODO: Why comment does not match the code??? - if ( state == StateRunning ) { - ++ running_threads; - // Stop counting running threads if the number is already greater than - // the number of available cores - if ( running_threads >= max ) { - goto finish; - } - } // if - }; // for i - } // if - if ( spi->NextEntryOffset == 0 ) { - break; - }; // if - spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset ); - }; // forever - - #undef CHECK - - finish: // Clean up and exit. - - if ( buffer != NULL ) { - KMP_INTERNAL_FREE( buffer ); - }; // if - - glb_running_threads = running_threads; - - return running_threads; - -} //__kmp_get_load_balance() - +#if USE_ITT_BUILD + KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive ); +#endif /* USE_ITT_BUILD */ + KMP_INIT_YIELD( spins ); + do { +#if USE_ITT_BUILD + KMP_FSYNC_SPIN_PREPARE( obj ); +#endif /* USE_ITT_BUILD */ + __kmp_is_thread_alive( th, &exit_val ); + KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); + } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) ); +#if USE_ITT_BUILD + if ( exit_val == STILL_ACTIVE ) { + KMP_FSYNC_CANCEL( obj ); + } else { + KMP_FSYNC_SPIN_ACQUIRED( obj ); + }; // if +#endif /* USE_ITT_BUILD */ + } + + __kmp_free_handle( th->th.th_info.ds.ds_thread ); + + /* + * NOTE: The ExitProcess(code) system call causes all threads to Terminate + * with a exit_val = code. Because of this we can not rely on + * exit_val having any particular value. + */ + if ( exit_val == STILL_ACTIVE ) { + KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) ); + } else if ( (void *) exit_val != (void *) th) { + KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) ); + }; // if + + KA_TRACE( 10, + ( + "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n", + th->th.th_info.ds.ds_gtid, + th->th.th_info.ds.ds_thread + ) + ); + + th->th.th_info.ds.ds_thread = 0; + th->th.th_info.ds.ds_tid = KMP_GTID_DNE; + th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; + th->th.th_info.ds.ds_thread_id = 0; + + KMP_MB(); /* Flush all pending memory write invalidates. */ +} + +void +__kmp_reap_monitor( kmp_info_t *th ) +{ + int status; + + KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n", + (void *) th->th.th_info.ds.ds_thread ) ); + + // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. + // If both tid and gtid are 0, it means the monitor did not ever start. + // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. + KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); + if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { + return; + }; // if + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + status = SetEvent( __kmp_monitor_ev ); + if ( status == FALSE ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantSetEvent ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); + __kmp_reap_common( th ); + + __kmp_free_handle( __kmp_monitor_ev ); + + KMP_MB(); /* Flush all pending memory write invalidates. */ +} + +void +__kmp_reap_worker( kmp_info_t * th ) +{ + KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); + __kmp_reap_common( th ); +} + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +#if KMP_HANDLE_SIGNALS + + +static void +__kmp_team_handler( int signo ) +{ + if ( __kmp_global.g.g_abort == 0 ) { + // Stage 1 signal handler, let's shut down all of the threads. + if ( __kmp_debug_buf ) { + __kmp_dump_debug_buffer(); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4( __kmp_global.g.g_abort, signo ); + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4( __kmp_global.g.g_done, TRUE ); + KMP_MB(); // Flush all pending memory write invalidates. + } +} // __kmp_team_handler + + + +static +sig_func_t __kmp_signal( int signum, sig_func_t handler ) { + sig_func_t old = signal( signum, handler ); + if ( old == SIG_ERR ) { + int error = errno; + __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null ); + }; // if + return old; +} + +static void +__kmp_install_one_handler( + int sig, + sig_func_t handler, + int parallel_init +) { + sig_func_t old; + KMP_MB(); /* Flush all pending memory write invalidates. */ + KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) ); + if ( parallel_init ) { + old = __kmp_signal( sig, handler ); + // SIG_DFL on Windows* OS in NULL or 0. + if ( old == __kmp_sighldrs[ sig ] ) { + __kmp_siginstalled[ sig ] = 1; + } else { + // Restore/keep user's handler if one previously installed. + old = __kmp_signal( sig, old ); + }; // if + } else { + // Save initial/system signal handlers to see if user handlers installed. + // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with + // parallel_init == TRUE. + old = __kmp_signal( sig, SIG_DFL ); + __kmp_sighldrs[ sig ] = old; + __kmp_signal( sig, old ); + }; // if + KMP_MB(); /* Flush all pending memory write invalidates. */ +} // __kmp_install_one_handler + +static void +__kmp_remove_one_handler( int sig ) { + if ( __kmp_siginstalled[ sig ] ) { + sig_func_t old; + KMP_MB(); // Flush all pending memory write invalidates. + KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) ); + old = __kmp_signal( sig, __kmp_sighldrs[ sig ] ); + if ( old != __kmp_team_handler ) { + KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); + old = __kmp_signal( sig, old ); + }; // if + __kmp_sighldrs[ sig ] = NULL; + __kmp_siginstalled[ sig ] = 0; + KMP_MB(); // Flush all pending memory write invalidates. + }; // if +} // __kmp_remove_one_handler + + +void +__kmp_install_signals( int parallel_init ) +{ + KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) ); + if ( ! __kmp_handle_signals ) { + KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) ); + return; + }; // if + __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); + __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); +} // __kmp_install_signals + + +void +__kmp_remove_signals( void ) +{ + int sig; + KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) ); + for ( sig = 1; sig < NSIG; ++ sig ) { + __kmp_remove_one_handler( sig ); + }; // for sig +} // __kmp_remove_signals + + +#endif // KMP_HANDLE_SIGNALS + +/* Put the thread to sleep for a time period */ +void +__kmp_thread_sleep( int millis ) +{ + DWORD status; + + status = SleepEx( (DWORD) millis, FALSE ); + if ( status ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FunctionError, "SleepEx()" ), + KMP_ERR( error ), + __kmp_msg_null + ); + } +} + +/* Determine whether the given address is mapped into the current address space. */ +int +__kmp_is_address_mapped( void * addr ) +{ + DWORD status; + MEMORY_BASIC_INFORMATION lpBuffer; + SIZE_T dwLength; + + dwLength = sizeof(MEMORY_BASIC_INFORMATION); + + status = VirtualQuery( addr, &lpBuffer, dwLength ); + + return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) || + (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE ))); +} + +kmp_uint64 +__kmp_hardware_timestamp(void) +{ + kmp_uint64 r = 0; + + QueryPerformanceCounter((LARGE_INTEGER*) &r); + return r; +} + +/* Free handle and check the error code */ +void +__kmp_free_handle( kmp_thread_t tHandle ) +{ +/* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */ + BOOL rc; + rc = CloseHandle( tHandle ); + if ( !rc ) { + DWORD error = GetLastError(); + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( CantCloseHandle ), + KMP_ERR( error ), + __kmp_msg_null + ); + } +} + +int +__kmp_get_load_balance( int max ) { + + static ULONG glb_buff_size = 100 * 1024; + + static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ + static double glb_call_time = 0; /* Thread balance algorithm call time */ + + int running_threads = 0; // Number of running threads in the system. + NTSTATUS status = 0; + ULONG buff_size = 0; + ULONG info_size = 0; + void * buffer = NULL; + PSYSTEM_PROCESS_INFORMATION spi = NULL; + int first_time = 1; + + double call_time = 0.0; //start, finish; + + __kmp_elapsed( & call_time ); + + if ( glb_call_time && + ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { + running_threads = glb_running_threads; + goto finish; + } + glb_call_time = call_time; + + // Do not spend time on running algorithm if we have a permanent error. + if ( NtQuerySystemInformation == NULL ) { + running_threads = -1; + goto finish; + }; // if + + if ( max <= 0 ) { + max = INT_MAX; + }; // if + + do { + + if ( first_time ) { + buff_size = glb_buff_size; + } else { + buff_size = 2 * buff_size; + } + + buffer = KMP_INTERNAL_REALLOC( buffer, buff_size ); + if ( buffer == NULL ) { + running_threads = -1; + goto finish; + }; // if + status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size ); + first_time = 0; + + } while ( status == STATUS_INFO_LENGTH_MISMATCH ); + glb_buff_size = buff_size; + + #define CHECK( cond ) \ + { \ + KMP_DEBUG_ASSERT( cond ); \ + if ( ! ( cond ) ) { \ + running_threads = -1; \ + goto finish; \ + } \ + } + + CHECK( buff_size >= info_size ); + spi = PSYSTEM_PROCESS_INFORMATION( buffer ); + for ( ; ; ) { + ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer ); + CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size ); + HANDLE pid = spi->ProcessId; + ULONG num = spi->NumberOfThreads; + CHECK( num >= 1 ); + size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 ); + CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer. + if ( spi->NextEntryOffset != 0 ) { + CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record. + }; // if + // pid == 0 corresponds to the System Idle Process. It always has running threads + // on all cores. So, we don't consider the running threads of this process. + if ( pid != 0 ) { + for ( int i = 0; i < num; ++ i ) { + THREAD_STATE state = spi->Threads[ i ].State; + // Count threads that have Ready or Running state. + // !!! TODO: Why comment does not match the code??? + if ( state == StateRunning ) { + ++ running_threads; + // Stop counting running threads if the number is already greater than + // the number of available cores + if ( running_threads >= max ) { + goto finish; + } + } // if + }; // for i + } // if + if ( spi->NextEntryOffset == 0 ) { + break; + }; // if + spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset ); + }; // forever + + #undef CHECK + + finish: // Clean up and exit. + + if ( buffer != NULL ) { + KMP_INTERNAL_FREE( buffer ); + }; // if + + glb_running_threads = running_threads; + + return running_threads; + +} //__kmp_get_load_balance() + diff --git a/contrib/libs/cxxsupp/system_stl/ya.make b/contrib/libs/cxxsupp/system_stl/ya.make index 3b4327f8777..75317898ac9 100644 --- a/contrib/libs/cxxsupp/system_stl/ya.make +++ b/contrib/libs/cxxsupp/system_stl/ya.make @@ -3,7 +3,7 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() LICENSE(YandexOpen) - + OWNER( g:contrib g:cpp-contrib diff --git a/contrib/libs/cxxsupp/ya.make b/contrib/libs/cxxsupp/ya.make index 5f77c46f139..6cd299ac8a1 100644 --- a/contrib/libs/cxxsupp/ya.make +++ b/contrib/libs/cxxsupp/ya.make @@ -1,4 +1,4 @@ -LIBRARY() +LIBRARY() WITHOUT_LICENSE_TEXTS() @@ -9,7 +9,7 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_PLATFORM() DISABLE(OPENSOURCE_EXPORT) @@ -23,7 +23,7 @@ ELSE() ) ENDIF() -END() +END() RECURSE( libcxx diff --git a/contrib/libs/double-conversion/bignum-dtoa.cc b/contrib/libs/double-conversion/bignum-dtoa.cc index e6b43a5d6fa..d99ac2aaf9d 100644 --- a/contrib/libs/double-conversion/bignum-dtoa.cc +++ b/contrib/libs/double-conversion/bignum-dtoa.cc @@ -1,641 +1,641 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include - -#include "bignum-dtoa.h" - -#include "bignum.h" -#include "ieee.h" - -namespace double_conversion { - -static int NormalizedExponent(uint64_t significand, int exponent) { - ASSERT(significand != 0); - while ((significand & Double::kHiddenBit) == 0) { - significand = significand << 1; - exponent = exponent - 1; - } - return exponent; -} - - -// Forward declarations: -// Returns an estimation of k such that 10^(k-1) <= v < 10^k. -static int EstimatePower(int exponent); -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus); -// Multiplies numerator/denominator so that its values lies in the range 1-10. -// Returns decimal_point s.t. -// v = numerator'/denominator' * 10^(decimal_point-1) -// where numerator' and denominator' are the values of numerator and -// denominator after the call to this function. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus); -// Generates digits from the left to the right and stops when the generated -// digits yield the shortest decimal representation of v. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length); -// Generates 'requested_digits' after the decimal point. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); -// Generates 'count' digits of numerator/denominator. -// Once 'count' digits have been produced rounds the result depending on the -// remainder (remainders of exactly .5 round upwards). Might update the -// decimal_point when rounding up (for example for 0.9999). -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); - - -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, - Vector buffer, int* length, int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - uint64_t significand; - int exponent; - bool lower_boundary_is_closer; - if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { - float f = static_cast(v); - ASSERT(f == v); - significand = Single(f).Significand(); - exponent = Single(f).Exponent(); - lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); - } else { - significand = Double(v).Significand(); - exponent = Double(v).Exponent(); - lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); - } - bool need_boundary_deltas = - (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); - - bool is_even = (significand & 1) == 0; - int normalized_exponent = NormalizedExponent(significand, exponent); - // estimated_power might be too low by 1. - int estimated_power = EstimatePower(normalized_exponent); - - // Shortcut for Fixed. - // The requested digits correspond to the digits after the point. If the - // number is much too small, then there is no need in trying to get any - // digits. - if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { - buffer[0] = '\0'; - *length = 0; - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - return; - } - - Bignum numerator; - Bignum denominator; - Bignum delta_minus; - Bignum delta_plus; - // Make sure the bignum can grow large enough. The smallest double equals - // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. - // The maximum double is 1.7976931348623157e308 which needs fewer than - // 308*4 binary digits. - ASSERT(Bignum::kMaxSignificantBits >= 324*4); - InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, - estimated_power, need_boundary_deltas, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^estimated_power. - FixupMultiply10(estimated_power, is_even, decimal_point, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^(decimal_point-1), and - // 1 <= (numerator + delta_plus) / denominator < 10 - switch (mode) { - case BIGNUM_DTOA_SHORTEST: - case BIGNUM_DTOA_SHORTEST_SINGLE: - GenerateShortestDigits(&numerator, &denominator, - &delta_minus, &delta_plus, - is_even, buffer, length); - break; - case BIGNUM_DTOA_FIXED: - BignumToFixed(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - case BIGNUM_DTOA_PRECISION: - GenerateCountedDigits(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - default: - UNREACHABLE(); - } - buffer[*length] = '\0'; -} - - -// The procedure starts generating digits from the left to the right and stops -// when the generated digits yield the shortest decimal representation of v. A -// decimal representation of v is a number lying closer to v than to any other -// double, so it converts to v when read. -// -// This is true if d, the decimal representation, is between m- and m+, the -// upper and lower boundaries. d must be strictly between them if !is_even. -// m- := (numerator - delta_minus) / denominator -// m+ := (numerator + delta_plus) / denominator -// -// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. -// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit -// will be produced. This should be the standard precondition. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length) { - // Small optimization: if delta_minus and delta_plus are the same just reuse - // one of the two bignums. - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_plus = delta_minus; - } - *length = 0; - for (;;) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[(*length)++] = static_cast(digit + '0'); - - // Can we stop already? - // If the remainder of the division is less than the distance to the lower - // boundary we can stop. In this case we simply round down (discarding the - // remainder). - // Similarly we test if we can round up (using the upper boundary). - bool in_delta_room_minus; - bool in_delta_room_plus; - if (is_even) { - in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); - } else { - in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); - } - if (is_even) { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (!in_delta_room_minus && !in_delta_room_plus) { - // Prepare for next iteration. - numerator->Times10(); - delta_minus->Times10(); - // We optimized delta_plus to be equal to delta_minus (if they share the - // same value). So don't multiply delta_plus if they point to the same - // object. - if (delta_minus != delta_plus) { - delta_plus->Times10(); - } - } else if (in_delta_room_minus && in_delta_room_plus) { - // Let's see if 2*numerator < denominator. - // If yes, then the next digit would be < 5 and we can round down. - int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); - if (compare < 0) { - // Remaining digits are less than .5. -> Round down (== do nothing). - } else if (compare > 0) { - // Remaining digits are more than .5 of denominator. -> Round up. - // Note that the last digit could not be a '9' as otherwise the whole - // loop would have stopped earlier. - // We still have an assert here in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } else { - // Halfway case. - // TODO(floitsch): need a way to solve half-way cases. - // For now let's round towards even (since this is what Gay seems to - // do). - - if ((buffer[(*length) - 1] - '0') % 2 == 0) { - // Round down => Do nothing. - } else { - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } - } - return; - } else if (in_delta_room_minus) { - // Round down (== do nothing). - return; - } else { // in_delta_room_plus - // Round up. - // Note again that the last digit could not be '9' since this would have - // stopped the loop earlier. - // We still have an ASSERT here, in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) -1] != '9'); - buffer[(*length) - 1]++; - return; - } - } -} - - -// Let v = numerator / denominator < 10. -// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) -// from left to right. Once 'count' digits have been produced we decide wether -// to round up or down. Remainders of exactly .5 round upwards. Numbers such -// as 9.999999 propagate a carry all the way, and change the -// exponent (decimal_point), when rounding upwards. -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector buffer, int* length) { - ASSERT(count >= 0); - for (int i = 0; i < count - 1; ++i) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[i] = static_cast(digit + '0'); - // Prepare for next iteration. - numerator->Times10(); - } - // Generate the last digit. - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - digit++; - } - ASSERT(digit <= 10); - buffer[count - 1] = static_cast(digit + '0'); - // Correct bad digits (in case we had a sequence of '9's). Propagate the - // carry until we hat a non-'9' or til we reach the first digit. - for (int i = count - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - if (buffer[0] == '0' + 10) { - // Propagate a carry past the top place. - buffer[0] = '1'; - (*decimal_point)++; - } - *length = count; -} - - -// Generates 'requested_digits' after the decimal point. It might omit -// trailing '0's. If the input number is too small then no digits at all are -// generated (ex.: 2 fixed digits for 0.00001). -// -// Input verifies: 1 <= (numerator + delta) / denominator < 10. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length) { - // Note that we have to look at more than just the requested_digits, since - // a number could be rounded up. Example: v=0.5 with requested_digits=0. - // Even though the power of v equals 0 we can't just stop here. - if (-(*decimal_point) > requested_digits) { - // The number is definitively too small. - // Ex: 0.001 with requested_digits == 1. - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - *length = 0; - return; - } else if (-(*decimal_point) == requested_digits) { - // We only need to verify if the number rounds down or up. - // Ex: 0.04 and 0.06 with requested_digits == 1. - ASSERT(*decimal_point == -requested_digits); - // Initially the fraction lies in range (1, 10]. Multiply the denominator - // by 10 so that we can compare more easily. - denominator->Times10(); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - // If the fraction is >= 0.5 then we have to include the rounded - // digit. - buffer[0] = '1'; - *length = 1; - (*decimal_point)++; - } else { - // Note that we caught most of similar cases earlier. - *length = 0; - } - return; - } else { - // The requested digits correspond to the digits after the point. - // The variable 'needed_digits' includes the digits before the point. - int needed_digits = (*decimal_point) + requested_digits; - GenerateCountedDigits(needed_digits, decimal_point, - numerator, denominator, - buffer, length); - } -} - - -// Returns an estimation of k such that 10^(k-1) <= v < 10^k where -// v = f * 2^exponent and 2^52 <= f < 2^53. -// v is hence a normalized double with the given exponent. The output is an -// approximation for the exponent of the decimal approimation .digits * 10^k. -// -// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. -// Note: this property holds for v's upper boundary m+ too. -// 10^k <= m+ < 10^k+1. -// (see explanation below). -// -// Examples: -// EstimatePower(0) => 16 -// EstimatePower(-52) => 0 -// -// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. -static int EstimatePower(int exponent) { - // This function estimates log10 of v where v = f*2^e (with e == exponent). - // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). - // Note that f is bounded by its container size. Let p = 53 (the double's - // significand size). Then 2^(p-1) <= f < 2^p. - // - // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close - // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). - // The computed number undershoots by less than 0.631 (when we compute log3 - // and not log10). - // - // Optimization: since we only need an approximated result this computation - // can be performed on 64 bit integers. On x86/x64 architecture the speedup is - // not really measurable, though. - // - // Since we want to avoid overshooting we decrement by 1e10 so that - // floating-point imprecisions don't affect us. - // - // Explanation for v's boundary m+: the computation takes advantage of - // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement - // (even for denormals where the delta can be much more important). - - const double k1Log10 = 0.30102999566398114; // 1/lg(10) - - // For doubles len(f) == 53 (don't forget the hidden bit). - const int kSignificandSize = Double::kSignificandSize; - double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); - return static_cast(estimate); -} - - -// See comments for InitialScaledStartValues. -static void InitialScaledStartValuesPositiveExponent( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // A positive exponent implies a positive power. - ASSERT(estimated_power >= 0); - // Since the estimated_power is positive we simply multiply the denominator - // by 10^estimated_power. - - // numerator = v. - numerator->AssignUInt64(significand); - numerator->ShiftLeft(exponent); - // denominator = 10^estimated_power. - denominator->AssignPowerUInt16(10, estimated_power); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - delta_plus->AssignUInt16(1); - delta_plus->ShiftLeft(exponent); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - delta_minus->ShiftLeft(exponent); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentPositivePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // v = f * 2^e with e < 0, and with estimated_power >= 0. - // This means that e is close to 0 (have a look at how estimated_power is - // computed). - - // numerator = significand - // since v = significand * 2^exponent this is equivalent to - // numerator = v * / 2^-exponent - numerator->AssignUInt64(significand); - // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) - denominator->AssignPowerUInt16(10, estimated_power); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - // Given that the denominator already includes v's exponent the distance - // to the boundaries is simply 1. - delta_plus->AssignUInt16(1); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentNegativePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // Instead of multiplying the denominator with 10^estimated_power we - // multiply all values (numerator and deltas) by 10^-estimated_power. - - // Use numerator as temporary container for power_ten. - Bignum* power_ten = numerator; - power_ten->AssignPowerUInt16(10, -estimated_power); - - if (need_boundary_deltas) { - // Since power_ten == numerator we must make a copy of 10^estimated_power - // before we complete the computation of the numerator. - // delta_plus = delta_minus = 10^estimated_power - delta_plus->AssignBignum(*power_ten); - delta_minus->AssignBignum(*power_ten); - } - - // numerator = significand * 2 * 10^-estimated_power - // since v = significand * 2^exponent this is equivalent to - // numerator = v * 10^-estimated_power * 2 * 2^-exponent. - // Remember: numerator has been abused as power_ten. So no need to assign it - // to itself. - ASSERT(numerator == power_ten); - numerator->MultiplyByUInt64(significand); - - // denominator = 2 * 2^-exponent with exponent < 0. - denominator->AssignUInt16(1); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - numerator->ShiftLeft(1); - denominator->ShiftLeft(1); - // With this shift the boundaries have their correct value, since - // delta_plus = 10^-estimated_power, and - // delta_minus = 10^-estimated_power. - // These assignments have been done earlier. - // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. - } -} - - -// Let v = significand * 2^exponent. -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. The functions GenerateShortestDigits and -// GenerateCountedDigits will then convert this ratio to its decimal -// representation d, with the required accuracy. -// Then d * 10^estimated_power is the representation of v. -// (Note: the fraction and the estimated_power might get adjusted before -// generating the decimal representation.) -// -// The initial start values consist of: -// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. -// - a scaled (common) denominator. -// optionally (used by GenerateShortestDigits to decide if it has the shortest -// decimal converting back to v): -// - v - m-: the distance to the lower boundary. -// - m+ - v: the distance to the upper boundary. -// -// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. -// -// Let ep == estimated_power, then the returned values will satisfy: -// v / 10^ep = numerator / denominator. -// v's boundarys m- and m+: -// m- / 10^ep == v / 10^ep - delta_minus / denominator -// m+ / 10^ep == v / 10^ep + delta_plus / denominator -// Or in other words: -// m- == v - delta_minus * 10^ep / denominator; -// m+ == v + delta_plus * 10^ep / denominator; -// -// Since 10^(k-1) <= v < 10^k (with k == estimated_power) -// or 10^k <= v < 10^(k+1) -// we then have 0.1 <= numerator/denominator < 1 -// or 1 <= numerator/denominator < 10 -// -// It is then easy to kickstart the digit-generation routine. -// -// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST -// or BIGNUM_DTOA_SHORTEST_SINGLE. - -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus) { - if (exponent >= 0) { - InitialScaledStartValuesPositiveExponent( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else if (estimated_power >= 0) { - InitialScaledStartValuesNegativeExponentPositivePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else { - InitialScaledStartValuesNegativeExponentNegativePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } - - if (need_boundary_deltas && lower_boundary_is_closer) { - // The lower boundary is closer at half the distance of "normal" numbers. - // Increase the common denominator and adapt all but the delta_minus. - denominator->ShiftLeft(1); // *2 - numerator->ShiftLeft(1); // *2 - delta_plus->ShiftLeft(1); // *2 - } -} - - -// This routine multiplies numerator/denominator so that its values lies in the -// range 1-10. That is after a call to this function we have: -// 1 <= (numerator + delta_plus) /denominator < 10. -// Let numerator the input before modification and numerator' the argument -// after modification, then the output-parameter decimal_point is such that -// numerator / denominator * 10^estimated_power == -// numerator' / denominator' * 10^(decimal_point - 1) -// In some cases estimated_power was too low, and this is already the case. We -// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == -// estimated_power) but do not touch the numerator or denominator. -// Otherwise the routine multiplies the numerator and the deltas by 10. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - bool in_range; - if (is_even) { - // For IEEE doubles half-way cases (in decimal system numbers ending with 5) - // are rounded to the closest floating-point number with even significand. - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (in_range) { - // Since numerator + delta_plus >= denominator we already have - // 1 <= numerator/denominator < 10. Simply update the estimated_power. - *decimal_point = estimated_power + 1; - } else { - *decimal_point = estimated_power; - numerator->Times10(); - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_minus->Times10(); - delta_plus->AssignBignum(*delta_minus); - } else { - delta_minus->Times10(); - delta_plus->Times10(); - } - } -} - -} // namespace double_conversion + +#include "bignum-dtoa.h" + +#include "bignum.h" +#include "ieee.h" + +namespace double_conversion { + +static int NormalizedExponent(uint64_t significand, int exponent) { + ASSERT(significand != 0); + while ((significand & Double::kHiddenBit) == 0) { + significand = significand << 1; + exponent = exponent - 1; + } + return exponent; +} + + +// Forward declarations: +// Returns an estimation of k such that 10^(k-1) <= v < 10^k. +static int EstimatePower(int exponent); +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus); +// Multiplies numerator/denominator so that its values lies in the range 1-10. +// Returns decimal_point s.t. +// v = numerator'/denominator' * 10^(decimal_point-1) +// where numerator' and denominator' are the values of numerator and +// denominator after the call to this function. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus); +// Generates digits from the left to the right and stops when the generated +// digits yield the shortest decimal representation of v. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector buffer, int* length); +// Generates 'requested_digits' after the decimal point. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length); +// Generates 'count' digits of numerator/denominator. +// Once 'count' digits have been produced rounds the result depending on the +// remainder (remainders of exactly .5 round upwards). Might update the +// decimal_point when rounding up (for example for 0.9999). +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length); + + +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector buffer, int* length, int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + uint64_t significand; + int exponent; + bool lower_boundary_is_closer; + if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { + float f = static_cast(v); + ASSERT(f == v); + significand = Single(f).Significand(); + exponent = Single(f).Exponent(); + lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); + } else { + significand = Double(v).Significand(); + exponent = Double(v).Exponent(); + lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); + } + bool need_boundary_deltas = + (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); + + bool is_even = (significand & 1) == 0; + int normalized_exponent = NormalizedExponent(significand, exponent); + // estimated_power might be too low by 1. + int estimated_power = EstimatePower(normalized_exponent); + + // Shortcut for Fixed. + // The requested digits correspond to the digits after the point. If the + // number is much too small, then there is no need in trying to get any + // digits. + if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { + buffer[0] = '\0'; + *length = 0; + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + return; + } + + Bignum numerator; + Bignum denominator; + Bignum delta_minus; + Bignum delta_plus; + // Make sure the bignum can grow large enough. The smallest double equals + // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. + // The maximum double is 1.7976931348623157e308 which needs fewer than + // 308*4 binary digits. + ASSERT(Bignum::kMaxSignificantBits >= 324*4); + InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, + estimated_power, need_boundary_deltas, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^estimated_power. + FixupMultiply10(estimated_power, is_even, decimal_point, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^(decimal_point-1), and + // 1 <= (numerator + delta_plus) / denominator < 10 + switch (mode) { + case BIGNUM_DTOA_SHORTEST: + case BIGNUM_DTOA_SHORTEST_SINGLE: + GenerateShortestDigits(&numerator, &denominator, + &delta_minus, &delta_plus, + is_even, buffer, length); + break; + case BIGNUM_DTOA_FIXED: + BignumToFixed(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + case BIGNUM_DTOA_PRECISION: + GenerateCountedDigits(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + default: + UNREACHABLE(); + } + buffer[*length] = '\0'; +} + + +// The procedure starts generating digits from the left to the right and stops +// when the generated digits yield the shortest decimal representation of v. A +// decimal representation of v is a number lying closer to v than to any other +// double, so it converts to v when read. +// +// This is true if d, the decimal representation, is between m- and m+, the +// upper and lower boundaries. d must be strictly between them if !is_even. +// m- := (numerator - delta_minus) / denominator +// m+ := (numerator + delta_plus) / denominator +// +// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. +// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit +// will be produced. This should be the standard precondition. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector buffer, int* length) { + // Small optimization: if delta_minus and delta_plus are the same just reuse + // one of the two bignums. + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_plus = delta_minus; + } + *length = 0; + for (;;) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[(*length)++] = static_cast(digit + '0'); + + // Can we stop already? + // If the remainder of the division is less than the distance to the lower + // boundary we can stop. In this case we simply round down (discarding the + // remainder). + // Similarly we test if we can round up (using the upper boundary). + bool in_delta_room_minus; + bool in_delta_room_plus; + if (is_even) { + in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); + } else { + in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); + } + if (is_even) { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (!in_delta_room_minus && !in_delta_room_plus) { + // Prepare for next iteration. + numerator->Times10(); + delta_minus->Times10(); + // We optimized delta_plus to be equal to delta_minus (if they share the + // same value). So don't multiply delta_plus if they point to the same + // object. + if (delta_minus != delta_plus) { + delta_plus->Times10(); + } + } else if (in_delta_room_minus && in_delta_room_plus) { + // Let's see if 2*numerator < denominator. + // If yes, then the next digit would be < 5 and we can round down. + int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); + if (compare < 0) { + // Remaining digits are less than .5. -> Round down (== do nothing). + } else if (compare > 0) { + // Remaining digits are more than .5 of denominator. -> Round up. + // Note that the last digit could not be a '9' as otherwise the whole + // loop would have stopped earlier. + // We still have an assert here in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } else { + // Halfway case. + // TODO(floitsch): need a way to solve half-way cases. + // For now let's round towards even (since this is what Gay seems to + // do). + + if ((buffer[(*length) - 1] - '0') % 2 == 0) { + // Round down => Do nothing. + } else { + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } + } + return; + } else if (in_delta_room_minus) { + // Round down (== do nothing). + return; + } else { // in_delta_room_plus + // Round up. + // Note again that the last digit could not be '9' since this would have + // stopped the loop earlier. + // We still have an ASSERT here, in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) -1] != '9'); + buffer[(*length) - 1]++; + return; + } + } +} + + +// Let v = numerator / denominator < 10. +// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) +// from left to right. Once 'count' digits have been produced we decide wether +// to round up or down. Remainders of exactly .5 round upwards. Numbers such +// as 9.999999 propagate a carry all the way, and change the +// exponent (decimal_point), when rounding upwards. +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector buffer, int* length) { + ASSERT(count >= 0); + for (int i = 0; i < count - 1; ++i) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[i] = static_cast(digit + '0'); + // Prepare for next iteration. + numerator->Times10(); + } + // Generate the last digit. + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + digit++; + } + ASSERT(digit <= 10); + buffer[count - 1] = static_cast(digit + '0'); + // Correct bad digits (in case we had a sequence of '9's). Propagate the + // carry until we hat a non-'9' or til we reach the first digit. + for (int i = count - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + if (buffer[0] == '0' + 10) { + // Propagate a carry past the top place. + buffer[0] = '1'; + (*decimal_point)++; + } + *length = count; +} + + +// Generates 'requested_digits' after the decimal point. It might omit +// trailing '0's. If the input number is too small then no digits at all are +// generated (ex.: 2 fixed digits for 0.00001). +// +// Input verifies: 1 <= (numerator + delta) / denominator < 10. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length) { + // Note that we have to look at more than just the requested_digits, since + // a number could be rounded up. Example: v=0.5 with requested_digits=0. + // Even though the power of v equals 0 we can't just stop here. + if (-(*decimal_point) > requested_digits) { + // The number is definitively too small. + // Ex: 0.001 with requested_digits == 1. + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + *length = 0; + return; + } else if (-(*decimal_point) == requested_digits) { + // We only need to verify if the number rounds down or up. + // Ex: 0.04 and 0.06 with requested_digits == 1. + ASSERT(*decimal_point == -requested_digits); + // Initially the fraction lies in range (1, 10]. Multiply the denominator + // by 10 so that we can compare more easily. + denominator->Times10(); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + // If the fraction is >= 0.5 then we have to include the rounded + // digit. + buffer[0] = '1'; + *length = 1; + (*decimal_point)++; + } else { + // Note that we caught most of similar cases earlier. + *length = 0; + } + return; + } else { + // The requested digits correspond to the digits after the point. + // The variable 'needed_digits' includes the digits before the point. + int needed_digits = (*decimal_point) + requested_digits; + GenerateCountedDigits(needed_digits, decimal_point, + numerator, denominator, + buffer, length); + } +} + + +// Returns an estimation of k such that 10^(k-1) <= v < 10^k where +// v = f * 2^exponent and 2^52 <= f < 2^53. +// v is hence a normalized double with the given exponent. The output is an +// approximation for the exponent of the decimal approimation .digits * 10^k. +// +// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. +// Note: this property holds for v's upper boundary m+ too. +// 10^k <= m+ < 10^k+1. +// (see explanation below). +// +// Examples: +// EstimatePower(0) => 16 +// EstimatePower(-52) => 0 +// +// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. +static int EstimatePower(int exponent) { + // This function estimates log10 of v where v = f*2^e (with e == exponent). + // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). + // Note that f is bounded by its container size. Let p = 53 (the double's + // significand size). Then 2^(p-1) <= f < 2^p. + // + // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close + // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). + // The computed number undershoots by less than 0.631 (when we compute log3 + // and not log10). + // + // Optimization: since we only need an approximated result this computation + // can be performed on 64 bit integers. On x86/x64 architecture the speedup is + // not really measurable, though. + // + // Since we want to avoid overshooting we decrement by 1e10 so that + // floating-point imprecisions don't affect us. + // + // Explanation for v's boundary m+: the computation takes advantage of + // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement + // (even for denormals where the delta can be much more important). + + const double k1Log10 = 0.30102999566398114; // 1/lg(10) + + // For doubles len(f) == 53 (don't forget the hidden bit). + const int kSignificandSize = Double::kSignificandSize; + double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); + return static_cast(estimate); +} + + +// See comments for InitialScaledStartValues. +static void InitialScaledStartValuesPositiveExponent( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // A positive exponent implies a positive power. + ASSERT(estimated_power >= 0); + // Since the estimated_power is positive we simply multiply the denominator + // by 10^estimated_power. + + // numerator = v. + numerator->AssignUInt64(significand); + numerator->ShiftLeft(exponent); + // denominator = 10^estimated_power. + denominator->AssignPowerUInt16(10, estimated_power); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + delta_plus->AssignUInt16(1); + delta_plus->ShiftLeft(exponent); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + delta_minus->ShiftLeft(exponent); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentPositivePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // v = f * 2^e with e < 0, and with estimated_power >= 0. + // This means that e is close to 0 (have a look at how estimated_power is + // computed). + + // numerator = significand + // since v = significand * 2^exponent this is equivalent to + // numerator = v * / 2^-exponent + numerator->AssignUInt64(significand); + // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) + denominator->AssignPowerUInt16(10, estimated_power); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + // Given that the denominator already includes v's exponent the distance + // to the boundaries is simply 1. + delta_plus->AssignUInt16(1); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentNegativePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // Instead of multiplying the denominator with 10^estimated_power we + // multiply all values (numerator and deltas) by 10^-estimated_power. + + // Use numerator as temporary container for power_ten. + Bignum* power_ten = numerator; + power_ten->AssignPowerUInt16(10, -estimated_power); + + if (need_boundary_deltas) { + // Since power_ten == numerator we must make a copy of 10^estimated_power + // before we complete the computation of the numerator. + // delta_plus = delta_minus = 10^estimated_power + delta_plus->AssignBignum(*power_ten); + delta_minus->AssignBignum(*power_ten); + } + + // numerator = significand * 2 * 10^-estimated_power + // since v = significand * 2^exponent this is equivalent to + // numerator = v * 10^-estimated_power * 2 * 2^-exponent. + // Remember: numerator has been abused as power_ten. So no need to assign it + // to itself. + ASSERT(numerator == power_ten); + numerator->MultiplyByUInt64(significand); + + // denominator = 2 * 2^-exponent with exponent < 0. + denominator->AssignUInt16(1); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + numerator->ShiftLeft(1); + denominator->ShiftLeft(1); + // With this shift the boundaries have their correct value, since + // delta_plus = 10^-estimated_power, and + // delta_minus = 10^-estimated_power. + // These assignments have been done earlier. + // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. + } +} + + +// Let v = significand * 2^exponent. +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. The functions GenerateShortestDigits and +// GenerateCountedDigits will then convert this ratio to its decimal +// representation d, with the required accuracy. +// Then d * 10^estimated_power is the representation of v. +// (Note: the fraction and the estimated_power might get adjusted before +// generating the decimal representation.) +// +// The initial start values consist of: +// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. +// - a scaled (common) denominator. +// optionally (used by GenerateShortestDigits to decide if it has the shortest +// decimal converting back to v): +// - v - m-: the distance to the lower boundary. +// - m+ - v: the distance to the upper boundary. +// +// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. +// +// Let ep == estimated_power, then the returned values will satisfy: +// v / 10^ep = numerator / denominator. +// v's boundarys m- and m+: +// m- / 10^ep == v / 10^ep - delta_minus / denominator +// m+ / 10^ep == v / 10^ep + delta_plus / denominator +// Or in other words: +// m- == v - delta_minus * 10^ep / denominator; +// m+ == v + delta_plus * 10^ep / denominator; +// +// Since 10^(k-1) <= v < 10^k (with k == estimated_power) +// or 10^k <= v < 10^(k+1) +// we then have 0.1 <= numerator/denominator < 1 +// or 1 <= numerator/denominator < 10 +// +// It is then easy to kickstart the digit-generation routine. +// +// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST +// or BIGNUM_DTOA_SHORTEST_SINGLE. + +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus) { + if (exponent >= 0) { + InitialScaledStartValuesPositiveExponent( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else if (estimated_power >= 0) { + InitialScaledStartValuesNegativeExponentPositivePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else { + InitialScaledStartValuesNegativeExponentNegativePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } + + if (need_boundary_deltas && lower_boundary_is_closer) { + // The lower boundary is closer at half the distance of "normal" numbers. + // Increase the common denominator and adapt all but the delta_minus. + denominator->ShiftLeft(1); // *2 + numerator->ShiftLeft(1); // *2 + delta_plus->ShiftLeft(1); // *2 + } +} + + +// This routine multiplies numerator/denominator so that its values lies in the +// range 1-10. That is after a call to this function we have: +// 1 <= (numerator + delta_plus) /denominator < 10. +// Let numerator the input before modification and numerator' the argument +// after modification, then the output-parameter decimal_point is such that +// numerator / denominator * 10^estimated_power == +// numerator' / denominator' * 10^(decimal_point - 1) +// In some cases estimated_power was too low, and this is already the case. We +// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == +// estimated_power) but do not touch the numerator or denominator. +// Otherwise the routine multiplies the numerator and the deltas by 10. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + bool in_range; + if (is_even) { + // For IEEE doubles half-way cases (in decimal system numbers ending with 5) + // are rounded to the closest floating-point number with even significand. + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (in_range) { + // Since numerator + delta_plus >= denominator we already have + // 1 <= numerator/denominator < 10. Simply update the estimated_power. + *decimal_point = estimated_power + 1; + } else { + *decimal_point = estimated_power; + numerator->Times10(); + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_minus->Times10(); + delta_plus->AssignBignum(*delta_minus); + } else { + delta_minus->Times10(); + delta_plus->Times10(); + } + } +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/bignum-dtoa.h b/contrib/libs/double-conversion/bignum-dtoa.h index 25904a5788c..34b961992d6 100644 --- a/contrib/libs/double-conversion/bignum-dtoa.h +++ b/contrib/libs/double-conversion/bignum-dtoa.h @@ -1,84 +1,84 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ -#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ - -#include "utils.h" - -namespace double_conversion { - -enum BignumDtoaMode { - // Return the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate but - // correct) 0.3. - BIGNUM_DTOA_SHORTEST, - // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. - BIGNUM_DTOA_SHORTEST_SINGLE, - // Return a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - BIGNUM_DTOA_FIXED, - // Return a fixed number of digits, no matter what the exponent is. - BIGNUM_DTOA_PRECISION -}; - -// Converts the given double 'v' to ascii. -// The result should be interpreted as buffer * 10^(point-length). -// The buffer will be null-terminated. -// -// The input v must be > 0 and different from NaN, and Infinity. -// -// The output depends on the given mode: -// - SHORTEST: produce the least amount of digits for which the internal -// identity requirement is still satisfied. If the digits are printed -// (together with the correct exponent) then reading this number will give -// 'v' again. The buffer will choose the representation that is closest to -// 'v'. If there are two at the same distance, than the number is round up. -// In this mode the 'requested_digits' parameter is ignored. -// - FIXED: produces digits necessary to print a given number with -// 'requested_digits' digits after the decimal point. The produced digits -// might be too short in which case the caller has to fill the gaps with '0's. -// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. -// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns -// buffer="2", point=0. -// Note: the length of the returned buffer has no meaning wrt the significance -// of its digits. That is, just because it contains '0's does not mean that -// any other digit would not satisfy the internal identity requirement. -// - PRECISION: produces 'requested_digits' where the first digit is not '0'. -// Even though the length of produced digits usually equals -// 'requested_digits', the function is allowed to return fewer digits, in -// which case the caller has to fill the missing digits with '0's. -// Halfway cases are again rounded up. -// 'BignumDtoa' expects the given buffer to be big enough to hold all digits -// and a terminating null-character. -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, - Vector buffer, int* length, int* point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ + +#include "utils.h" + +namespace double_conversion { + +enum BignumDtoaMode { + // Return the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate but + // correct) 0.3. + BIGNUM_DTOA_SHORTEST, + // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. + BIGNUM_DTOA_SHORTEST_SINGLE, + // Return a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + BIGNUM_DTOA_FIXED, + // Return a fixed number of digits, no matter what the exponent is. + BIGNUM_DTOA_PRECISION +}; + +// Converts the given double 'v' to ascii. +// The result should be interpreted as buffer * 10^(point-length). +// The buffer will be null-terminated. +// +// The input v must be > 0 and different from NaN, and Infinity. +// +// The output depends on the given mode: +// - SHORTEST: produce the least amount of digits for which the internal +// identity requirement is still satisfied. If the digits are printed +// (together with the correct exponent) then reading this number will give +// 'v' again. The buffer will choose the representation that is closest to +// 'v'. If there are two at the same distance, than the number is round up. +// In this mode the 'requested_digits' parameter is ignored. +// - FIXED: produces digits necessary to print a given number with +// 'requested_digits' digits after the decimal point. The produced digits +// might be too short in which case the caller has to fill the gaps with '0's. +// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. +// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns +// buffer="2", point=0. +// Note: the length of the returned buffer has no meaning wrt the significance +// of its digits. That is, just because it contains '0's does not mean that +// any other digit would not satisfy the internal identity requirement. +// - PRECISION: produces 'requested_digits' where the first digit is not '0'. +// Even though the length of produced digits usually equals +// 'requested_digits', the function is allowed to return fewer digits, in +// which case the caller has to fill the missing digits with '0's. +// Halfway cases are again rounded up. +// 'BignumDtoa' expects the given buffer to be big enough to hold all digits +// and a terminating null-character. +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector buffer, int* length, int* point); + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ diff --git a/contrib/libs/double-conversion/bignum.cc b/contrib/libs/double-conversion/bignum.cc index 490071facdf..d077eef3f55 100644 --- a/contrib/libs/double-conversion/bignum.cc +++ b/contrib/libs/double-conversion/bignum.cc @@ -1,767 +1,767 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "bignum.h" -#include "utils.h" - -namespace double_conversion { - -Bignum::Bignum() +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "bignum.h" +#include "utils.h" + +namespace double_conversion { + +Bignum::Bignum() : bigits_buffer_(), bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) { - for (int i = 0; i < kBigitCapacity; ++i) { - bigits_[i] = 0; - } -} - - -template -static int BitSize(S value) { - (void) value; // Mark variable as used. - return 8 * sizeof(value); -} - -// Guaranteed to lie in one Bigit. -void Bignum::AssignUInt16(uint16_t value) { - ASSERT(kBigitSize >= BitSize(value)); - Zero(); - if (value == 0) return; - - EnsureCapacity(1); - bigits_[0] = value; - used_digits_ = 1; -} - - -void Bignum::AssignUInt64(uint64_t value) { - const int kUInt64Size = 64; - - Zero(); - if (value == 0) return; - - int needed_bigits = kUInt64Size / kBigitSize + 1; - EnsureCapacity(needed_bigits); - for (int i = 0; i < needed_bigits; ++i) { - bigits_[i] = value & kBigitMask; - value = value >> kBigitSize; - } - used_digits_ = needed_bigits; - Clamp(); -} - - -void Bignum::AssignBignum(const Bignum& other) { - exponent_ = other.exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - bigits_[i] = other.bigits_[i]; - } - // Clear the excess digits (if there were any). - for (int i = other.used_digits_; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = other.used_digits_; -} - - -static uint64_t ReadUInt64(Vector buffer, - int from, - int digits_to_read) { - uint64_t result = 0; - for (int i = from; i < from + digits_to_read; ++i) { - int digit = buffer[i] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = result * 10 + digit; - } - return result; -} - - -void Bignum::AssignDecimalString(Vector value) { - // 2^64 = 18446744073709551616 > 10^19 - const int kMaxUint64DecimalDigits = 19; - Zero(); - int length = value.length(); - unsigned int pos = 0; - // Let's just say that each digit needs 4 bits. - while (length >= kMaxUint64DecimalDigits) { - uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); - pos += kMaxUint64DecimalDigits; - length -= kMaxUint64DecimalDigits; - MultiplyByPowerOfTen(kMaxUint64DecimalDigits); - AddUInt64(digits); - } - uint64_t digits = ReadUInt64(value, pos, length); - MultiplyByPowerOfTen(length); - AddUInt64(digits); - Clamp(); -} - - -static int HexCharValue(char c) { - if ('0' <= c && c <= '9') return c - '0'; - if ('a' <= c && c <= 'f') return 10 + c - 'a'; - ASSERT('A' <= c && c <= 'F'); - return 10 + c - 'A'; -} - - -void Bignum::AssignHexString(Vector value) { - Zero(); - int length = value.length(); - - int needed_bigits = length * 4 / kBigitSize + 1; - EnsureCapacity(needed_bigits); - int string_index = length - 1; - for (int i = 0; i < needed_bigits - 1; ++i) { - // These bigits are guaranteed to be "full". - Chunk current_bigit = 0; - for (int j = 0; j < kBigitSize / 4; j++) { - current_bigit += HexCharValue(value[string_index--]) << (j * 4); - } - bigits_[i] = current_bigit; - } - used_digits_ = needed_bigits - 1; - - Chunk most_significant_bigit = 0; // Could be = 0; - for (int j = 0; j <= string_index; ++j) { - most_significant_bigit <<= 4; - most_significant_bigit += HexCharValue(value[j]); - } - if (most_significant_bigit != 0) { - bigits_[used_digits_] = most_significant_bigit; - used_digits_++; - } - Clamp(); -} - - -void Bignum::AddUInt64(uint64_t operand) { - if (operand == 0) return; - Bignum other; - other.AssignUInt64(operand); - AddBignum(other); -} - - -void Bignum::AddBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - - // If this has a greater exponent than other append zero-bigits to this. - // After this call exponent_ <= other.exponent_. - Align(other); - - // There are two possibilities: - // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) - // bbbbb 00000000 - // ---------------- - // ccccccccccc 0000 - // or - // aaaaaaaaaa 0000 - // bbbbbbbbb 0000000 - // ----------------- - // cccccccccccc 0000 - // In both cases we might need a carry bigit. - - EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); - Chunk carry = 0; - int bigit_pos = other.exponent_ - exponent_; - ASSERT(bigit_pos >= 0); - for (int i = 0; i < other.used_digits_; ++i) { - Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - - while (carry != 0) { - Chunk sum = bigits_[bigit_pos] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - used_digits_ = Max(bigit_pos, used_digits_); - ASSERT(IsClamped()); -} - - -void Bignum::SubtractBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - // We require this to be bigger than other. - ASSERT(LessEqual(other, *this)); - - Align(other); - - int offset = other.exponent_ - exponent_; - Chunk borrow = 0; - int i; - for (i = 0; i < other.used_digits_; ++i) { - ASSERT((borrow == 0) || (borrow == 1)); - Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - while (borrow != 0) { - Chunk difference = bigits_[i + offset] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - ++i; - } - Clamp(); -} - - -void Bignum::ShiftLeft(int shift_amount) { - if (used_digits_ == 0) return; - exponent_ += shift_amount / kBigitSize; - int local_shift = shift_amount % kBigitSize; - EnsureCapacity(used_digits_ + 1); - BigitsShiftLeft(local_shift); -} - - -void Bignum::MultiplyByUInt32(uint32_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - if (used_digits_ == 0) return; - - // The product of a bigit with the factor is of size kBigitSize + 32. - // Assert that this number + 1 (for the carry) fits into double chunk. - ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); - DoubleChunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * bigits_[i] + carry; - bigits_[i] = static_cast(product & kBigitMask); - carry = (product >> kBigitSize); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByUInt64(uint64_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - ASSERT(kBigitSize < 32); - uint64_t carry = 0; - uint64_t low = factor & 0xFFFFFFFF; - uint64_t high = factor >> 32; - for (int i = 0; i < used_digits_; ++i) { - uint64_t product_low = low * bigits_[i]; - uint64_t product_high = high * bigits_[i]; - uint64_t tmp = (carry & kBigitMask) + product_low; - bigits_[i] = tmp & kBigitMask; - carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + - (product_high << (32 - kBigitSize)); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByPowerOfTen(int exponent) { - const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); - const uint16_t kFive1 = 5; - const uint16_t kFive2 = kFive1 * 5; - const uint16_t kFive3 = kFive2 * 5; - const uint16_t kFive4 = kFive3 * 5; - const uint16_t kFive5 = kFive4 * 5; - const uint16_t kFive6 = kFive5 * 5; - const uint32_t kFive7 = kFive6 * 5; - const uint32_t kFive8 = kFive7 * 5; - const uint32_t kFive9 = kFive8 * 5; - const uint32_t kFive10 = kFive9 * 5; - const uint32_t kFive11 = kFive10 * 5; - const uint32_t kFive12 = kFive11 * 5; - const uint32_t kFive13 = kFive12 * 5; - const uint32_t kFive1_to_12[] = - { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, - kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; - - ASSERT(exponent >= 0); - if (exponent == 0) return; - if (used_digits_ == 0) return; - - // We shift by exponent at the end just before returning. - int remaining_exponent = exponent; - while (remaining_exponent >= 27) { - MultiplyByUInt64(kFive27); - remaining_exponent -= 27; - } - while (remaining_exponent >= 13) { - MultiplyByUInt32(kFive13); - remaining_exponent -= 13; - } - if (remaining_exponent > 0) { - MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); - } - ShiftLeft(exponent); -} - - -void Bignum::Square() { - ASSERT(IsClamped()); - int product_length = 2 * used_digits_; - EnsureCapacity(product_length); - - // Comba multiplication: compute each column separately. - // Example: r = a2a1a0 * b2b1b0. - // r = 1 * a0b0 + - // 10 * (a1b0 + a0b1) + - // 100 * (a2b0 + a1b1 + a0b2) + - // 1000 * (a2b1 + a1b2) + - // 10000 * a2b2 - // - // In the worst case we have to accumulate nb-digits products of digit*digit. - // - // Assert that the additional number of bits in a DoubleChunk are enough to - // sum up used_digits of Bigit*Bigit. - if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { - UNIMPLEMENTED(); - } - DoubleChunk accumulator = 0; - // First shift the digits so we don't overwrite them. - int copy_offset = used_digits_; - for (int i = 0; i < used_digits_; ++i) { - bigits_[copy_offset + i] = bigits_[i]; - } - // We have two loops to avoid some 'if's in the loop. - for (int i = 0; i < used_digits_; ++i) { - // Process temporary digit i with power i. - // The sum of the two indices must be equal to i. - int bigit_index1 = i; - int bigit_index2 = 0; - // Sum all of the sub-products. - while (bigit_index1 >= 0) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - for (int i = used_digits_; i < product_length; ++i) { - int bigit_index1 = used_digits_ - 1; - int bigit_index2 = i - bigit_index1; - // Invariant: sum of both indices is again equal to i. - // Inner loop runs 0 times on last iteration, emptying accumulator. - while (bigit_index2 < used_digits_) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - // The overwritten bigits_[i] will never be read in further loop iterations, - // because bigit_index1 and bigit_index2 are always greater - // than i - used_digits_. - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - // Since the result was guaranteed to lie inside the number the - // accumulator must be 0 now. - ASSERT(accumulator == 0); - - // Don't forget to update the used_digits and the exponent. - used_digits_ = product_length; - exponent_ *= 2; - Clamp(); -} - - -void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { - ASSERT(base != 0); - ASSERT(power_exponent >= 0); - if (power_exponent == 0) { - AssignUInt16(1); - return; - } - Zero(); - int shifts = 0; - // We expect base to be in range 2-32, and most often to be 10. - // It does not make much sense to implement different algorithms for counting - // the bits. - while ((base & 1) == 0) { - base >>= 1; - shifts++; - } - int bit_size = 0; - int tmp_base = base; - while (tmp_base != 0) { - tmp_base >>= 1; - bit_size++; - } - int final_size = bit_size * power_exponent; - // 1 extra bigit for the shifting, and one for rounded final_size. - EnsureCapacity(final_size / kBigitSize + 2); - - // Left to Right exponentiation. - int mask = 1; - while (power_exponent >= mask) mask <<= 1; - - // The mask is now pointing to the bit above the most significant 1-bit of - // power_exponent. - // Get rid of first 1-bit; - mask >>= 2; - uint64_t this_value = base; - + for (int i = 0; i < kBigitCapacity; ++i) { + bigits_[i] = 0; + } +} + + +template +static int BitSize(S value) { + (void) value; // Mark variable as used. + return 8 * sizeof(value); +} + +// Guaranteed to lie in one Bigit. +void Bignum::AssignUInt16(uint16_t value) { + ASSERT(kBigitSize >= BitSize(value)); + Zero(); + if (value == 0) return; + + EnsureCapacity(1); + bigits_[0] = value; + used_digits_ = 1; +} + + +void Bignum::AssignUInt64(uint64_t value) { + const int kUInt64Size = 64; + + Zero(); + if (value == 0) return; + + int needed_bigits = kUInt64Size / kBigitSize + 1; + EnsureCapacity(needed_bigits); + for (int i = 0; i < needed_bigits; ++i) { + bigits_[i] = value & kBigitMask; + value = value >> kBigitSize; + } + used_digits_ = needed_bigits; + Clamp(); +} + + +void Bignum::AssignBignum(const Bignum& other) { + exponent_ = other.exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + bigits_[i] = other.bigits_[i]; + } + // Clear the excess digits (if there were any). + for (int i = other.used_digits_; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = other.used_digits_; +} + + +static uint64_t ReadUInt64(Vector buffer, + int from, + int digits_to_read) { + uint64_t result = 0; + for (int i = from; i < from + digits_to_read; ++i) { + int digit = buffer[i] - '0'; + ASSERT(0 <= digit && digit <= 9); + result = result * 10 + digit; + } + return result; +} + + +void Bignum::AssignDecimalString(Vector value) { + // 2^64 = 18446744073709551616 > 10^19 + const int kMaxUint64DecimalDigits = 19; + Zero(); + int length = value.length(); + unsigned int pos = 0; + // Let's just say that each digit needs 4 bits. + while (length >= kMaxUint64DecimalDigits) { + uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); + pos += kMaxUint64DecimalDigits; + length -= kMaxUint64DecimalDigits; + MultiplyByPowerOfTen(kMaxUint64DecimalDigits); + AddUInt64(digits); + } + uint64_t digits = ReadUInt64(value, pos, length); + MultiplyByPowerOfTen(length); + AddUInt64(digits); + Clamp(); +} + + +static int HexCharValue(char c) { + if ('0' <= c && c <= '9') return c - '0'; + if ('a' <= c && c <= 'f') return 10 + c - 'a'; + ASSERT('A' <= c && c <= 'F'); + return 10 + c - 'A'; +} + + +void Bignum::AssignHexString(Vector value) { + Zero(); + int length = value.length(); + + int needed_bigits = length * 4 / kBigitSize + 1; + EnsureCapacity(needed_bigits); + int string_index = length - 1; + for (int i = 0; i < needed_bigits - 1; ++i) { + // These bigits are guaranteed to be "full". + Chunk current_bigit = 0; + for (int j = 0; j < kBigitSize / 4; j++) { + current_bigit += HexCharValue(value[string_index--]) << (j * 4); + } + bigits_[i] = current_bigit; + } + used_digits_ = needed_bigits - 1; + + Chunk most_significant_bigit = 0; // Could be = 0; + for (int j = 0; j <= string_index; ++j) { + most_significant_bigit <<= 4; + most_significant_bigit += HexCharValue(value[j]); + } + if (most_significant_bigit != 0) { + bigits_[used_digits_] = most_significant_bigit; + used_digits_++; + } + Clamp(); +} + + +void Bignum::AddUInt64(uint64_t operand) { + if (operand == 0) return; + Bignum other; + other.AssignUInt64(operand); + AddBignum(other); +} + + +void Bignum::AddBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + + // If this has a greater exponent than other append zero-bigits to this. + // After this call exponent_ <= other.exponent_. + Align(other); + + // There are two possibilities: + // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) + // bbbbb 00000000 + // ---------------- + // ccccccccccc 0000 + // or + // aaaaaaaaaa 0000 + // bbbbbbbbb 0000000 + // ----------------- + // cccccccccccc 0000 + // In both cases we might need a carry bigit. + + EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); + Chunk carry = 0; + int bigit_pos = other.exponent_ - exponent_; + ASSERT(bigit_pos >= 0); + for (int i = 0; i < other.used_digits_; ++i) { + Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + + while (carry != 0) { + Chunk sum = bigits_[bigit_pos] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + used_digits_ = Max(bigit_pos, used_digits_); + ASSERT(IsClamped()); +} + + +void Bignum::SubtractBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + // We require this to be bigger than other. + ASSERT(LessEqual(other, *this)); + + Align(other); + + int offset = other.exponent_ - exponent_; + Chunk borrow = 0; + int i; + for (i = 0; i < other.used_digits_; ++i) { + ASSERT((borrow == 0) || (borrow == 1)); + Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + while (borrow != 0) { + Chunk difference = bigits_[i + offset] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + ++i; + } + Clamp(); +} + + +void Bignum::ShiftLeft(int shift_amount) { + if (used_digits_ == 0) return; + exponent_ += shift_amount / kBigitSize; + int local_shift = shift_amount % kBigitSize; + EnsureCapacity(used_digits_ + 1); + BigitsShiftLeft(local_shift); +} + + +void Bignum::MultiplyByUInt32(uint32_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + if (used_digits_ == 0) return; + + // The product of a bigit with the factor is of size kBigitSize + 32. + // Assert that this number + 1 (for the carry) fits into double chunk. + ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); + DoubleChunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + DoubleChunk product = static_cast(factor) * bigits_[i] + carry; + bigits_[i] = static_cast(product & kBigitMask); + carry = (product >> kBigitSize); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByUInt64(uint64_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + ASSERT(kBigitSize < 32); + uint64_t carry = 0; + uint64_t low = factor & 0xFFFFFFFF; + uint64_t high = factor >> 32; + for (int i = 0; i < used_digits_; ++i) { + uint64_t product_low = low * bigits_[i]; + uint64_t product_high = high * bigits_[i]; + uint64_t tmp = (carry & kBigitMask) + product_low; + bigits_[i] = tmp & kBigitMask; + carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + + (product_high << (32 - kBigitSize)); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByPowerOfTen(int exponent) { + const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); + const uint16_t kFive1 = 5; + const uint16_t kFive2 = kFive1 * 5; + const uint16_t kFive3 = kFive2 * 5; + const uint16_t kFive4 = kFive3 * 5; + const uint16_t kFive5 = kFive4 * 5; + const uint16_t kFive6 = kFive5 * 5; + const uint32_t kFive7 = kFive6 * 5; + const uint32_t kFive8 = kFive7 * 5; + const uint32_t kFive9 = kFive8 * 5; + const uint32_t kFive10 = kFive9 * 5; + const uint32_t kFive11 = kFive10 * 5; + const uint32_t kFive12 = kFive11 * 5; + const uint32_t kFive13 = kFive12 * 5; + const uint32_t kFive1_to_12[] = + { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, + kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; + + ASSERT(exponent >= 0); + if (exponent == 0) return; + if (used_digits_ == 0) return; + + // We shift by exponent at the end just before returning. + int remaining_exponent = exponent; + while (remaining_exponent >= 27) { + MultiplyByUInt64(kFive27); + remaining_exponent -= 27; + } + while (remaining_exponent >= 13) { + MultiplyByUInt32(kFive13); + remaining_exponent -= 13; + } + if (remaining_exponent > 0) { + MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); + } + ShiftLeft(exponent); +} + + +void Bignum::Square() { + ASSERT(IsClamped()); + int product_length = 2 * used_digits_; + EnsureCapacity(product_length); + + // Comba multiplication: compute each column separately. + // Example: r = a2a1a0 * b2b1b0. + // r = 1 * a0b0 + + // 10 * (a1b0 + a0b1) + + // 100 * (a2b0 + a1b1 + a0b2) + + // 1000 * (a2b1 + a1b2) + + // 10000 * a2b2 + // + // In the worst case we have to accumulate nb-digits products of digit*digit. + // + // Assert that the additional number of bits in a DoubleChunk are enough to + // sum up used_digits of Bigit*Bigit. + if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { + UNIMPLEMENTED(); + } + DoubleChunk accumulator = 0; + // First shift the digits so we don't overwrite them. + int copy_offset = used_digits_; + for (int i = 0; i < used_digits_; ++i) { + bigits_[copy_offset + i] = bigits_[i]; + } + // We have two loops to avoid some 'if's in the loop. + for (int i = 0; i < used_digits_; ++i) { + // Process temporary digit i with power i. + // The sum of the two indices must be equal to i. + int bigit_index1 = i; + int bigit_index2 = 0; + // Sum all of the sub-products. + while (bigit_index1 >= 0) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + bigits_[i] = static_cast(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + for (int i = used_digits_; i < product_length; ++i) { + int bigit_index1 = used_digits_ - 1; + int bigit_index2 = i - bigit_index1; + // Invariant: sum of both indices is again equal to i. + // Inner loop runs 0 times on last iteration, emptying accumulator. + while (bigit_index2 < used_digits_) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + // The overwritten bigits_[i] will never be read in further loop iterations, + // because bigit_index1 and bigit_index2 are always greater + // than i - used_digits_. + bigits_[i] = static_cast(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + // Since the result was guaranteed to lie inside the number the + // accumulator must be 0 now. + ASSERT(accumulator == 0); + + // Don't forget to update the used_digits and the exponent. + used_digits_ = product_length; + exponent_ *= 2; + Clamp(); +} + + +void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { + ASSERT(base != 0); + ASSERT(power_exponent >= 0); + if (power_exponent == 0) { + AssignUInt16(1); + return; + } + Zero(); + int shifts = 0; + // We expect base to be in range 2-32, and most often to be 10. + // It does not make much sense to implement different algorithms for counting + // the bits. + while ((base & 1) == 0) { + base >>= 1; + shifts++; + } + int bit_size = 0; + int tmp_base = base; + while (tmp_base != 0) { + tmp_base >>= 1; + bit_size++; + } + int final_size = bit_size * power_exponent; + // 1 extra bigit for the shifting, and one for rounded final_size. + EnsureCapacity(final_size / kBigitSize + 2); + + // Left to Right exponentiation. + int mask = 1; + while (power_exponent >= mask) mask <<= 1; + + // The mask is now pointing to the bit above the most significant 1-bit of + // power_exponent. + // Get rid of first 1-bit; + mask >>= 2; + uint64_t this_value = base; + bool delayed_multiplication = false; - const uint64_t max_32bits = 0xFFFFFFFF; - while (mask != 0 && this_value <= max_32bits) { - this_value = this_value * this_value; - // Verify that there is enough space in this_value to perform the - // multiplication. The first bit_size bits must be 0. - if ((power_exponent & mask) != 0) { + const uint64_t max_32bits = 0xFFFFFFFF; + while (mask != 0 && this_value <= max_32bits) { + this_value = this_value * this_value; + // Verify that there is enough space in this_value to perform the + // multiplication. The first bit_size bits must be 0. + if ((power_exponent & mask) != 0) { ASSERT(bit_size > 0); - uint64_t base_bits_mask = - ~((static_cast(1) << (64 - bit_size)) - 1); - bool high_bits_zero = (this_value & base_bits_mask) == 0; - if (high_bits_zero) { - this_value *= base; - } else { + uint64_t base_bits_mask = + ~((static_cast(1) << (64 - bit_size)) - 1); + bool high_bits_zero = (this_value & base_bits_mask) == 0; + if (high_bits_zero) { + this_value *= base; + } else { delayed_multiplication = true; - } - } - mask >>= 1; - } - AssignUInt64(this_value); + } + } + mask >>= 1; + } + AssignUInt64(this_value); if (delayed_multiplication) { - MultiplyByUInt32(base); - } - - // Now do the same thing as a bignum. - while (mask != 0) { - Square(); - if ((power_exponent & mask) != 0) { - MultiplyByUInt32(base); - } - mask >>= 1; - } - - // And finally add the saved shifts. - ShiftLeft(shifts * power_exponent); -} - - -// Precondition: this/other < 16bit. -uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - ASSERT(other.used_digits_ > 0); - - // Easy case: if we have less digits than the divisor than the result is 0. - // Note: this handles the case where this == 0, too. - if (BigitLength() < other.BigitLength()) { - return 0; - } - - Align(other); - - uint16_t result = 0; - - // Start by removing multiples of 'other' until both numbers have the same - // number of digits. - while (BigitLength() > other.BigitLength()) { - // This naive approach is extremely inefficient if `this` divided by other - // is big. This function is implemented for doubleToString where - // the result should be small (less than 10). - ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); - ASSERT(bigits_[used_digits_ - 1] < 0x10000); - // Remove the multiples of the first digit. - // Example this = 23 and other equals 9. -> Remove 2 multiples. - result += static_cast(bigits_[used_digits_ - 1]); - SubtractTimes(other, bigits_[used_digits_ - 1]); - } - - ASSERT(BigitLength() == other.BigitLength()); - - // Both bignums are at the same length now. - // Since other has more than 0 digits we know that the access to - // bigits_[used_digits_ - 1] is safe. - Chunk this_bigit = bigits_[used_digits_ - 1]; - Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; - - if (other.used_digits_ == 1) { - // Shortcut for easy (and common) case. - int quotient = this_bigit / other_bigit; - bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; - ASSERT(quotient < 0x10000); - result += static_cast(quotient); - Clamp(); - return result; - } - - int division_estimate = this_bigit / (other_bigit + 1); - ASSERT(division_estimate < 0x10000); - result += static_cast(division_estimate); - SubtractTimes(other, division_estimate); - - if (other_bigit * (division_estimate + 1) > this_bigit) { - // No need to even try to subtract. Even if other's remaining digits were 0 - // another subtraction would be too much. - return result; - } - - while (LessEqual(other, *this)) { - SubtractBignum(other); - result++; - } - return result; -} - - -template -static int SizeInHexChars(S number) { - ASSERT(number > 0); - int result = 0; - while (number != 0) { - number >>= 4; - result++; - } - return result; -} - - -static char HexCharOfValue(int value) { - ASSERT(0 <= value && value <= 16); - if (value < 10) return static_cast(value + '0'); - return static_cast(value - 10 + 'A'); -} - - -bool Bignum::ToHexString(char* buffer, int buffer_size) const { - ASSERT(IsClamped()); - // Each bigit must be printable as separate hex-character. - ASSERT(kBigitSize % 4 == 0); - const int kHexCharsPerBigit = kBigitSize / 4; - - if (used_digits_ == 0) { - if (buffer_size < 2) return false; - buffer[0] = '0'; - buffer[1] = '\0'; - return true; - } - // We add 1 for the terminating '\0' character. - int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + - SizeInHexChars(bigits_[used_digits_ - 1]) + 1; - if (needed_chars > buffer_size) return false; - int string_index = needed_chars - 1; - buffer[string_index--] = '\0'; - for (int i = 0; i < exponent_; ++i) { - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = '0'; - } - } - for (int i = 0; i < used_digits_ - 1; ++i) { - Chunk current_bigit = bigits_[i]; - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); - current_bigit >>= 4; - } - } - // And finally the last bigit. - Chunk most_significant_bigit = bigits_[used_digits_ - 1]; - while (most_significant_bigit != 0) { - buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); - most_significant_bigit >>= 4; - } - return true; -} - - -Bignum::Chunk Bignum::BigitAt(int index) const { - if (index >= BigitLength()) return 0; - if (index < exponent_) return 0; - return bigits_[index - exponent_]; -} - - -int Bignum::Compare(const Bignum& a, const Bignum& b) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - int bigit_length_a = a.BigitLength(); - int bigit_length_b = b.BigitLength(); - if (bigit_length_a < bigit_length_b) return -1; - if (bigit_length_a > bigit_length_b) return +1; - for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { - Chunk bigit_a = a.BigitAt(i); - Chunk bigit_b = b.BigitAt(i); - if (bigit_a < bigit_b) return -1; - if (bigit_a > bigit_b) return +1; - // Otherwise they are equal up to this digit. Try the next digit. - } - return 0; -} - - -int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - ASSERT(c.IsClamped()); - if (a.BigitLength() < b.BigitLength()) { - return PlusCompare(b, a, c); - } - if (a.BigitLength() + 1 < c.BigitLength()) return -1; - if (a.BigitLength() > c.BigitLength()) return +1; - // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than - // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one - // of 'a'. - if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { - return -1; - } - - Chunk borrow = 0; - // Starting at min_exponent all digits are == 0. So no need to compare them. - int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); - for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { - Chunk chunk_a = a.BigitAt(i); - Chunk chunk_b = b.BigitAt(i); - Chunk chunk_c = c.BigitAt(i); - Chunk sum = chunk_a + chunk_b; - if (sum > chunk_c + borrow) { - return +1; - } else { - borrow = chunk_c + borrow - sum; - if (borrow > 1) return -1; - borrow <<= kBigitSize; - } - } - if (borrow == 0) return 0; - return -1; -} - - -void Bignum::Clamp() { - while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { - used_digits_--; - } - if (used_digits_ == 0) { - // Zero. - exponent_ = 0; - } -} - - -bool Bignum::IsClamped() const { - return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; -} - - -void Bignum::Zero() { - for (int i = 0; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = 0; - exponent_ = 0; -} - - -void Bignum::Align(const Bignum& other) { - if (exponent_ > other.exponent_) { - // If "X" represents a "hidden" digit (by the exponent) then we are in the - // following case (a == this, b == other): - // a: aaaaaaXXXX or a: aaaaaXXX - // b: bbbbbbX b: bbbbbbbbXX - // We replace some of the hidden digits (X) of a with 0 digits. - // a: aaaaaa000X or a: aaaaa0XX - int zero_digits = exponent_ - other.exponent_; - EnsureCapacity(used_digits_ + zero_digits); - for (int i = used_digits_ - 1; i >= 0; --i) { - bigits_[i + zero_digits] = bigits_[i]; - } - for (int i = 0; i < zero_digits; ++i) { - bigits_[i] = 0; - } - used_digits_ += zero_digits; - exponent_ -= zero_digits; - ASSERT(used_digits_ >= 0); - ASSERT(exponent_ >= 0); - } -} - - -void Bignum::BigitsShiftLeft(int shift_amount) { - ASSERT(shift_amount < kBigitSize); - ASSERT(shift_amount >= 0); - Chunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); - bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; - carry = new_carry; - } - if (carry != 0) { - bigits_[used_digits_] = carry; - used_digits_++; - } -} - - -void Bignum::SubtractTimes(const Bignum& other, int factor) { - ASSERT(exponent_ <= other.exponent_); - if (factor < 3) { - for (int i = 0; i < factor; ++i) { - SubtractBignum(other); - } - return; - } - Chunk borrow = 0; - int exponent_diff = other.exponent_ - exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * other.bigits_[i]; - DoubleChunk remove = borrow + product; - Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); - bigits_[i + exponent_diff] = difference & kBigitMask; - borrow = static_cast((difference >> (kChunkSize - 1)) + - (remove >> kBigitSize)); - } - for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { - if (borrow == 0) return; - Chunk difference = bigits_[i] - borrow; - bigits_[i] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - Clamp(); -} - - -} // namespace double_conversion + MultiplyByUInt32(base); + } + + // Now do the same thing as a bignum. + while (mask != 0) { + Square(); + if ((power_exponent & mask) != 0) { + MultiplyByUInt32(base); + } + mask >>= 1; + } + + // And finally add the saved shifts. + ShiftLeft(shifts * power_exponent); +} + + +// Precondition: this/other < 16bit. +uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + ASSERT(other.used_digits_ > 0); + + // Easy case: if we have less digits than the divisor than the result is 0. + // Note: this handles the case where this == 0, too. + if (BigitLength() < other.BigitLength()) { + return 0; + } + + Align(other); + + uint16_t result = 0; + + // Start by removing multiples of 'other' until both numbers have the same + // number of digits. + while (BigitLength() > other.BigitLength()) { + // This naive approach is extremely inefficient if `this` divided by other + // is big. This function is implemented for doubleToString where + // the result should be small (less than 10). + ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); + ASSERT(bigits_[used_digits_ - 1] < 0x10000); + // Remove the multiples of the first digit. + // Example this = 23 and other equals 9. -> Remove 2 multiples. + result += static_cast(bigits_[used_digits_ - 1]); + SubtractTimes(other, bigits_[used_digits_ - 1]); + } + + ASSERT(BigitLength() == other.BigitLength()); + + // Both bignums are at the same length now. + // Since other has more than 0 digits we know that the access to + // bigits_[used_digits_ - 1] is safe. + Chunk this_bigit = bigits_[used_digits_ - 1]; + Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; + + if (other.used_digits_ == 1) { + // Shortcut for easy (and common) case. + int quotient = this_bigit / other_bigit; + bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; + ASSERT(quotient < 0x10000); + result += static_cast(quotient); + Clamp(); + return result; + } + + int division_estimate = this_bigit / (other_bigit + 1); + ASSERT(division_estimate < 0x10000); + result += static_cast(division_estimate); + SubtractTimes(other, division_estimate); + + if (other_bigit * (division_estimate + 1) > this_bigit) { + // No need to even try to subtract. Even if other's remaining digits were 0 + // another subtraction would be too much. + return result; + } + + while (LessEqual(other, *this)) { + SubtractBignum(other); + result++; + } + return result; +} + + +template +static int SizeInHexChars(S number) { + ASSERT(number > 0); + int result = 0; + while (number != 0) { + number >>= 4; + result++; + } + return result; +} + + +static char HexCharOfValue(int value) { + ASSERT(0 <= value && value <= 16); + if (value < 10) return static_cast(value + '0'); + return static_cast(value - 10 + 'A'); +} + + +bool Bignum::ToHexString(char* buffer, int buffer_size) const { + ASSERT(IsClamped()); + // Each bigit must be printable as separate hex-character. + ASSERT(kBigitSize % 4 == 0); + const int kHexCharsPerBigit = kBigitSize / 4; + + if (used_digits_ == 0) { + if (buffer_size < 2) return false; + buffer[0] = '0'; + buffer[1] = '\0'; + return true; + } + // We add 1 for the terminating '\0' character. + int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + + SizeInHexChars(bigits_[used_digits_ - 1]) + 1; + if (needed_chars > buffer_size) return false; + int string_index = needed_chars - 1; + buffer[string_index--] = '\0'; + for (int i = 0; i < exponent_; ++i) { + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = '0'; + } + } + for (int i = 0; i < used_digits_ - 1; ++i) { + Chunk current_bigit = bigits_[i]; + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); + current_bigit >>= 4; + } + } + // And finally the last bigit. + Chunk most_significant_bigit = bigits_[used_digits_ - 1]; + while (most_significant_bigit != 0) { + buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); + most_significant_bigit >>= 4; + } + return true; +} + + +Bignum::Chunk Bignum::BigitAt(int index) const { + if (index >= BigitLength()) return 0; + if (index < exponent_) return 0; + return bigits_[index - exponent_]; +} + + +int Bignum::Compare(const Bignum& a, const Bignum& b) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + int bigit_length_a = a.BigitLength(); + int bigit_length_b = b.BigitLength(); + if (bigit_length_a < bigit_length_b) return -1; + if (bigit_length_a > bigit_length_b) return +1; + for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { + Chunk bigit_a = a.BigitAt(i); + Chunk bigit_b = b.BigitAt(i); + if (bigit_a < bigit_b) return -1; + if (bigit_a > bigit_b) return +1; + // Otherwise they are equal up to this digit. Try the next digit. + } + return 0; +} + + +int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + ASSERT(c.IsClamped()); + if (a.BigitLength() < b.BigitLength()) { + return PlusCompare(b, a, c); + } + if (a.BigitLength() + 1 < c.BigitLength()) return -1; + if (a.BigitLength() > c.BigitLength()) return +1; + // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than + // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one + // of 'a'. + if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { + return -1; + } + + Chunk borrow = 0; + // Starting at min_exponent all digits are == 0. So no need to compare them. + int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); + for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { + Chunk chunk_a = a.BigitAt(i); + Chunk chunk_b = b.BigitAt(i); + Chunk chunk_c = c.BigitAt(i); + Chunk sum = chunk_a + chunk_b; + if (sum > chunk_c + borrow) { + return +1; + } else { + borrow = chunk_c + borrow - sum; + if (borrow > 1) return -1; + borrow <<= kBigitSize; + } + } + if (borrow == 0) return 0; + return -1; +} + + +void Bignum::Clamp() { + while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { + used_digits_--; + } + if (used_digits_ == 0) { + // Zero. + exponent_ = 0; + } +} + + +bool Bignum::IsClamped() const { + return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; +} + + +void Bignum::Zero() { + for (int i = 0; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = 0; + exponent_ = 0; +} + + +void Bignum::Align(const Bignum& other) { + if (exponent_ > other.exponent_) { + // If "X" represents a "hidden" digit (by the exponent) then we are in the + // following case (a == this, b == other): + // a: aaaaaaXXXX or a: aaaaaXXX + // b: bbbbbbX b: bbbbbbbbXX + // We replace some of the hidden digits (X) of a with 0 digits. + // a: aaaaaa000X or a: aaaaa0XX + int zero_digits = exponent_ - other.exponent_; + EnsureCapacity(used_digits_ + zero_digits); + for (int i = used_digits_ - 1; i >= 0; --i) { + bigits_[i + zero_digits] = bigits_[i]; + } + for (int i = 0; i < zero_digits; ++i) { + bigits_[i] = 0; + } + used_digits_ += zero_digits; + exponent_ -= zero_digits; + ASSERT(used_digits_ >= 0); + ASSERT(exponent_ >= 0); + } +} + + +void Bignum::BigitsShiftLeft(int shift_amount) { + ASSERT(shift_amount < kBigitSize); + ASSERT(shift_amount >= 0); + Chunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); + bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; + carry = new_carry; + } + if (carry != 0) { + bigits_[used_digits_] = carry; + used_digits_++; + } +} + + +void Bignum::SubtractTimes(const Bignum& other, int factor) { + ASSERT(exponent_ <= other.exponent_); + if (factor < 3) { + for (int i = 0; i < factor; ++i) { + SubtractBignum(other); + } + return; + } + Chunk borrow = 0; + int exponent_diff = other.exponent_ - exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + DoubleChunk product = static_cast(factor) * other.bigits_[i]; + DoubleChunk remove = borrow + product; + Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); + bigits_[i + exponent_diff] = difference & kBigitMask; + borrow = static_cast((difference >> (kChunkSize - 1)) + + (remove >> kBigitSize)); + } + for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { + if (borrow == 0) return; + Chunk difference = bigits_[i] - borrow; + bigits_[i] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + Clamp(); +} + + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/bignum.h b/contrib/libs/double-conversion/bignum.h index eabfd1d7fd5..7c289fa2f6b 100644 --- a/contrib/libs/double-conversion/bignum.h +++ b/contrib/libs/double-conversion/bignum.h @@ -1,144 +1,144 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_H_ -#define DOUBLE_CONVERSION_BIGNUM_H_ - -#include "utils.h" - -namespace double_conversion { - -class Bignum { - public: - // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. - // This bignum can encode much bigger numbers, since it contains an - // exponent. - static const int kMaxSignificantBits = 3584; - - Bignum(); - void AssignUInt16(uint16_t value); - void AssignUInt64(uint64_t value); - void AssignBignum(const Bignum& other); - - void AssignDecimalString(Vector value); - void AssignHexString(Vector value); - - void AssignPowerUInt16(uint16_t base, int exponent); - - void AddUInt64(uint64_t operand); - void AddBignum(const Bignum& other); - // Precondition: this >= other. - void SubtractBignum(const Bignum& other); - - void Square(); - void ShiftLeft(int shift_amount); - void MultiplyByUInt32(uint32_t factor); - void MultiplyByUInt64(uint64_t factor); - void MultiplyByPowerOfTen(int exponent); - void Times10() { return MultiplyByUInt32(10); } - // Pseudocode: - // int result = this / other; - // this = this % other; - // In the worst case this function is in O(this/other). - uint16_t DivideModuloIntBignum(const Bignum& other); - - bool ToHexString(char* buffer, int buffer_size) const; - - // Returns - // -1 if a < b, - // 0 if a == b, and - // +1 if a > b. - static int Compare(const Bignum& a, const Bignum& b); - static bool Equal(const Bignum& a, const Bignum& b) { - return Compare(a, b) == 0; - } - static bool LessEqual(const Bignum& a, const Bignum& b) { - return Compare(a, b) <= 0; - } - static bool Less(const Bignum& a, const Bignum& b) { - return Compare(a, b) < 0; - } - // Returns Compare(a + b, c); - static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c); - // Returns a + b == c - static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) { - return PlusCompare(a, b, c) == 0; - } - // Returns a + b <= c - static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) { - return PlusCompare(a, b, c) <= 0; - } - // Returns a + b < c - static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) { - return PlusCompare(a, b, c) < 0; - } - private: - typedef uint32_t Chunk; - typedef uint64_t DoubleChunk; - - static const int kChunkSize = sizeof(Chunk) * 8; - static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; - // With bigit size of 28 we loose some bits, but a double still fits easily - // into two chunks, and more importantly we can use the Comba multiplication. - static const int kBigitSize = 28; - static const Chunk kBigitMask = (1 << kBigitSize) - 1; - // Every instance allocates kBigitLength chunks on the stack. Bignums cannot - // grow. There are no checks if the stack-allocated space is sufficient. - static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; - - void EnsureCapacity(int size) { - if (size > kBigitCapacity) { - UNREACHABLE(); - } - } - void Align(const Bignum& other); - void Clamp(); - bool IsClamped() const; - void Zero(); - // Requires this to have enough capacity (no tests done). - // Updates used_digits_ if necessary. - // shift_amount must be < kBigitSize. - void BigitsShiftLeft(int shift_amount); - // BigitLength includes the "hidden" digits encoded in the exponent. - int BigitLength() const { return used_digits_ + exponent_; } - Chunk BigitAt(int index) const; - void SubtractTimes(const Bignum& other, int factor); - - Chunk bigits_buffer_[kBigitCapacity]; - // A vector backed by bigits_buffer_. This way accesses to the array are - // checked for out-of-bounds errors. - Vector bigits_; - int used_digits_; - // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). - int exponent_; - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_BIGNUM_H_ +#define DOUBLE_CONVERSION_BIGNUM_H_ + +#include "utils.h" + +namespace double_conversion { + +class Bignum { + public: + // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. + // This bignum can encode much bigger numbers, since it contains an + // exponent. + static const int kMaxSignificantBits = 3584; + + Bignum(); + void AssignUInt16(uint16_t value); + void AssignUInt64(uint64_t value); + void AssignBignum(const Bignum& other); + + void AssignDecimalString(Vector value); + void AssignHexString(Vector value); + + void AssignPowerUInt16(uint16_t base, int exponent); + + void AddUInt64(uint64_t operand); + void AddBignum(const Bignum& other); + // Precondition: this >= other. + void SubtractBignum(const Bignum& other); + + void Square(); + void ShiftLeft(int shift_amount); + void MultiplyByUInt32(uint32_t factor); + void MultiplyByUInt64(uint64_t factor); + void MultiplyByPowerOfTen(int exponent); + void Times10() { return MultiplyByUInt32(10); } + // Pseudocode: + // int result = this / other; + // this = this % other; + // In the worst case this function is in O(this/other). + uint16_t DivideModuloIntBignum(const Bignum& other); + + bool ToHexString(char* buffer, int buffer_size) const; + + // Returns + // -1 if a < b, + // 0 if a == b, and + // +1 if a > b. + static int Compare(const Bignum& a, const Bignum& b); + static bool Equal(const Bignum& a, const Bignum& b) { + return Compare(a, b) == 0; + } + static bool LessEqual(const Bignum& a, const Bignum& b) { + return Compare(a, b) <= 0; + } + static bool Less(const Bignum& a, const Bignum& b) { + return Compare(a, b) < 0; + } + // Returns Compare(a + b, c); + static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c); + // Returns a + b == c + static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) == 0; + } + // Returns a + b <= c + static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) <= 0; + } + // Returns a + b < c + static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) < 0; + } + private: + typedef uint32_t Chunk; + typedef uint64_t DoubleChunk; + + static const int kChunkSize = sizeof(Chunk) * 8; + static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; + // With bigit size of 28 we loose some bits, but a double still fits easily + // into two chunks, and more importantly we can use the Comba multiplication. + static const int kBigitSize = 28; + static const Chunk kBigitMask = (1 << kBigitSize) - 1; + // Every instance allocates kBigitLength chunks on the stack. Bignums cannot + // grow. There are no checks if the stack-allocated space is sufficient. + static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; + + void EnsureCapacity(int size) { + if (size > kBigitCapacity) { + UNREACHABLE(); + } + } + void Align(const Bignum& other); + void Clamp(); + bool IsClamped() const; + void Zero(); + // Requires this to have enough capacity (no tests done). + // Updates used_digits_ if necessary. + // shift_amount must be < kBigitSize. + void BigitsShiftLeft(int shift_amount); + // BigitLength includes the "hidden" digits encoded in the exponent. + int BigitLength() const { return used_digits_ + exponent_; } + Chunk BigitAt(int index) const; + void SubtractTimes(const Bignum& other, int factor); + + Chunk bigits_buffer_[kBigitCapacity]; + // A vector backed by bigits_buffer_. This way accesses to the array are + // checked for out-of-bounds errors. + Vector bigits_; + int used_digits_; + // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). + int exponent_; + DC_DISALLOW_COPY_AND_ASSIGN(Bignum); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_H_ +}; + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_BIGNUM_H_ diff --git a/contrib/libs/double-conversion/cached-powers.cc b/contrib/libs/double-conversion/cached-powers.cc index bec831d8630..8ab281a1ba0 100644 --- a/contrib/libs/double-conversion/cached-powers.cc +++ b/contrib/libs/double-conversion/cached-powers.cc @@ -1,175 +1,175 @@ -// Copyright 2006-2008 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2006-2008 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include #include #include - -#include "utils.h" - -#include "cached-powers.h" - -namespace double_conversion { - -struct CachedPower { - uint64_t significand; - int16_t binary_exponent; - int16_t decimal_exponent; -}; - -static const CachedPower kCachedPowers[] = { - {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, - {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, - {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, - {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, - {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, - {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, - {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, - {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, - {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, - {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, - {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, - {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, - {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, - {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, - {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, - {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, - {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, - {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, - {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, - {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, - {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, - {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, - {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, - {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, - {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, - {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, - {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, - {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, - {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, - {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, - {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, - {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, - {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, - {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, - {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, - {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, - {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, - {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, - {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, - {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, - {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, - {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, - {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, - {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, - {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, - {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, - {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, - {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, - {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, - {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, - {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, - {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, - {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, - {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, - {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, - {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, - {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, - {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, - {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, - {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, - {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, - {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, - {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, - {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, - {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, - {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, - {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, - {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, - {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, - {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, - {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, - {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, - {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, - {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, - {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, - {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, - {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, - {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, - {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, - {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, - {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, - {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, - {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, - {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, - {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, - {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, - {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, -}; - -static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. -static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) -// Difference between the decimal exponents in the table above. -const int PowersOfTenCache::kDecimalExponentDistance = 8; -const int PowersOfTenCache::kMinDecimalExponent = -348; -const int PowersOfTenCache::kMaxDecimalExponent = 340; - -void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - int min_exponent, - int max_exponent, - DiyFp* power, - int* decimal_exponent) { - int kQ = DiyFp::kSignificandSize; - double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); - int foo = kCachedPowersOffset; - int index = - (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; - ASSERT(0 <= index && index < static_cast(ARRAY_SIZE(kCachedPowers))); - CachedPower cached_power = kCachedPowers[index]; - ASSERT(min_exponent <= cached_power.binary_exponent); - (void) max_exponent; // Mark variable as used. - ASSERT(cached_power.binary_exponent <= max_exponent); - *decimal_exponent = cached_power.decimal_exponent; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); -} - - -void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, - DiyFp* power, - int* found_exponent) { - ASSERT(kMinDecimalExponent <= requested_exponent); - ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); - int index = - (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; - CachedPower cached_power = kCachedPowers[index]; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); - *found_exponent = cached_power.decimal_exponent; - ASSERT(*found_exponent <= requested_exponent); - ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); -} - -} // namespace double_conversion + +#include "utils.h" + +#include "cached-powers.h" + +namespace double_conversion { + +struct CachedPower { + uint64_t significand; + int16_t binary_exponent; + int16_t decimal_exponent; +}; + +static const CachedPower kCachedPowers[] = { + {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, + {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, + {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, + {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, + {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, + {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, + {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, + {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, + {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, + {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, + {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, + {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, + {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, + {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, + {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, + {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, + {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, + {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, + {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, + {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, + {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, + {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, + {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, + {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, + {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, + {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, + {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, + {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, + {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, + {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, + {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, + {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, + {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, + {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, + {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, + {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, + {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, + {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, + {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, + {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, + {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, + {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, + {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, + {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, + {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, + {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, + {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, + {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, + {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, + {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, + {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, + {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, + {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, + {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, + {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, + {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, + {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, + {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, + {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, + {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, + {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, + {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, + {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, + {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, + {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, + {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, + {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, + {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, + {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, + {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, + {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, + {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, + {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, + {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, + {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, + {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, + {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, + {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, + {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, + {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, + {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, + {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, + {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, + {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, + {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, + {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, + {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, +}; + +static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. +static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) +// Difference between the decimal exponents in the table above. +const int PowersOfTenCache::kDecimalExponentDistance = 8; +const int PowersOfTenCache::kMinDecimalExponent = -348; +const int PowersOfTenCache::kMaxDecimalExponent = 340; + +void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent) { + int kQ = DiyFp::kSignificandSize; + double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); + int foo = kCachedPowersOffset; + int index = + (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; + ASSERT(0 <= index && index < static_cast(ARRAY_SIZE(kCachedPowers))); + CachedPower cached_power = kCachedPowers[index]; + ASSERT(min_exponent <= cached_power.binary_exponent); + (void) max_exponent; // Mark variable as used. + ASSERT(cached_power.binary_exponent <= max_exponent); + *decimal_exponent = cached_power.decimal_exponent; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); +} + + +void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent) { + ASSERT(kMinDecimalExponent <= requested_exponent); + ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); + int index = + (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; + CachedPower cached_power = kCachedPowers[index]; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); + *found_exponent = cached_power.decimal_exponent; + ASSERT(*found_exponent <= requested_exponent); + ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/cached-powers.h b/contrib/libs/double-conversion/cached-powers.h index b37a93d0823..61a50614cf1 100644 --- a/contrib/libs/double-conversion/cached-powers.h +++ b/contrib/libs/double-conversion/cached-powers.h @@ -1,64 +1,64 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ -#define DOUBLE_CONVERSION_CACHED_POWERS_H_ - -#include "diy-fp.h" - -namespace double_conversion { - -class PowersOfTenCache { - public: - - // Not all powers of ten are cached. The decimal exponent of two neighboring - // cached numbers will differ by kDecimalExponentDistance. - static const int kDecimalExponentDistance; - - static const int kMinDecimalExponent; - static const int kMaxDecimalExponent; - - // Returns a cached power-of-ten with a binary exponent in the range - // [min_exponent; max_exponent] (boundaries included). - static void GetCachedPowerForBinaryExponentRange(int min_exponent, - int max_exponent, - DiyFp* power, - int* decimal_exponent); - - // Returns a cached power of ten x ~= 10^k such that - // k <= decimal_exponent < k + kCachedPowersDecimalDistance. - // The given decimal_exponent must satisfy - // kMinDecimalExponent <= requested_exponent, and - // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. - static void GetCachedPowerForDecimalExponent(int requested_exponent, - DiyFp* power, - int* found_exponent); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ +#define DOUBLE_CONVERSION_CACHED_POWERS_H_ + +#include "diy-fp.h" + +namespace double_conversion { + +class PowersOfTenCache { + public: + + // Not all powers of ten are cached. The decimal exponent of two neighboring + // cached numbers will differ by kDecimalExponentDistance. + static const int kDecimalExponentDistance; + + static const int kMinDecimalExponent; + static const int kMaxDecimalExponent; + + // Returns a cached power-of-ten with a binary exponent in the range + // [min_exponent; max_exponent] (boundaries included). + static void GetCachedPowerForBinaryExponentRange(int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent); + + // Returns a cached power of ten x ~= 10^k such that + // k <= decimal_exponent < k + kCachedPowersDecimalDistance. + // The given decimal_exponent must satisfy + // kMinDecimalExponent <= requested_exponent, and + // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. + static void GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent); +}; + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ diff --git a/contrib/libs/double-conversion/diy-fp.cc b/contrib/libs/double-conversion/diy-fp.cc index ea0b55f716d..ddd1891b168 100644 --- a/contrib/libs/double-conversion/diy-fp.cc +++ b/contrib/libs/double-conversion/diy-fp.cc @@ -1,57 +1,57 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#include "diy-fp.h" -#include "utils.h" - -namespace double_conversion { - -void DiyFp::Multiply(const DiyFp& other) { - // Simply "emulates" a 128 bit multiplication. - // However: the resulting number only contains 64 bits. The least - // significant 64 bits are only used for rounding the most significant 64 - // bits. - const uint64_t kM32 = 0xFFFFFFFFU; - uint64_t a = f_ >> 32; - uint64_t b = f_ & kM32; - uint64_t c = other.f_ >> 32; - uint64_t d = other.f_ & kM32; - uint64_t ac = a * c; - uint64_t bc = b * c; - uint64_t ad = a * d; - uint64_t bd = b * d; - uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); - // By adding 1U << 31 to tmp we round the final result. - // Halfway cases will be round up. - tmp += 1U << 31; - uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); - e_ += other.e_ + 64; - f_ = result_f; -} - -} // namespace double_conversion +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#include "diy-fp.h" +#include "utils.h" + +namespace double_conversion { + +void DiyFp::Multiply(const DiyFp& other) { + // Simply "emulates" a 128 bit multiplication. + // However: the resulting number only contains 64 bits. The least + // significant 64 bits are only used for rounding the most significant 64 + // bits. + const uint64_t kM32 = 0xFFFFFFFFU; + uint64_t a = f_ >> 32; + uint64_t b = f_ & kM32; + uint64_t c = other.f_ >> 32; + uint64_t d = other.f_ & kM32; + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); + // By adding 1U << 31 to tmp we round the final result. + // Halfway cases will be round up. + tmp += 1U << 31; + uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); + e_ += other.e_ + 64; + f_ = result_f; +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/diy-fp.h b/contrib/libs/double-conversion/diy-fp.h index b88ff7e1fb8..2edf34674ee 100644 --- a/contrib/libs/double-conversion/diy-fp.h +++ b/contrib/libs/double-conversion/diy-fp.h @@ -1,118 +1,118 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DIY_FP_H_ -#define DOUBLE_CONVERSION_DIY_FP_H_ - -#include "utils.h" - -namespace double_conversion { - -// This "Do It Yourself Floating Point" class implements a floating-point number -// with a uint64 significand and an int exponent. Normalized DiyFp numbers will -// have the most significant bit of the significand set. -// Multiplication and Subtraction do not normalize their results. -// DiyFp are not designed to contain special doubles (NaN and Infinity). -class DiyFp { - public: - static const int kSignificandSize = 64; - - DiyFp() : f_(0), e_(0) {} - DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} - - // this = this - other. - // The exponents of both numbers must be the same and the significand of this - // must be bigger than the significand of other. - // The result will not be normalized. - void Subtract(const DiyFp& other) { - ASSERT(e_ == other.e_); - ASSERT(f_ >= other.f_); - f_ -= other.f_; - } - - // Returns a - b. - // The exponents of both numbers must be the same and this must be bigger - // than other. The result will not be normalized. - static DiyFp Minus(const DiyFp& a, const DiyFp& b) { - DiyFp result = a; - result.Subtract(b); - return result; - } - - - // this = this * other. - void Multiply(const DiyFp& other); - - // returns a * b; - static DiyFp Times(const DiyFp& a, const DiyFp& b) { - DiyFp result = a; - result.Multiply(b); - return result; - } - - void Normalize() { - ASSERT(f_ != 0); - uint64_t significand = f_; - int exponent = e_; - - // This method is mainly called for normalizing boundaries. In general - // boundaries need to be shifted by 10 bits. We thus optimize for this case. - const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); - while ((significand & k10MSBits) == 0) { - significand <<= 10; - exponent -= 10; - } - while ((significand & kUint64MSB) == 0) { - significand <<= 1; - exponent--; - } - f_ = significand; - e_ = exponent; - } - - static DiyFp Normalize(const DiyFp& a) { - DiyFp result = a; - result.Normalize(); - return result; - } - - uint64_t f() const { return f_; } - int e() const { return e_; } - - void set_f(uint64_t new_value) { f_ = new_value; } - void set_e(int new_value) { e_ = new_value; } - - private: - static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); - - uint64_t f_; - int e_; -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DIY_FP_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_DIY_FP_H_ +#define DOUBLE_CONVERSION_DIY_FP_H_ + +#include "utils.h" + +namespace double_conversion { + +// This "Do It Yourself Floating Point" class implements a floating-point number +// with a uint64 significand and an int exponent. Normalized DiyFp numbers will +// have the most significant bit of the significand set. +// Multiplication and Subtraction do not normalize their results. +// DiyFp are not designed to contain special doubles (NaN and Infinity). +class DiyFp { + public: + static const int kSignificandSize = 64; + + DiyFp() : f_(0), e_(0) {} + DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} + + // this = this - other. + // The exponents of both numbers must be the same and the significand of this + // must be bigger than the significand of other. + // The result will not be normalized. + void Subtract(const DiyFp& other) { + ASSERT(e_ == other.e_); + ASSERT(f_ >= other.f_); + f_ -= other.f_; + } + + // Returns a - b. + // The exponents of both numbers must be the same and this must be bigger + // than other. The result will not be normalized. + static DiyFp Minus(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Subtract(b); + return result; + } + + + // this = this * other. + void Multiply(const DiyFp& other); + + // returns a * b; + static DiyFp Times(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Multiply(b); + return result; + } + + void Normalize() { + ASSERT(f_ != 0); + uint64_t significand = f_; + int exponent = e_; + + // This method is mainly called for normalizing boundaries. In general + // boundaries need to be shifted by 10 bits. We thus optimize for this case. + const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); + while ((significand & k10MSBits) == 0) { + significand <<= 10; + exponent -= 10; + } + while ((significand & kUint64MSB) == 0) { + significand <<= 1; + exponent--; + } + f_ = significand; + e_ = exponent; + } + + static DiyFp Normalize(const DiyFp& a) { + DiyFp result = a; + result.Normalize(); + return result; + } + + uint64_t f() const { return f_; } + int e() const { return e_; } + + void set_f(uint64_t new_value) { f_ = new_value; } + void set_e(int new_value) { e_ = new_value; } + + private: + static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); + + uint64_t f_; + int e_; +}; + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_DIY_FP_H_ diff --git a/contrib/libs/double-conversion/double-conversion.cc b/contrib/libs/double-conversion/double-conversion.cc index 2b6627e7eb7..6ee6feb09b0 100644 --- a/contrib/libs/double-conversion/double-conversion.cc +++ b/contrib/libs/double-conversion/double-conversion.cc @@ -1,420 +1,420 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include #include #include - -#include "double-conversion.h" - -#include "bignum-dtoa.h" -#include "fast-dtoa.h" -#include "fixed-dtoa.h" -#include "ieee.h" -#include "strtod.h" -#include "utils.h" - -namespace double_conversion { - -const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { - int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; - static DoubleToStringConverter converter(flags, - "Infinity", - "NaN", - 'e', - -6, 21, - 6, 0); - return converter; -} - - -bool DoubleToStringConverter::HandleSpecialValues( - double value, - StringBuilder* result_builder) const { - Double double_inspect(value); - if (double_inspect.IsInfinite()) { - if (infinity_symbol_ == NULL) return false; - if (value < 0) { - result_builder->AddCharacter('-'); - } - result_builder->AddString(infinity_symbol_); - return true; - } - if (double_inspect.IsNan()) { - if (nan_symbol_ == NULL) return false; - result_builder->AddString(nan_symbol_); - return true; - } - return false; -} - - -void DoubleToStringConverter::CreateExponentialRepresentation( - const char* decimal_digits, - int length, - int exponent, - StringBuilder* result_builder) const { - ASSERT(length != 0); - result_builder->AddCharacter(decimal_digits[0]); - if (length != 1) { - result_builder->AddCharacter('.'); - result_builder->AddSubstring(&decimal_digits[1], length-1); - } - result_builder->AddCharacter(exponent_character_); - if (exponent < 0) { - result_builder->AddCharacter('-'); - exponent = -exponent; - } else { - if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { - result_builder->AddCharacter('+'); - } - } - if (exponent == 0) { - result_builder->AddCharacter('0'); - return; - } - ASSERT(exponent < 1e4); - const int kMaxExponentLength = 5; - char buffer[kMaxExponentLength + 1]; - buffer[kMaxExponentLength] = '\0'; - int first_char_pos = kMaxExponentLength; - while (exponent > 0) { - buffer[--first_char_pos] = '0' + (exponent % 10); - exponent /= 10; - } - result_builder->AddSubstring(&buffer[first_char_pos], - kMaxExponentLength - first_char_pos); -} - - -void DoubleToStringConverter::CreateDecimalRepresentation( - const char* decimal_digits, - int length, - int decimal_point, - int digits_after_point, - StringBuilder* result_builder) const { - // Create a representation that is padded with zeros if needed. - if (decimal_point <= 0) { - // "0.00000decimal_rep" or "0.000decimal_rep00". - result_builder->AddCharacter('0'); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', -decimal_point); - ASSERT(length <= digits_after_point - (-decimal_point)); - result_builder->AddSubstring(decimal_digits, length); - int remaining_digits = digits_after_point - (-decimal_point) - length; - result_builder->AddPadding('0', remaining_digits); - } - } else if (decimal_point >= length) { - // "decimal_rep0000.00000" or "decimal_rep.0000". - result_builder->AddSubstring(decimal_digits, length); - result_builder->AddPadding('0', decimal_point - length); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', digits_after_point); - } - } else { - // "decima.l_rep000". - ASSERT(digits_after_point > 0); - result_builder->AddSubstring(decimal_digits, decimal_point); - result_builder->AddCharacter('.'); - ASSERT(length - decimal_point <= digits_after_point); - result_builder->AddSubstring(&decimal_digits[decimal_point], - length - decimal_point); - int remaining_digits = digits_after_point - (length - decimal_point); - result_builder->AddPadding('0', remaining_digits); - } - if (digits_after_point == 0) { - if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { - result_builder->AddCharacter('.'); - } - if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { - result_builder->AddCharacter('0'); - } - } -} - - -bool DoubleToStringConverter::ToShortestIeeeNumber( - double value, - StringBuilder* result_builder, - DoubleToStringConverter::DtoaMode mode) const { - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - int decimal_point; - bool sign; - const int kDecimalRepCapacity = kBase10MaximalLength + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - if ((decimal_in_shortest_low_ <= exponent) && - (exponent < decimal_in_shortest_high_)) { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, - decimal_point, - Max(0, decimal_rep_length - decimal_point), - result_builder); - } else { - CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, - result_builder); - } - return true; -} - - -bool DoubleToStringConverter::ToFixed(double value, - int requested_digits, - StringBuilder* result_builder) const { - ASSERT(kMaxFixedDigitsBeforePoint == 60); - const double kFirstNonFixed = 1e60; - - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits > kMaxFixedDigitsAfterPoint) return false; - if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add space for the '\0' byte. - const int kDecimalRepCapacity = - kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - DoubleToAscii(value, FIXED, requested_digits, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - requested_digits, result_builder); - return true; -} - - -bool DoubleToStringConverter::ToExponential( - double value, - int requested_digits, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits < -1) return false; - if (requested_digits > kMaxExponentialDigits) return false; - - int decimal_point; - bool sign; - // Add space for digit before the decimal point and the '\0' character. - const int kDecimalRepCapacity = kMaxExponentialDigits + 2; - ASSERT(kDecimalRepCapacity > kBase10MaximalLength); - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - if (requested_digits == -1) { - DoubleToAscii(value, SHORTEST, 0, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - } else { - DoubleToAscii(value, PRECISION, requested_digits + 1, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= requested_digits + 1); - - for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { - decimal_rep[i] = '0'; - } - decimal_rep_length = requested_digits + 1; - } - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - CreateExponentialRepresentation(decimal_rep, - decimal_rep_length, - exponent, - result_builder); - return true; -} - - -bool DoubleToStringConverter::ToPrecision(double value, - int precision, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { - return false; - } - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add one for the terminating null character. - const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, PRECISION, precision, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= precision); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - // The exponent if we print the number as x.xxeyyy. That is with the - // decimal point after the first digit. - int exponent = decimal_point - 1; - - int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; - if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || - (decimal_point - precision + extra_zero > - max_trailing_padding_zeroes_in_precision_mode_)) { - // Fill buffer to contain 'precision' digits. - // Usually the buffer is already at the correct length, but 'DoubleToAscii' - // is allowed to return less characters. - for (int i = decimal_rep_length; i < precision; ++i) { - decimal_rep[i] = '0'; - } - - CreateExponentialRepresentation(decimal_rep, - precision, - exponent, - result_builder); - } else { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - Max(0, precision - decimal_point), - result_builder); - } - return true; -} - - -static BignumDtoaMode DtoaToBignumDtoaMode( - DoubleToStringConverter::DtoaMode dtoa_mode) { - switch (dtoa_mode) { - case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; - case DoubleToStringConverter::SHORTEST_SINGLE: - return BIGNUM_DTOA_SHORTEST_SINGLE; - case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; - case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; - default: - UNREACHABLE(); - } -} - - -void DoubleToStringConverter::DoubleToAscii(double v, - DtoaMode mode, - int requested_digits, - char* buffer, - int buffer_length, - bool* sign, - int* length, - int* point) { - Vector vector(buffer, buffer_length); - ASSERT(!Double(v).IsSpecial()); - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); - - if (Double(v).Sign() < 0) { - *sign = true; - v = -v; - } else { - *sign = false; - } - - if (mode == PRECISION && requested_digits == 0) { - vector[0] = '\0'; - *length = 0; - return; - } - - if (v == 0) { - vector[0] = '0'; - vector[1] = '\0'; - *length = 1; - *point = 1; - return; - } - - bool fast_worked; - switch (mode) { - case SHORTEST: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); - break; - case SHORTEST_SINGLE: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, - vector, length, point); - break; - case FIXED: - fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); - break; - case PRECISION: - fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, - vector, length, point); - break; - default: - fast_worked = false; - UNREACHABLE(); - } - if (fast_worked) return; - - // If the fast dtoa didn't succeed use the slower bignum version. - BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); - BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); - vector[*length] = '\0'; -} - - + +#include "double-conversion.h" + +#include "bignum-dtoa.h" +#include "fast-dtoa.h" +#include "fixed-dtoa.h" +#include "ieee.h" +#include "strtod.h" +#include "utils.h" + +namespace double_conversion { + +const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { + int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; + static DoubleToStringConverter converter(flags, + "Infinity", + "NaN", + 'e', + -6, 21, + 6, 0); + return converter; +} + + +bool DoubleToStringConverter::HandleSpecialValues( + double value, + StringBuilder* result_builder) const { + Double double_inspect(value); + if (double_inspect.IsInfinite()) { + if (infinity_symbol_ == NULL) return false; + if (value < 0) { + result_builder->AddCharacter('-'); + } + result_builder->AddString(infinity_symbol_); + return true; + } + if (double_inspect.IsNan()) { + if (nan_symbol_ == NULL) return false; + result_builder->AddString(nan_symbol_); + return true; + } + return false; +} + + +void DoubleToStringConverter::CreateExponentialRepresentation( + const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const { + ASSERT(length != 0); + result_builder->AddCharacter(decimal_digits[0]); + if (length != 1) { + result_builder->AddCharacter('.'); + result_builder->AddSubstring(&decimal_digits[1], length-1); + } + result_builder->AddCharacter(exponent_character_); + if (exponent < 0) { + result_builder->AddCharacter('-'); + exponent = -exponent; + } else { + if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { + result_builder->AddCharacter('+'); + } + } + if (exponent == 0) { + result_builder->AddCharacter('0'); + return; + } + ASSERT(exponent < 1e4); + const int kMaxExponentLength = 5; + char buffer[kMaxExponentLength + 1]; + buffer[kMaxExponentLength] = '\0'; + int first_char_pos = kMaxExponentLength; + while (exponent > 0) { + buffer[--first_char_pos] = '0' + (exponent % 10); + exponent /= 10; + } + result_builder->AddSubstring(&buffer[first_char_pos], + kMaxExponentLength - first_char_pos); +} + + +void DoubleToStringConverter::CreateDecimalRepresentation( + const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const { + // Create a representation that is padded with zeros if needed. + if (decimal_point <= 0) { + // "0.00000decimal_rep" or "0.000decimal_rep00". + result_builder->AddCharacter('0'); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', -decimal_point); + ASSERT(length <= digits_after_point - (-decimal_point)); + result_builder->AddSubstring(decimal_digits, length); + int remaining_digits = digits_after_point - (-decimal_point) - length; + result_builder->AddPadding('0', remaining_digits); + } + } else if (decimal_point >= length) { + // "decimal_rep0000.00000" or "decimal_rep.0000". + result_builder->AddSubstring(decimal_digits, length); + result_builder->AddPadding('0', decimal_point - length); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', digits_after_point); + } + } else { + // "decima.l_rep000". + ASSERT(digits_after_point > 0); + result_builder->AddSubstring(decimal_digits, decimal_point); + result_builder->AddCharacter('.'); + ASSERT(length - decimal_point <= digits_after_point); + result_builder->AddSubstring(&decimal_digits[decimal_point], + length - decimal_point); + int remaining_digits = digits_after_point - (length - decimal_point); + result_builder->AddPadding('0', remaining_digits); + } + if (digits_after_point == 0) { + if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { + result_builder->AddCharacter('.'); + } + if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { + result_builder->AddCharacter('0'); + } + } +} + + +bool DoubleToStringConverter::ToShortestIeeeNumber( + double value, + StringBuilder* result_builder, + DoubleToStringConverter::DtoaMode mode) const { + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + int decimal_point; + bool sign; + const int kDecimalRepCapacity = kBase10MaximalLength + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + if ((decimal_in_shortest_low_ <= exponent) && + (exponent < decimal_in_shortest_high_)) { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, + decimal_point, + Max(0, decimal_rep_length - decimal_point), + result_builder); + } else { + CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, + result_builder); + } + return true; +} + + +bool DoubleToStringConverter::ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const { + ASSERT(kMaxFixedDigitsBeforePoint == 60); + const double kFirstNonFixed = 1e60; + + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits > kMaxFixedDigitsAfterPoint) return false; + if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add space for the '\0' byte. + const int kDecimalRepCapacity = + kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + DoubleToAscii(value, FIXED, requested_digits, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + requested_digits, result_builder); + return true; +} + + +bool DoubleToStringConverter::ToExponential( + double value, + int requested_digits, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits < -1) return false; + if (requested_digits > kMaxExponentialDigits) return false; + + int decimal_point; + bool sign; + // Add space for digit before the decimal point and the '\0' character. + const int kDecimalRepCapacity = kMaxExponentialDigits + 2; + ASSERT(kDecimalRepCapacity > kBase10MaximalLength); + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + if (requested_digits == -1) { + DoubleToAscii(value, SHORTEST, 0, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + } else { + DoubleToAscii(value, PRECISION, requested_digits + 1, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= requested_digits + 1); + + for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { + decimal_rep[i] = '0'; + } + decimal_rep_length = requested_digits + 1; + } + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + CreateExponentialRepresentation(decimal_rep, + decimal_rep_length, + exponent, + result_builder); + return true; +} + + +bool DoubleToStringConverter::ToPrecision(double value, + int precision, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { + return false; + } + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add one for the terminating null character. + const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, PRECISION, precision, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= precision); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + // The exponent if we print the number as x.xxeyyy. That is with the + // decimal point after the first digit. + int exponent = decimal_point - 1; + + int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; + if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || + (decimal_point - precision + extra_zero > + max_trailing_padding_zeroes_in_precision_mode_)) { + // Fill buffer to contain 'precision' digits. + // Usually the buffer is already at the correct length, but 'DoubleToAscii' + // is allowed to return less characters. + for (int i = decimal_rep_length; i < precision; ++i) { + decimal_rep[i] = '0'; + } + + CreateExponentialRepresentation(decimal_rep, + precision, + exponent, + result_builder); + } else { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + Max(0, precision - decimal_point), + result_builder); + } + return true; +} + + +static BignumDtoaMode DtoaToBignumDtoaMode( + DoubleToStringConverter::DtoaMode dtoa_mode) { + switch (dtoa_mode) { + case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; + case DoubleToStringConverter::SHORTEST_SINGLE: + return BIGNUM_DTOA_SHORTEST_SINGLE; + case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; + case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; + default: + UNREACHABLE(); + } +} + + +void DoubleToStringConverter::DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point) { + Vector vector(buffer, buffer_length); + ASSERT(!Double(v).IsSpecial()); + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); + + if (Double(v).Sign() < 0) { + *sign = true; + v = -v; + } else { + *sign = false; + } + + if (mode == PRECISION && requested_digits == 0) { + vector[0] = '\0'; + *length = 0; + return; + } + + if (v == 0) { + vector[0] = '0'; + vector[1] = '\0'; + *length = 1; + *point = 1; + return; + } + + bool fast_worked; + switch (mode) { + case SHORTEST: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); + break; + case SHORTEST_SINGLE: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, + vector, length, point); + break; + case FIXED: + fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); + break; + case PRECISION: + fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, + vector, length, point); + break; + default: + fast_worked = false; + UNREACHABLE(); + } + if (fast_worked) return; + + // If the fast dtoa didn't succeed use the slower bignum version. + BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); + BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); + vector[*length] = '\0'; +} + + namespace { inline char ToLower(char ch) { @@ -443,20 +443,20 @@ static inline bool ConsumeSubStringImpl(Iterator* current, return true; } -// Consumes the given substring from the iterator. -// Returns false, if the substring does not match. -template -static bool ConsumeSubString(Iterator* current, - Iterator end, +// Consumes the given substring from the iterator. +// Returns false, if the substring does not match. +template +static bool ConsumeSubString(Iterator* current, + Iterator end, const char* substring, bool allow_case_insensibility) { if (allow_case_insensibility) { return ConsumeSubStringImpl(current, end, substring, ToLower); } else { return ConsumeSubStringImpl(current, end, substring, Pass); - } -} - + } +} + // Consumes first character of the str is equal to ch inline bool ConsumeFirstCharacter(char ch, const char* str, @@ -464,93 +464,93 @@ inline bool ConsumeFirstCharacter(char ch, return case_insensibility ? ToLower(ch) == str[0] : ch == str[0]; } } // namespace - -// Maximum number of significant digits in decimal representation. -// The longest possible double in decimal representation is -// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 -// (768 digits). If we parse a number whose first digits are equal to a -// mean of 2 adjacent doubles (that could have up to 769 digits) the result -// must be rounded to the bigger one unless the tail consists of zeros, so -// we don't need to preserve all the digits. -const int kMaxSignificantDigits = 772; - - -static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; -static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); - - -static const uc16 kWhitespaceTable16[] = { - 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, - 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 -}; -static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); - - -static bool isWhitespace(int x) { - if (x < 128) { - for (int i = 0; i < kWhitespaceTable7Length; i++) { - if (kWhitespaceTable7[i] == x) return true; - } - } else { - for (int i = 0; i < kWhitespaceTable16Length; i++) { - if (kWhitespaceTable16[i] == x) return true; - } - } - return false; -} - - -// Returns true if a nonspace found and false if the end has reached. -template -static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { - while (*current != end) { - if (!isWhitespace(**current)) return true; - ++*current; - } - return false; -} - - -static bool isDigit(int x, int radix) { - return (x >= '0' && x <= '9' && x < '0' + radix) - || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) - || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); -} - - -static double SignedZero(bool sign) { - return sign ? -0.0 : 0.0; -} - - -// Returns true if 'c' is a decimal digit that is valid for the given radix. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the last -// condition was always true. By moving it into a separate function the -// compiler wouldn't warn anymore. -#if _MSC_VER -#pragma optimize("",off) -static bool IsDecimalDigitForRadix(int c, int radix) { - return '0' <= c && c <= '9' && (c - '0') < radix; -} -#pragma optimize("",on) -#else -static bool inline IsDecimalDigitForRadix(int c, int radix) { + +// Maximum number of significant digits in decimal representation. +// The longest possible double in decimal representation is +// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 +// (768 digits). If we parse a number whose first digits are equal to a +// mean of 2 adjacent doubles (that could have up to 769 digits) the result +// must be rounded to the bigger one unless the tail consists of zeros, so +// we don't need to preserve all the digits. +const int kMaxSignificantDigits = 772; + + +static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; +static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); + + +static const uc16 kWhitespaceTable16[] = { + 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, + 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 +}; +static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); + + +static bool isWhitespace(int x) { + if (x < 128) { + for (int i = 0; i < kWhitespaceTable7Length; i++) { + if (kWhitespaceTable7[i] == x) return true; + } + } else { + for (int i = 0; i < kWhitespaceTable16Length; i++) { + if (kWhitespaceTable16[i] == x) return true; + } + } + return false; +} + + +// Returns true if a nonspace found and false if the end has reached. +template +static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { + while (*current != end) { + if (!isWhitespace(**current)) return true; + ++*current; + } + return false; +} + + +static bool isDigit(int x, int radix) { + return (x >= '0' && x <= '9' && x < '0' + radix) + || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) + || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); +} + + +static double SignedZero(bool sign) { + return sign ? -0.0 : 0.0; +} + + +// Returns true if 'c' is a decimal digit that is valid for the given radix. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the last +// condition was always true. By moving it into a separate function the +// compiler wouldn't warn anymore. +#if _MSC_VER +#pragma optimize("",off) +static bool IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#pragma optimize("",on) +#else +static bool inline IsDecimalDigitForRadix(int c, int radix) { return '0' <= c && c <= '9' && (c - '0') < radix; -} -#endif -// Returns true if 'c' is a character digit that is valid for the given radix. -// The 'a_character' should be 'a' or 'A'. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the first -// condition was always false. By moving it into a separate function the -// compiler wouldn't warn anymore. -static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { - return radix > 10 && c >= a_character && c < a_character + radix - 10; -} - +} +#endif +// Returns true if 'c' is a character digit that is valid for the given radix. +// The 'a_character' should be 'a' or 'A'. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the first +// condition was always false. By moving it into a separate function the +// compiler wouldn't warn anymore. +static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { + return radix > 10 && c >= a_character && c < a_character + radix - 10; +} + // Returns true, when the iterator is equal to end. template static bool Advance (Iterator* it, char separator, int base, Iterator& end) { @@ -570,7 +570,7 @@ static bool Advance (Iterator* it, char separator, int base, Iterator& end) { } return *it == end; } - + // Checks whether the string in the range start-end is a hex-float string. // This function assumes that the leading '0x'/'0X' is already consumed. // @@ -614,30 +614,30 @@ static bool IsHexFloatString(Iterator start, } -// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. +// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. // // If parse_as_hex_float is true, then the string must be a valid // hex-float. -template -static double RadixStringToIeee(Iterator* current, - Iterator end, - bool sign, +template +static double RadixStringToIeee(Iterator* current, + Iterator end, + bool sign, char separator, bool parse_as_hex_float, - bool allow_trailing_junk, - double junk_string_value, - bool read_as_double, - bool* result_is_junk) { - ASSERT(*current != end); + bool allow_trailing_junk, + double junk_string_value, + bool read_as_double, + bool* result_is_junk) { + ASSERT(*current != end); ASSERT(!parse_as_hex_float || IsHexFloatString(*current, end, separator, allow_trailing_junk)); - - const int kDoubleSize = Double::kSignificandSize; - const int kSingleSize = Single::kSignificandSize; - const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; - - *result_is_junk = true; - + + const int kDoubleSize = Double::kSignificandSize; + const int kSingleSize = Single::kSignificandSize; + const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; + + *result_is_junk = true; + int64_t number = 0; int exponent = 0; const int radix = (1 << radix_log_2); @@ -645,24 +645,24 @@ static double RadixStringToIeee(Iterator* current, // Only relevant if parse_as_hex_float is true. bool post_decimal = false; - // Skip leading 0s. - while (**current == '0') { + // Skip leading 0s. + while (**current == '0') { if (Advance(current, separator, radix, end)) { - *result_is_junk = false; - return SignedZero(sign); - } - } - + *result_is_junk = false; + return SignedZero(sign); + } + } + while (true) { - int digit; - if (IsDecimalDigitForRadix(**current, radix)) { - digit = static_cast(**current) - '0'; + int digit; + if (IsDecimalDigitForRadix(**current, radix)) { + digit = static_cast(**current) - '0'; if (post_decimal) exponent -= radix_log_2; - } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { - digit = static_cast(**current) - 'a' + 10; + } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { + digit = static_cast(**current) - 'a' + 10; if (post_decimal) exponent -= radix_log_2; - } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { - digit = static_cast(**current) - 'A' + 10; + } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { + digit = static_cast(**current) - 'A' + 10; if (post_decimal) exponent -= radix_log_2; } else if (parse_as_hex_float && **current == '.') { post_decimal = true; @@ -671,32 +671,32 @@ static double RadixStringToIeee(Iterator* current, continue; } else if (parse_as_hex_float && (**current == 'p' || **current == 'P')) { break; - } else { - if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { - break; - } else { - return junk_string_value; - } - } - - number = number * radix + digit; - int overflow = static_cast(number >> kSignificandSize); - if (overflow != 0) { - // Overflow occurred. Need to determine which direction to round the - // result. - int overflow_bits_count = 1; - while (overflow > 1) { - overflow_bits_count++; - overflow >>= 1; - } - - int dropped_bits_mask = ((1 << overflow_bits_count) - 1); - int dropped_bits = static_cast(number) & dropped_bits_mask; - number >>= overflow_bits_count; + } else { + if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { + break; + } else { + return junk_string_value; + } + } + + number = number * radix + digit; + int overflow = static_cast(number >> kSignificandSize); + if (overflow != 0) { + // Overflow occurred. Need to determine which direction to round the + // result. + int overflow_bits_count = 1; + while (overflow > 1) { + overflow_bits_count++; + overflow >>= 1; + } + + int dropped_bits_mask = ((1 << overflow_bits_count) - 1); + int dropped_bits = static_cast(number) & dropped_bits_mask; + number >>= overflow_bits_count; exponent += overflow_bits_count; - - bool zero_tail = true; - for (;;) { + + bool zero_tail = true; + for (;;) { if (Advance(current, separator, radix, end)) break; if (parse_as_hex_float && **current == '.') { // Just run over the '.'. We are just trying to see whether there is @@ -706,42 +706,42 @@ static double RadixStringToIeee(Iterator* current, post_decimal = true; } if (!isDigit(**current, radix)) break; - zero_tail = zero_tail && **current == '0'; + zero_tail = zero_tail && **current == '0'; if (!post_decimal) exponent += radix_log_2; - } - + } + if (!parse_as_hex_float && !allow_trailing_junk && AdvanceToNonspace(current, end)) { - return junk_string_value; - } - - int middle_value = (1 << (overflow_bits_count - 1)); - if (dropped_bits > middle_value) { - number++; // Rounding up. - } else if (dropped_bits == middle_value) { - // Rounding to even to consistency with decimals: half-way case rounds - // up if significant part is odd and down otherwise. - if ((number & 1) != 0 || !zero_tail) { - number++; // Rounding up. - } - } - - // Rounding up may cause overflow. - if ((number & ((int64_t)1 << kSignificandSize)) != 0) { - exponent++; - number >>= 1; - } - break; - } + return junk_string_value; + } + + int middle_value = (1 << (overflow_bits_count - 1)); + if (dropped_bits > middle_value) { + number++; // Rounding up. + } else if (dropped_bits == middle_value) { + // Rounding to even to consistency with decimals: half-way case rounds + // up if significant part is odd and down otherwise. + if ((number & 1) != 0 || !zero_tail) { + number++; // Rounding up. + } + } + + // Rounding up may cause overflow. + if ((number & ((int64_t)1 << kSignificandSize)) != 0) { + exponent++; + number >>= 1; + } + break; + } if (Advance(current, separator, radix, end)) break; } - - ASSERT(number < ((int64_t)1 << kSignificandSize)); - ASSERT(static_cast(static_cast(number)) == number); - - *result_is_junk = false; - + + ASSERT(number < ((int64_t)1 << kSignificandSize)); + ASSERT(static_cast(static_cast(number)) == number); + + *result_is_junk = false; + if (parse_as_hex_float) { ASSERT(**current == 'p' || **current == 'P'); Advance(current, separator, radix, end); @@ -765,133 +765,133 @@ static double RadixStringToIeee(Iterator* current, } if (exponent == 0 || number == 0) { - if (sign) { - if (number == 0) return -0.0; - number = -number; - } - return static_cast(number); - } - - ASSERT(number != 0); + if (sign) { + if (number == 0) return -0.0; + number = -number; + } + return static_cast(number); + } + + ASSERT(number != 0); double result = Double(DiyFp(number, exponent)).value(); return sign ? -result : result; -} - -template -double StringToDoubleConverter::StringToIeee( - Iterator input, - int length, - bool read_as_double, - int* processed_characters_count) const { - Iterator current = input; - Iterator end = input + length; - - *processed_characters_count = 0; - - const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; - const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; - const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; - const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; +} + +template +double StringToDoubleConverter::StringToIeee( + Iterator input, + int length, + bool read_as_double, + int* processed_characters_count) const { + Iterator current = input; + Iterator end = input + length; + + *processed_characters_count = 0; + + const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; + const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; + const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; + const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; const bool allow_case_insensibility = (flags_ & ALLOW_CASE_INSENSIBILITY) != 0; - - // To make sure that iterator dereferencing is valid the following - // convention is used: - // 1. Each '++current' statement is followed by check for equality to 'end'. - // 2. If AdvanceToNonspace returned false then current == end. - // 3. If 'current' becomes equal to 'end' the function returns or goes to - // 'parsing_done'. - // 4. 'current' is not dereferenced after the 'parsing_done' label. - // 5. Code before 'parsing_done' may rely on 'current != end'. - if (current == end) return empty_string_value_; - - if (allow_leading_spaces || allow_trailing_spaces) { - if (!AdvanceToNonspace(¤t, end)) { - *processed_characters_count = static_cast(current - input); - return empty_string_value_; - } - if (!allow_leading_spaces && (input != current)) { - // No leading spaces allowed, but AdvanceToNonspace moved forward. - return junk_string_value_; - } - } - - // The longest form of simplified number is: "-.1eXXX\0". - const int kBufferSize = kMaxSignificantDigits + 10; - char buffer[kBufferSize]; // NOLINT: size is known at compile time. - int buffer_pos = 0; - - // Exponent will be adjusted if insignificant digits of the integer part - // or insignificant leading zeros of the fractional part are dropped. - int exponent = 0; - int significant_digits = 0; - int insignificant_digits = 0; - bool nonzero_digit_dropped = false; - - bool sign = false; - - if (*current == '+' || *current == '-') { - sign = (*current == '-'); - ++current; - Iterator next_non_space = current; - // Skip following spaces (if allowed). - if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; - if (!allow_spaces_after_sign && (current != next_non_space)) { - return junk_string_value_; - } - current = next_non_space; - } - - if (infinity_symbol_ != NULL) { + + // To make sure that iterator dereferencing is valid the following + // convention is used: + // 1. Each '++current' statement is followed by check for equality to 'end'. + // 2. If AdvanceToNonspace returned false then current == end. + // 3. If 'current' becomes equal to 'end' the function returns or goes to + // 'parsing_done'. + // 4. 'current' is not dereferenced after the 'parsing_done' label. + // 5. Code before 'parsing_done' may rely on 'current != end'. + if (current == end) return empty_string_value_; + + if (allow_leading_spaces || allow_trailing_spaces) { + if (!AdvanceToNonspace(¤t, end)) { + *processed_characters_count = static_cast(current - input); + return empty_string_value_; + } + if (!allow_leading_spaces && (input != current)) { + // No leading spaces allowed, but AdvanceToNonspace moved forward. + return junk_string_value_; + } + } + + // The longest form of simplified number is: "-.1eXXX\0". + const int kBufferSize = kMaxSignificantDigits + 10; + char buffer[kBufferSize]; // NOLINT: size is known at compile time. + int buffer_pos = 0; + + // Exponent will be adjusted if insignificant digits of the integer part + // or insignificant leading zeros of the fractional part are dropped. + int exponent = 0; + int significant_digits = 0; + int insignificant_digits = 0; + bool nonzero_digit_dropped = false; + + bool sign = false; + + if (*current == '+' || *current == '-') { + sign = (*current == '-'); + ++current; + Iterator next_non_space = current; + // Skip following spaces (if allowed). + if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; + if (!allow_spaces_after_sign && (current != next_non_space)) { + return junk_string_value_; + } + current = next_non_space; + } + + if (infinity_symbol_ != NULL) { if (ConsumeFirstCharacter(*current, infinity_symbol_, allow_case_insensibility)) { if (!ConsumeSubString(¤t, end, infinity_symbol_, allow_case_insensibility)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::Infinity() : Double::Infinity(); - } - } - - if (nan_symbol_ != NULL) { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast(current - input); + return sign ? -Double::Infinity() : Double::Infinity(); + } + } + + if (nan_symbol_ != NULL) { if (ConsumeFirstCharacter(*current, nan_symbol_, allow_case_insensibility)) { if (!ConsumeSubString(¤t, end, nan_symbol_, allow_case_insensibility)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::NaN() : Double::NaN(); - } - } - - bool leading_zero = false; - if (*current == '0') { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast(current - input); + return sign ? -Double::NaN() : Double::NaN(); + } + } + + bool leading_zero = false; + if (*current == '0') { if (Advance(¤t, separator_, 10, end)) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - - leading_zero = true; - - // It could be hexadecimal value. + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + + leading_zero = true; + + // It could be hexadecimal value. if (((flags_ & ALLOW_HEX) || (flags_ & ALLOW_HEX_FLOATS)) && (*current == 'x' || *current == 'X')) { - ++current; + ++current; bool parse_as_hex_float = (flags_ & ALLOW_HEX_FLOATS) && IsHexFloatString(current, end, separator_, allow_trailing_junk); @@ -899,240 +899,240 @@ double StringToDoubleConverter::StringToIeee( if (current == end) return junk_string_value_; // "0x" if (!parse_as_hex_float && !isDigit(*current, 16)) { return junk_string_value_; - } - - bool result_is_junk; - double result = RadixStringToIeee<4>(¤t, - end, - sign, + } + + bool result_is_junk; + double result = RadixStringToIeee<4>(¤t, + end, + sign, separator_, parse_as_hex_float, - allow_trailing_junk, - junk_string_value_, - read_as_double, - &result_is_junk); - if (!result_is_junk) { - if (allow_trailing_spaces) AdvanceToNonspace(¤t, end); - *processed_characters_count = static_cast(current - input); - } - return result; - } - - // Ignore leading zeros in the integer part. - while (*current == '0') { + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + if (!result_is_junk) { + if (allow_trailing_spaces) AdvanceToNonspace(¤t, end); + *processed_characters_count = static_cast(current - input); + } + return result; + } + + // Ignore leading zeros in the integer part. + while (*current == '0') { if (Advance(¤t, separator_, 10, end)) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - } - } - - bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; - - // Copy significant digits of the integer part (if any) to the buffer. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - // Will later check if it's an octal in the buffer. - } else { - insignificant_digits++; // Move the digit into the exponential part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } - octal = octal && *current < '8'; + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + } + } + + bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; + + // Copy significant digits of the integer part (if any) to the buffer. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast(*current); + significant_digits++; + // Will later check if it's an octal in the buffer. + } else { + insignificant_digits++; // Move the digit into the exponential part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } + octal = octal && *current < '8'; if (Advance(¤t, separator_, 10, end)) goto parsing_done; - } - - if (significant_digits == 0) { - octal = false; - } - - if (*current == '.') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; - + } + + if (significant_digits == 0) { + octal = false; + } + + if (*current == '.') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; + if (Advance(¤t, separator_, 10, end)) { - if (significant_digits == 0 && !leading_zero) { - return junk_string_value_; - } else { - goto parsing_done; - } - } - - if (significant_digits == 0) { - // octal = false; - // Integer part consists of 0 or is absent. Significant digits start after - // leading zeros (if any). - while (*current == '0') { + if (significant_digits == 0 && !leading_zero) { + return junk_string_value_; + } else { + goto parsing_done; + } + } + + if (significant_digits == 0) { + // octal = false; + // Integer part consists of 0 or is absent. Significant digits start after + // leading zeros (if any). + while (*current == '0') { if (Advance(¤t, separator_, 10, end)) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - exponent--; // Move this 0 into the exponent. - } - } - - // There is a fractional part. - // We don't emit a '.', but adjust the exponent instead. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - exponent--; - } else { - // Ignore insignificant digits in the fractional part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + exponent--; // Move this 0 into the exponent. + } + } + + // There is a fractional part. + // We don't emit a '.', but adjust the exponent instead. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast(*current); + significant_digits++; + exponent--; + } else { + // Ignore insignificant digits in the fractional part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } if (Advance(¤t, separator_, 10, end)) goto parsing_done; - } - } - - if (!leading_zero && exponent == 0 && significant_digits == 0) { - // If leading_zeros is true then the string contains zeros. - // If exponent < 0 then string was [+-]\.0*... - // If significant_digits != 0 the string is not equal to 0. - // Otherwise there are no digits in the string. - return junk_string_value_; - } - - // Parse exponential part. - if (*current == 'e' || *current == 'E') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; + } + } + + if (!leading_zero && exponent == 0 && significant_digits == 0) { + // If leading_zeros is true then the string contains zeros. + // If exponent < 0 then string was [+-]\.0*... + // If significant_digits != 0 the string is not equal to 0. + // Otherwise there are no digits in the string. + return junk_string_value_; + } + + // Parse exponential part. + if (*current == 'e' || *current == 'E') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; Iterator junk_begin = current; - ++current; - if (current == end) { - if (allow_trailing_junk) { + ++current; + if (current == end) { + if (allow_trailing_junk) { current = junk_begin; - goto parsing_done; - } else { - return junk_string_value_; - } - } - char exponen_sign = '+'; - if (*current == '+' || *current == '-') { - exponen_sign = static_cast(*current); - ++current; - if (current == end) { - if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + char exponen_sign = '+'; + if (*current == '+' || *current == '-') { + exponen_sign = static_cast(*current); + ++current; + if (current == end) { + if (allow_trailing_junk) { current = junk_begin; - goto parsing_done; - } else { - return junk_string_value_; - } - } - } - - if (current == end || *current < '0' || *current > '9') { - if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + } + + if (current == end || *current < '0' || *current > '9') { + if (allow_trailing_junk) { current = junk_begin; - goto parsing_done; - } else { - return junk_string_value_; - } - } - - const int max_exponent = INT_MAX / 2; - ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); - int num = 0; - do { - // Check overflow. - int digit = *current - '0'; - if (num >= max_exponent / 10 - && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { - num = max_exponent; - } else { - num = num * 10 + digit; - } - ++current; - } while (current != end && *current >= '0' && *current <= '9'); - - exponent += (exponen_sign == '-' ? -num : num); - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - if (allow_trailing_spaces) { - AdvanceToNonspace(¤t, end); - } - - parsing_done: - exponent += insignificant_digits; - - if (octal) { - double result; - bool result_is_junk; - char* start = buffer; - result = RadixStringToIeee<3>(&start, - buffer + buffer_pos, - sign, + goto parsing_done; + } else { + return junk_string_value_; + } + } + + const int max_exponent = INT_MAX / 2; + ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); + int num = 0; + do { + // Check overflow. + int digit = *current - '0'; + if (num >= max_exponent / 10 + && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { + num = max_exponent; + } else { + num = num * 10 + digit; + } + ++current; + } while (current != end && *current >= '0' && *current <= '9'); + + exponent += (exponen_sign == '-' ? -num : num); + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + if (allow_trailing_spaces) { + AdvanceToNonspace(¤t, end); + } + + parsing_done: + exponent += insignificant_digits; + + if (octal) { + double result; + bool result_is_junk; + char* start = buffer; + result = RadixStringToIeee<3>(&start, + buffer + buffer_pos, + sign, separator_, false, // Don't parse as hex_float. - allow_trailing_junk, - junk_string_value_, - read_as_double, - &result_is_junk); - ASSERT(!result_is_junk); - *processed_characters_count = static_cast(current - input); - return result; - } - - if (nonzero_digit_dropped) { - buffer[buffer_pos++] = '1'; - exponent--; - } - - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos] = '\0'; - - double converted; - if (read_as_double) { - converted = Strtod(Vector(buffer, buffer_pos), exponent); - } else { - converted = Strtof(Vector(buffer, buffer_pos), exponent); - } - *processed_characters_count = static_cast(current - input); - return sign? -converted: converted; -} - - -double StringToDoubleConverter::StringToDouble( - const char* buffer, - int length, - int* processed_characters_count) const { - return StringToIeee(buffer, length, true, processed_characters_count); -} - - -double StringToDoubleConverter::StringToDouble( - const uc16* buffer, - int length, - int* processed_characters_count) const { - return StringToIeee(buffer, length, true, processed_characters_count); -} - - -float StringToDoubleConverter::StringToFloat( - const char* buffer, - int length, - int* processed_characters_count) const { - return static_cast(StringToIeee(buffer, length, false, - processed_characters_count)); -} - - -float StringToDoubleConverter::StringToFloat( - const uc16* buffer, - int length, - int* processed_characters_count) const { - return static_cast(StringToIeee(buffer, length, false, - processed_characters_count)); -} - -} // namespace double_conversion + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + ASSERT(!result_is_junk); + *processed_characters_count = static_cast(current - input); + return result; + } + + if (nonzero_digit_dropped) { + buffer[buffer_pos++] = '1'; + exponent--; + } + + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos] = '\0'; + + double converted; + if (read_as_double) { + converted = Strtod(Vector(buffer, buffer_pos), exponent); + } else { + converted = Strtof(Vector(buffer, buffer_pos), exponent); + } + *processed_characters_count = static_cast(current - input); + return sign? -converted: converted; +} + + +double StringToDoubleConverter::StringToDouble( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +double StringToDoubleConverter::StringToDouble( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +float StringToDoubleConverter::StringToFloat( + const char* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} + + +float StringToDoubleConverter::StringToFloat( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/double-conversion.h b/contrib/libs/double-conversion/double-conversion.h index 0b4cce51b25..6dbc0997c61 100644 --- a/contrib/libs/double-conversion/double-conversion.h +++ b/contrib/libs/double-conversion/double-conversion.h @@ -1,435 +1,435 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ -#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ - -#include "utils.h" - -namespace double_conversion { - -class DoubleToStringConverter { - public: - // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint - // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the - // function returns false. - static const int kMaxFixedDigitsBeforePoint = 60; - static const int kMaxFixedDigitsAfterPoint = 60; - - // When calling ToExponential with a requested_digits - // parameter > kMaxExponentialDigits then the function returns false. - static const int kMaxExponentialDigits = 120; - - // When calling ToPrecision with a requested_digits - // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits - // then the function returns false. - static const int kMinPrecisionDigits = 1; - static const int kMaxPrecisionDigits = 120; - - enum Flags { - NO_FLAGS = 0, - EMIT_POSITIVE_EXPONENT_SIGN = 1, - EMIT_TRAILING_DECIMAL_POINT = 2, - EMIT_TRAILING_ZERO_AFTER_POINT = 4, - UNIQUE_ZERO = 8 - }; - - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent - // form, emits a '+' for positive exponents. Example: 1.2e+2. - // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is - // converted into decimal format then a trailing decimal point is appended. - // Example: 2345.0 is converted to "2345.". - // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point - // emits a trailing '0'-character. This flag requires the - // EXMIT_TRAILING_DECIMAL_POINT flag. - // Example: 2345.0 is converted to "2345.0". - // - UNIQUE_ZERO: "-0.0" is converted to "0.0". - // - // Infinity symbol and nan_symbol provide the string representation for these - // special values. If the string is NULL and the special value is encountered - // then the conversion functions return false. - // - // The exponent_character is used in exponential representations. It is - // usually 'e' or 'E'. - // - // When converting to the shortest representation the converter will - // represent input numbers in decimal format if they are in the interval - // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ - // (lower boundary included, greater boundary excluded). - // Example: with decimal_in_shortest_low = -6 and - // decimal_in_shortest_high = 21: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // When converting to precision mode the converter may add - // max_leading_padding_zeroes before returning the number in exponential - // format. - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarily the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - DoubleToStringConverter(int flags, - const char* infinity_symbol, - const char* nan_symbol, - char exponent_character, - int decimal_in_shortest_low, - int decimal_in_shortest_high, - int max_leading_padding_zeroes_in_precision_mode, - int max_trailing_padding_zeroes_in_precision_mode) - : flags_(flags), - infinity_symbol_(infinity_symbol), - nan_symbol_(nan_symbol), - exponent_character_(exponent_character), - decimal_in_shortest_low_(decimal_in_shortest_low), - decimal_in_shortest_high_(decimal_in_shortest_high), - max_leading_padding_zeroes_in_precision_mode_( - max_leading_padding_zeroes_in_precision_mode), - max_trailing_padding_zeroes_in_precision_mode_( - max_trailing_padding_zeroes_in_precision_mode) { - // When 'trailing zero after the point' is set, then 'trailing point' - // must be set too. - ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || - !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); - } - - // Returns a converter following the EcmaScript specification. - static const DoubleToStringConverter& EcmaScriptConverter(); - - // Computes the shortest string of digits that correctly represent the input - // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high - // (see constructor) it then either returns a decimal representation, or an - // exponential representation. - // Example with decimal_in_shortest_low = -6, - // decimal_in_shortest_high = 21, - // EMIT_POSITIVE_EXPONENT_SIGN activated, and - // EMIT_TRAILING_DECIMAL_POINT deactived: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // Note: the conversion may round the output if the returned string - // is accurate enough to uniquely identify the input-number. - // For example the most precise representation of the double 9e59 equals - // "899999999999999918767229449717619953810131273674690656206848", but - // the converter will return the shorter (but still correct) "9e59". - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except when the input value is special and no infinity_symbol or - // nan_symbol has been given to the constructor. - bool ToShortest(double value, StringBuilder* result_builder) const { - return ToShortestIeeeNumber(value, result_builder, SHORTEST); - } - - // Same as ToShortest, but for single-precision floats. - bool ToShortestSingle(float value, StringBuilder* result_builder) const { - return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); - } - - - // Computes a decimal representation with a fixed number of digits after the - // decimal point. The last emitted digit is rounded. - // - // Examples: - // ToFixed(3.12, 1) -> "3.1" - // ToFixed(3.1415, 3) -> "3.142" - // ToFixed(1234.56789, 4) -> "1234.5679" - // ToFixed(1.23, 5) -> "1.23000" - // ToFixed(0.1, 4) -> "0.1000" - // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" - // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" - // ToFixed(0.1, 17) -> "0.10000000000000001" - // - // If requested_digits equals 0, then the tail of the result depends on - // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples, for requested_digits == 0, - // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be - // - false and false: then 123.45 -> 123 - // 0.678 -> 1 - // - true and false: then 123.45 -> 123. - // 0.678 -> 1. - // - true and true: then 123.45 -> 123.0 - // 0.678 -> 1.0 - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'value' > 10^kMaxFixedDigitsBeforePoint, or - // - 'requested_digits' > kMaxFixedDigitsAfterPoint. - // The last two conditions imply that the result will never contain more than - // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters - // (one additional character for the sign, and one for the decimal point). - bool ToFixed(double value, - int requested_digits, - StringBuilder* result_builder) const; - - // Computes a representation in exponential format with requested_digits - // after the decimal point. The last emitted digit is rounded. - // If requested_digits equals -1, then the shortest exponential representation - // is computed. - // - // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and - // exponent_character set to 'e'. - // ToExponential(3.12, 1) -> "3.1e0" - // ToExponential(5.0, 3) -> "5.000e0" - // ToExponential(0.001, 2) -> "1.00e-3" - // ToExponential(3.1415, -1) -> "3.1415e0" - // ToExponential(3.1415, 4) -> "3.1415e0" - // ToExponential(3.1415, 3) -> "3.142e0" - // ToExponential(123456789000000, 3) -> "1.235e14" - // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" - // ToExponential(1000000000000000019884624838656.0, 32) -> - // "1.00000000000000001988462483865600e30" - // ToExponential(1234, 0) -> "1e3" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'requested_digits' > kMaxExponentialDigits. - // The last condition implies that the result will never contain more than - // kMaxExponentialDigits + 8 characters (the sign, the digit before the - // decimal point, the decimal point, the exponent character, the - // exponent's sign, and at most 3 exponent digits). - bool ToExponential(double value, - int requested_digits, - StringBuilder* result_builder) const; - - // Computes 'precision' leading digits of the given 'value' and returns them - // either in exponential or decimal format, depending on - // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the - // constructor). - // The last computed digit is rounded. - // - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarily the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no - // EMIT_TRAILING_ZERO_AFTER_POINT: - // ToPrecision(123450.0, 6) -> "123450" - // ToPrecision(123450.0, 5) -> "123450" - // ToPrecision(123450.0, 4) -> "123500" - // ToPrecision(123450.0, 3) -> "123000" - // ToPrecision(123450.0, 2) -> "1.2e5" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - precision < kMinPericisionDigits - // - precision > kMaxPrecisionDigits - // The last condition implies that the result will never contain more than - // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the - // exponent character, the exponent's sign, and at most 3 exponent digits). - bool ToPrecision(double value, - int precision, - StringBuilder* result_builder) const; - - enum DtoaMode { - // Produce the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate - // but correct) 0.3. - SHORTEST, - // Same as SHORTEST, but for single-precision floats. - SHORTEST_SINGLE, - // Produce a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - FIXED, - // Fixed number of digits (independent of the decimal point). - PRECISION - }; - - // The maximal number of digits that are needed to emit a double in base 10. - // A higher precision can be achieved by using more digits, but the shortest - // accurate representation of any double will never use more digits than - // kBase10MaximalLength. - // Note that DoubleToAscii null-terminates its input. So the given buffer - // should be at least kBase10MaximalLength + 1 characters long. - static const int kBase10MaximalLength = 17; - +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ + +#include "utils.h" + +namespace double_conversion { + +class DoubleToStringConverter { + public: + // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint + // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the + // function returns false. + static const int kMaxFixedDigitsBeforePoint = 60; + static const int kMaxFixedDigitsAfterPoint = 60; + + // When calling ToExponential with a requested_digits + // parameter > kMaxExponentialDigits then the function returns false. + static const int kMaxExponentialDigits = 120; + + // When calling ToPrecision with a requested_digits + // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits + // then the function returns false. + static const int kMinPrecisionDigits = 1; + static const int kMaxPrecisionDigits = 120; + + enum Flags { + NO_FLAGS = 0, + EMIT_POSITIVE_EXPONENT_SIGN = 1, + EMIT_TRAILING_DECIMAL_POINT = 2, + EMIT_TRAILING_ZERO_AFTER_POINT = 4, + UNIQUE_ZERO = 8 + }; + + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent + // form, emits a '+' for positive exponents. Example: 1.2e+2. + // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is + // converted into decimal format then a trailing decimal point is appended. + // Example: 2345.0 is converted to "2345.". + // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point + // emits a trailing '0'-character. This flag requires the + // EXMIT_TRAILING_DECIMAL_POINT flag. + // Example: 2345.0 is converted to "2345.0". + // - UNIQUE_ZERO: "-0.0" is converted to "0.0". + // + // Infinity symbol and nan_symbol provide the string representation for these + // special values. If the string is NULL and the special value is encountered + // then the conversion functions return false. + // + // The exponent_character is used in exponential representations. It is + // usually 'e' or 'E'. + // + // When converting to the shortest representation the converter will + // represent input numbers in decimal format if they are in the interval + // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ + // (lower boundary included, greater boundary excluded). + // Example: with decimal_in_shortest_low = -6 and + // decimal_in_shortest_high = 21: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // When converting to precision mode the converter may add + // max_leading_padding_zeroes before returning the number in exponential + // format. + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + DoubleToStringConverter(int flags, + const char* infinity_symbol, + const char* nan_symbol, + char exponent_character, + int decimal_in_shortest_low, + int decimal_in_shortest_high, + int max_leading_padding_zeroes_in_precision_mode, + int max_trailing_padding_zeroes_in_precision_mode) + : flags_(flags), + infinity_symbol_(infinity_symbol), + nan_symbol_(nan_symbol), + exponent_character_(exponent_character), + decimal_in_shortest_low_(decimal_in_shortest_low), + decimal_in_shortest_high_(decimal_in_shortest_high), + max_leading_padding_zeroes_in_precision_mode_( + max_leading_padding_zeroes_in_precision_mode), + max_trailing_padding_zeroes_in_precision_mode_( + max_trailing_padding_zeroes_in_precision_mode) { + // When 'trailing zero after the point' is set, then 'trailing point' + // must be set too. + ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || + !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); + } + + // Returns a converter following the EcmaScript specification. + static const DoubleToStringConverter& EcmaScriptConverter(); + + // Computes the shortest string of digits that correctly represent the input + // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high + // (see constructor) it then either returns a decimal representation, or an + // exponential representation. + // Example with decimal_in_shortest_low = -6, + // decimal_in_shortest_high = 21, + // EMIT_POSITIVE_EXPONENT_SIGN activated, and + // EMIT_TRAILING_DECIMAL_POINT deactived: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // Note: the conversion may round the output if the returned string + // is accurate enough to uniquely identify the input-number. + // For example the most precise representation of the double 9e59 equals + // "899999999999999918767229449717619953810131273674690656206848", but + // the converter will return the shorter (but still correct) "9e59". + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except when the input value is special and no infinity_symbol or + // nan_symbol has been given to the constructor. + bool ToShortest(double value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST); + } + + // Same as ToShortest, but for single-precision floats. + bool ToShortestSingle(float value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); + } + + + // Computes a decimal representation with a fixed number of digits after the + // decimal point. The last emitted digit is rounded. + // + // Examples: + // ToFixed(3.12, 1) -> "3.1" + // ToFixed(3.1415, 3) -> "3.142" + // ToFixed(1234.56789, 4) -> "1234.5679" + // ToFixed(1.23, 5) -> "1.23000" + // ToFixed(0.1, 4) -> "0.1000" + // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" + // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" + // ToFixed(0.1, 17) -> "0.10000000000000001" + // + // If requested_digits equals 0, then the tail of the result depends on + // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples, for requested_digits == 0, + // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be + // - false and false: then 123.45 -> 123 + // 0.678 -> 1 + // - true and false: then 123.45 -> 123. + // 0.678 -> 1. + // - true and true: then 123.45 -> 123.0 + // 0.678 -> 1.0 + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'value' > 10^kMaxFixedDigitsBeforePoint, or + // - 'requested_digits' > kMaxFixedDigitsAfterPoint. + // The last two conditions imply that the result will never contain more than + // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters + // (one additional character for the sign, and one for the decimal point). + bool ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes a representation in exponential format with requested_digits + // after the decimal point. The last emitted digit is rounded. + // If requested_digits equals -1, then the shortest exponential representation + // is computed. + // + // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and + // exponent_character set to 'e'. + // ToExponential(3.12, 1) -> "3.1e0" + // ToExponential(5.0, 3) -> "5.000e0" + // ToExponential(0.001, 2) -> "1.00e-3" + // ToExponential(3.1415, -1) -> "3.1415e0" + // ToExponential(3.1415, 4) -> "3.1415e0" + // ToExponential(3.1415, 3) -> "3.142e0" + // ToExponential(123456789000000, 3) -> "1.235e14" + // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" + // ToExponential(1000000000000000019884624838656.0, 32) -> + // "1.00000000000000001988462483865600e30" + // ToExponential(1234, 0) -> "1e3" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'requested_digits' > kMaxExponentialDigits. + // The last condition implies that the result will never contain more than + // kMaxExponentialDigits + 8 characters (the sign, the digit before the + // decimal point, the decimal point, the exponent character, the + // exponent's sign, and at most 3 exponent digits). + bool ToExponential(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes 'precision' leading digits of the given 'value' and returns them + // either in exponential or decimal format, depending on + // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the + // constructor). + // The last computed digit is rounded. + // + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no + // EMIT_TRAILING_ZERO_AFTER_POINT: + // ToPrecision(123450.0, 6) -> "123450" + // ToPrecision(123450.0, 5) -> "123450" + // ToPrecision(123450.0, 4) -> "123500" + // ToPrecision(123450.0, 3) -> "123000" + // ToPrecision(123450.0, 2) -> "1.2e5" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - precision < kMinPericisionDigits + // - precision > kMaxPrecisionDigits + // The last condition implies that the result will never contain more than + // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the + // exponent character, the exponent's sign, and at most 3 exponent digits). + bool ToPrecision(double value, + int precision, + StringBuilder* result_builder) const; + + enum DtoaMode { + // Produce the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate + // but correct) 0.3. + SHORTEST, + // Same as SHORTEST, but for single-precision floats. + SHORTEST_SINGLE, + // Produce a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + FIXED, + // Fixed number of digits (independent of the decimal point). + PRECISION + }; + + // The maximal number of digits that are needed to emit a double in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any double will never use more digits than + // kBase10MaximalLength. + // Note that DoubleToAscii null-terminates its input. So the given buffer + // should be at least kBase10MaximalLength + 1 characters long. + static const int kBase10MaximalLength = 17; + // Converts the given double 'v' to digit characters. 'v' must not be NaN, // +Infinity, or -Infinity. In SHORTEST_SINGLE-mode this restriction also // applies to 'v' after it has been casted to a single-precision float. That // is, in this mode static_cast(v) must not be NaN, +Infinity or // -Infinity. - // - // The result should be interpreted as buffer * 10^(point-length). - // + // + // The result should be interpreted as buffer * 10^(point-length). + // // The digits are written to the buffer in the platform's charset, which is // often UTF-8 (with ASCII-range digits) but may be another charset, such // as EBCDIC. // - // The output depends on the given mode: - // - SHORTEST: produce the least amount of digits for which the internal - // identity requirement is still satisfied. If the digits are printed - // (together with the correct exponent) then reading this number will give - // 'v' again. The buffer will choose the representation that is closest to - // 'v'. If there are two at the same distance, than the one farther away - // from 0 is chosen (halfway cases - ending with 5 - are rounded up). - // In this mode the 'requested_digits' parameter is ignored. - // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. - // - FIXED: produces digits necessary to print a given number with - // 'requested_digits' digits after the decimal point. The produced digits - // might be too short in which case the caller has to fill the remainder - // with '0's. - // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. - // Halfway cases are rounded towards +/-Infinity (away from 0). The call - // toFixed(0.15, 2) thus returns buffer="2", point=0. - // The returned buffer may contain digits that would be truncated from the - // shortest representation of the input. - // - PRECISION: produces 'requested_digits' where the first digit is not '0'. - // Even though the length of produced digits usually equals - // 'requested_digits', the function is allowed to return fewer digits, in - // which case the caller has to fill the missing digits with '0's. - // Halfway cases are again rounded away from 0. - // DoubleToAscii expects the given buffer to be big enough to hold all - // digits and a terminating null-character. In SHORTEST-mode it expects a - // buffer of at least kBase10MaximalLength + 1. In all other modes the - // requested_digits parameter and the padding-zeroes limit the size of the - // output. Don't forget the decimal point, the exponent character and the - // terminating null-character when computing the maximal output size. - // The given length is only used in debug mode to ensure the buffer is big - // enough. - static void DoubleToAscii(double v, - DtoaMode mode, - int requested_digits, - char* buffer, - int buffer_length, - bool* sign, - int* length, - int* point); - - private: - // Implementation for ToShortest and ToShortestSingle. - bool ToShortestIeeeNumber(double value, - StringBuilder* result_builder, - DtoaMode mode) const; - - // If the value is a special value (NaN or Infinity) constructs the - // corresponding string using the configured infinity/nan-symbol. - // If either of them is NULL or the value is not special then the - // function returns false. - bool HandleSpecialValues(double value, StringBuilder* result_builder) const; - // Constructs an exponential representation (i.e. 1.234e56). - // The given exponent assumes a decimal point after the first decimal digit. - void CreateExponentialRepresentation(const char* decimal_digits, - int length, - int exponent, - StringBuilder* result_builder) const; - // Creates a decimal representation (i.e 1234.5678). - void CreateDecimalRepresentation(const char* decimal_digits, - int length, - int decimal_point, - int digits_after_point, - StringBuilder* result_builder) const; - - const int flags_; - const char* const infinity_symbol_; - const char* const nan_symbol_; - const char exponent_character_; - const int decimal_in_shortest_low_; - const int decimal_in_shortest_high_; - const int max_leading_padding_zeroes_in_precision_mode_; - const int max_trailing_padding_zeroes_in_precision_mode_; - + // The output depends on the given mode: + // - SHORTEST: produce the least amount of digits for which the internal + // identity requirement is still satisfied. If the digits are printed + // (together with the correct exponent) then reading this number will give + // 'v' again. The buffer will choose the representation that is closest to + // 'v'. If there are two at the same distance, than the one farther away + // from 0 is chosen (halfway cases - ending with 5 - are rounded up). + // In this mode the 'requested_digits' parameter is ignored. + // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. + // - FIXED: produces digits necessary to print a given number with + // 'requested_digits' digits after the decimal point. The produced digits + // might be too short in which case the caller has to fill the remainder + // with '0's. + // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. + // Halfway cases are rounded towards +/-Infinity (away from 0). The call + // toFixed(0.15, 2) thus returns buffer="2", point=0. + // The returned buffer may contain digits that would be truncated from the + // shortest representation of the input. + // - PRECISION: produces 'requested_digits' where the first digit is not '0'. + // Even though the length of produced digits usually equals + // 'requested_digits', the function is allowed to return fewer digits, in + // which case the caller has to fill the missing digits with '0's. + // Halfway cases are again rounded away from 0. + // DoubleToAscii expects the given buffer to be big enough to hold all + // digits and a terminating null-character. In SHORTEST-mode it expects a + // buffer of at least kBase10MaximalLength + 1. In all other modes the + // requested_digits parameter and the padding-zeroes limit the size of the + // output. Don't forget the decimal point, the exponent character and the + // terminating null-character when computing the maximal output size. + // The given length is only used in debug mode to ensure the buffer is big + // enough. + static void DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point); + + private: + // Implementation for ToShortest and ToShortestSingle. + bool ToShortestIeeeNumber(double value, + StringBuilder* result_builder, + DtoaMode mode) const; + + // If the value is a special value (NaN or Infinity) constructs the + // corresponding string using the configured infinity/nan-symbol. + // If either of them is NULL or the value is not special then the + // function returns false. + bool HandleSpecialValues(double value, StringBuilder* result_builder) const; + // Constructs an exponential representation (i.e. 1.234e56). + // The given exponent assumes a decimal point after the first decimal digit. + void CreateExponentialRepresentation(const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const; + // Creates a decimal representation (i.e 1234.5678). + void CreateDecimalRepresentation(const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const; + + const int flags_; + const char* const infinity_symbol_; + const char* const nan_symbol_; + const char exponent_character_; + const int decimal_in_shortest_low_; + const int decimal_in_shortest_high_; + const int max_leading_padding_zeroes_in_precision_mode_; + const int max_trailing_padding_zeroes_in_precision_mode_; + DC_DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter); -}; - - -class StringToDoubleConverter { - public: - // Enumeration for allowing octals and ignoring junk when converting - // strings to numbers. - enum Flags { - NO_FLAGS = 0, - ALLOW_HEX = 1, - ALLOW_OCTALS = 2, - ALLOW_TRAILING_JUNK = 4, - ALLOW_LEADING_SPACES = 8, - ALLOW_TRAILING_SPACES = 16, +}; + + +class StringToDoubleConverter { + public: + // Enumeration for allowing octals and ignoring junk when converting + // strings to numbers. + enum Flags { + NO_FLAGS = 0, + ALLOW_HEX = 1, + ALLOW_OCTALS = 2, + ALLOW_TRAILING_JUNK = 4, + ALLOW_LEADING_SPACES = 8, + ALLOW_TRAILING_SPACES = 16, ALLOW_SPACES_AFTER_SIGN = 32, ALLOW_CASE_INSENSIBILITY = 64, ALLOW_HEX_FLOATS = 128, - }; - + }; + static const uc16 kNoSeparator = '\0'; - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. - // Ex: StringToDouble("0x1234") -> 4660.0 - // In StringToDouble("0x1234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, - // the string will not be parsed as "0" followed by junk. - // - // - ALLOW_OCTALS: recognizes the prefix "0" for octals: - // If a sequence of octal digits starts with '0', then the number is - // read as octal integer. Octal numbers may only be integers. - // Ex: StringToDouble("01234") -> 668.0 - // StringToDouble("012349") -> 12349.0 // Not a sequence of octal - // // digits. - // In StringToDouble("01234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // In StringToDouble("01234e56") the characters "e56" are trailing - // junk, too. - // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of - // a double literal. - // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, - // new-lines, and tabs. - // - ALLOW_TRAILING_SPACES: ignore trailing whitespace. - // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. - // Ex: StringToDouble("- 123.2") -> -123.2. - // StringToDouble("+ 123.2") -> 123.2 + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. + // Ex: StringToDouble("0x1234") -> 4660.0 + // In StringToDouble("0x1234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, + // the string will not be parsed as "0" followed by junk. + // + // - ALLOW_OCTALS: recognizes the prefix "0" for octals: + // If a sequence of octal digits starts with '0', then the number is + // read as octal integer. Octal numbers may only be integers. + // Ex: StringToDouble("01234") -> 668.0 + // StringToDouble("012349") -> 12349.0 // Not a sequence of octal + // // digits. + // In StringToDouble("01234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // In StringToDouble("01234e56") the characters "e56" are trailing + // junk, too. + // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of + // a double literal. + // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, + // new-lines, and tabs. + // - ALLOW_TRAILING_SPACES: ignore trailing whitespace. + // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. + // Ex: StringToDouble("- 123.2") -> -123.2. + // StringToDouble("+ 123.2") -> 123.2 // - ALLOW_CASE_INSENSIBILITY: ignore case of characters for special values: // infinity and nan. // - ALLOW_HEX_FLOATS: allows hexadecimal float literals. @@ -437,75 +437,75 @@ class StringToDoubleConverter { // Examples: 0x1.2p3 == 9.0 // 0x10.1p0 == 16.0625 // ALLOW_HEX and ALLOW_HEX_FLOATS are indendent. - // - // empty_string_value is returned when an empty string is given as input. - // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string - // containing only spaces is converted to the 'empty_string_value', too. - // - // junk_string_value is returned when - // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not - // part of a double-literal) is found. - // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a - // double literal. - // - // infinity_symbol and nan_symbol are strings that are used to detect - // inputs that represent infinity and NaN. They can be null, in which case - // they are ignored. - // The conversion routine first reads any possible signs. Then it compares the - // following character of the input-string with the first character of - // the infinity, and nan-symbol. If either matches, the function assumes, that - // a match has been found, and expects the following input characters to match - // the remaining characters of the special-value symbol. - // This means that the following restrictions apply to special-value symbols: - // - they must not start with signs ('+', or '-'), - // - they must not have the same first character. - // - they must not start with digits. - // + // + // empty_string_value is returned when an empty string is given as input. + // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string + // containing only spaces is converted to the 'empty_string_value', too. + // + // junk_string_value is returned when + // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not + // part of a double-literal) is found. + // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a + // double literal. + // + // infinity_symbol and nan_symbol are strings that are used to detect + // inputs that represent infinity and NaN. They can be null, in which case + // they are ignored. + // The conversion routine first reads any possible signs. Then it compares the + // following character of the input-string with the first character of + // the infinity, and nan-symbol. If either matches, the function assumes, that + // a match has been found, and expects the following input characters to match + // the remaining characters of the special-value symbol. + // This means that the following restrictions apply to special-value symbols: + // - they must not start with signs ('+', or '-'), + // - they must not have the same first character. + // - they must not start with digits. + // // If the separator character is not kNoSeparator, then that specific // character is ignored when in between two valid digits of the significant. // It is not allowed to appear in the exponent. // It is not allowed to lead or trail the number. // It is not allowed to appear twice next to each other. // - // Examples: - // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = "infinity", - // nan_symbol = "nan": - // StringToDouble("0x1234") -> 4660.0. - // StringToDouble("0x1234K") -> 4660.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> NaN // junk_string_value. - // StringToDouble(" 1") -> NaN // junk_string_value. - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("-123.45") -> -123.45. - // StringToDouble("--123.45") -> NaN // junk_string_value. - // StringToDouble("123e45") -> 123e45. - // StringToDouble("123E45") -> 123e45. - // StringToDouble("123e+45") -> 123e45. - // StringToDouble("123E-45") -> 123e-45. - // StringToDouble("123e") -> 123.0 // trailing junk ignored. - // StringToDouble("123e-") -> 123.0 // trailing junk ignored. - // StringToDouble("+NaN") -> NaN // NaN string literal. - // StringToDouble("-infinity") -> -inf. // infinity literal. - // StringToDouble("Infinity") -> NaN // junk_string_value. - // - // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = NULL, - // nan_symbol = NULL: - // StringToDouble("0x1234") -> NaN // junk_string_value. - // StringToDouble("01234") -> 668.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> 0.0 // empty_string_value. - // StringToDouble(" 1") -> 1.0 - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("0123e45") -> NaN // junk_string_value. - // StringToDouble("01239E45") -> 1239e45. - // StringToDouble("-infinity") -> NaN // junk_string_value. - // StringToDouble("NaN") -> NaN // junk_string_value. + // Examples: + // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = "infinity", + // nan_symbol = "nan": + // StringToDouble("0x1234") -> 4660.0. + // StringToDouble("0x1234K") -> 4660.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> NaN // junk_string_value. + // StringToDouble(" 1") -> NaN // junk_string_value. + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("-123.45") -> -123.45. + // StringToDouble("--123.45") -> NaN // junk_string_value. + // StringToDouble("123e45") -> 123e45. + // StringToDouble("123E45") -> 123e45. + // StringToDouble("123e+45") -> 123e45. + // StringToDouble("123E-45") -> 123e-45. + // StringToDouble("123e") -> 123.0 // trailing junk ignored. + // StringToDouble("123e-") -> 123.0 // trailing junk ignored. + // StringToDouble("+NaN") -> NaN // NaN string literal. + // StringToDouble("-infinity") -> -inf. // infinity literal. + // StringToDouble("Infinity") -> NaN // junk_string_value. + // + // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = NULL, + // nan_symbol = NULL: + // StringToDouble("0x1234") -> NaN // junk_string_value. + // StringToDouble("01234") -> 668.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> 0.0 // empty_string_value. + // StringToDouble(" 1") -> 1.0 + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("0123e45") -> NaN // junk_string_value. + // StringToDouble("01239E45") -> 1239e45. + // StringToDouble("-infinity") -> NaN // junk_string_value. + // StringToDouble("NaN") -> NaN // junk_string_value. // // flags = NO_FLAGS, // separator = ' ': @@ -514,63 +514,63 @@ class StringToDoubleConverter { // StringToDouble("1 000 000.0") -> 1000000.0 // StringToDouble("1.000 000") -> 1.0 // StringToDouble("1.0e1 000") -> NaN // junk_string_value - StringToDoubleConverter(int flags, - double empty_string_value, - double junk_string_value, - const char* infinity_symbol, + StringToDoubleConverter(int flags, + double empty_string_value, + double junk_string_value, + const char* infinity_symbol, const char* nan_symbol, uc16 separator = kNoSeparator) - : flags_(flags), - empty_string_value_(empty_string_value), - junk_string_value_(junk_string_value), - infinity_symbol_(infinity_symbol), + : flags_(flags), + empty_string_value_(empty_string_value), + junk_string_value_(junk_string_value), + infinity_symbol_(infinity_symbol), nan_symbol_(nan_symbol), separator_(separator) { - } - - // Performs the conversion. - // The output parameter 'processed_characters_count' is set to the number - // of characters that have been processed to read the number. - // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included - // in the 'processed_characters_count'. Trailing junk is never included. - double StringToDouble(const char* buffer, - int length, - int* processed_characters_count) const; - - // Same as StringToDouble above but for 16 bit characters. - double StringToDouble(const uc16* buffer, - int length, - int* processed_characters_count) const; - - // Same as StringToDouble but reads a float. - // Note that this is not equivalent to static_cast(StringToDouble(...)) - // due to potential double-rounding. - float StringToFloat(const char* buffer, - int length, - int* processed_characters_count) const; - - // Same as StringToFloat above but for 16 bit characters. - float StringToFloat(const uc16* buffer, - int length, - int* processed_characters_count) const; - - private: - const int flags_; - const double empty_string_value_; - const double junk_string_value_; - const char* const infinity_symbol_; - const char* const nan_symbol_; + } + + // Performs the conversion. + // The output parameter 'processed_characters_count' is set to the number + // of characters that have been processed to read the number. + // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included + // in the 'processed_characters_count'. Trailing junk is never included. + double StringToDouble(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble above but for 16 bit characters. + double StringToDouble(const uc16* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble but reads a float. + // Note that this is not equivalent to static_cast(StringToDouble(...)) + // due to potential double-rounding. + float StringToFloat(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToFloat above but for 16 bit characters. + float StringToFloat(const uc16* buffer, + int length, + int* processed_characters_count) const; + + private: + const int flags_; + const double empty_string_value_; + const double junk_string_value_; + const char* const infinity_symbol_; + const char* const nan_symbol_; const uc16 separator_; - - template - double StringToIeee(Iterator start_pointer, - int length, - bool read_as_double, - int* processed_characters_count) const; - + + template + double StringToIeee(Iterator start_pointer, + int length, + bool read_as_double, + int* processed_characters_count) const; + DC_DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +}; + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ diff --git a/contrib/libs/double-conversion/fast-dtoa.cc b/contrib/libs/double-conversion/fast-dtoa.cc index deb757d5eba..61350383a95 100644 --- a/contrib/libs/double-conversion/fast-dtoa.cc +++ b/contrib/libs/double-conversion/fast-dtoa.cc @@ -1,665 +1,665 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "fast-dtoa.h" - -#include "cached-powers.h" -#include "diy-fp.h" -#include "ieee.h" - -namespace double_conversion { - -// The minimal and maximal target exponent define the range of w's binary -// exponent, where 'w' is the result of multiplying the input by a cached power -// of ten. -// -// A different range might be chosen on a different platform, to optimize digit -// generation, but a smaller range requires more powers of ten to be cached. -static const int kMinimalTargetExponent = -60; -static const int kMaximalTargetExponent = -32; - - -// Adjusts the last digit of the generated number, and screens out generated -// solutions that may be inaccurate. A solution may be inaccurate if it is -// outside the safe interval, or if we cannot prove that it is closer to the -// input than a neighboring representation of the same length. -// -// Input: * buffer containing the digits of too_high / 10^kappa -// * the buffer's length -// * distance_too_high_w == (too_high - w).f() * unit -// * unsafe_interval == (too_high - too_low).f() * unit -// * rest = (too_high - buffer * 10^kappa).f() * unit -// * ten_kappa = 10^kappa * unit -// * unit = the common multiplier -// Output: returns true if the buffer is guaranteed to contain the closest -// representable number to the input. -// Modifies the generated digits in the buffer to approach (round towards) w. -static bool RoundWeed(Vector buffer, - int length, - uint64_t distance_too_high_w, - uint64_t unsafe_interval, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit) { - uint64_t small_distance = distance_too_high_w - unit; - uint64_t big_distance = distance_too_high_w + unit; - // Let w_low = too_high - big_distance, and - // w_high = too_high - small_distance. - // Note: w_low < w < w_high - // - // The real w (* unit) must lie somewhere inside the interval - // ]w_low; w_high[ (often written as "(w_low; w_high)") - - // Basically the buffer currently contains a number in the unsafe interval - // ]too_low; too_high[ with too_low < w < too_high - // - // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // ^v 1 unit ^ ^ ^ ^ - // boundary_high --------------------- . . . . - // ^v 1 unit . . . . - // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . - // . . ^ . . - // . big_distance . . . - // . . . . rest - // small_distance . . . . - // v . . . . - // w_high - - - - - - - - - - - - - - - - - - . . . . - // ^v 1 unit . . . . - // w ---------------------------------------- . . . . - // ^v 1 unit v . . . - // w_low - - - - - - - - - - - - - - - - - - - - - . . . - // . . v - // buffer --------------------------------------------------+-------+-------- - // . . - // safe_interval . - // v . - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - // ^v 1 unit . - // boundary_low ------------------------- unsafe_interval - // ^v 1 unit v - // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - // - // Note that the value of buffer could lie anywhere inside the range too_low - // to too_high. - // - // boundary_low, boundary_high and w are approximations of the real boundaries - // and v (the input number). They are guaranteed to be precise up to one unit. - // In fact the error is guaranteed to be strictly less than one unit. - // - // Anything that lies outside the unsafe interval is guaranteed not to round - // to v when read again. - // Anything that lies inside the safe interval is guaranteed to round to v - // when read again. - // If the number inside the buffer lies inside the unsafe interval but not - // inside the safe interval then we simply do not know and bail out (returning - // false). - // - // Similarly we have to take into account the imprecision of 'w' when finding - // the closest representation of 'w'. If we have two potential - // representations, and one is closer to both w_low and w_high, then we know - // it is closer to the actual value v. - // - // By generating the digits of too_high we got the largest (closest to - // too_high) buffer that is still in the unsafe interval. In the case where - // w_high < buffer < too_high we try to decrement the buffer. - // This way the buffer approaches (rounds towards) w. - // There are 3 conditions that stop the decrementation process: - // 1) the buffer is already below w_high - // 2) decrementing the buffer would make it leave the unsafe interval - // 3) decrementing the buffer would yield a number below w_high and farther - // away than the current number. In other words: - // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high - // Instead of using the buffer directly we use its distance to too_high. - // Conceptually rest ~= too_high - buffer - // We need to do the following tests in this order to avoid over- and - // underflows. - ASSERT(rest <= unsafe_interval); - while (rest < small_distance && // Negated condition 1 - unsafe_interval - rest >= ten_kappa && // Negated condition 2 - (rest + ten_kappa < small_distance || // buffer{-1} > w_high - small_distance - rest >= rest + ten_kappa - small_distance)) { - buffer[length - 1]--; - rest += ten_kappa; - } - - // We have approached w+ as much as possible. We now test if approaching w- - // would require changing the buffer. If yes, then we have two possible - // representations close to w, but we cannot decide which one is closer. - if (rest < big_distance && - unsafe_interval - rest >= ten_kappa && - (rest + ten_kappa < big_distance || - big_distance - rest > rest + ten_kappa - big_distance)) { - return false; - } - - // Weeding test. - // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] - // Since too_low = too_high - unsafe_interval this is equivalent to - // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] - // Conceptually we have: rest ~= too_high - buffer - return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); -} - - -// Rounds the buffer upwards if the result is closer to v by possibly adding -// 1 to the buffer. If the precision of the calculation is not sufficient to -// round correctly, return false. -// The rounding might shift the whole buffer in which case the kappa is -// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. -// -// If 2*rest > ten_kappa then the buffer needs to be round up. -// rest can have an error of +/- 1 unit. This function accounts for the -// imprecision and returns false, if the rounding direction cannot be -// unambiguously determined. -// -// Precondition: rest < ten_kappa. -static bool RoundWeedCounted(Vector buffer, - int length, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit, - int* kappa) { - ASSERT(rest < ten_kappa); - // The following tests are done in a specific order to avoid overflows. They - // will work correctly with any uint64 values of rest < ten_kappa and unit. - // - // If the unit is too big, then we don't know which way to round. For example - // a unit of 50 means that the real number lies within rest +/- 50. If - // 10^kappa == 40 then there is no way to tell which way to round. - if (unit >= ten_kappa) return false; - // Even if unit is just half the size of 10^kappa we are already completely - // lost. (And after the previous test we know that the expression will not - // over/underflow.) - if (ten_kappa - unit <= unit) return false; - // If 2 * (rest + unit) <= 10^kappa we can safely round down. - if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { - return true; - } - // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. - if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { - // Increment the last digit recursively until we find a non '9' digit. - buffer[length - 1]++; - for (int i = length - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the - // exception of the first digit all digits are now '0'. Simply switch the - // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and - // the power (the kappa) is increased. - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*kappa) += 1; - } - return true; - } - return false; -} - -// Returns the biggest power of ten that is less than or equal to the given -// number. We furthermore receive the maximum number of bits 'number' has. -// -// Returns power == 10^(exponent_plus_one-1) such that -// power <= number < power * 10. -// If number_bits == 0 then 0^(0-1) is returned. -// The number of bits must be <= 32. -// Precondition: number < (1 << (number_bits + 1)). - -// Inspired by the method for finding an integer log base 10 from here: -// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 -static unsigned int const kSmallPowersOfTen[] = - {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, - 1000000000}; - -static void BiggestPowerTen(uint32_t number, - int number_bits, - uint32_t* power, - int* exponent_plus_one) { - ASSERT(number < (1u << (number_bits + 1))); - // 1233/4096 is approximately 1/lg(10). - int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); - // We increment to skip over the first entry in the kPowersOf10 table. - // Note: kPowersOf10[i] == 10^(i-1). - exponent_plus_one_guess++; - // We don't have any guarantees that 2^number_bits <= number. - if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { - exponent_plus_one_guess--; - } - *power = kSmallPowersOfTen[exponent_plus_one_guess]; - *exponent_plus_one = exponent_plus_one_guess; -} - -// Generates the digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * low, w and high are correct up to 1 ulp (unit in the last place). That -// is, their error must be less than a unit of their last digits. -// * low.e() == w.e() == high.e() -// * low < w < high, and taking into account their error: low~ <= high~ -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but len contains the number of digits. -// * buffer contains the shortest possible decimal digit-sequence -// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the -// correct values of low and high (without their error). -// * if more than one decimal representation gives the minimal number of -// decimal digits then the one closest to W (where W is the correct value -// of w) is chosen. -// Remark: this procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely (~0.5%). -// -// Say, for the sake of example, that -// w.e() == -48, and w.f() == 0x1234567890abcdef -// w's value can be computed by w.f() * 2^w.e() -// We can obtain w's integral digits by simply shifting w.f() by -w.e(). -// -> w's integral part is 0x1234 -// w's fractional part is therefore 0x567890abcdef. -// Printing w's integral part is easy (simply print 0x1234 in decimal). -// In order to print its fraction we repeatedly multiply the fraction by 10 and -// get each digit. Example the first digit after the point would be computed by -// (0x567890abcdef * 10) >> 48. -> 3 -// The whole thing becomes slightly more complicated because we want to stop -// once we have enough digits. That is, once the digits inside the buffer -// represent 'w' we can stop. Everything inside the interval low - high -// represents w. However we have to pay attention to low, high and w's -// imprecision. -static bool DigitGen(DiyFp low, - DiyFp w, - DiyFp high, - Vector buffer, - int* length, - int* kappa) { - ASSERT(low.e() == w.e() && w.e() == high.e()); - ASSERT(low.f() + 1 <= high.f() - 1); - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - // low, w and high are imprecise, but by less than one ulp (unit in the last - // place). - // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that - // the new numbers are outside of the interval we want the final - // representation to lie in. - // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield - // numbers that are certain to lie in the interval. We will use this fact - // later on. - // We will now start by generating the digits within the uncertain - // interval. Later we will weed out representations that lie outside the safe - // interval and thus _might_ lie outside the correct interval. - uint64_t unit = 1; - DiyFp too_low = DiyFp(low.f() - unit, low.e()); - DiyFp too_high = DiyFp(high.f() + unit, high.e()); - // too_low and too_high are guaranteed to lie outside the interval we want the - // generated number in. - DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); - // We now cut the input number into two parts: the integral digits and the - // fractionals. We will not write any decimal separator though, but adapt - // kappa instead. - // Reminder: we are currently computing the digits (stored inside the buffer) - // such that: too_low < buffer * 10^kappa < too_high - // We use too_high for the digit_generation and stop as soon as possible. - // If we stop early we effectively round down. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(too_high.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = too_high.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - // Loop invariant: buffer = too_high / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than integrals. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) - // Reminder: unsafe_interval.e() == one.e() - if (rest < unsafe_interval.f()) { - // Rounding down (by not emitting the remaining digits) yields a number - // that lies within the unsafe interval. - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), - unsafe_interval.f(), rest, - static_cast(divisor) << -one.e(), unit); - } - divisor /= 10; - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (like the interval or 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - for (;;) { - fractionals *= 10; - unit *= 10; - unsafe_interval.set_f(unsafe_interval.f() * 10); - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - if (fractionals < unsafe_interval.f()) { - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, - unsafe_interval.f(), fractionals, one.f(), unit); - } - } -} - - - -// Generates (at most) requested_digits digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * w is correct up to 1 ulp (unit in the last place). That -// is, its error must be strictly less than a unit of its last digit. -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but length contains the number of -// digits. -// * the representation in buffer is the most precise representation of -// requested_digits digits. -// * buffer contains at most requested_digits digits of w. If there are less -// than requested_digits digits then some trailing '0's have been removed. -// * kappa is such that -// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. -// -// Remark: This procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely, but the failure-rate -// increases with higher requested_digits. -static bool DigitGenCounted(DiyFp w, - int requested_digits, - Vector buffer, - int* length, - int* kappa) { - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - ASSERT(kMinimalTargetExponent >= -60); - ASSERT(kMaximalTargetExponent <= -32); - // w is assumed to have an error less than 1 unit. Whenever w is scaled we - // also scale its error. - uint64_t w_error = 1; - // We cut the input number into two parts: the integral digits and the - // fractional digits. We don't emit any decimal separator, but adapt kappa - // instead. Example: instead of writing "1.2" we put "12" into the buffer and - // increase kappa by 1. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(w.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = w.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - - // Loop invariant: buffer = w / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than 'integrals'. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - if (requested_digits == 0) break; - divisor /= 10; - } - - if (requested_digits == 0) { - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - return RoundWeedCounted(buffer, *length, rest, - static_cast(divisor) << -one.e(), w_error, - kappa); - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (the 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - while (requested_digits > 0 && fractionals > w_error) { - fractionals *= 10; - w_error *= 10; - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - } - if (requested_digits != 0) return false; - return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, - kappa); -} - - -// Provides a decimal representation of v. -// Returns true if it succeeds, otherwise the result cannot be trusted. -// There will be *length digits inside the buffer (not null-terminated). -// If the function returns true then -// v == (double) (buffer * 10^decimal_exponent). -// The digits in the buffer are the shortest representation possible: no -// 0.09999999999999999 instead of 0.1. The shorter representation will even be -// chosen even if the longer one would be closer to v. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the closest will be -// computed. -static bool Grisu3(double v, - FastDtoaMode mode, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - // boundary_minus and boundary_plus are the boundaries between v and its - // closest floating-point neighbors. Any number strictly between - // boundary_minus and boundary_plus will round to v when convert to a double. - // Grisu3 will never output representations that lie exactly on a boundary. - DiyFp boundary_minus, boundary_plus; - if (mode == FAST_DTOA_SHORTEST) { - Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } else { - ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); - float single_v = static_cast(v); - Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } - ASSERT(boundary_plus.e() == w.e()); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - ASSERT(scaled_w.e() == - boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); - // In theory it would be possible to avoid some recomputations by computing - // the difference between w and boundary_minus/plus (a power of 2) and to - // compute scaled_boundary_minus/plus by subtracting/adding from - // scaled_w. However the code becomes much less readable and the speed - // enhancements are not terriffic. - DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); - DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); - - // DigitGen will generate the digits of scaled_w. Therefore we have - // v == (double) (scaled_w * 10^-mk). - // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an - // integer than it will be updated. For instance if scaled_w == 1.23 then - // the buffer will be filled with "123" und the decimal_exponent will be - // decreased by 2. - int kappa; - bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -// The "counted" version of grisu3 (see above) only generates requested_digits -// number of digits. This version does not generate the shortest representation, -// and with enough requested digits 0.1 will at some point print as 0.9999999... -// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and -// therefore the rounding strategy for halfway cases is irrelevant. -static bool Grisu3Counted(double v, - int requested_digits, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - - // We now have (double) (scaled_w * 10^-mk). - // DigitGen will generate the first requested_digits digits of scaled_w and - // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It - // will not always be exactly the same since DigitGenCounted only produces a - // limited number of digits.) - int kappa; - bool result = DigitGenCounted(scaled_w, requested_digits, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -bool FastDtoa(double v, - FastDtoaMode mode, - int requested_digits, - Vector buffer, - int* length, - int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - - bool result = false; - int decimal_exponent = 0; - switch (mode) { - case FAST_DTOA_SHORTEST: - case FAST_DTOA_SHORTEST_SINGLE: - result = Grisu3(v, mode, buffer, length, &decimal_exponent); - break; - case FAST_DTOA_PRECISION: - result = Grisu3Counted(v, requested_digits, - buffer, length, &decimal_exponent); - break; - default: - UNREACHABLE(); - } - if (result) { - *decimal_point = *length + decimal_exponent; - buffer[*length] = '\0'; - } - return result; -} - -} // namespace double_conversion +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "fast-dtoa.h" + +#include "cached-powers.h" +#include "diy-fp.h" +#include "ieee.h" + +namespace double_conversion { + +// The minimal and maximal target exponent define the range of w's binary +// exponent, where 'w' is the result of multiplying the input by a cached power +// of ten. +// +// A different range might be chosen on a different platform, to optimize digit +// generation, but a smaller range requires more powers of ten to be cached. +static const int kMinimalTargetExponent = -60; +static const int kMaximalTargetExponent = -32; + + +// Adjusts the last digit of the generated number, and screens out generated +// solutions that may be inaccurate. A solution may be inaccurate if it is +// outside the safe interval, or if we cannot prove that it is closer to the +// input than a neighboring representation of the same length. +// +// Input: * buffer containing the digits of too_high / 10^kappa +// * the buffer's length +// * distance_too_high_w == (too_high - w).f() * unit +// * unsafe_interval == (too_high - too_low).f() * unit +// * rest = (too_high - buffer * 10^kappa).f() * unit +// * ten_kappa = 10^kappa * unit +// * unit = the common multiplier +// Output: returns true if the buffer is guaranteed to contain the closest +// representable number to the input. +// Modifies the generated digits in the buffer to approach (round towards) w. +static bool RoundWeed(Vector buffer, + int length, + uint64_t distance_too_high_w, + uint64_t unsafe_interval, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit) { + uint64_t small_distance = distance_too_high_w - unit; + uint64_t big_distance = distance_too_high_w + unit; + // Let w_low = too_high - big_distance, and + // w_high = too_high - small_distance. + // Note: w_low < w < w_high + // + // The real w (* unit) must lie somewhere inside the interval + // ]w_low; w_high[ (often written as "(w_low; w_high)") + + // Basically the buffer currently contains a number in the unsafe interval + // ]too_low; too_high[ with too_low < w < too_high + // + // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // ^v 1 unit ^ ^ ^ ^ + // boundary_high --------------------- . . . . + // ^v 1 unit . . . . + // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . + // . . ^ . . + // . big_distance . . . + // . . . . rest + // small_distance . . . . + // v . . . . + // w_high - - - - - - - - - - - - - - - - - - . . . . + // ^v 1 unit . . . . + // w ---------------------------------------- . . . . + // ^v 1 unit v . . . + // w_low - - - - - - - - - - - - - - - - - - - - - . . . + // . . v + // buffer --------------------------------------------------+-------+-------- + // . . + // safe_interval . + // v . + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . + // ^v 1 unit . + // boundary_low ------------------------- unsafe_interval + // ^v 1 unit v + // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // + // + // Note that the value of buffer could lie anywhere inside the range too_low + // to too_high. + // + // boundary_low, boundary_high and w are approximations of the real boundaries + // and v (the input number). They are guaranteed to be precise up to one unit. + // In fact the error is guaranteed to be strictly less than one unit. + // + // Anything that lies outside the unsafe interval is guaranteed not to round + // to v when read again. + // Anything that lies inside the safe interval is guaranteed to round to v + // when read again. + // If the number inside the buffer lies inside the unsafe interval but not + // inside the safe interval then we simply do not know and bail out (returning + // false). + // + // Similarly we have to take into account the imprecision of 'w' when finding + // the closest representation of 'w'. If we have two potential + // representations, and one is closer to both w_low and w_high, then we know + // it is closer to the actual value v. + // + // By generating the digits of too_high we got the largest (closest to + // too_high) buffer that is still in the unsafe interval. In the case where + // w_high < buffer < too_high we try to decrement the buffer. + // This way the buffer approaches (rounds towards) w. + // There are 3 conditions that stop the decrementation process: + // 1) the buffer is already below w_high + // 2) decrementing the buffer would make it leave the unsafe interval + // 3) decrementing the buffer would yield a number below w_high and farther + // away than the current number. In other words: + // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high + // Instead of using the buffer directly we use its distance to too_high. + // Conceptually rest ~= too_high - buffer + // We need to do the following tests in this order to avoid over- and + // underflows. + ASSERT(rest <= unsafe_interval); + while (rest < small_distance && // Negated condition 1 + unsafe_interval - rest >= ten_kappa && // Negated condition 2 + (rest + ten_kappa < small_distance || // buffer{-1} > w_high + small_distance - rest >= rest + ten_kappa - small_distance)) { + buffer[length - 1]--; + rest += ten_kappa; + } + + // We have approached w+ as much as possible. We now test if approaching w- + // would require changing the buffer. If yes, then we have two possible + // representations close to w, but we cannot decide which one is closer. + if (rest < big_distance && + unsafe_interval - rest >= ten_kappa && + (rest + ten_kappa < big_distance || + big_distance - rest > rest + ten_kappa - big_distance)) { + return false; + } + + // Weeding test. + // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] + // Since too_low = too_high - unsafe_interval this is equivalent to + // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] + // Conceptually we have: rest ~= too_high - buffer + return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); +} + + +// Rounds the buffer upwards if the result is closer to v by possibly adding +// 1 to the buffer. If the precision of the calculation is not sufficient to +// round correctly, return false. +// The rounding might shift the whole buffer in which case the kappa is +// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. +// +// If 2*rest > ten_kappa then the buffer needs to be round up. +// rest can have an error of +/- 1 unit. This function accounts for the +// imprecision and returns false, if the rounding direction cannot be +// unambiguously determined. +// +// Precondition: rest < ten_kappa. +static bool RoundWeedCounted(Vector buffer, + int length, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit, + int* kappa) { + ASSERT(rest < ten_kappa); + // The following tests are done in a specific order to avoid overflows. They + // will work correctly with any uint64 values of rest < ten_kappa and unit. + // + // If the unit is too big, then we don't know which way to round. For example + // a unit of 50 means that the real number lies within rest +/- 50. If + // 10^kappa == 40 then there is no way to tell which way to round. + if (unit >= ten_kappa) return false; + // Even if unit is just half the size of 10^kappa we are already completely + // lost. (And after the previous test we know that the expression will not + // over/underflow.) + if (ten_kappa - unit <= unit) return false; + // If 2 * (rest + unit) <= 10^kappa we can safely round down. + if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { + return true; + } + // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. + if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { + // Increment the last digit recursively until we find a non '9' digit. + buffer[length - 1]++; + for (int i = length - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the + // exception of the first digit all digits are now '0'. Simply switch the + // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and + // the power (the kappa) is increased. + if (buffer[0] == '0' + 10) { + buffer[0] = '1'; + (*kappa) += 1; + } + return true; + } + return false; +} + +// Returns the biggest power of ten that is less than or equal to the given +// number. We furthermore receive the maximum number of bits 'number' has. +// +// Returns power == 10^(exponent_plus_one-1) such that +// power <= number < power * 10. +// If number_bits == 0 then 0^(0-1) is returned. +// The number of bits must be <= 32. +// Precondition: number < (1 << (number_bits + 1)). + +// Inspired by the method for finding an integer log base 10 from here: +// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 +static unsigned int const kSmallPowersOfTen[] = + {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, + 1000000000}; + +static void BiggestPowerTen(uint32_t number, + int number_bits, + uint32_t* power, + int* exponent_plus_one) { + ASSERT(number < (1u << (number_bits + 1))); + // 1233/4096 is approximately 1/lg(10). + int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); + // We increment to skip over the first entry in the kPowersOf10 table. + // Note: kPowersOf10[i] == 10^(i-1). + exponent_plus_one_guess++; + // We don't have any guarantees that 2^number_bits <= number. + if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { + exponent_plus_one_guess--; + } + *power = kSmallPowersOfTen[exponent_plus_one_guess]; + *exponent_plus_one = exponent_plus_one_guess; +} + +// Generates the digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * low, w and high are correct up to 1 ulp (unit in the last place). That +// is, their error must be less than a unit of their last digits. +// * low.e() == w.e() == high.e() +// * low < w < high, and taking into account their error: low~ <= high~ +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but len contains the number of digits. +// * buffer contains the shortest possible decimal digit-sequence +// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the +// correct values of low and high (without their error). +// * if more than one decimal representation gives the minimal number of +// decimal digits then the one closest to W (where W is the correct value +// of w) is chosen. +// Remark: this procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely (~0.5%). +// +// Say, for the sake of example, that +// w.e() == -48, and w.f() == 0x1234567890abcdef +// w's value can be computed by w.f() * 2^w.e() +// We can obtain w's integral digits by simply shifting w.f() by -w.e(). +// -> w's integral part is 0x1234 +// w's fractional part is therefore 0x567890abcdef. +// Printing w's integral part is easy (simply print 0x1234 in decimal). +// In order to print its fraction we repeatedly multiply the fraction by 10 and +// get each digit. Example the first digit after the point would be computed by +// (0x567890abcdef * 10) >> 48. -> 3 +// The whole thing becomes slightly more complicated because we want to stop +// once we have enough digits. That is, once the digits inside the buffer +// represent 'w' we can stop. Everything inside the interval low - high +// represents w. However we have to pay attention to low, high and w's +// imprecision. +static bool DigitGen(DiyFp low, + DiyFp w, + DiyFp high, + Vector buffer, + int* length, + int* kappa) { + ASSERT(low.e() == w.e() && w.e() == high.e()); + ASSERT(low.f() + 1 <= high.f() - 1); + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + // low, w and high are imprecise, but by less than one ulp (unit in the last + // place). + // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that + // the new numbers are outside of the interval we want the final + // representation to lie in. + // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield + // numbers that are certain to lie in the interval. We will use this fact + // later on. + // We will now start by generating the digits within the uncertain + // interval. Later we will weed out representations that lie outside the safe + // interval and thus _might_ lie outside the correct interval. + uint64_t unit = 1; + DiyFp too_low = DiyFp(low.f() - unit, low.e()); + DiyFp too_high = DiyFp(high.f() + unit, high.e()); + // too_low and too_high are guaranteed to lie outside the interval we want the + // generated number in. + DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); + // We now cut the input number into two parts: the integral digits and the + // fractionals. We will not write any decimal separator though, but adapt + // kappa instead. + // Reminder: we are currently computing the digits (stored inside the buffer) + // such that: too_low < buffer * 10^kappa < too_high + // We use too_high for the digit_generation and stop as soon as possible. + // If we stop early we effectively round down. + DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast(too_high.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = too_high.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + // Loop invariant: buffer = too_high / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than integrals. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + uint64_t rest = + (static_cast(integrals) << -one.e()) + fractionals; + // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) + // Reminder: unsafe_interval.e() == one.e() + if (rest < unsafe_interval.f()) { + // Rounding down (by not emitting the remaining digits) yields a number + // that lies within the unsafe interval. + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), + unsafe_interval.f(), rest, + static_cast(divisor) << -one.e(), unit); + } + divisor /= 10; + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (like the interval or 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + for (;;) { + fractionals *= 10; + unit *= 10; + unsafe_interval.set_f(unsafe_interval.f() * 10); + // Integer division by one. + int digit = static_cast(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + if (fractionals < unsafe_interval.f()) { + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, + unsafe_interval.f(), fractionals, one.f(), unit); + } + } +} + + + +// Generates (at most) requested_digits digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * w is correct up to 1 ulp (unit in the last place). That +// is, its error must be strictly less than a unit of its last digit. +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but length contains the number of +// digits. +// * the representation in buffer is the most precise representation of +// requested_digits digits. +// * buffer contains at most requested_digits digits of w. If there are less +// than requested_digits digits then some trailing '0's have been removed. +// * kappa is such that +// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. +// +// Remark: This procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely, but the failure-rate +// increases with higher requested_digits. +static bool DigitGenCounted(DiyFp w, + int requested_digits, + Vector buffer, + int* length, + int* kappa) { + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + ASSERT(kMinimalTargetExponent >= -60); + ASSERT(kMaximalTargetExponent <= -32); + // w is assumed to have an error less than 1 unit. Whenever w is scaled we + // also scale its error. + uint64_t w_error = 1; + // We cut the input number into two parts: the integral digits and the + // fractional digits. We don't emit any decimal separator, but adapt kappa + // instead. Example: instead of writing "1.2" we put "12" into the buffer and + // increase kappa by 1. + DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast(w.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = w.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + + // Loop invariant: buffer = w / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than 'integrals'. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + requested_digits--; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + if (requested_digits == 0) break; + divisor /= 10; + } + + if (requested_digits == 0) { + uint64_t rest = + (static_cast(integrals) << -one.e()) + fractionals; + return RoundWeedCounted(buffer, *length, rest, + static_cast(divisor) << -one.e(), w_error, + kappa); + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (the 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + while (requested_digits > 0 && fractionals > w_error) { + fractionals *= 10; + w_error *= 10; + // Integer division by one. + int digit = static_cast(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + requested_digits--; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + } + if (requested_digits != 0) return false; + return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, + kappa); +} + + +// Provides a decimal representation of v. +// Returns true if it succeeds, otherwise the result cannot be trusted. +// There will be *length digits inside the buffer (not null-terminated). +// If the function returns true then +// v == (double) (buffer * 10^decimal_exponent). +// The digits in the buffer are the shortest representation possible: no +// 0.09999999999999999 instead of 0.1. The shorter representation will even be +// chosen even if the longer one would be closer to v. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the closest will be +// computed. +static bool Grisu3(double v, + FastDtoaMode mode, + Vector buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + // boundary_minus and boundary_plus are the boundaries between v and its + // closest floating-point neighbors. Any number strictly between + // boundary_minus and boundary_plus will round to v when convert to a double. + // Grisu3 will never output representations that lie exactly on a boundary. + DiyFp boundary_minus, boundary_plus; + if (mode == FAST_DTOA_SHORTEST) { + Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } else { + ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); + float single_v = static_cast(v); + Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } + ASSERT(boundary_plus.e() == w.e()); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + ASSERT(scaled_w.e() == + boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); + // In theory it would be possible to avoid some recomputations by computing + // the difference between w and boundary_minus/plus (a power of 2) and to + // compute scaled_boundary_minus/plus by subtracting/adding from + // scaled_w. However the code becomes much less readable and the speed + // enhancements are not terriffic. + DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); + DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); + + // DigitGen will generate the digits of scaled_w. Therefore we have + // v == (double) (scaled_w * 10^-mk). + // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an + // integer than it will be updated. For instance if scaled_w == 1.23 then + // the buffer will be filled with "123" und the decimal_exponent will be + // decreased by 2. + int kappa; + bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +// The "counted" version of grisu3 (see above) only generates requested_digits +// number of digits. This version does not generate the shortest representation, +// and with enough requested digits 0.1 will at some point print as 0.9999999... +// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and +// therefore the rounding strategy for halfway cases is irrelevant. +static bool Grisu3Counted(double v, + int requested_digits, + Vector buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + + // We now have (double) (scaled_w * 10^-mk). + // DigitGen will generate the first requested_digits digits of scaled_w and + // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It + // will not always be exactly the same since DigitGenCounted only produces a + // limited number of digits.) + int kappa; + bool result = DigitGenCounted(scaled_w, requested_digits, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +bool FastDtoa(double v, + FastDtoaMode mode, + int requested_digits, + Vector buffer, + int* length, + int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + + bool result = false; + int decimal_exponent = 0; + switch (mode) { + case FAST_DTOA_SHORTEST: + case FAST_DTOA_SHORTEST_SINGLE: + result = Grisu3(v, mode, buffer, length, &decimal_exponent); + break; + case FAST_DTOA_PRECISION: + result = Grisu3Counted(v, requested_digits, + buffer, length, &decimal_exponent); + break; + default: + UNREACHABLE(); + } + if (result) { + *decimal_point = *length + decimal_exponent; + buffer[*length] = '\0'; + } + return result; +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/fast-dtoa.h b/contrib/libs/double-conversion/fast-dtoa.h index b7011035005..5f1e8eee5e5 100644 --- a/contrib/libs/double-conversion/fast-dtoa.h +++ b/contrib/libs/double-conversion/fast-dtoa.h @@ -1,88 +1,88 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ -#define DOUBLE_CONVERSION_FAST_DTOA_H_ - -#include "utils.h" - -namespace double_conversion { - -enum FastDtoaMode { - // Computes the shortest representation of the given input. The returned - // result will be the most accurate number of this length. Longer - // representations might be more accurate. - FAST_DTOA_SHORTEST, - // Same as FAST_DTOA_SHORTEST but for single-precision floats. - FAST_DTOA_SHORTEST_SINGLE, - // Computes a representation where the precision (number of digits) is - // given as input. The precision is independent of the decimal point. - FAST_DTOA_PRECISION -}; - -// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not -// include the terminating '\0' character. -static const int kFastDtoaMaximalLength = 17; -// Same for single-precision numbers. -static const int kFastDtoaMaximalSingleLength = 9; - -// Provides a decimal representation of v. -// The result should be interpreted as buffer * 10^(point - length). -// -// Precondition: -// * v must be a strictly positive finite double. -// -// Returns true if it succeeds, otherwise the result can not be trusted. -// There will be *length digits inside the buffer followed by a null terminator. -// If the function returns true and mode equals -// - FAST_DTOA_SHORTEST, then -// the parameter requested_digits is ignored. -// The result satisfies -// v == (double) (buffer * 10^(point - length)). -// The digits in the buffer are the shortest representation possible. E.g. -// if 0.099999999999 and 0.1 represent the same double then "1" is returned -// with point = 0. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the buffer will contain -// the one closest to v. -// - FAST_DTOA_PRECISION, then -// the buffer contains requested_digits digits. -// the difference v - (buffer * 10^(point-length)) is closest to zero for -// all possible representations of requested_digits digits. -// If there are two values that are equally close, then FastDtoa returns -// false. -// For both modes the buffer must be large enough to hold the result. -bool FastDtoa(double d, - FastDtoaMode mode, - int requested_digits, - Vector buffer, - int* length, - int* decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ +#define DOUBLE_CONVERSION_FAST_DTOA_H_ + +#include "utils.h" + +namespace double_conversion { + +enum FastDtoaMode { + // Computes the shortest representation of the given input. The returned + // result will be the most accurate number of this length. Longer + // representations might be more accurate. + FAST_DTOA_SHORTEST, + // Same as FAST_DTOA_SHORTEST but for single-precision floats. + FAST_DTOA_SHORTEST_SINGLE, + // Computes a representation where the precision (number of digits) is + // given as input. The precision is independent of the decimal point. + FAST_DTOA_PRECISION +}; + +// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not +// include the terminating '\0' character. +static const int kFastDtoaMaximalLength = 17; +// Same for single-precision numbers. +static const int kFastDtoaMaximalSingleLength = 9; + +// Provides a decimal representation of v. +// The result should be interpreted as buffer * 10^(point - length). +// +// Precondition: +// * v must be a strictly positive finite double. +// +// Returns true if it succeeds, otherwise the result can not be trusted. +// There will be *length digits inside the buffer followed by a null terminator. +// If the function returns true and mode equals +// - FAST_DTOA_SHORTEST, then +// the parameter requested_digits is ignored. +// The result satisfies +// v == (double) (buffer * 10^(point - length)). +// The digits in the buffer are the shortest representation possible. E.g. +// if 0.099999999999 and 0.1 represent the same double then "1" is returned +// with point = 0. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the buffer will contain +// the one closest to v. +// - FAST_DTOA_PRECISION, then +// the buffer contains requested_digits digits. +// the difference v - (buffer * 10^(point-length)) is closest to zero for +// all possible representations of requested_digits digits. +// If there are two values that are equally close, then FastDtoa returns +// false. +// For both modes the buffer must be large enough to hold the result. +bool FastDtoa(double d, + FastDtoaMode mode, + int requested_digits, + Vector buffer, + int* length, + int* decimal_point); + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ diff --git a/contrib/libs/double-conversion/fixed-dtoa.cc b/contrib/libs/double-conversion/fixed-dtoa.cc index eb5b27d777a..0f989bceaf2 100644 --- a/contrib/libs/double-conversion/fixed-dtoa.cc +++ b/contrib/libs/double-conversion/fixed-dtoa.cc @@ -1,405 +1,405 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include - -#include "fixed-dtoa.h" -#include "ieee.h" - -namespace double_conversion { - -// Represents a 128bit type. This class should be replaced by a native type on -// platforms that support 128bit integers. -class UInt128 { - public: - UInt128() : high_bits_(0), low_bits_(0) { } - UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { } - - void Multiply(uint32_t multiplicand) { - uint64_t accumulator; - - accumulator = (low_bits_ & kMask32) * multiplicand; - uint32_t part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (low_bits_ >> 32) * multiplicand; - low_bits_ = (accumulator << 32) + part; - accumulator >>= 32; - accumulator = accumulator + (high_bits_ & kMask32) * multiplicand; - part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (high_bits_ >> 32) * multiplicand; - high_bits_ = (accumulator << 32) + part; - ASSERT((accumulator >> 32) == 0); - } - - void Shift(int shift_amount) { - ASSERT(-64 <= shift_amount && shift_amount <= 64); - if (shift_amount == 0) { - return; - } else if (shift_amount == -64) { - high_bits_ = low_bits_; - low_bits_ = 0; - } else if (shift_amount == 64) { - low_bits_ = high_bits_; - high_bits_ = 0; - } else if (shift_amount <= 0) { - high_bits_ <<= -shift_amount; - high_bits_ += low_bits_ >> (64 + shift_amount); - low_bits_ <<= -shift_amount; - } else { - low_bits_ >>= shift_amount; - low_bits_ += high_bits_ << (64 - shift_amount); - high_bits_ >>= shift_amount; - } - } - - // Modifies *this to *this MOD (2^power). - // Returns *this DIV (2^power). - int DivModPowerOf2(int power) { - if (power >= 64) { - int result = static_cast(high_bits_ >> (power - 64)); - high_bits_ -= static_cast(result) << (power - 64); - return result; - } else { - uint64_t part_low = low_bits_ >> power; - uint64_t part_high = high_bits_ << (64 - power); - int result = static_cast(part_low + part_high); - high_bits_ = 0; - low_bits_ -= part_low << power; - return result; - } - } - - bool IsZero() const { - return high_bits_ == 0 && low_bits_ == 0; - } - - int BitAt(int position) const { - if (position >= 64) { - return static_cast(high_bits_ >> (position - 64)) & 1; - } else { - return static_cast(low_bits_ >> position) & 1; - } - } - - private: - static const uint64_t kMask32 = 0xFFFFFFFF; - // Value == (high_bits_ << 64) + low_bits_ - uint64_t high_bits_; - uint64_t low_bits_; -}; - - -static const int kDoubleSignificandSize = 53; // Includes the hidden bit. - - -static void FillDigits32FixedLength(uint32_t number, int requested_length, - Vector buffer, int* length) { - for (int i = requested_length - 1; i >= 0; --i) { - buffer[(*length) + i] = '0' + number % 10; - number /= 10; - } - *length += requested_length; -} - - -static void FillDigits32(uint32_t number, Vector buffer, int* length) { - int number_length = 0; - // We fill the digits in reverse order and exchange them afterwards. - while (number != 0) { - int digit = number % 10; - number /= 10; - buffer[(*length) + number_length] = static_cast('0' + digit); - number_length++; - } - // Exchange the digits. - int i = *length; - int j = *length + number_length - 1; - while (i < j) { - char tmp = buffer[i]; - buffer[i] = buffer[j]; - buffer[j] = tmp; - i++; - j--; - } - *length += number_length; -} - - -static void FillDigits64FixedLength(uint64_t number, - Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - FillDigits32FixedLength(part0, 3, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); -} - - -static void FillDigits64(uint64_t number, Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - if (part0 != 0) { - FillDigits32(part0, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else if (part1 != 0) { - FillDigits32(part1, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else { - FillDigits32(part2, buffer, length); - } -} - - -static void RoundUp(Vector buffer, int* length, int* decimal_point) { - // An empty buffer represents 0. - if (*length == 0) { - buffer[0] = '1'; - *decimal_point = 1; - *length = 1; - return; - } - // Round the last digit until we either have a digit that was not '9' or until - // we reached the first digit. - buffer[(*length) - 1]++; - for (int i = (*length) - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) { - return; - } - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0' + 10, we would need to set it to '0' and add - // a '1' in front. However we reach the first digit only if all following - // digits had been '9' before rounding up. Now all trailing digits are '0' and - // we simply switch the first digit to '1' and update the decimal-point - // (indicating that the point is now one digit to the right). - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*decimal_point)++; - } -} - - -// The given fractionals number represents a fixed-point number with binary -// point at bit (-exponent). -// Preconditions: -// -128 <= exponent <= 0. -// 0 <= fractionals * 2^exponent < 1 -// The buffer holds the result. -// The function will round its result. During the rounding-process digits not -// generated by this function might be updated, and the decimal-point variable -// might be updated. If this function generates the digits 99 and the buffer -// already contained "199" (thus yielding a buffer of "19999") then a -// rounding-up will change the contents of the buffer to "20000". -static void FillFractionals(uint64_t fractionals, int exponent, - int fractional_count, Vector buffer, - int* length, int* decimal_point) { - ASSERT(-128 <= exponent && exponent <= 0); - // 'fractionals' is a fixed-point number, with binary point at bit - // (-exponent). Inside the function the non-converted remainder of fractionals - // is a fixed-point number, with binary point at bit 'point'. - if (-exponent <= 64) { - // One 64 bit number is sufficient. - ASSERT(fractionals >> 56 == 0); - int point = -exponent; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals == 0) break; - // Instead of multiplying by 10 we multiply by 5 and adjust the point - // location. This way the fractionals variable will not overflow. - // Invariant at the beginning of the loop: fractionals < 2^point. - // Initially we have: point <= 64 and fractionals < 2^56 - // After each iteration the point is decremented by one. - // Note that 5^3 = 125 < 128 = 2^7. - // Therefore three iterations of this loop will not overflow fractionals - // (even without the subtraction at the end of the loop body). At this - // time point will satisfy point <= 61 and therefore fractionals < 2^point - // and any further multiplication of fractionals by 5 will not overflow. - fractionals *= 5; - point--; - int digit = static_cast(fractionals >> point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals -= static_cast(digit) << point; - } - // If the first bit after the point is set we have to round up. - ASSERT(fractionals == 0 || point - 1 >= 0); - if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } else { // We need 128 bits. - ASSERT(64 < -exponent && -exponent <= 128); - UInt128 fractionals128 = UInt128(fractionals, 0); - fractionals128.Shift(-exponent - 64); - int point = 128; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals128.IsZero()) break; - // As before: instead of multiplying by 10 we multiply by 5 and adjust the - // point location. - // This multiplication will not overflow for the same reasons as before. - fractionals128.Multiply(5); - point--; - int digit = fractionals128.DivModPowerOf2(point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - } - if (fractionals128.BitAt(point - 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } -} - - -// Removes leading and trailing zeros. -// If leading zeros are removed then the decimal point position is adjusted. -static void TrimZeros(Vector buffer, int* length, int* decimal_point) { - while (*length > 0 && buffer[(*length) - 1] == '0') { - (*length)--; - } - int first_non_zero = 0; - while (first_non_zero < *length && buffer[first_non_zero] == '0') { - first_non_zero++; - } - if (first_non_zero != 0) { - for (int i = first_non_zero; i < *length; ++i) { - buffer[i - first_non_zero] = buffer[i]; - } - *length -= first_non_zero; - *decimal_point -= first_non_zero; - } -} - - -bool FastFixedDtoa(double v, - int fractional_count, - Vector buffer, - int* length, - int* decimal_point) { - const uint32_t kMaxUInt32 = 0xFFFFFFFF; - uint64_t significand = Double(v).Significand(); - int exponent = Double(v).Exponent(); - // v = significand * 2^exponent (with significand a 53bit integer). - // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we - // don't know how to compute the representation. 2^73 ~= 9.5*10^21. - // If necessary this limit could probably be increased, but we don't need - // more. - if (exponent > 20) return false; - if (fractional_count > 20) return false; - *length = 0; - // At most kDoubleSignificandSize bits of the significand are non-zero. - // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero - // bits: 0..11*..0xxx..53*..xx - if (exponent + kDoubleSignificandSize > 64) { - // The exponent must be > 11. - // - // We know that v = significand * 2^exponent. - // And the exponent > 11. - // We simplify the task by dividing v by 10^17. - // The quotient delivers the first digits, and the remainder fits into a 64 - // bit number. - // Dividing by 10^17 is equivalent to dividing by 5^17*2^17. - const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17 - uint64_t divisor = kFive17; - int divisor_power = 17; - uint64_t dividend = significand; - uint32_t quotient; - uint64_t remainder; - // Let v = f * 2^e with f == significand and e == exponent. - // Then need q (quotient) and r (remainder) as follows: - // v = q * 10^17 + r - // f * 2^e = q * 10^17 + r - // f * 2^e = q * 5^17 * 2^17 + r - // If e > 17 then - // f * 2^(e-17) = q * 5^17 + r/2^17 - // else - // f = q * 5^17 * 2^(17-e) + r/2^e - if (exponent > divisor_power) { - // We only allow exponents of up to 20 and therefore (17 - e) <= 3 - dividend <<= exponent - divisor_power; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << divisor_power; - } else { - divisor <<= divisor_power - exponent; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << exponent; - } - FillDigits32(quotient, buffer, length); - FillDigits64FixedLength(remainder, buffer, length); - *decimal_point = *length; - } else if (exponent >= 0) { - // 0 <= exponent <= 11 - significand <<= exponent; - FillDigits64(significand, buffer, length); - *decimal_point = *length; - } else if (exponent > -kDoubleSignificandSize) { - // We have to cut the number. - uint64_t integrals = significand >> -exponent; - uint64_t fractionals = significand - (integrals << -exponent); - if (integrals > kMaxUInt32) { - FillDigits64(integrals, buffer, length); - } else { - FillDigits32(static_cast(integrals), buffer, length); - } - *decimal_point = *length; - FillFractionals(fractionals, exponent, fractional_count, - buffer, length, decimal_point); - } else if (exponent < -128) { - // This configuration (with at most 20 digits) means that all digits must be - // 0. - ASSERT(fractional_count <= 20); - buffer[0] = '\0'; - *length = 0; - *decimal_point = -fractional_count; - } else { - *decimal_point = 0; - FillFractionals(significand, exponent, fractional_count, - buffer, length, decimal_point); - } - TrimZeros(buffer, length, decimal_point); - buffer[*length] = '\0'; - if ((*length) == 0) { - // The string is empty and the decimal_point thus has no importance. Mimick - // Gay's dtoa and and set it to -fractional_count. - *decimal_point = -fractional_count; - } - return true; -} - -} // namespace double_conversion + +#include "fixed-dtoa.h" +#include "ieee.h" + +namespace double_conversion { + +// Represents a 128bit type. This class should be replaced by a native type on +// platforms that support 128bit integers. +class UInt128 { + public: + UInt128() : high_bits_(0), low_bits_(0) { } + UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { } + + void Multiply(uint32_t multiplicand) { + uint64_t accumulator; + + accumulator = (low_bits_ & kMask32) * multiplicand; + uint32_t part = static_cast(accumulator & kMask32); + accumulator >>= 32; + accumulator = accumulator + (low_bits_ >> 32) * multiplicand; + low_bits_ = (accumulator << 32) + part; + accumulator >>= 32; + accumulator = accumulator + (high_bits_ & kMask32) * multiplicand; + part = static_cast(accumulator & kMask32); + accumulator >>= 32; + accumulator = accumulator + (high_bits_ >> 32) * multiplicand; + high_bits_ = (accumulator << 32) + part; + ASSERT((accumulator >> 32) == 0); + } + + void Shift(int shift_amount) { + ASSERT(-64 <= shift_amount && shift_amount <= 64); + if (shift_amount == 0) { + return; + } else if (shift_amount == -64) { + high_bits_ = low_bits_; + low_bits_ = 0; + } else if (shift_amount == 64) { + low_bits_ = high_bits_; + high_bits_ = 0; + } else if (shift_amount <= 0) { + high_bits_ <<= -shift_amount; + high_bits_ += low_bits_ >> (64 + shift_amount); + low_bits_ <<= -shift_amount; + } else { + low_bits_ >>= shift_amount; + low_bits_ += high_bits_ << (64 - shift_amount); + high_bits_ >>= shift_amount; + } + } + + // Modifies *this to *this MOD (2^power). + // Returns *this DIV (2^power). + int DivModPowerOf2(int power) { + if (power >= 64) { + int result = static_cast(high_bits_ >> (power - 64)); + high_bits_ -= static_cast(result) << (power - 64); + return result; + } else { + uint64_t part_low = low_bits_ >> power; + uint64_t part_high = high_bits_ << (64 - power); + int result = static_cast(part_low + part_high); + high_bits_ = 0; + low_bits_ -= part_low << power; + return result; + } + } + + bool IsZero() const { + return high_bits_ == 0 && low_bits_ == 0; + } + + int BitAt(int position) const { + if (position >= 64) { + return static_cast(high_bits_ >> (position - 64)) & 1; + } else { + return static_cast(low_bits_ >> position) & 1; + } + } + + private: + static const uint64_t kMask32 = 0xFFFFFFFF; + // Value == (high_bits_ << 64) + low_bits_ + uint64_t high_bits_; + uint64_t low_bits_; +}; + + +static const int kDoubleSignificandSize = 53; // Includes the hidden bit. + + +static void FillDigits32FixedLength(uint32_t number, int requested_length, + Vector buffer, int* length) { + for (int i = requested_length - 1; i >= 0; --i) { + buffer[(*length) + i] = '0' + number % 10; + number /= 10; + } + *length += requested_length; +} + + +static void FillDigits32(uint32_t number, Vector buffer, int* length) { + int number_length = 0; + // We fill the digits in reverse order and exchange them afterwards. + while (number != 0) { + int digit = number % 10; + number /= 10; + buffer[(*length) + number_length] = static_cast('0' + digit); + number_length++; + } + // Exchange the digits. + int i = *length; + int j = *length + number_length - 1; + while (i < j) { + char tmp = buffer[i]; + buffer[i] = buffer[j]; + buffer[j] = tmp; + i++; + j--; + } + *length += number_length; +} + + +static void FillDigits64FixedLength(uint64_t number, + Vector buffer, int* length) { + const uint32_t kTen7 = 10000000; + // For efficiency cut the number into 3 uint32_t parts, and print those. + uint32_t part2 = static_cast(number % kTen7); + number /= kTen7; + uint32_t part1 = static_cast(number % kTen7); + uint32_t part0 = static_cast(number / kTen7); + + FillDigits32FixedLength(part0, 3, buffer, length); + FillDigits32FixedLength(part1, 7, buffer, length); + FillDigits32FixedLength(part2, 7, buffer, length); +} + + +static void FillDigits64(uint64_t number, Vector buffer, int* length) { + const uint32_t kTen7 = 10000000; + // For efficiency cut the number into 3 uint32_t parts, and print those. + uint32_t part2 = static_cast(number % kTen7); + number /= kTen7; + uint32_t part1 = static_cast(number % kTen7); + uint32_t part0 = static_cast(number / kTen7); + + if (part0 != 0) { + FillDigits32(part0, buffer, length); + FillDigits32FixedLength(part1, 7, buffer, length); + FillDigits32FixedLength(part2, 7, buffer, length); + } else if (part1 != 0) { + FillDigits32(part1, buffer, length); + FillDigits32FixedLength(part2, 7, buffer, length); + } else { + FillDigits32(part2, buffer, length); + } +} + + +static void RoundUp(Vector buffer, int* length, int* decimal_point) { + // An empty buffer represents 0. + if (*length == 0) { + buffer[0] = '1'; + *decimal_point = 1; + *length = 1; + return; + } + // Round the last digit until we either have a digit that was not '9' or until + // we reached the first digit. + buffer[(*length) - 1]++; + for (int i = (*length) - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) { + return; + } + buffer[i] = '0'; + buffer[i - 1]++; + } + // If the first digit is now '0' + 10, we would need to set it to '0' and add + // a '1' in front. However we reach the first digit only if all following + // digits had been '9' before rounding up. Now all trailing digits are '0' and + // we simply switch the first digit to '1' and update the decimal-point + // (indicating that the point is now one digit to the right). + if (buffer[0] == '0' + 10) { + buffer[0] = '1'; + (*decimal_point)++; + } +} + + +// The given fractionals number represents a fixed-point number with binary +// point at bit (-exponent). +// Preconditions: +// -128 <= exponent <= 0. +// 0 <= fractionals * 2^exponent < 1 +// The buffer holds the result. +// The function will round its result. During the rounding-process digits not +// generated by this function might be updated, and the decimal-point variable +// might be updated. If this function generates the digits 99 and the buffer +// already contained "199" (thus yielding a buffer of "19999") then a +// rounding-up will change the contents of the buffer to "20000". +static void FillFractionals(uint64_t fractionals, int exponent, + int fractional_count, Vector buffer, + int* length, int* decimal_point) { + ASSERT(-128 <= exponent && exponent <= 0); + // 'fractionals' is a fixed-point number, with binary point at bit + // (-exponent). Inside the function the non-converted remainder of fractionals + // is a fixed-point number, with binary point at bit 'point'. + if (-exponent <= 64) { + // One 64 bit number is sufficient. + ASSERT(fractionals >> 56 == 0); + int point = -exponent; + for (int i = 0; i < fractional_count; ++i) { + if (fractionals == 0) break; + // Instead of multiplying by 10 we multiply by 5 and adjust the point + // location. This way the fractionals variable will not overflow. + // Invariant at the beginning of the loop: fractionals < 2^point. + // Initially we have: point <= 64 and fractionals < 2^56 + // After each iteration the point is decremented by one. + // Note that 5^3 = 125 < 128 = 2^7. + // Therefore three iterations of this loop will not overflow fractionals + // (even without the subtraction at the end of the loop body). At this + // time point will satisfy point <= 61 and therefore fractionals < 2^point + // and any further multiplication of fractionals by 5 will not overflow. + fractionals *= 5; + point--; + int digit = static_cast(fractionals >> point); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + fractionals -= static_cast(digit) << point; + } + // If the first bit after the point is set we have to round up. + ASSERT(fractionals == 0 || point - 1 >= 0); + if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) { + RoundUp(buffer, length, decimal_point); + } + } else { // We need 128 bits. + ASSERT(64 < -exponent && -exponent <= 128); + UInt128 fractionals128 = UInt128(fractionals, 0); + fractionals128.Shift(-exponent - 64); + int point = 128; + for (int i = 0; i < fractional_count; ++i) { + if (fractionals128.IsZero()) break; + // As before: instead of multiplying by 10 we multiply by 5 and adjust the + // point location. + // This multiplication will not overflow for the same reasons as before. + fractionals128.Multiply(5); + point--; + int digit = fractionals128.DivModPowerOf2(point); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + } + if (fractionals128.BitAt(point - 1) == 1) { + RoundUp(buffer, length, decimal_point); + } + } +} + + +// Removes leading and trailing zeros. +// If leading zeros are removed then the decimal point position is adjusted. +static void TrimZeros(Vector buffer, int* length, int* decimal_point) { + while (*length > 0 && buffer[(*length) - 1] == '0') { + (*length)--; + } + int first_non_zero = 0; + while (first_non_zero < *length && buffer[first_non_zero] == '0') { + first_non_zero++; + } + if (first_non_zero != 0) { + for (int i = first_non_zero; i < *length; ++i) { + buffer[i - first_non_zero] = buffer[i]; + } + *length -= first_non_zero; + *decimal_point -= first_non_zero; + } +} + + +bool FastFixedDtoa(double v, + int fractional_count, + Vector buffer, + int* length, + int* decimal_point) { + const uint32_t kMaxUInt32 = 0xFFFFFFFF; + uint64_t significand = Double(v).Significand(); + int exponent = Double(v).Exponent(); + // v = significand * 2^exponent (with significand a 53bit integer). + // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we + // don't know how to compute the representation. 2^73 ~= 9.5*10^21. + // If necessary this limit could probably be increased, but we don't need + // more. + if (exponent > 20) return false; + if (fractional_count > 20) return false; + *length = 0; + // At most kDoubleSignificandSize bits of the significand are non-zero. + // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero + // bits: 0..11*..0xxx..53*..xx + if (exponent + kDoubleSignificandSize > 64) { + // The exponent must be > 11. + // + // We know that v = significand * 2^exponent. + // And the exponent > 11. + // We simplify the task by dividing v by 10^17. + // The quotient delivers the first digits, and the remainder fits into a 64 + // bit number. + // Dividing by 10^17 is equivalent to dividing by 5^17*2^17. + const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17 + uint64_t divisor = kFive17; + int divisor_power = 17; + uint64_t dividend = significand; + uint32_t quotient; + uint64_t remainder; + // Let v = f * 2^e with f == significand and e == exponent. + // Then need q (quotient) and r (remainder) as follows: + // v = q * 10^17 + r + // f * 2^e = q * 10^17 + r + // f * 2^e = q * 5^17 * 2^17 + r + // If e > 17 then + // f * 2^(e-17) = q * 5^17 + r/2^17 + // else + // f = q * 5^17 * 2^(17-e) + r/2^e + if (exponent > divisor_power) { + // We only allow exponents of up to 20 and therefore (17 - e) <= 3 + dividend <<= exponent - divisor_power; + quotient = static_cast(dividend / divisor); + remainder = (dividend % divisor) << divisor_power; + } else { + divisor <<= divisor_power - exponent; + quotient = static_cast(dividend / divisor); + remainder = (dividend % divisor) << exponent; + } + FillDigits32(quotient, buffer, length); + FillDigits64FixedLength(remainder, buffer, length); + *decimal_point = *length; + } else if (exponent >= 0) { + // 0 <= exponent <= 11 + significand <<= exponent; + FillDigits64(significand, buffer, length); + *decimal_point = *length; + } else if (exponent > -kDoubleSignificandSize) { + // We have to cut the number. + uint64_t integrals = significand >> -exponent; + uint64_t fractionals = significand - (integrals << -exponent); + if (integrals > kMaxUInt32) { + FillDigits64(integrals, buffer, length); + } else { + FillDigits32(static_cast(integrals), buffer, length); + } + *decimal_point = *length; + FillFractionals(fractionals, exponent, fractional_count, + buffer, length, decimal_point); + } else if (exponent < -128) { + // This configuration (with at most 20 digits) means that all digits must be + // 0. + ASSERT(fractional_count <= 20); + buffer[0] = '\0'; + *length = 0; + *decimal_point = -fractional_count; + } else { + *decimal_point = 0; + FillFractionals(significand, exponent, fractional_count, + buffer, length, decimal_point); + } + TrimZeros(buffer, length, decimal_point); + buffer[*length] = '\0'; + if ((*length) == 0) { + // The string is empty and the decimal_point thus has no importance. Mimick + // Gay's dtoa and and set it to -fractional_count. + *decimal_point = -fractional_count; + } + return true; +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/fixed-dtoa.h b/contrib/libs/double-conversion/fixed-dtoa.h index e4eccca3713..3bdd08e21f5 100644 --- a/contrib/libs/double-conversion/fixed-dtoa.h +++ b/contrib/libs/double-conversion/fixed-dtoa.h @@ -1,56 +1,56 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_ -#define DOUBLE_CONVERSION_FIXED_DTOA_H_ - -#include "utils.h" - -namespace double_conversion { - -// Produces digits necessary to print a given number with -// 'fractional_count' digits after the decimal point. -// The buffer must be big enough to hold the result plus one terminating null -// character. -// -// The produced digits might be too short in which case the caller has to fill -// the gaps with '0's. -// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and -// decimal_point = -2. -// Halfway cases are rounded towards +/-Infinity (away from 0). The call -// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0. -// The returned buffer may contain digits that would be truncated from the -// shortest representation of the input. -// -// This method only works for some parameters. If it can't handle the input it -// returns false. The output is null-terminated when the function succeeds. -bool FastFixedDtoa(double v, int fractional_count, - Vector buffer, int* length, int* decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FIXED_DTOA_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_ +#define DOUBLE_CONVERSION_FIXED_DTOA_H_ + +#include "utils.h" + +namespace double_conversion { + +// Produces digits necessary to print a given number with +// 'fractional_count' digits after the decimal point. +// The buffer must be big enough to hold the result plus one terminating null +// character. +// +// The produced digits might be too short in which case the caller has to fill +// the gaps with '0's. +// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and +// decimal_point = -2. +// Halfway cases are rounded towards +/-Infinity (away from 0). The call +// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0. +// The returned buffer may contain digits that would be truncated from the +// shortest representation of the input. +// +// This method only works for some parameters. If it can't handle the input it +// returns false. The output is null-terminated when the function succeeds. +bool FastFixedDtoa(double v, int fractional_count, + Vector buffer, int* length, int* decimal_point); + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_FIXED_DTOA_H_ diff --git a/contrib/libs/double-conversion/ieee.h b/contrib/libs/double-conversion/ieee.h index 05da3fcc35f..4a5fe8f9c0a 100644 --- a/contrib/libs/double-conversion/ieee.h +++ b/contrib/libs/double-conversion/ieee.h @@ -1,402 +1,402 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_H_ -#define DOUBLE_CONVERSION_DOUBLE_H_ - -#include "diy-fp.h" - -namespace double_conversion { - -// We assume that doubles and uint64_t have the same endianness. -static uint64_t double_to_uint64(double d) { return BitCast(d); } -static double uint64_to_double(uint64_t d64) { return BitCast(d64); } -static uint32_t float_to_uint32(float f) { return BitCast(f); } -static float uint32_to_float(uint32_t d32) { return BitCast(d32); } - -// Helper functions for doubles. -class Double { - public: - static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); - static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); - static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); - static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. - static const int kSignificandSize = 53; - - Double() : d64_(0) {} - explicit Double(double d) : d64_(double_to_uint64(d)) {} - explicit Double(uint64_t d64) : d64_(d64) {} - explicit Double(DiyFp diy_fp) - : d64_(DiyFpToUint64(diy_fp)) {} - - // The value encoded by this Double must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // The value encoded by this Double must be strictly greater than 0. - DiyFp AsNormalizedDiyFp() const { - ASSERT(value() > 0.0); - uint64_t f = Significand(); - int e = Exponent(); - - // The current double could be a denormal. - while ((f & kHiddenBit) == 0) { - f <<= 1; - e--; - } - // Do the final shifts in one go. - f <<= DiyFp::kSignificandSize - kSignificandSize; - e -= DiyFp::kSignificandSize - kSignificandSize; - return DiyFp(f, e); - } - - // Returns the double's bit as uint64. - uint64_t AsUint64() const { - return d64_; - } - - // Returns the next greater double. Returns +infinity on input +infinity. - double NextDouble() const { - if (d64_ == kInfinity) return Double(kInfinity).value(); - if (Sign() < 0 && Significand() == 0) { - // -0.0 - return 0.0; - } - if (Sign() < 0) { - return Double(d64_ - 1).value(); - } else { - return Double(d64_ + 1).value(); - } - } - - double PreviousDouble() const { - if (d64_ == (kInfinity | kSignMask)) return -Infinity(); - if (Sign() < 0) { - return Double(d64_ + 1).value(); - } else { - if (Significand() == 0) return -0.0; - return Double(d64_ - 1).value(); - } - } - - int Exponent() const { - if (IsDenormal()) return kDenormalExponent; - - uint64_t d64 = AsUint64(); - int biased_e = - static_cast((d64 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint64_t Significand() const { - uint64_t d64 = AsUint64(); - uint64_t significand = d64 & kSignificandMask; - if (!IsDenormal()) { - return significand + kHiddenBit; - } else { - return significand; - } - } - - // Returns true if the double is a denormal. - bool IsDenormal() const { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == kExponentMask; - } - - bool IsNan() const { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && - ((d64 & kSignificandMask) != 0); - } - - bool IsInfinite() const { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && - ((d64 & kSignificandMask) == 0); - } - - int Sign() const { - uint64_t d64 = AsUint64(); - return (d64 & kSignMask) == 0? 1: -1; - } - - // Precondition: the value encoded by this Double must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Double must be greater than 0. - void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } else { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - bool LowerBoundaryIsCloser() const { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - double value() const { return uint64_to_double(d64_); } - - // Returns the significand size for a given order of magnitude. - // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. - // This function returns the number of significant binary digits v will have - // once it's encoded into a double. In almost all cases this is equal to - // kSignificandSize. The only exceptions are denormals. They start with - // leading zeroes and their effective significand-size is hence smaller. - static int SignificandSizeForOrderOfMagnitude(int order) { - if (order >= (kDenormalExponent + kSignificandSize)) { - return kSignificandSize; - } - if (order <= kDenormalExponent) return 0; - return order - kDenormalExponent; - } - - static double Infinity() { - return Double(kInfinity).value(); - } - - static double NaN() { - return Double(kNaN).value(); - } - - private: - static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0x7FF - kExponentBias; - static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); - - const uint64_t d64_; - - static uint64_t DiyFpToUint64(DiyFp diy_fp) { - uint64_t significand = diy_fp.f(); - int exponent = diy_fp.e(); - while (significand > kHiddenBit + kSignificandMask) { - significand >>= 1; - exponent++; - } - if (exponent >= kMaxExponent) { - return kInfinity; - } - if (exponent < kDenormalExponent) { - return 0; - } - while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { - significand <<= 1; - exponent--; - } - uint64_t biased_exponent; - if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { - biased_exponent = 0; - } else { - biased_exponent = static_cast(exponent + kExponentBias); - } - return (significand & kSignificandMask) | - (biased_exponent << kPhysicalSignificandSize); - } - +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_DOUBLE_H_ +#define DOUBLE_CONVERSION_DOUBLE_H_ + +#include "diy-fp.h" + +namespace double_conversion { + +// We assume that doubles and uint64_t have the same endianness. +static uint64_t double_to_uint64(double d) { return BitCast(d); } +static double uint64_to_double(uint64_t d64) { return BitCast(d64); } +static uint32_t float_to_uint32(float f) { return BitCast(f); } +static float uint32_to_float(uint32_t d32) { return BitCast(d32); } + +// Helper functions for doubles. +class Double { + public: + static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); + static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); + static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); + static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. + static const int kSignificandSize = 53; + + Double() : d64_(0) {} + explicit Double(double d) : d64_(double_to_uint64(d)) {} + explicit Double(uint64_t d64) : d64_(d64) {} + explicit Double(DiyFp diy_fp) + : d64_(DiyFpToUint64(diy_fp)) {} + + // The value encoded by this Double must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // The value encoded by this Double must be strictly greater than 0. + DiyFp AsNormalizedDiyFp() const { + ASSERT(value() > 0.0); + uint64_t f = Significand(); + int e = Exponent(); + + // The current double could be a denormal. + while ((f & kHiddenBit) == 0) { + f <<= 1; + e--; + } + // Do the final shifts in one go. + f <<= DiyFp::kSignificandSize - kSignificandSize; + e -= DiyFp::kSignificandSize - kSignificandSize; + return DiyFp(f, e); + } + + // Returns the double's bit as uint64. + uint64_t AsUint64() const { + return d64_; + } + + // Returns the next greater double. Returns +infinity on input +infinity. + double NextDouble() const { + if (d64_ == kInfinity) return Double(kInfinity).value(); + if (Sign() < 0 && Significand() == 0) { + // -0.0 + return 0.0; + } + if (Sign() < 0) { + return Double(d64_ - 1).value(); + } else { + return Double(d64_ + 1).value(); + } + } + + double PreviousDouble() const { + if (d64_ == (kInfinity | kSignMask)) return -Infinity(); + if (Sign() < 0) { + return Double(d64_ + 1).value(); + } else { + if (Significand() == 0) return -0.0; + return Double(d64_ - 1).value(); + } + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint64_t d64 = AsUint64(); + int biased_e = + static_cast((d64 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint64_t Significand() const { + uint64_t d64 = AsUint64(); + uint64_t significand = d64 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the double is a denormal. + bool IsDenormal() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) == 0); + } + + int Sign() const { + uint64_t d64 = AsUint64(); + return (d64 & kSignMask) == 0? 1: -1; + } + + // Precondition: the value encoded by this Double must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Double must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + double value() const { return uint64_to_double(d64_); } + + // Returns the significand size for a given order of magnitude. + // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. + // This function returns the number of significant binary digits v will have + // once it's encoded into a double. In almost all cases this is equal to + // kSignificandSize. The only exceptions are denormals. They start with + // leading zeroes and their effective significand-size is hence smaller. + static int SignificandSizeForOrderOfMagnitude(int order) { + if (order >= (kDenormalExponent + kSignificandSize)) { + return kSignificandSize; + } + if (order <= kDenormalExponent) return 0; + return order - kDenormalExponent; + } + + static double Infinity() { + return Double(kInfinity).value(); + } + + static double NaN() { + return Double(kNaN).value(); + } + + private: + static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0x7FF - kExponentBias; + static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); + + const uint64_t d64_; + + static uint64_t DiyFpToUint64(DiyFp diy_fp) { + uint64_t significand = diy_fp.f(); + int exponent = diy_fp.e(); + while (significand > kHiddenBit + kSignificandMask) { + significand >>= 1; + exponent++; + } + if (exponent >= kMaxExponent) { + return kInfinity; + } + if (exponent < kDenormalExponent) { + return 0; + } + while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { + significand <<= 1; + exponent--; + } + uint64_t biased_exponent; + if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { + biased_exponent = 0; + } else { + biased_exponent = static_cast(exponent + kExponentBias); + } + return (significand & kSignificandMask) | + (biased_exponent << kPhysicalSignificandSize); + } + DC_DISALLOW_COPY_AND_ASSIGN(Double); -}; - -class Single { - public: - static const uint32_t kSignMask = 0x80000000; - static const uint32_t kExponentMask = 0x7F800000; - static const uint32_t kSignificandMask = 0x007FFFFF; - static const uint32_t kHiddenBit = 0x00800000; - static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. - static const int kSignificandSize = 24; - - Single() : d32_(0) {} - explicit Single(float f) : d32_(float_to_uint32(f)) {} - explicit Single(uint32_t d32) : d32_(d32) {} - - // The value encoded by this Single must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // Returns the single's bit as uint64. - uint32_t AsUint32() const { - return d32_; - } - - int Exponent() const { - if (IsDenormal()) return kDenormalExponent; - - uint32_t d32 = AsUint32(); - int biased_e = - static_cast((d32 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint32_t Significand() const { - uint32_t d32 = AsUint32(); - uint32_t significand = d32 & kSignificandMask; - if (!IsDenormal()) { - return significand + kHiddenBit; - } else { - return significand; - } - } - - // Returns true if the single is a denormal. - bool IsDenormal() const { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == kExponentMask; - } - - bool IsNan() const { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && - ((d32 & kSignificandMask) != 0); - } - - bool IsInfinite() const { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && - ((d32 & kSignificandMask) == 0); - } - - int Sign() const { - uint32_t d32 = AsUint32(); - return (d32 & kSignMask) == 0? 1: -1; - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Single must be greater than 0. - void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } else { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - // Precondition: the value encoded by this Single must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - bool LowerBoundaryIsCloser() const { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - float value() const { return uint32_to_float(d32_); } - - static float Infinity() { - return Single(kInfinity).value(); - } - - static float NaN() { - return Single(kNaN).value(); - } - - private: - static const int kExponentBias = 0x7F + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0xFF - kExponentBias; - static const uint32_t kInfinity = 0x7F800000; - static const uint32_t kNaN = 0x7FC00000; - - const uint32_t d32_; - +}; + +class Single { + public: + static const uint32_t kSignMask = 0x80000000; + static const uint32_t kExponentMask = 0x7F800000; + static const uint32_t kSignificandMask = 0x007FFFFF; + static const uint32_t kHiddenBit = 0x00800000; + static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. + static const int kSignificandSize = 24; + + Single() : d32_(0) {} + explicit Single(float f) : d32_(float_to_uint32(f)) {} + explicit Single(uint32_t d32) : d32_(d32) {} + + // The value encoded by this Single must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // Returns the single's bit as uint64. + uint32_t AsUint32() const { + return d32_; + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint32_t d32 = AsUint32(); + int biased_e = + static_cast((d32 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint32_t Significand() const { + uint32_t d32 = AsUint32(); + uint32_t significand = d32 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the single is a denormal. + bool IsDenormal() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) == 0); + } + + int Sign() const { + uint32_t d32 = AsUint32(); + return (d32 & kSignMask) == 0? 1: -1; + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Single must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + // Precondition: the value encoded by this Single must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + float value() const { return uint32_to_float(d32_); } + + static float Infinity() { + return Single(kInfinity).value(); + } + + static float NaN() { + return Single(kNaN).value(); + } + + private: + static const int kExponentBias = 0x7F + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0xFF - kExponentBias; + static const uint32_t kInfinity = 0x7F800000; + static const uint32_t kNaN = 0x7FC00000; + + const uint32_t d32_; + DC_DISALLOW_COPY_AND_ASSIGN(Single); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_H_ +}; + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_DOUBLE_H_ diff --git a/contrib/libs/double-conversion/strtod.cc b/contrib/libs/double-conversion/strtod.cc index 8dd07c19abd..a75cf5d9f1b 100644 --- a/contrib/libs/double-conversion/strtod.cc +++ b/contrib/libs/double-conversion/strtod.cc @@ -1,477 +1,477 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include #include - -#include "bignum.h" -#include "cached-powers.h" -#include "ieee.h" + +#include "bignum.h" +#include "cached-powers.h" +#include "ieee.h" #include "strtod.h" - -namespace double_conversion { - -// 2^53 = 9007199254740992. -// Any integer with at most 15 decimal digits will hence fit into a double -// (which has a 53bit significand) without loss of precision. -static const int kMaxExactDoubleIntegerDecimalDigits = 15; -// 2^64 = 18446744073709551616 > 10^19 -static const int kMaxUint64DecimalDigits = 19; - -// Max double: 1.7976931348623157 x 10^308 -// Min non-zero double: 4.9406564584124654 x 10^-324 -// Any x >= 10^309 is interpreted as +infinity. -// Any x <= 10^-324 is interpreted as 0. -// Note that 2.5e-324 (despite being smaller than the min double) will be read -// as non-zero (equal to the min non-zero double). -static const int kMaxDecimalPower = 309; -static const int kMinDecimalPower = -324; - -// 2^64 = 18446744073709551616 -static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF); - - -static const double exact_powers_of_ten[] = { - 1.0, // 10^0 - 10.0, - 100.0, - 1000.0, - 10000.0, - 100000.0, - 1000000.0, - 10000000.0, - 100000000.0, - 1000000000.0, - 10000000000.0, // 10^10 - 100000000000.0, - 1000000000000.0, - 10000000000000.0, - 100000000000000.0, - 1000000000000000.0, - 10000000000000000.0, - 100000000000000000.0, - 1000000000000000000.0, - 10000000000000000000.0, - 100000000000000000000.0, // 10^20 - 1000000000000000000000.0, - // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22 - 10000000000000000000000.0 -}; -static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten); - -// Maximum number of significant digits in the decimal representation. -// In fact the value is 772 (see conversions.cc), but to give us some margin -// we round up to 780. -static const int kMaxSignificantDecimalDigits = 780; - -static Vector TrimLeadingZeros(Vector buffer) { - for (int i = 0; i < buffer.length(); i++) { - if (buffer[i] != '0') { - return buffer.SubVector(i, buffer.length()); - } - } - return Vector(buffer.start(), 0); -} - - -static Vector TrimTrailingZeros(Vector buffer) { - for (int i = buffer.length() - 1; i >= 0; --i) { - if (buffer[i] != '0') { - return buffer.SubVector(0, i + 1); - } - } - return Vector(buffer.start(), 0); -} - - -static void CutToMaxSignificantDigits(Vector buffer, - int exponent, - char* significant_buffer, - int* significant_exponent) { - for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) { - significant_buffer[i] = buffer[i]; - } - // The input buffer has been trimmed. Therefore the last digit must be - // different from '0'. - ASSERT(buffer[buffer.length() - 1] != '0'); - // Set the last digit to be non-zero. This is sufficient to guarantee - // correct rounding. - significant_buffer[kMaxSignificantDecimalDigits - 1] = '1'; - *significant_exponent = - exponent + (buffer.length() - kMaxSignificantDecimalDigits); -} - - -// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits. -// If possible the input-buffer is reused, but if the buffer needs to be -// modified (due to cutting), then the input needs to be copied into the -// buffer_copy_space. -static void TrimAndCut(Vector buffer, int exponent, - char* buffer_copy_space, int space_size, - Vector* trimmed, int* updated_exponent) { - Vector left_trimmed = TrimLeadingZeros(buffer); - Vector right_trimmed = TrimTrailingZeros(left_trimmed); - exponent += left_trimmed.length() - right_trimmed.length(); - if (right_trimmed.length() > kMaxSignificantDecimalDigits) { - (void) space_size; // Mark variable as used. - ASSERT(space_size >= kMaxSignificantDecimalDigits); - CutToMaxSignificantDigits(right_trimmed, exponent, - buffer_copy_space, updated_exponent); - *trimmed = Vector(buffer_copy_space, - kMaxSignificantDecimalDigits); - } else { - *trimmed = right_trimmed; - *updated_exponent = exponent; - } -} - - -// Reads digits from the buffer and converts them to a uint64. -// Reads in as many digits as fit into a uint64. -// When the string starts with "1844674407370955161" no further digit is read. -// Since 2^64 = 18446744073709551616 it would still be possible read another -// digit if it was less or equal than 6, but this would complicate the code. -static uint64_t ReadUint64(Vector buffer, - int* number_of_read_digits) { - uint64_t result = 0; - int i = 0; - while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) { - int digit = buffer[i++] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = 10 * result + digit; - } - *number_of_read_digits = i; - return result; -} - - -// Reads a DiyFp from the buffer. -// The returned DiyFp is not necessarily normalized. -// If remaining_decimals is zero then the returned DiyFp is accurate. -// Otherwise it has been rounded and has error of at most 1/2 ulp. -static void ReadDiyFp(Vector buffer, - DiyFp* result, - int* remaining_decimals) { - int read_digits; - uint64_t significand = ReadUint64(buffer, &read_digits); - if (buffer.length() == read_digits) { - *result = DiyFp(significand, 0); - *remaining_decimals = 0; - } else { - // Round the significand. - if (buffer[read_digits] >= '5') { - significand++; - } - // Compute the binary exponent. - int exponent = 0; - *result = DiyFp(significand, exponent); - *remaining_decimals = buffer.length() - read_digits; - } -} - - -static bool DoubleStrtod(Vector trimmed, - int exponent, - double* result) { -#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) - // On x86 the floating-point stack can be 64 or 80 bits wide. If it is - // 80 bits wide (as is the case on Linux) then double-rounding occurs and the - // result is not accurate. - // We know that Windows32 uses 64 bits and is therefore accurate. - // Note that the ARM simulator is compiled for 32bits. It therefore exhibits - // the same problem. - return false; + +namespace double_conversion { + +// 2^53 = 9007199254740992. +// Any integer with at most 15 decimal digits will hence fit into a double +// (which has a 53bit significand) without loss of precision. +static const int kMaxExactDoubleIntegerDecimalDigits = 15; +// 2^64 = 18446744073709551616 > 10^19 +static const int kMaxUint64DecimalDigits = 19; + +// Max double: 1.7976931348623157 x 10^308 +// Min non-zero double: 4.9406564584124654 x 10^-324 +// Any x >= 10^309 is interpreted as +infinity. +// Any x <= 10^-324 is interpreted as 0. +// Note that 2.5e-324 (despite being smaller than the min double) will be read +// as non-zero (equal to the min non-zero double). +static const int kMaxDecimalPower = 309; +static const int kMinDecimalPower = -324; + +// 2^64 = 18446744073709551616 +static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF); + + +static const double exact_powers_of_ten[] = { + 1.0, // 10^0 + 10.0, + 100.0, + 1000.0, + 10000.0, + 100000.0, + 1000000.0, + 10000000.0, + 100000000.0, + 1000000000.0, + 10000000000.0, // 10^10 + 100000000000.0, + 1000000000000.0, + 10000000000000.0, + 100000000000000.0, + 1000000000000000.0, + 10000000000000000.0, + 100000000000000000.0, + 1000000000000000000.0, + 10000000000000000000.0, + 100000000000000000000.0, // 10^20 + 1000000000000000000000.0, + // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22 + 10000000000000000000000.0 +}; +static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten); + +// Maximum number of significant digits in the decimal representation. +// In fact the value is 772 (see conversions.cc), but to give us some margin +// we round up to 780. +static const int kMaxSignificantDecimalDigits = 780; + +static Vector TrimLeadingZeros(Vector buffer) { + for (int i = 0; i < buffer.length(); i++) { + if (buffer[i] != '0') { + return buffer.SubVector(i, buffer.length()); + } + } + return Vector(buffer.start(), 0); +} + + +static Vector TrimTrailingZeros(Vector buffer) { + for (int i = buffer.length() - 1; i >= 0; --i) { + if (buffer[i] != '0') { + return buffer.SubVector(0, i + 1); + } + } + return Vector(buffer.start(), 0); +} + + +static void CutToMaxSignificantDigits(Vector buffer, + int exponent, + char* significant_buffer, + int* significant_exponent) { + for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) { + significant_buffer[i] = buffer[i]; + } + // The input buffer has been trimmed. Therefore the last digit must be + // different from '0'. + ASSERT(buffer[buffer.length() - 1] != '0'); + // Set the last digit to be non-zero. This is sufficient to guarantee + // correct rounding. + significant_buffer[kMaxSignificantDecimalDigits - 1] = '1'; + *significant_exponent = + exponent + (buffer.length() - kMaxSignificantDecimalDigits); +} + + +// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits. +// If possible the input-buffer is reused, but if the buffer needs to be +// modified (due to cutting), then the input needs to be copied into the +// buffer_copy_space. +static void TrimAndCut(Vector buffer, int exponent, + char* buffer_copy_space, int space_size, + Vector* trimmed, int* updated_exponent) { + Vector left_trimmed = TrimLeadingZeros(buffer); + Vector right_trimmed = TrimTrailingZeros(left_trimmed); + exponent += left_trimmed.length() - right_trimmed.length(); + if (right_trimmed.length() > kMaxSignificantDecimalDigits) { + (void) space_size; // Mark variable as used. + ASSERT(space_size >= kMaxSignificantDecimalDigits); + CutToMaxSignificantDigits(right_trimmed, exponent, + buffer_copy_space, updated_exponent); + *trimmed = Vector(buffer_copy_space, + kMaxSignificantDecimalDigits); + } else { + *trimmed = right_trimmed; + *updated_exponent = exponent; + } +} + + +// Reads digits from the buffer and converts them to a uint64. +// Reads in as many digits as fit into a uint64. +// When the string starts with "1844674407370955161" no further digit is read. +// Since 2^64 = 18446744073709551616 it would still be possible read another +// digit if it was less or equal than 6, but this would complicate the code. +static uint64_t ReadUint64(Vector buffer, + int* number_of_read_digits) { + uint64_t result = 0; + int i = 0; + while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) { + int digit = buffer[i++] - '0'; + ASSERT(0 <= digit && digit <= 9); + result = 10 * result + digit; + } + *number_of_read_digits = i; + return result; +} + + +// Reads a DiyFp from the buffer. +// The returned DiyFp is not necessarily normalized. +// If remaining_decimals is zero then the returned DiyFp is accurate. +// Otherwise it has been rounded and has error of at most 1/2 ulp. +static void ReadDiyFp(Vector buffer, + DiyFp* result, + int* remaining_decimals) { + int read_digits; + uint64_t significand = ReadUint64(buffer, &read_digits); + if (buffer.length() == read_digits) { + *result = DiyFp(significand, 0); + *remaining_decimals = 0; + } else { + // Round the significand. + if (buffer[read_digits] >= '5') { + significand++; + } + // Compute the binary exponent. + int exponent = 0; + *result = DiyFp(significand, exponent); + *remaining_decimals = buffer.length() - read_digits; + } +} + + +static bool DoubleStrtod(Vector trimmed, + int exponent, + double* result) { +#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) + // On x86 the floating-point stack can be 64 or 80 bits wide. If it is + // 80 bits wide (as is the case on Linux) then double-rounding occurs and the + // result is not accurate. + // We know that Windows32 uses 64 bits and is therefore accurate. + // Note that the ARM simulator is compiled for 32bits. It therefore exhibits + // the same problem. + return false; #else - if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { - int read_digits; - // The trimmed input fits into a double. - // If the 10^exponent (resp. 10^-exponent) fits into a double too then we - // can compute the result-double simply by multiplying (resp. dividing) the - // two numbers. - // This is possible because IEEE guarantees that floating-point operations - // return the best possible approximation. - if (exponent < 0 && -exponent < kExactPowersOfTenSize) { - // 10^-exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result /= exact_powers_of_ten[-exponent]; - return true; - } - if (0 <= exponent && exponent < kExactPowersOfTenSize) { - // 10^exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[exponent]; - return true; - } - int remaining_digits = - kMaxExactDoubleIntegerDecimalDigits - trimmed.length(); - if ((0 <= exponent) && - (exponent - remaining_digits < kExactPowersOfTenSize)) { - // The trimmed string was short and we can multiply it with - // 10^remaining_digits. As a result the remaining exponent now fits - // into a double too. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[remaining_digits]; - *result *= exact_powers_of_ten[exponent - remaining_digits]; - return true; - } - } - return false; + if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { + int read_digits; + // The trimmed input fits into a double. + // If the 10^exponent (resp. 10^-exponent) fits into a double too then we + // can compute the result-double simply by multiplying (resp. dividing) the + // two numbers. + // This is possible because IEEE guarantees that floating-point operations + // return the best possible approximation. + if (exponent < 0 && -exponent < kExactPowersOfTenSize) { + // 10^-exponent fits into a double. + *result = static_cast(ReadUint64(trimmed, &read_digits)); + ASSERT(read_digits == trimmed.length()); + *result /= exact_powers_of_ten[-exponent]; + return true; + } + if (0 <= exponent && exponent < kExactPowersOfTenSize) { + // 10^exponent fits into a double. + *result = static_cast(ReadUint64(trimmed, &read_digits)); + ASSERT(read_digits == trimmed.length()); + *result *= exact_powers_of_ten[exponent]; + return true; + } + int remaining_digits = + kMaxExactDoubleIntegerDecimalDigits - trimmed.length(); + if ((0 <= exponent) && + (exponent - remaining_digits < kExactPowersOfTenSize)) { + // The trimmed string was short and we can multiply it with + // 10^remaining_digits. As a result the remaining exponent now fits + // into a double too. + *result = static_cast(ReadUint64(trimmed, &read_digits)); + ASSERT(read_digits == trimmed.length()); + *result *= exact_powers_of_ten[remaining_digits]; + *result *= exact_powers_of_ten[exponent - remaining_digits]; + return true; + } + } + return false; #endif -} - - -// Returns 10^exponent as an exact DiyFp. -// The given exponent must be in the range [1; kDecimalExponentDistance[. -static DiyFp AdjustmentPowerOfTen(int exponent) { - ASSERT(0 < exponent); - ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance); - // Simply hardcode the remaining powers for the given decimal exponent - // distance. - ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8); - switch (exponent) { - case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60); - case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57); - case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54); - case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50); - case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47); - case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44); - case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); - default: - UNREACHABLE(); - } -} - - -// If the function returns true then the result is the correct double. -// Otherwise it is either the correct double or the double that is just below -// the correct double. -static bool DiyFpStrtod(Vector buffer, - int exponent, - double* result) { - DiyFp input; - int remaining_decimals; - ReadDiyFp(buffer, &input, &remaining_decimals); - // Since we may have dropped some digits the input is not accurate. - // If remaining_decimals is different than 0 than the error is at most - // .5 ulp (unit in the last place). - // We don't want to deal with fractions and therefore keep a common - // denominator. - const int kDenominatorLog = 3; - const int kDenominator = 1 << kDenominatorLog; - // Move the remaining decimals into the exponent. - exponent += remaining_decimals; - uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2); - - int old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent); - if (exponent < PowersOfTenCache::kMinDecimalExponent) { - *result = 0.0; - return true; - } - DiyFp cached_power; - int cached_decimal_exponent; - PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent, - &cached_power, - &cached_decimal_exponent); - - if (cached_decimal_exponent != exponent) { - int adjustment_exponent = exponent - cached_decimal_exponent; - DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); - input.Multiply(adjustment_power); - if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { - // The product of input with the adjustment power fits into a 64 bit - // integer. - ASSERT(DiyFp::kSignificandSize == 64); - } else { - // The adjustment power is exact. There is hence only an error of 0.5. - error += kDenominator / 2; - } - } - - input.Multiply(cached_power); - // The error introduced by a multiplication of a*b equals - // error_a + error_b + error_a*error_b/2^64 + 0.5 - // Substituting a with 'input' and b with 'cached_power' we have - // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), - // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 - int error_b = kDenominator / 2; - int error_ab = (error == 0 ? 0 : 1); // We round up to 1. - int fixed_error = kDenominator / 2; - error += error_b + error_ab + fixed_error; - - old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - // See if the double's significand changes if we add/subtract the error. - int order_of_magnitude = DiyFp::kSignificandSize + input.e(); - int effective_significand_size = - Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude); - int precision_digits_count = - DiyFp::kSignificandSize - effective_significand_size; - if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) { - // This can only happen for very small denormals. In this case the - // half-way multiplied by the denominator exceeds the range of an uint64. - // Simply shift everything to the right. - int shift_amount = (precision_digits_count + kDenominatorLog) - - DiyFp::kSignificandSize + 1; - input.set_f(input.f() >> shift_amount); - input.set_e(input.e() + shift_amount); - // We add 1 for the lost precision of error, and kDenominator for - // the lost precision of input.f(). - error = (error >> shift_amount) + 1 + kDenominator; - precision_digits_count -= shift_amount; - } - // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. - ASSERT(DiyFp::kSignificandSize == 64); - ASSERT(precision_digits_count < 64); - uint64_t one64 = 1; - uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; - uint64_t precision_bits = input.f() & precision_bits_mask; - uint64_t half_way = one64 << (precision_digits_count - 1); - precision_bits *= kDenominator; - half_way *= kDenominator; - DiyFp rounded_input(input.f() >> precision_digits_count, - input.e() + precision_digits_count); - if (precision_bits >= half_way + error) { - rounded_input.set_f(rounded_input.f() + 1); - } - // If the last_bits are too close to the half-way case than we are too - // inaccurate and round down. In this case we return false so that we can - // fall back to a more precise algorithm. - - *result = Double(rounded_input).value(); - if (half_way - error < precision_bits && precision_bits < half_way + error) { - // Too imprecise. The caller will have to fall back to a slower version. - // However the returned number is guaranteed to be either the correct - // double, or the next-lower double. - return false; - } else { - return true; - } -} - - -// Returns -// - -1 if buffer*10^exponent < diy_fp. -// - 0 if buffer*10^exponent == diy_fp. -// - +1 if buffer*10^exponent > diy_fp. -// Preconditions: -// buffer.length() + exponent <= kMaxDecimalPower + 1 -// buffer.length() + exponent > kMinDecimalPower -// buffer.length() <= kMaxDecimalSignificantDigits -static int CompareBufferWithDiyFp(Vector buffer, - int exponent, - DiyFp diy_fp) { - ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1); - ASSERT(buffer.length() + exponent > kMinDecimalPower); - ASSERT(buffer.length() <= kMaxSignificantDecimalDigits); - // Make sure that the Bignum will be able to hold all our numbers. - // Our Bignum implementation has a separate field for exponents. Shifts will - // consume at most one bigit (< 64 bits). - // ln(10) == 3.3219... - ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits); - Bignum buffer_bignum; - Bignum diy_fp_bignum; - buffer_bignum.AssignDecimalString(buffer); - diy_fp_bignum.AssignUInt64(diy_fp.f()); - if (exponent >= 0) { - buffer_bignum.MultiplyByPowerOfTen(exponent); - } else { - diy_fp_bignum.MultiplyByPowerOfTen(-exponent); - } - if (diy_fp.e() > 0) { - diy_fp_bignum.ShiftLeft(diy_fp.e()); - } else { - buffer_bignum.ShiftLeft(-diy_fp.e()); - } - return Bignum::Compare(buffer_bignum, diy_fp_bignum); -} - - -// Returns true if the guess is the correct double. -// Returns false, when guess is either correct or the next-lower double. -static bool ComputeGuess(Vector trimmed, int exponent, - double* guess) { - if (trimmed.length() == 0) { - *guess = 0.0; - return true; - } - if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) { - *guess = Double::Infinity(); - return true; - } - if (exponent + trimmed.length() <= kMinDecimalPower) { - *guess = 0.0; - return true; - } - - if (DoubleStrtod(trimmed, exponent, guess) || - DiyFpStrtod(trimmed, exponent, guess)) { - return true; - } - if (*guess == Double::Infinity()) { - return true; - } - return false; -} - -double Strtod(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double guess; - bool is_correct = ComputeGuess(trimmed, exponent, &guess); - if (is_correct) return guess; - - DiyFp upper_boundary = Double(guess).UpperBoundary(); - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return Double(guess).NextDouble(); - } else if ((Double(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return Double(guess).NextDouble(); - } -} - +} + + +// Returns 10^exponent as an exact DiyFp. +// The given exponent must be in the range [1; kDecimalExponentDistance[. +static DiyFp AdjustmentPowerOfTen(int exponent) { + ASSERT(0 < exponent); + ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance); + // Simply hardcode the remaining powers for the given decimal exponent + // distance. + ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8); + switch (exponent) { + case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60); + case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57); + case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54); + case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50); + case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47); + case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44); + case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); + default: + UNREACHABLE(); + } +} + + +// If the function returns true then the result is the correct double. +// Otherwise it is either the correct double or the double that is just below +// the correct double. +static bool DiyFpStrtod(Vector buffer, + int exponent, + double* result) { + DiyFp input; + int remaining_decimals; + ReadDiyFp(buffer, &input, &remaining_decimals); + // Since we may have dropped some digits the input is not accurate. + // If remaining_decimals is different than 0 than the error is at most + // .5 ulp (unit in the last place). + // We don't want to deal with fractions and therefore keep a common + // denominator. + const int kDenominatorLog = 3; + const int kDenominator = 1 << kDenominatorLog; + // Move the remaining decimals into the exponent. + exponent += remaining_decimals; + uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2); + + int old_e = input.e(); + input.Normalize(); + error <<= old_e - input.e(); + + ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent); + if (exponent < PowersOfTenCache::kMinDecimalExponent) { + *result = 0.0; + return true; + } + DiyFp cached_power; + int cached_decimal_exponent; + PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent, + &cached_power, + &cached_decimal_exponent); + + if (cached_decimal_exponent != exponent) { + int adjustment_exponent = exponent - cached_decimal_exponent; + DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); + input.Multiply(adjustment_power); + if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { + // The product of input with the adjustment power fits into a 64 bit + // integer. + ASSERT(DiyFp::kSignificandSize == 64); + } else { + // The adjustment power is exact. There is hence only an error of 0.5. + error += kDenominator / 2; + } + } + + input.Multiply(cached_power); + // The error introduced by a multiplication of a*b equals + // error_a + error_b + error_a*error_b/2^64 + 0.5 + // Substituting a with 'input' and b with 'cached_power' we have + // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), + // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 + int error_b = kDenominator / 2; + int error_ab = (error == 0 ? 0 : 1); // We round up to 1. + int fixed_error = kDenominator / 2; + error += error_b + error_ab + fixed_error; + + old_e = input.e(); + input.Normalize(); + error <<= old_e - input.e(); + + // See if the double's significand changes if we add/subtract the error. + int order_of_magnitude = DiyFp::kSignificandSize + input.e(); + int effective_significand_size = + Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude); + int precision_digits_count = + DiyFp::kSignificandSize - effective_significand_size; + if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) { + // This can only happen for very small denormals. In this case the + // half-way multiplied by the denominator exceeds the range of an uint64. + // Simply shift everything to the right. + int shift_amount = (precision_digits_count + kDenominatorLog) - + DiyFp::kSignificandSize + 1; + input.set_f(input.f() >> shift_amount); + input.set_e(input.e() + shift_amount); + // We add 1 for the lost precision of error, and kDenominator for + // the lost precision of input.f(). + error = (error >> shift_amount) + 1 + kDenominator; + precision_digits_count -= shift_amount; + } + // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. + ASSERT(DiyFp::kSignificandSize == 64); + ASSERT(precision_digits_count < 64); + uint64_t one64 = 1; + uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; + uint64_t precision_bits = input.f() & precision_bits_mask; + uint64_t half_way = one64 << (precision_digits_count - 1); + precision_bits *= kDenominator; + half_way *= kDenominator; + DiyFp rounded_input(input.f() >> precision_digits_count, + input.e() + precision_digits_count); + if (precision_bits >= half_way + error) { + rounded_input.set_f(rounded_input.f() + 1); + } + // If the last_bits are too close to the half-way case than we are too + // inaccurate and round down. In this case we return false so that we can + // fall back to a more precise algorithm. + + *result = Double(rounded_input).value(); + if (half_way - error < precision_bits && precision_bits < half_way + error) { + // Too imprecise. The caller will have to fall back to a slower version. + // However the returned number is guaranteed to be either the correct + // double, or the next-lower double. + return false; + } else { + return true; + } +} + + +// Returns +// - -1 if buffer*10^exponent < diy_fp. +// - 0 if buffer*10^exponent == diy_fp. +// - +1 if buffer*10^exponent > diy_fp. +// Preconditions: +// buffer.length() + exponent <= kMaxDecimalPower + 1 +// buffer.length() + exponent > kMinDecimalPower +// buffer.length() <= kMaxDecimalSignificantDigits +static int CompareBufferWithDiyFp(Vector buffer, + int exponent, + DiyFp diy_fp) { + ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1); + ASSERT(buffer.length() + exponent > kMinDecimalPower); + ASSERT(buffer.length() <= kMaxSignificantDecimalDigits); + // Make sure that the Bignum will be able to hold all our numbers. + // Our Bignum implementation has a separate field for exponents. Shifts will + // consume at most one bigit (< 64 bits). + // ln(10) == 3.3219... + ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits); + Bignum buffer_bignum; + Bignum diy_fp_bignum; + buffer_bignum.AssignDecimalString(buffer); + diy_fp_bignum.AssignUInt64(diy_fp.f()); + if (exponent >= 0) { + buffer_bignum.MultiplyByPowerOfTen(exponent); + } else { + diy_fp_bignum.MultiplyByPowerOfTen(-exponent); + } + if (diy_fp.e() > 0) { + diy_fp_bignum.ShiftLeft(diy_fp.e()); + } else { + buffer_bignum.ShiftLeft(-diy_fp.e()); + } + return Bignum::Compare(buffer_bignum, diy_fp_bignum); +} + + +// Returns true if the guess is the correct double. +// Returns false, when guess is either correct or the next-lower double. +static bool ComputeGuess(Vector trimmed, int exponent, + double* guess) { + if (trimmed.length() == 0) { + *guess = 0.0; + return true; + } + if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) { + *guess = Double::Infinity(); + return true; + } + if (exponent + trimmed.length() <= kMinDecimalPower) { + *guess = 0.0; + return true; + } + + if (DoubleStrtod(trimmed, exponent, guess) || + DiyFpStrtod(trimmed, exponent, guess)) { + return true; + } + if (*guess == Double::Infinity()) { + return true; + } + return false; +} + +double Strtod(Vector buffer, int exponent) { + char copy_buffer[kMaxSignificantDecimalDigits]; + Vector trimmed; + int updated_exponent; + TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, + &trimmed, &updated_exponent); + exponent = updated_exponent; + + double guess; + bool is_correct = ComputeGuess(trimmed, exponent, &guess); + if (is_correct) return guess; + + DiyFp upper_boundary = Double(guess).UpperBoundary(); + int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); + if (comparison < 0) { + return guess; + } else if (comparison > 0) { + return Double(guess).NextDouble(); + } else if ((Double(guess).Significand() & 1) == 0) { + // Round towards even. + return guess; + } else { + return Double(guess).NextDouble(); + } +} + static float SanitizedDoubletof(double d) { ASSERT(d >= 0.0); // ASAN has a sanitize check that disallows casting doubles to floats if @@ -496,85 +496,85 @@ static float SanitizedDoubletof(double d) { } } -float Strtof(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double double_guess; - bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); - +float Strtof(Vector buffer, int exponent) { + char copy_buffer[kMaxSignificantDecimalDigits]; + Vector trimmed; + int updated_exponent; + TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, + &trimmed, &updated_exponent); + exponent = updated_exponent; + + double double_guess; + bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); + float float_guess = SanitizedDoubletof(double_guess); - if (float_guess == double_guess) { - // This shortcut triggers for integer values. - return float_guess; - } - - // We must catch double-rounding. Say the double has been rounded up, and is - // now a boundary of a float, and rounds up again. This is why we have to - // look at previous too. - // Example (in decimal numbers): - // input: 12349 - // high-precision (4 digits): 1235 - // low-precision (3 digits): - // when read from input: 123 - // when rounded from high precision: 124. - // To do this we simply look at the neigbors of the correct result and see - // if they would round to the same float. If the guess is not correct we have - // to look at four values (since two different doubles could be the correct - // double). - - double double_next = Double(double_guess).NextDouble(); - double double_previous = Double(double_guess).PreviousDouble(); - + if (float_guess == double_guess) { + // This shortcut triggers for integer values. + return float_guess; + } + + // We must catch double-rounding. Say the double has been rounded up, and is + // now a boundary of a float, and rounds up again. This is why we have to + // look at previous too. + // Example (in decimal numbers): + // input: 12349 + // high-precision (4 digits): 1235 + // low-precision (3 digits): + // when read from input: 123 + // when rounded from high precision: 124. + // To do this we simply look at the neigbors of the correct result and see + // if they would round to the same float. If the guess is not correct we have + // to look at four values (since two different doubles could be the correct + // double). + + double double_next = Double(double_guess).NextDouble(); + double double_previous = Double(double_guess).PreviousDouble(); + float f1 = SanitizedDoubletof(double_previous); - float f2 = float_guess; + float f2 = float_guess; float f3 = SanitizedDoubletof(double_next); - float f4; - if (is_correct) { - f4 = f3; - } else { - double double_next2 = Double(double_next).NextDouble(); + float f4; + if (is_correct) { + f4 = f3; + } else { + double double_next2 = Double(double_next).NextDouble(); f4 = SanitizedDoubletof(double_next2); - } - (void) f2; // Mark variable as used. - ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); - - // If the guess doesn't lie near a single-precision boundary we can simply - // return its float-value. - if (f1 == f4) { - return float_guess; - } - - ASSERT((f1 != f2 && f2 == f3 && f3 == f4) || - (f1 == f2 && f2 != f3 && f3 == f4) || - (f1 == f2 && f2 == f3 && f3 != f4)); - + } + (void) f2; // Mark variable as used. + ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); + + // If the guess doesn't lie near a single-precision boundary we can simply + // return its float-value. + if (f1 == f4) { + return float_guess; + } + + ASSERT((f1 != f2 && f2 == f3 && f3 == f4) || + (f1 == f2 && f2 != f3 && f3 == f4) || + (f1 == f2 && f2 == f3 && f3 != f4)); + // guess and next are the two possible candidates (in the same way that - // double_guess was the lower candidate for a double-precision guess). - float guess = f1; - float next = f4; - DiyFp upper_boundary; - if (guess == 0.0f) { - float min_float = 1e-45f; - upper_boundary = Double(static_cast(min_float) / 2).AsDiyFp(); - } else { - upper_boundary = Single(guess).UpperBoundary(); - } - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return next; - } else if ((Single(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return next; - } -} - -} // namespace double_conversion + // double_guess was the lower candidate for a double-precision guess). + float guess = f1; + float next = f4; + DiyFp upper_boundary; + if (guess == 0.0f) { + float min_float = 1e-45f; + upper_boundary = Double(static_cast(min_float) / 2).AsDiyFp(); + } else { + upper_boundary = Single(guess).UpperBoundary(); + } + int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); + if (comparison < 0) { + return guess; + } else if (comparison > 0) { + return next; + } else if ((Single(guess).Significand() & 1) == 0) { + // Round towards even. + return guess; + } else { + return next; + } +} + +} // namespace double_conversion diff --git a/contrib/libs/double-conversion/strtod.h b/contrib/libs/double-conversion/strtod.h index 937e5a3c5b0..ed0293b8f54 100644 --- a/contrib/libs/double-conversion/strtod.h +++ b/contrib/libs/double-conversion/strtod.h @@ -1,45 +1,45 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_STRTOD_H_ -#define DOUBLE_CONVERSION_STRTOD_H_ - -#include "utils.h" - -namespace double_conversion { - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -double Strtod(Vector buffer, int exponent); - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -float Strtof(Vector buffer, int exponent); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_STRTOD_H_ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_STRTOD_H_ +#define DOUBLE_CONVERSION_STRTOD_H_ + +#include "utils.h" + +namespace double_conversion { + +// The buffer must only contain digits in the range [0-9]. It must not +// contain a dot or a sign. It must not start with '0', and must not be empty. +double Strtod(Vector buffer, int exponent); + +// The buffer must only contain digits in the range [0-9]. It must not +// contain a dot or a sign. It must not start with '0', and must not be empty. +float Strtof(Vector buffer, int exponent); + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_STRTOD_H_ diff --git a/contrib/libs/double-conversion/utils.h b/contrib/libs/double-conversion/utils.h index c99e28f0f43..41c5b02d2c1 100644 --- a/contrib/libs/double-conversion/utils.h +++ b/contrib/libs/double-conversion/utils.h @@ -1,72 +1,72 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_UTILS_H_ -#define DOUBLE_CONVERSION_UTILS_H_ - +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef DOUBLE_CONVERSION_UTILS_H_ +#define DOUBLE_CONVERSION_UTILS_H_ + #include #include - + #include -#ifndef ASSERT -#define ASSERT(condition) \ - assert(condition); -#endif -#ifndef UNIMPLEMENTED -#define UNIMPLEMENTED() (abort()) -#endif -#ifndef DOUBLE_CONVERSION_NO_RETURN -#ifdef _MSC_VER -#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) -#else -#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) -#endif -#endif -#ifndef UNREACHABLE -#ifdef _MSC_VER -void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); -inline void abort_noreturn() { abort(); } -#define UNREACHABLE() (abort_noreturn()) -#else -#define UNREACHABLE() (abort()) -#endif -#endif - - -// Double operations detection based on target architecture. -// Linux uses a 80bit wide floating point stack on x86. This induces double -// rounding, which in turn leads to wrong results. -// An easy way to test if the floating-point operations are correct is to -// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then -// the result is equal to 89255e-22. -// The best way to test this, is to create a division-function and to compare -// the output of the division with the expected result. (Inlining must be -// disabled.) -// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) +#ifndef ASSERT +#define ASSERT(condition) \ + assert(condition); +#endif +#ifndef UNIMPLEMENTED +#define UNIMPLEMENTED() (abort()) +#endif +#ifndef DOUBLE_CONVERSION_NO_RETURN +#ifdef _MSC_VER +#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) +#else +#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) +#endif +#endif +#ifndef UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define UNREACHABLE() (abort_noreturn()) +#else +#define UNREACHABLE() (abort()) +#endif +#endif + + +// Double operations detection based on target architecture. +// Linux uses a 80bit wide floating point stack on x86. This induces double +// rounding, which in turn leads to wrong results. +// An easy way to test if the floating-point operations are correct is to +// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then +// the result is equal to 89255e-22. +// The best way to test this, is to create a division-function and to compare +// the output of the division with the expected result. (Inlining must be +// disabled.) +// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) // // For example: /* @@ -83,276 +83,276 @@ int main(int argc, char** argv) { // Run as follows ./main || echo "correct" // // If it prints "correct" then the architecture should be here, in the "correct" section. -#if defined(_M_X64) || defined(__x86_64__) || \ +#if defined(_M_X64) || defined(__x86_64__) || \ defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ - defined(__hppa__) || defined(__ia64__) || \ - defined(__mips__) || \ - defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ - defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ - defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ - defined(__SH4__) || defined(__alpha__) || \ - defined(_MIPS_ARCH_MIPS32R2) || \ + defined(__hppa__) || defined(__ia64__) || \ + defined(__mips__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ + defined(_MIPS_ARCH_MIPS32R2) || \ defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \ defined(__riscv) || \ defined(__or1k__) -#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 #elif defined(__mc68000__) || \ defined(__pnacl__) || defined(__native_client__) -#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS -#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) -#if defined(_WIN32) -// Windows uses a 64bit wide floating point stack. -#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 -#else -#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS -#endif // _WIN32 -#else -#error Target architecture was not detected as supported by Double-Conversion. -#endif - -#if defined(_WIN32) && !defined(__MINGW32__) - -typedef signed char int8_t; -typedef unsigned char uint8_t; -typedef short int16_t; // NOLINT -typedef unsigned short uint16_t; // NOLINT -typedef int int32_t; -typedef unsigned int uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -// intptr_t and friends are defined in crtdefs.h through stdio.h. - -#else - -#include - -#endif - -typedef uint16_t uc16; - -// The following macro works on both 32 and 64-bit platforms. -// Usage: instead of writing 0x1234567890123456 -// write UINT64_2PART_C(0x12345678,90123456); -#define UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) - - -// The expression ARRAY_SIZE(a) is a compile-time constant of type -// size_t which represents the number of elements of the given -// array. You should only use ARRAY_SIZE on statically allocated -// arrays. -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - -// A macro to disallow the evil copy constructor and operator= functions -// This should be used in the private: declarations for a class +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#if defined(_WIN32) +// Windows uses a 64bit wide floating point stack. +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#else +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#endif // _WIN32 +#else +#error Target architecture was not detected as supported by Double-Conversion. +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; // NOLINT +typedef unsigned short uint16_t; // NOLINT +typedef int int32_t; +typedef unsigned int uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +// intptr_t and friends are defined in crtdefs.h through stdio.h. + +#else + +#include + +#endif + +typedef uint16_t uc16; + +// The following macro works on both 32 and 64-bit platforms. +// Usage: instead of writing 0x1234567890123456 +// write UINT64_2PART_C(0x12345678,90123456); +#define UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) + + +// The expression ARRAY_SIZE(a) is a compile-time constant of type +// size_t which represents the number of elements of the given +// array. You should only use ARRAY_SIZE on statically allocated +// arrays. +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast(!(sizeof(a) % sizeof(*(a))))) +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class #ifndef DC_DISALLOW_COPY_AND_ASSIGN #define DC_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) -#endif - -// A macro to disallow all the implicit constructors, namely the -// default constructor, copy constructor and operator= functions. -// -// This should be used in the private: declarations for a class -// that wants to prevent anyone from instantiating it. This is -// especially useful for classes containing only static methods. + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. #ifndef DC_DISALLOW_IMPLICIT_CONSTRUCTORS #define DC_DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName(); \ + TypeName(); \ DC_DISALLOW_COPY_AND_ASSIGN(TypeName) -#endif - -namespace double_conversion { - -static const int kCharSize = sizeof(char); - -// Returns the maximum of the two parameters. -template -static T Max(T a, T b) { - return a < b ? b : a; -} - - -// Returns the minimum of the two parameters. -template -static T Min(T a, T b) { - return a < b ? a : b; -} - - -inline int StrLength(const char* string) { - size_t length = strlen(string); - ASSERT(length == static_cast(static_cast(length))); - return static_cast(length); -} - -// This is a simplified version of V8's Vector class. -template -class Vector { - public: - Vector() : start_(NULL), length_(0) {} - Vector(T* data, int len) : start_(data), length_(len) { - ASSERT(len == 0 || (len > 0 && data != NULL)); - } - - // Returns a vector using the same backing storage as this one, - // spanning from and including 'from', to but not including 'to'. - Vector SubVector(int from, int to) { - ASSERT(to <= length_); - ASSERT(from < to); - ASSERT(0 <= from); - return Vector(start() + from, to - from); - } - - // Returns the length of the vector. - int length() const { return length_; } - - // Returns whether or not the vector is empty. - bool is_empty() const { return length_ == 0; } - - // Returns the pointer to the start of the data in the vector. - T* start() const { return start_; } - - // Access individual vector elements - checks bounds in debug mode. - T& operator[](int index) const { - ASSERT(0 <= index && index < length_); - return start_[index]; - } - - T& first() { return start_[0]; } - - T& last() { return start_[length_ - 1]; } - - private: - T* start_; - int length_; -}; - - -// Helper class for building result strings in a character buffer. The -// purpose of the class is to use safe operations that checks the -// buffer bounds on all operations in debug mode. -class StringBuilder { - public: - StringBuilder(char* buffer, int buffer_size) - : buffer_(buffer, buffer_size), position_(0) { } - - ~StringBuilder() { if (!is_finalized()) Finalize(); } - - int size() const { return buffer_.length(); } - - // Get the current position in the builder. - int position() const { - ASSERT(!is_finalized()); - return position_; - } - - // Reset the position. - void Reset() { position_ = 0; } - - // Add a single character to the builder. It is not allowed to add - // 0-characters; use the Finalize() method to terminate the string - // instead. - void AddCharacter(char c) { - ASSERT(c != '\0'); - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_++] = c; - } - - // Add an entire string to the builder. Uses strlen() internally to - // compute the length of the input string. - void AddString(const char* s) { - AddSubstring(s, StrLength(s)); - } - - // Add the first 'n' characters of the given string 's' to the - // builder. The input string must have enough characters. - void AddSubstring(const char* s, int n) { - ASSERT(!is_finalized() && position_ + n < buffer_.length()); - ASSERT(static_cast(n) <= strlen(s)); - memmove(&buffer_[position_], s, n * kCharSize); - position_ += n; - } - - - // Add character padding to the builder. If count is non-positive, - // nothing is added to the builder. - void AddPadding(char c, int count) { - for (int i = 0; i < count; i++) { - AddCharacter(c); - } - } - - // Finalize the string by 0-terminating it and returning the buffer. - char* Finalize() { - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_] = '\0'; - // Make sure nobody managed to add a 0-character to the - // buffer while building the string. - ASSERT(strlen(buffer_.start()) == static_cast(position_)); - position_ = -1; - ASSERT(is_finalized()); - return buffer_.start(); - } - - private: - Vector buffer_; - int position_; - - bool is_finalized() const { return position_ < 0; } - +#endif + +namespace double_conversion { + +static const int kCharSize = sizeof(char); + +// Returns the maximum of the two parameters. +template +static T Max(T a, T b) { + return a < b ? b : a; +} + + +// Returns the minimum of the two parameters. +template +static T Min(T a, T b) { + return a < b ? a : b; +} + + +inline int StrLength(const char* string) { + size_t length = strlen(string); + ASSERT(length == static_cast(static_cast(length))); + return static_cast(length); +} + +// This is a simplified version of V8's Vector class. +template +class Vector { + public: + Vector() : start_(NULL), length_(0) {} + Vector(T* data, int len) : start_(data), length_(len) { + ASSERT(len == 0 || (len > 0 && data != NULL)); + } + + // Returns a vector using the same backing storage as this one, + // spanning from and including 'from', to but not including 'to'. + Vector SubVector(int from, int to) { + ASSERT(to <= length_); + ASSERT(from < to); + ASSERT(0 <= from); + return Vector(start() + from, to - from); + } + + // Returns the length of the vector. + int length() const { return length_; } + + // Returns whether or not the vector is empty. + bool is_empty() const { return length_ == 0; } + + // Returns the pointer to the start of the data in the vector. + T* start() const { return start_; } + + // Access individual vector elements - checks bounds in debug mode. + T& operator[](int index) const { + ASSERT(0 <= index && index < length_); + return start_[index]; + } + + T& first() { return start_[0]; } + + T& last() { return start_[length_ - 1]; } + + private: + T* start_; + int length_; +}; + + +// Helper class for building result strings in a character buffer. The +// purpose of the class is to use safe operations that checks the +// buffer bounds on all operations in debug mode. +class StringBuilder { + public: + StringBuilder(char* buffer, int buffer_size) + : buffer_(buffer, buffer_size), position_(0) { } + + ~StringBuilder() { if (!is_finalized()) Finalize(); } + + int size() const { return buffer_.length(); } + + // Get the current position in the builder. + int position() const { + ASSERT(!is_finalized()); + return position_; + } + + // Reset the position. + void Reset() { position_ = 0; } + + // Add a single character to the builder. It is not allowed to add + // 0-characters; use the Finalize() method to terminate the string + // instead. + void AddCharacter(char c) { + ASSERT(c != '\0'); + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_++] = c; + } + + // Add an entire string to the builder. Uses strlen() internally to + // compute the length of the input string. + void AddString(const char* s) { + AddSubstring(s, StrLength(s)); + } + + // Add the first 'n' characters of the given string 's' to the + // builder. The input string must have enough characters. + void AddSubstring(const char* s, int n) { + ASSERT(!is_finalized() && position_ + n < buffer_.length()); + ASSERT(static_cast(n) <= strlen(s)); + memmove(&buffer_[position_], s, n * kCharSize); + position_ += n; + } + + + // Add character padding to the builder. If count is non-positive, + // nothing is added to the builder. + void AddPadding(char c, int count) { + for (int i = 0; i < count; i++) { + AddCharacter(c); + } + } + + // Finalize the string by 0-terminating it and returning the buffer. + char* Finalize() { + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_] = '\0'; + // Make sure nobody managed to add a 0-character to the + // buffer while building the string. + ASSERT(strlen(buffer_.start()) == static_cast(position_)); + position_ = -1; + ASSERT(is_finalized()); + return buffer_.start(); + } + + private: + Vector buffer_; + int position_; + + bool is_finalized() const { return position_ < 0; } + DC_DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); -}; - -// The type-based aliasing rule allows the compiler to assume that pointers of -// different types (for some definition of different) never alias each other. -// Thus the following code does not work: -// -// float f = foo(); -// int fbits = *(int*)(&f); -// -// The compiler 'knows' that the int pointer can't refer to f since the types -// don't match, so the compiler may cache f in a register, leaving random data -// in fbits. Using C++ style casts makes no difference, however a pointer to -// char data is assumed to alias any other pointer. This is the 'memcpy -// exception'. -// -// Bit_cast uses the memcpy exception to move the bits from a variable of one -// type of a variable of another type. Of course the end result is likely to -// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) -// will completely optimize BitCast away. -// -// There is an additional use for BitCast. -// Recent gccs will warn when they see casts that may result in breakage due to -// the type-based aliasing rule. If you have checked that there is no breakage -// you can use BitCast to cast one pointer type to another. This confuses gcc -// enough that it can no longer see that you have cast one pointer type to -// another thus avoiding the warning. -template -inline Dest BitCast(const Source& source) { - // Compile time assertion: sizeof(Dest) == sizeof(Source) - // A compile error here means your Dest and Source have different sizes. +}; + +// The type-based aliasing rule allows the compiler to assume that pointers of +// different types (for some definition of different) never alias each other. +// Thus the following code does not work: +// +// float f = foo(); +// int fbits = *(int*)(&f); +// +// The compiler 'knows' that the int pointer can't refer to f since the types +// don't match, so the compiler may cache f in a register, leaving random data +// in fbits. Using C++ style casts makes no difference, however a pointer to +// char data is assumed to alias any other pointer. This is the 'memcpy +// exception'. +// +// Bit_cast uses the memcpy exception to move the bits from a variable of one +// type of a variable of another type. Of course the end result is likely to +// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) +// will completely optimize BitCast away. +// +// There is an additional use for BitCast. +// Recent gccs will warn when they see casts that may result in breakage due to +// the type-based aliasing rule. If you have checked that there is no breakage +// you can use BitCast to cast one pointer type to another. This confuses gcc +// enough that it can no longer see that you have cast one pointer type to +// another thus avoiding the warning. +template +inline Dest BitCast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. #if __cplusplus >= 201103L static_assert(sizeof(Dest) == sizeof(Source), "source and destination size mismatch"); #else typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; #endif - - Dest dest; - memmove(&dest, &source, sizeof(dest)); - return dest; -} - -template -inline Dest BitCast(Source* source) { - return BitCast(reinterpret_cast(source)); -} - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_UTILS_H_ + + Dest dest; + memmove(&dest, &source, sizeof(dest)); + return dest; +} + +template +inline Dest BitCast(Source* source) { + return BitCast(reinterpret_cast(source)); +} + +} // namespace double_conversion + +#endif // DOUBLE_CONVERSION_UTILS_H_ diff --git a/contrib/libs/double-conversion/ya.make b/contrib/libs/double-conversion/ya.make index b97a761c803..52e59b09891 100644 --- a/contrib/libs/double-conversion/ya.make +++ b/contrib/libs/double-conversion/ya.make @@ -1,8 +1,8 @@ -LIBRARY() - +LIBRARY() + VERSION(3.1.0) -LICENSE(BSD-3-Clause) +LICENSE(BSD-3-Clause) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) @@ -11,22 +11,22 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_COMPILER_WARNINGS() -NO_UTIL() - +NO_UTIL() + ADDINCL(GLOBAL contrib/libs/double-conversion/include) -SRCS( - cached-powers.cc - bignum-dtoa.cc - double-conversion.cc - diy-fp.cc - fixed-dtoa.cc - strtod.cc - bignum.cc - fast-dtoa.cc -) - -END() +SRCS( + cached-powers.cc + bignum-dtoa.cc + double-conversion.cc + diy-fp.cc + fixed-dtoa.cc + strtod.cc + bignum.cc + fast-dtoa.cc +) + +END() diff --git a/contrib/libs/expat/ya.make b/contrib/libs/expat/ya.make index 09c5e26409b..8128621e60f 100644 --- a/contrib/libs/expat/ya.make +++ b/contrib/libs/expat/ya.make @@ -6,7 +6,7 @@ OWNER( orivej g:cpp-contrib ) - + VERSION(2.4.4) ORIGINAL_SOURCE(https://github.com/libexpat/libexpat/releases/download/R_2_4_4/expat-2.4.4.tar.xz) diff --git a/contrib/libs/farmhash/arch/sse41/ya.make b/contrib/libs/farmhash/arch/sse41/ya.make index 46e7d808c90..b3b2964c999 100644 --- a/contrib/libs/farmhash/arch/sse41/ya.make +++ b/contrib/libs/farmhash/arch/sse41/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(MIT) - +LICENSE(MIT) + OWNER(somov) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/farmhash/arch/sse42/ya.make b/contrib/libs/farmhash/arch/sse42/ya.make index f48b958eed6..6df471feb7c 100644 --- a/contrib/libs/farmhash/arch/sse42/ya.make +++ b/contrib/libs/farmhash/arch/sse42/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(MIT) - +LICENSE(MIT) + OWNER(somov) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/farmhash/arch/sse42_aesni/ya.make b/contrib/libs/farmhash/arch/sse42_aesni/ya.make index 05596dc2777..4d558bc2a2c 100644 --- a/contrib/libs/farmhash/arch/sse42_aesni/ya.make +++ b/contrib/libs/farmhash/arch/sse42_aesni/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(MIT) - +LICENSE(MIT) + OWNER(somov) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/fastlz/fastlz.c b/contrib/libs/fastlz/fastlz.c index deb72667704..e671bda728e 100644 --- a/contrib/libs/fastlz/fastlz.c +++ b/contrib/libs/fastlz/fastlz.c @@ -1,553 +1,553 @@ -/* - FastLZ - lightning-fast lossless compression library - - Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) - Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) - Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#include "fastlz.h" - -#if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) - -/* - * Always check for bound when decompressing. - * Generally it is best to leave it defined. - */ -#define FASTLZ_SAFE - -/* - * Give hints to the compiler for branch prediction optimization. - */ -#if defined(__GNUC__) && (__GNUC__ > 2) -#define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) -#define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) +/* + FastLZ - lightning-fast lossless compression library + + Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) + Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) + Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include "fastlz.h" + +#if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) + +/* + * Always check for bound when decompressing. + * Generally it is best to leave it defined. + */ +#define FASTLZ_SAFE + +/* + * Give hints to the compiler for branch prediction optimization. + */ +#if defined(__GNUC__) && (__GNUC__ > 2) +#define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) +#define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) +#else +#define FASTLZ_EXPECT_CONDITIONAL(c) (c) +#define FASTLZ_UNEXPECT_CONDITIONAL(c) (c) +#endif + +/* + * Use inlined functions for supported systems. + */ +#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C) +#define FASTLZ_INLINE inline +#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) +#define FASTLZ_INLINE __inline #else -#define FASTLZ_EXPECT_CONDITIONAL(c) (c) -#define FASTLZ_UNEXPECT_CONDITIONAL(c) (c) -#endif - -/* - * Use inlined functions for supported systems. - */ -#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C) -#define FASTLZ_INLINE inline -#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) -#define FASTLZ_INLINE __inline -#else -#define FASTLZ_INLINE -#endif - -/* - * Prevent accessing more than 8-bit at once, except on x86 architectures. - */ -#if !defined(FASTLZ_STRICT_ALIGN) -#define FASTLZ_STRICT_ALIGN -#if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */ -#undef FASTLZ_STRICT_ALIGN -#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ -#undef FASTLZ_STRICT_ALIGN -#elif defined(_M_IX86) /* Intel, MSVC */ -#undef FASTLZ_STRICT_ALIGN -#elif defined(__386) -#undef FASTLZ_STRICT_ALIGN -#elif defined(_X86_) /* MinGW */ -#undef FASTLZ_STRICT_ALIGN -#elif defined(__I86__) /* Digital Mars */ -#undef FASTLZ_STRICT_ALIGN -#endif -#endif - -/* - * FIXME: use preprocessor magic to set this on different platforms! - */ -typedef unsigned char flzuint8; -typedef unsigned short flzuint16; -typedef unsigned int flzuint32; - -/* prototypes */ -int fastlz_compress(const void* input, int length, void* output); -int fastlz_compress_level(int level, const void* input, int length, void* output); -int fastlz_decompress(const void* input, int length, void* output, int maxout); - -#define MAX_COPY 32 -#define MAX_LEN 264 /* 256 + 8 */ -#define MAX_DISTANCE 8192 - -#if !defined(FASTLZ_STRICT_ALIGN) -#define FASTLZ_READU16(p) *((const flzuint16*)(p)) -#else -#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8) -#endif - -#define HASH_LOG 13 -#define HASH_SIZE (1<< HASH_LOG) -#define HASH_MASK (HASH_SIZE-1) -#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; } - -#undef FASTLZ_LEVEL -#define FASTLZ_LEVEL 1 - -#undef FASTLZ_COMPRESSOR -#undef FASTLZ_DECOMPRESSOR -#define FASTLZ_COMPRESSOR fastlz1_compress -#define FASTLZ_DECOMPRESSOR fastlz1_decompress -static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); -static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); -#include "fastlz.c" - -#undef FASTLZ_LEVEL -#define FASTLZ_LEVEL 2 - -#undef MAX_DISTANCE -#define MAX_DISTANCE 8191 -#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) - -#undef FASTLZ_COMPRESSOR -#undef FASTLZ_DECOMPRESSOR -#define FASTLZ_COMPRESSOR fastlz2_compress -#define FASTLZ_DECOMPRESSOR fastlz2_decompress -static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); -static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); -#include "fastlz.c" - -int fastlz_compress(const void* input, int length, void* output) -{ - /* for short block, choose fastlz1 */ - if(length < 65536) - return fastlz1_compress(input, length, output); - - /* else... */ - return fastlz2_compress(input, length, output); -} - -int fastlz_decompress(const void* input, int length, void* output, int maxout) -{ - /* magic identifier for compression level */ - int level = ((*(const flzuint8*)input) >> 5) + 1; - - if(level == 1) - return fastlz1_decompress(input, length, output, maxout); - if(level == 2) - return fastlz2_decompress(input, length, output, maxout); - - /* unknown level, trigger error */ - return 0; -} - -int fastlz_compress_level(int level, const void* input, int length, void* output) -{ - if(level == 1) - return fastlz1_compress(input, length, output); - if(level == 2) - return fastlz2_compress(input, length, output); - - return 0; -} - -#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ - -static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output) -{ - const flzuint8* ip = (const flzuint8*) input; - const flzuint8* ip_bound = ip + length - 2; - const flzuint8* ip_limit = ip + length - 12; - flzuint8* op = (flzuint8*) output; - - const flzuint8* htab[HASH_SIZE]; - const flzuint8** hslot; - flzuint32 hval; - - flzuint32 copy; - - /* sanity check */ - if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) - { - if(length) - { - /* create literal copy only */ - *op++ = length-1; - ip_bound++; - while(ip <= ip_bound) - *op++ = *ip++; - return length+1; - } - else - return 0; - } - - /* initializes hash table */ - for (hslot = htab; hslot < htab + HASH_SIZE; hslot++) - *hslot = ip; - - /* we start with literal copy */ - copy = 2; - *op++ = MAX_COPY-1; - *op++ = *ip++; - *op++ = *ip++; - - /* main loop */ - while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) - { - const flzuint8* ref; - flzuint32 distance; - - /* minimum match length */ - flzuint32 len = 3; - - /* comparison starting-point */ - const flzuint8* anchor = ip; - - /* check for a run */ -#if FASTLZ_LEVEL==2 - if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1)) - { - distance = 1; - ip += 3; - ref = anchor - 1 + 3; - goto match; - } -#endif - - /* find potential match */ - HASH_FUNCTION(hval,ip); - hslot = htab + hval; - ref = htab[hval]; - - /* calculate distance to the match */ - distance = anchor - ref; - - /* update hash table */ - *hslot = anchor; - - /* is this a match? check the first 3 bytes */ - if(distance==0 || -#if FASTLZ_LEVEL==1 - (distance >= MAX_DISTANCE) || -#else - (distance >= MAX_FARDISTANCE) || -#endif - *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) - goto literal; - -#if FASTLZ_LEVEL==2 - /* far, needs at least 5-byte match */ - if(distance >= MAX_DISTANCE) - { - if(*ip++ != *ref++ || *ip++!= *ref++) - goto literal; - len += 2; - } - - match: -#endif - - /* last matched byte */ - ip = anchor + len; - - /* distance is biased */ - distance--; - - if(!distance) - { - /* zero distance means a run */ - flzuint8 x = ip[-1]; - while(ip < ip_bound) - if(*ref++ != x) break; else ip++; - } - else - for(;;) - { - /* safe because the outer check against ip limit */ - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - if(*ref++ != *ip++) break; - while(ip < ip_bound) - if(*ref++ != *ip++) break; - break; - } - - /* if we have copied something, adjust the copy count */ - if(copy) - /* copy is biased, '0' means 1 byte copy */ - *(op-copy-1) = copy-1; - else - /* back, to overwrite the copy count */ - op--; - - /* reset literal counter */ - copy = 0; - - /* length is biased, '1' means a match of 3 bytes */ - ip -= 3; - len = ip - anchor; - - /* encode the match */ -#if FASTLZ_LEVEL==2 - if(distance < MAX_DISTANCE) - { - if(len < 7) - { - *op++ = (len << 5) + (distance >> 8); - *op++ = (distance & 255); - } - else - { - *op++ = (7 << 5) + (distance >> 8); - for(len-=7; len >= 255; len-= 255) - *op++ = 255; - *op++ = len; - *op++ = (distance & 255); - } - } - else - { - /* far away, but not yet in the another galaxy... */ - if(len < 7) - { - distance -= MAX_DISTANCE; - *op++ = (len << 5) + 31; - *op++ = 255; - *op++ = distance >> 8; - *op++ = distance & 255; - } - else - { - distance -= MAX_DISTANCE; - *op++ = (7 << 5) + 31; - for(len-=7; len >= 255; len-= 255) - *op++ = 255; - *op++ = len; - *op++ = 255; - *op++ = distance >> 8; - *op++ = distance & 255; - } - } -#else - - if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2)) - while(len > MAX_LEN-2) - { - *op++ = (7 << 5) + (distance >> 8); - *op++ = MAX_LEN - 2 - 7 -2; - *op++ = (distance & 255); - len -= MAX_LEN-2; - } - - if(len < 7) - { - *op++ = (len << 5) + (distance >> 8); - *op++ = (distance & 255); - } - else - { - *op++ = (7 << 5) + (distance >> 8); - *op++ = len - 7; - *op++ = (distance & 255); - } -#endif - - /* update the hash at match boundary */ - HASH_FUNCTION(hval,ip); - htab[hval] = ip++; - HASH_FUNCTION(hval,ip); - htab[hval] = ip++; - - /* assuming literal copy */ - *op++ = MAX_COPY-1; - - continue; - - literal: - *op++ = *anchor++; - ip = anchor; - copy++; - if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) - { - copy = 0; - *op++ = MAX_COPY-1; - } - } - - /* left-over as literal copy */ - ip_bound++; - while(ip <= ip_bound) - { - *op++ = *ip++; - copy++; - if(copy == MAX_COPY) - { - copy = 0; - *op++ = MAX_COPY-1; - } - } - - /* if we have copied something, adjust the copy length */ - if(copy) - *(op-copy-1) = copy-1; - else - op--; - -#if FASTLZ_LEVEL==2 - /* marker for fastlz2 */ - *(flzuint8*)output |= (1 << 5); -#endif - - return op - (flzuint8*)output; -} - -static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout) -{ - const flzuint8* ip = (const flzuint8*) input; - const flzuint8* ip_limit = ip + length; - flzuint8* op = (flzuint8*) output; - flzuint8* op_limit = op + maxout; - flzuint32 ctrl = (*ip++) & 31; - int loop = 1; - - do - { - const flzuint8* ref = op; - flzuint32 len = ctrl >> 5; - flzuint32 ofs = (ctrl & 31) << 8; - - if(ctrl >= 32) - { -#if FASTLZ_LEVEL==2 - flzuint8 code; -#endif - len--; - ref -= ofs; - if (len == 7-1) -#if FASTLZ_LEVEL==1 - len += *ip++; - ref -= *ip++; -#else - do - { - code = *ip++; - len += code; - } while (code==255); - code = *ip++; - ref -= code; - - /* match from 16-bit distance */ - if(FASTLZ_UNEXPECT_CONDITIONAL(code==255)) - if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) - { - ofs = (*ip++) << 8; - ofs += *ip++; - ref = op - ofs - MAX_DISTANCE; - } -#endif - -#ifdef FASTLZ_SAFE - if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) - return 0; - - if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output)) - return 0; -#endif - - if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) - ctrl = *ip++; - else - loop = 0; - - if(ref == op) - { - /* optimize copy for a run */ - flzuint8 b = ref[-1]; - *op++ = b; - *op++ = b; - *op++ = b; - for(; len; --len) - *op++ = b; - } - else - { -#if !defined(FASTLZ_STRICT_ALIGN) - const flzuint16* p; - flzuint16* q; -#endif - /* copy from reference */ - ref--; - *op++ = *ref++; - *op++ = *ref++; - *op++ = *ref++; - -#if !defined(FASTLZ_STRICT_ALIGN) - /* copy a byte, so that now it's word aligned */ - if(len & 1) - { - *op++ = *ref++; - len--; - } - - /* copy 16-bit at once */ - q = (flzuint16*) op; - op += len; - p = (const flzuint16*) ref; - for(len>>=1; len > 4; len-=4) - { - *q++ = *p++; - *q++ = *p++; - *q++ = *p++; - *q++ = *p++; - } - for(; len; --len) - *q++ = *p++; -#else - for(; len; --len) - *op++ = *ref++; -#endif - } - } - else - { - ctrl++; -#ifdef FASTLZ_SAFE - if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) - return 0; - if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) - return 0; -#endif - - *op++ = *ip++; - for(--ctrl; ctrl; ctrl--) - *op++ = *ip++; - - loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit); - if(loop) - ctrl = *ip++; - } - } - while(FASTLZ_EXPECT_CONDITIONAL(loop)); - - return op - (flzuint8*)output; -} - -#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ +#define FASTLZ_INLINE +#endif + +/* + * Prevent accessing more than 8-bit at once, except on x86 architectures. + */ +#if !defined(FASTLZ_STRICT_ALIGN) +#define FASTLZ_STRICT_ALIGN +#if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */ +#undef FASTLZ_STRICT_ALIGN +#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ +#undef FASTLZ_STRICT_ALIGN +#elif defined(_M_IX86) /* Intel, MSVC */ +#undef FASTLZ_STRICT_ALIGN +#elif defined(__386) +#undef FASTLZ_STRICT_ALIGN +#elif defined(_X86_) /* MinGW */ +#undef FASTLZ_STRICT_ALIGN +#elif defined(__I86__) /* Digital Mars */ +#undef FASTLZ_STRICT_ALIGN +#endif +#endif + +/* + * FIXME: use preprocessor magic to set this on different platforms! + */ +typedef unsigned char flzuint8; +typedef unsigned short flzuint16; +typedef unsigned int flzuint32; + +/* prototypes */ +int fastlz_compress(const void* input, int length, void* output); +int fastlz_compress_level(int level, const void* input, int length, void* output); +int fastlz_decompress(const void* input, int length, void* output, int maxout); + +#define MAX_COPY 32 +#define MAX_LEN 264 /* 256 + 8 */ +#define MAX_DISTANCE 8192 + +#if !defined(FASTLZ_STRICT_ALIGN) +#define FASTLZ_READU16(p) *((const flzuint16*)(p)) +#else +#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8) +#endif + +#define HASH_LOG 13 +#define HASH_SIZE (1<< HASH_LOG) +#define HASH_MASK (HASH_SIZE-1) +#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; } + +#undef FASTLZ_LEVEL +#define FASTLZ_LEVEL 1 + +#undef FASTLZ_COMPRESSOR +#undef FASTLZ_DECOMPRESSOR +#define FASTLZ_COMPRESSOR fastlz1_compress +#define FASTLZ_DECOMPRESSOR fastlz1_decompress +static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); +static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); +#include "fastlz.c" + +#undef FASTLZ_LEVEL +#define FASTLZ_LEVEL 2 + +#undef MAX_DISTANCE +#define MAX_DISTANCE 8191 +#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) + +#undef FASTLZ_COMPRESSOR +#undef FASTLZ_DECOMPRESSOR +#define FASTLZ_COMPRESSOR fastlz2_compress +#define FASTLZ_DECOMPRESSOR fastlz2_decompress +static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); +static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); +#include "fastlz.c" + +int fastlz_compress(const void* input, int length, void* output) +{ + /* for short block, choose fastlz1 */ + if(length < 65536) + return fastlz1_compress(input, length, output); + + /* else... */ + return fastlz2_compress(input, length, output); +} + +int fastlz_decompress(const void* input, int length, void* output, int maxout) +{ + /* magic identifier for compression level */ + int level = ((*(const flzuint8*)input) >> 5) + 1; + + if(level == 1) + return fastlz1_decompress(input, length, output, maxout); + if(level == 2) + return fastlz2_decompress(input, length, output, maxout); + + /* unknown level, trigger error */ + return 0; +} + +int fastlz_compress_level(int level, const void* input, int length, void* output) +{ + if(level == 1) + return fastlz1_compress(input, length, output); + if(level == 2) + return fastlz2_compress(input, length, output); + + return 0; +} + +#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ + +static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output) +{ + const flzuint8* ip = (const flzuint8*) input; + const flzuint8* ip_bound = ip + length - 2; + const flzuint8* ip_limit = ip + length - 12; + flzuint8* op = (flzuint8*) output; + + const flzuint8* htab[HASH_SIZE]; + const flzuint8** hslot; + flzuint32 hval; + + flzuint32 copy; + + /* sanity check */ + if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) + { + if(length) + { + /* create literal copy only */ + *op++ = length-1; + ip_bound++; + while(ip <= ip_bound) + *op++ = *ip++; + return length+1; + } + else + return 0; + } + + /* initializes hash table */ + for (hslot = htab; hslot < htab + HASH_SIZE; hslot++) + *hslot = ip; + + /* we start with literal copy */ + copy = 2; + *op++ = MAX_COPY-1; + *op++ = *ip++; + *op++ = *ip++; + + /* main loop */ + while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) + { + const flzuint8* ref; + flzuint32 distance; + + /* minimum match length */ + flzuint32 len = 3; + + /* comparison starting-point */ + const flzuint8* anchor = ip; + + /* check for a run */ +#if FASTLZ_LEVEL==2 + if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1)) + { + distance = 1; + ip += 3; + ref = anchor - 1 + 3; + goto match; + } +#endif + + /* find potential match */ + HASH_FUNCTION(hval,ip); + hslot = htab + hval; + ref = htab[hval]; + + /* calculate distance to the match */ + distance = anchor - ref; + + /* update hash table */ + *hslot = anchor; + + /* is this a match? check the first 3 bytes */ + if(distance==0 || +#if FASTLZ_LEVEL==1 + (distance >= MAX_DISTANCE) || +#else + (distance >= MAX_FARDISTANCE) || +#endif + *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) + goto literal; + +#if FASTLZ_LEVEL==2 + /* far, needs at least 5-byte match */ + if(distance >= MAX_DISTANCE) + { + if(*ip++ != *ref++ || *ip++!= *ref++) + goto literal; + len += 2; + } + + match: +#endif + + /* last matched byte */ + ip = anchor + len; + + /* distance is biased */ + distance--; + + if(!distance) + { + /* zero distance means a run */ + flzuint8 x = ip[-1]; + while(ip < ip_bound) + if(*ref++ != x) break; else ip++; + } + else + for(;;) + { + /* safe because the outer check against ip limit */ + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + if(*ref++ != *ip++) break; + while(ip < ip_bound) + if(*ref++ != *ip++) break; + break; + } + + /* if we have copied something, adjust the copy count */ + if(copy) + /* copy is biased, '0' means 1 byte copy */ + *(op-copy-1) = copy-1; + else + /* back, to overwrite the copy count */ + op--; + + /* reset literal counter */ + copy = 0; + + /* length is biased, '1' means a match of 3 bytes */ + ip -= 3; + len = ip - anchor; + + /* encode the match */ +#if FASTLZ_LEVEL==2 + if(distance < MAX_DISTANCE) + { + if(len < 7) + { + *op++ = (len << 5) + (distance >> 8); + *op++ = (distance & 255); + } + else + { + *op++ = (7 << 5) + (distance >> 8); + for(len-=7; len >= 255; len-= 255) + *op++ = 255; + *op++ = len; + *op++ = (distance & 255); + } + } + else + { + /* far away, but not yet in the another galaxy... */ + if(len < 7) + { + distance -= MAX_DISTANCE; + *op++ = (len << 5) + 31; + *op++ = 255; + *op++ = distance >> 8; + *op++ = distance & 255; + } + else + { + distance -= MAX_DISTANCE; + *op++ = (7 << 5) + 31; + for(len-=7; len >= 255; len-= 255) + *op++ = 255; + *op++ = len; + *op++ = 255; + *op++ = distance >> 8; + *op++ = distance & 255; + } + } +#else + + if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2)) + while(len > MAX_LEN-2) + { + *op++ = (7 << 5) + (distance >> 8); + *op++ = MAX_LEN - 2 - 7 -2; + *op++ = (distance & 255); + len -= MAX_LEN-2; + } + + if(len < 7) + { + *op++ = (len << 5) + (distance >> 8); + *op++ = (distance & 255); + } + else + { + *op++ = (7 << 5) + (distance >> 8); + *op++ = len - 7; + *op++ = (distance & 255); + } +#endif + + /* update the hash at match boundary */ + HASH_FUNCTION(hval,ip); + htab[hval] = ip++; + HASH_FUNCTION(hval,ip); + htab[hval] = ip++; + + /* assuming literal copy */ + *op++ = MAX_COPY-1; + + continue; + + literal: + *op++ = *anchor++; + ip = anchor; + copy++; + if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) + { + copy = 0; + *op++ = MAX_COPY-1; + } + } + + /* left-over as literal copy */ + ip_bound++; + while(ip <= ip_bound) + { + *op++ = *ip++; + copy++; + if(copy == MAX_COPY) + { + copy = 0; + *op++ = MAX_COPY-1; + } + } + + /* if we have copied something, adjust the copy length */ + if(copy) + *(op-copy-1) = copy-1; + else + op--; + +#if FASTLZ_LEVEL==2 + /* marker for fastlz2 */ + *(flzuint8*)output |= (1 << 5); +#endif + + return op - (flzuint8*)output; +} + +static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout) +{ + const flzuint8* ip = (const flzuint8*) input; + const flzuint8* ip_limit = ip + length; + flzuint8* op = (flzuint8*) output; + flzuint8* op_limit = op + maxout; + flzuint32 ctrl = (*ip++) & 31; + int loop = 1; + + do + { + const flzuint8* ref = op; + flzuint32 len = ctrl >> 5; + flzuint32 ofs = (ctrl & 31) << 8; + + if(ctrl >= 32) + { +#if FASTLZ_LEVEL==2 + flzuint8 code; +#endif + len--; + ref -= ofs; + if (len == 7-1) +#if FASTLZ_LEVEL==1 + len += *ip++; + ref -= *ip++; +#else + do + { + code = *ip++; + len += code; + } while (code==255); + code = *ip++; + ref -= code; + + /* match from 16-bit distance */ + if(FASTLZ_UNEXPECT_CONDITIONAL(code==255)) + if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) + { + ofs = (*ip++) << 8; + ofs += *ip++; + ref = op - ofs - MAX_DISTANCE; + } +#endif + +#ifdef FASTLZ_SAFE + if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) + return 0; + + if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output)) + return 0; +#endif + + if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) + ctrl = *ip++; + else + loop = 0; + + if(ref == op) + { + /* optimize copy for a run */ + flzuint8 b = ref[-1]; + *op++ = b; + *op++ = b; + *op++ = b; + for(; len; --len) + *op++ = b; + } + else + { +#if !defined(FASTLZ_STRICT_ALIGN) + const flzuint16* p; + flzuint16* q; +#endif + /* copy from reference */ + ref--; + *op++ = *ref++; + *op++ = *ref++; + *op++ = *ref++; + +#if !defined(FASTLZ_STRICT_ALIGN) + /* copy a byte, so that now it's word aligned */ + if(len & 1) + { + *op++ = *ref++; + len--; + } + + /* copy 16-bit at once */ + q = (flzuint16*) op; + op += len; + p = (const flzuint16*) ref; + for(len>>=1; len > 4; len-=4) + { + *q++ = *p++; + *q++ = *p++; + *q++ = *p++; + *q++ = *p++; + } + for(; len; --len) + *q++ = *p++; +#else + for(; len; --len) + *op++ = *ref++; +#endif + } + } + else + { + ctrl++; +#ifdef FASTLZ_SAFE + if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) + return 0; + if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) + return 0; +#endif + + *op++ = *ip++; + for(--ctrl; ctrl; ctrl--) + *op++ = *ip++; + + loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit); + if(loop) + ctrl = *ip++; + } + } + while(FASTLZ_EXPECT_CONDITIONAL(loop)); + + return op - (flzuint8*)output; +} + +#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ diff --git a/contrib/libs/fastlz/fastlz.h b/contrib/libs/fastlz/fastlz.h index 821626a3efe..5bce060a18d 100644 --- a/contrib/libs/fastlz/fastlz.h +++ b/contrib/libs/fastlz/fastlz.h @@ -1,102 +1,102 @@ -/* - FastLZ - lightning-fast lossless compression library - - Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) - Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) - Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#ifndef FASTLZ_H -#define FASTLZ_H - -#include "rename.h" - -#define FASTLZ_VERSION 0x000100 - -#define FASTLZ_VERSION_MAJOR 0 -#define FASTLZ_VERSION_MINOR 0 -#define FASTLZ_VERSION_REVISION 0 - -#define FASTLZ_VERSION_STRING "0.1.0" - -#if defined (__cplusplus) -extern "C" { -#endif - -/** - Compress a block of data in the input buffer and returns the size of - compressed block. The size of input buffer is specified by length. The - minimum input buffer size is 16. - - The output buffer must be at least 5% larger than the input buffer - and can not be smaller than 66 bytes. - - If the input is not compressible, the return value might be larger than - length (input buffer size). - - The input buffer and the output buffer can not overlap. -*/ - -int fastlz_compress(const void* input, int length, void* output); - -/** - Decompress a block of compressed data and returns the size of the - decompressed block. If error occurs, e.g. the compressed data is - corrupted or the output buffer is not large enough, then 0 (zero) - will be returned instead. - - The input buffer and the output buffer can not overlap. - - Decompression is memory safe and guaranteed not to write the output buffer - more than what is specified in maxout. - */ - -int fastlz_decompress(const void* input, int length, void* output, int maxout); - -/** - Compress a block of data in the input buffer and returns the size of - compressed block. The size of input buffer is specified by length. The - minimum input buffer size is 16. - - The output buffer must be at least 5% larger than the input buffer - and can not be smaller than 66 bytes. - - If the input is not compressible, the return value might be larger than - length (input buffer size). - - The input buffer and the output buffer can not overlap. - - Compression level can be specified in parameter level. At the moment, - only level 1 and level 2 are supported. - Level 1 is the fastest compression and generally useful for short data. - Level 2 is slightly slower but it gives better compression ratio. - - Note that the compressed data, regardless of the level, can always be - decompressed using the function fastlz_decompress above. -*/ - -int fastlz_compress_level(int level, const void* input, int length, void* output); - -#if defined (__cplusplus) -} -#endif - -#endif /* FASTLZ_H */ +/* + FastLZ - lightning-fast lossless compression library + + Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) + Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) + Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef FASTLZ_H +#define FASTLZ_H + +#include "rename.h" + +#define FASTLZ_VERSION 0x000100 + +#define FASTLZ_VERSION_MAJOR 0 +#define FASTLZ_VERSION_MINOR 0 +#define FASTLZ_VERSION_REVISION 0 + +#define FASTLZ_VERSION_STRING "0.1.0" + +#if defined (__cplusplus) +extern "C" { +#endif + +/** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by length. The + minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, the return value might be larger than + length (input buffer size). + + The input buffer and the output buffer can not overlap. +*/ + +int fastlz_compress(const void* input, int length, void* output); + +/** + Decompress a block of compressed data and returns the size of the + decompressed block. If error occurs, e.g. the compressed data is + corrupted or the output buffer is not large enough, then 0 (zero) + will be returned instead. + + The input buffer and the output buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the output buffer + more than what is specified in maxout. + */ + +int fastlz_decompress(const void* input, int length, void* output, int maxout); + +/** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by length. The + minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, the return value might be larger than + length (input buffer size). + + The input buffer and the output buffer can not overlap. + + Compression level can be specified in parameter level. At the moment, + only level 1 and level 2 are supported. + Level 1 is the fastest compression and generally useful for short data. + Level 2 is slightly slower but it gives better compression ratio. + + Note that the compressed data, regardless of the level, can always be + decompressed using the function fastlz_decompress above. +*/ + +int fastlz_compress_level(int level, const void* input, int length, void* output); + +#if defined (__cplusplus) +} +#endif + +#endif /* FASTLZ_H */ diff --git a/contrib/libs/fastlz/rename.h b/contrib/libs/fastlz/rename.h index d99342a51c8..143a9f08f48 100644 --- a/contrib/libs/fastlz/rename.h +++ b/contrib/libs/fastlz/rename.h @@ -1,8 +1,8 @@ -#ifndef rename_h_7d8f6576asdf5 -#define rename_h_7d8f6576asdf5 - -#define fastlz_compress yfastlz_compress -#define fastlz_decompress yfastlz_decompress -#define fastlz_compress_level yfastlz_compress_level - -#endif +#ifndef rename_h_7d8f6576asdf5 +#define rename_h_7d8f6576asdf5 + +#define fastlz_compress yfastlz_compress +#define fastlz_decompress yfastlz_decompress +#define fastlz_compress_level yfastlz_compress_level + +#endif diff --git a/contrib/libs/fastlz/ya.make b/contrib/libs/fastlz/ya.make index 476ed9248ef..7f31ac0becf 100644 --- a/contrib/libs/fastlz/ya.make +++ b/contrib/libs/fastlz/ya.make @@ -1,6 +1,6 @@ -LIBRARY() +LIBRARY() -LICENSE(MIT) +LICENSE(MIT) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) @@ -10,14 +10,14 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_UTIL() NO_COMPILER_WARNINGS() - -SRCS( - fastlz.c + +SRCS( + fastlz.c rename.h -) - -END() +) + +END() diff --git a/contrib/libs/fmt/test/ya.make b/contrib/libs/fmt/test/ya.make index 38c2e4ecae8..8db82d6c1e5 100644 --- a/contrib/libs/fmt/test/ya.make +++ b/contrib/libs/fmt/test/ya.make @@ -9,8 +9,8 @@ OWNER( g:cpp-contrib ) -LICENSE(MIT) - +LICENSE(MIT) + PEERDIR( contrib/libs/fmt contrib/restricted/googletest/googlemock diff --git a/contrib/libs/grpc/grpc++/ya.make b/contrib/libs/grpc/grpc++/ya.make index 63f85a6a8f9..788da4ce7eb 100644 --- a/contrib/libs/grpc/grpc++/ya.make +++ b/contrib/libs/grpc/grpc++/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/grpc/grpc contrib/libs/grpc/src/core/lib diff --git a/contrib/libs/grpc/grpc++_error_details/ya.make b/contrib/libs/grpc/grpc++_error_details/ya.make index efbae2eb9d5..c1b3fad26f2 100644 --- a/contrib/libs/grpc/grpc++_error_details/ya.make +++ b/contrib/libs/grpc/grpc++_error_details/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/grpc/grpc contrib/libs/grpc/grpc++ diff --git a/contrib/libs/grpc/grpc++_reflection/ya.make b/contrib/libs/grpc/grpc++_reflection/ya.make index d9083c1fe92..3068b78f9ad 100644 --- a/contrib/libs/grpc/grpc++_reflection/ya.make +++ b/contrib/libs/grpc/grpc++_reflection/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/grpc/grpc contrib/libs/grpc/grpc++ diff --git a/contrib/libs/grpc/grpc++_unsecure/ya.make b/contrib/libs/grpc/grpc++_unsecure/ya.make index a0d8820c12a..4810d9a0373 100644 --- a/contrib/libs/grpc/grpc++_unsecure/ya.make +++ b/contrib/libs/grpc/grpc++_unsecure/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/grpc/grpc contrib/libs/grpc/grpc++ diff --git a/contrib/libs/grpc/grpc/ya.make b/contrib/libs/grpc/grpc/ya.make index 9381f9d3133..c29c11c0c4b 100644 --- a/contrib/libs/grpc/grpc/ya.make +++ b/contrib/libs/grpc/grpc/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( certs contrib/libs/c-ares diff --git a/contrib/libs/grpc/grpc_unsecure/ya.make b/contrib/libs/grpc/grpc_unsecure/ya.make index ceef301d914..38ef63eb18f 100644 --- a/contrib/libs/grpc/grpc_unsecure/ya.make +++ b/contrib/libs/grpc/grpc_unsecure/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/c-ares contrib/libs/grpc/grpc diff --git a/contrib/libs/grpc/grpcpp_channelz/ya.make b/contrib/libs/grpc/grpcpp_channelz/ya.make index b99a9fc7f2b..31761e3d471 100644 --- a/contrib/libs/grpc/grpcpp_channelz/ya.make +++ b/contrib/libs/grpc/grpcpp_channelz/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/grpc/grpc contrib/libs/grpc/grpc++ diff --git a/contrib/libs/grpc/python/ya.make b/contrib/libs/grpc/python/ya.make index 5668b8fcf35..f22d3393fc5 100644 --- a/contrib/libs/grpc/python/ya.make +++ b/contrib/libs/grpc/python/ya.make @@ -2,8 +2,8 @@ PY23_LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + OWNER( akastornov g:contrib diff --git a/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make b/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make index ddd91ed13f5..068ae1ead6d 100644 --- a/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make +++ b/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make @@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + PEERDIR( contrib/libs/protobuf contrib/libs/protoc diff --git a/contrib/libs/grpc/src/core/lib/ya.make b/contrib/libs/grpc/src/core/lib/ya.make index 82d64f2fd3a..93f59abe37c 100644 --- a/contrib/libs/grpc/src/core/lib/ya.make +++ b/contrib/libs/grpc/src/core/lib/ya.make @@ -4,8 +4,8 @@ LIBRARY() OWNER(g:cpp-contrib) -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/grpc/src/proto/grpc/channelz/ya.make b/contrib/libs/grpc/src/proto/grpc/channelz/ya.make index 7d62c80e6b8..9f1e3cedc9a 100644 --- a/contrib/libs/grpc/src/proto/grpc/channelz/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/channelz/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/core/ya.make b/contrib/libs/grpc/src/proto/grpc/core/ya.make index c0db9a79fa1..856c34ee53f 100644 --- a/contrib/libs/grpc/src/proto/grpc/core/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/core/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make b/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make index 469b2f1af4d..7bb8b0dff91 100644 --- a/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make b/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make index f213f753665..f8e301c9370 100644 --- a/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/status/ya.make b/contrib/libs/grpc/src/proto/grpc/status/ya.make index 588164c6614..eec367765ff 100644 --- a/contrib/libs/grpc/src/proto/grpc/status/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/status/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make index b0bfcc81a19..b94aa415f80 100644 --- a/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make index 419c0ddfbee..994ec6fa82a 100644 --- a/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/proto/grpc/testing/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/ya.make index 42db0476aaf..2704585a373 100644 --- a/contrib/libs/grpc/src/proto/grpc/testing/ya.make +++ b/contrib/libs/grpc/src/proto/grpc/testing/ya.make @@ -1,7 +1,7 @@ PROTO_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/python/grpcio_channelz/ya.make b/contrib/libs/grpc/src/python/grpcio_channelz/ya.make index 62dc3e7c3a5..2703d70ab04 100644 --- a/contrib/libs/grpc/src/python/grpcio_channelz/ya.make +++ b/contrib/libs/grpc/src/python/grpcio_channelz/ya.make @@ -1,7 +1,7 @@ PY23_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make b/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make index 0d1611f13d2..4bab1758a5e 100644 --- a/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make +++ b/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make @@ -1,7 +1,7 @@ PY23_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/python/grpcio_reflection/ya.make b/contrib/libs/grpc/src/python/grpcio_reflection/ya.make index ca9ac093f67..c0f7d61dcc3 100644 --- a/contrib/libs/grpc/src/python/grpcio_reflection/ya.make +++ b/contrib/libs/grpc/src/python/grpcio_reflection/ya.make @@ -1,7 +1,7 @@ PY23_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/src/python/grpcio_status/ya.make b/contrib/libs/grpc/src/python/grpcio_status/ya.make index fb7f71272a1..61a3ee4336b 100644 --- a/contrib/libs/grpc/src/python/grpcio_status/ya.make +++ b/contrib/libs/grpc/src/python/grpcio_status/ya.make @@ -1,7 +1,7 @@ PY23_LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/grpc/test/core/util/ya.make b/contrib/libs/grpc/test/core/util/ya.make index 0f15e395d79..fbaad80cadb 100644 --- a/contrib/libs/grpc/test/core/util/ya.make +++ b/contrib/libs/grpc/test/core/util/ya.make @@ -1,7 +1,7 @@ LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER(dvshkurko) diff --git a/contrib/libs/grpc/test/cpp/end2end/ya.make b/contrib/libs/grpc/test/cpp/end2end/ya.make index 0ffbd6783b3..b9c1dc7fe00 100644 --- a/contrib/libs/grpc/test/cpp/end2end/ya.make +++ b/contrib/libs/grpc/test/cpp/end2end/ya.make @@ -1,7 +1,7 @@ LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER(dvshkurko) diff --git a/contrib/libs/grpc/test/cpp/util/ya.make b/contrib/libs/grpc/test/cpp/util/ya.make index 05a9b0853c9..f043cc5b146 100644 --- a/contrib/libs/grpc/test/cpp/util/ya.make +++ b/contrib/libs/grpc/test/cpp/util/ya.make @@ -1,7 +1,7 @@ LIBRARY() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER(orivej) diff --git a/contrib/libs/grpc/ya.make b/contrib/libs/grpc/ya.make index 23dd8c6c6e3..29848d23ea5 100644 --- a/contrib/libs/grpc/ya.make +++ b/contrib/libs/grpc/ya.make @@ -2,7 +2,7 @@ LIBRARY() LICENSE(Apache-2.0) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(1.33.2) diff --git a/contrib/libs/hdr_histogram/ya.make b/contrib/libs/hdr_histogram/ya.make index 968be53c356..b20d977b573 100644 --- a/contrib/libs/hdr_histogram/ya.make +++ b/contrib/libs/hdr_histogram/ya.make @@ -4,7 +4,7 @@ LICENSE( BSD-2-Clause AND CC0-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(0.9.5) diff --git a/contrib/libs/highwayhash/arch/avx2/ya.make b/contrib/libs/highwayhash/arch/avx2/ya.make index 25fafd7e97c..3084a352d88 100644 --- a/contrib/libs/highwayhash/arch/avx2/ya.make +++ b/contrib/libs/highwayhash/arch/avx2/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + OWNER(somov) ADDINCL(contrib/libs/highwayhash) diff --git a/contrib/libs/highwayhash/arch/sse41/ya.make b/contrib/libs/highwayhash/arch/sse41/ya.make index 64924d12713..d94ad970385 100644 --- a/contrib/libs/highwayhash/arch/sse41/ya.make +++ b/contrib/libs/highwayhash/arch/sse41/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - +LICENSE(Apache-2.0) + OWNER(somov) ADDINCL(contrib/libs/highwayhash) diff --git a/contrib/libs/hyperscan/ya.make b/contrib/libs/hyperscan/ya.make index a8f671f1814..7783969e4a2 100644 --- a/contrib/libs/hyperscan/ya.make +++ b/contrib/libs/hyperscan/ya.make @@ -1,7 +1,7 @@ # Generated by devtools/yamaker from nixpkgs cc3b147ed182a6cae239348ef094158815da14ae. LIBRARY() - + OWNER( galtsev g:antiinfra diff --git a/contrib/libs/jemalloc/hack.cpp b/contrib/libs/jemalloc/hack.cpp index 39fd018eb07..900856d3534 100644 --- a/contrib/libs/jemalloc/hack.cpp +++ b/contrib/libs/jemalloc/hack.cpp @@ -1,14 +1,14 @@ -#include "hack.h" - -#include -#include - -#include "spinlock.h" - -void SPIN_L(spinlock_t* l) { - AcquireAdaptiveLock(l); -} - -void SPIN_U(spinlock_t* l) { - ReleaseAdaptiveLock(l); -} +#include "hack.h" + +#include +#include + +#include "spinlock.h" + +void SPIN_L(spinlock_t* l) { + AcquireAdaptiveLock(l); +} + +void SPIN_U(spinlock_t* l) { + ReleaseAdaptiveLock(l); +} diff --git a/contrib/libs/jemalloc/hack.h b/contrib/libs/jemalloc/hack.h index 160b79bebef..3b172a2da26 100644 --- a/contrib/libs/jemalloc/hack.h +++ b/contrib/libs/jemalloc/hack.h @@ -1,21 +1,21 @@ #pragma once - -#include - -#if defined(__cplusplus) -extern "C" { -#endif - -#define AcquireAdaptiveLockSlow AllocAcquireAdaptiveLockSlow -#define SchedYield AllocSchedYield -#define ThreadYield AllocThreadYield -#define NSystemInfo NAllocSystemInfo - + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#define AcquireAdaptiveLockSlow AllocAcquireAdaptiveLockSlow +#define SchedYield AllocSchedYield +#define ThreadYield AllocThreadYield +#define NSystemInfo NAllocSystemInfo + #ifdef _MSC_VER # define __restrict__ __restrict # define JEMALLOC_EXPORT #endif -#if defined(__cplusplus) -}; -#endif +#if defined(__cplusplus) +}; +#endif diff --git a/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h b/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h index b7d31ebedf4..28cc151f07e 100644 --- a/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h +++ b/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h @@ -38,9 +38,9 @@ /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ #define HAVE_CPU_SPINWAIT 1 #else -#define CPU_SPINWAIT +#define CPU_SPINWAIT #define HAVE_CPU_SPINWAIT 0 -#endif +#endif /* * Number of significant bits in virtual addresses. This may be less than the diff --git a/contrib/libs/jemalloc/reg_zone.cpp b/contrib/libs/jemalloc/reg_zone.cpp index 7946b87928c..6a7b9d69e1f 100644 --- a/contrib/libs/jemalloc/reg_zone.cpp +++ b/contrib/libs/jemalloc/reg_zone.cpp @@ -1,23 +1,23 @@ #include extern "C" void je_zone_register(); - + static volatile bool initialized = false; -namespace { +namespace { struct TInit { - inline TInit() { + inline TInit() { if (!initialized) { je_zone_register(); initialized = true; } - } + } }; void zone_register() { static TInit init; } -} +} extern "C" { void je_assure_zone_register() { diff --git a/contrib/libs/jemalloc/spinlock.h b/contrib/libs/jemalloc/spinlock.h index 1430b9bb8f5..93fcf10e12a 100644 --- a/contrib/libs/jemalloc/spinlock.h +++ b/contrib/libs/jemalloc/spinlock.h @@ -1,21 +1,21 @@ #pragma once - + #include - + typedef volatile intptr_t spinlock_t; -#define SPIN_L AllocAcquireAdaptiveLock -#define SPIN_U AllocReleaseAdaptiveLock - +#define SPIN_L AllocAcquireAdaptiveLock +#define SPIN_U AllocReleaseAdaptiveLock + #define _SPINLOCK_INITIALIZER 0 -#define _SPINUNLOCK(_lck) SPIN_U(_lck) +#define _SPINUNLOCK(_lck) SPIN_U(_lck) #define _SPINLOCK(_lck) SPIN_L(_lck) - -#if defined(__cplusplus) -extern "C" { -#endif - void SPIN_L(spinlock_t* lock); - void SPIN_U(spinlock_t* lock); -#if defined(__cplusplus) -}; -#endif + +#if defined(__cplusplus) +extern "C" { +#endif + void SPIN_L(spinlock_t* lock); + void SPIN_U(spinlock_t* lock); +#if defined(__cplusplus) +}; +#endif diff --git a/contrib/libs/jemalloc/ya.make b/contrib/libs/jemalloc/ya.make index ffb0e624003..586de30ab0f 100644 --- a/contrib/libs/jemalloc/ya.make +++ b/contrib/libs/jemalloc/ya.make @@ -1,12 +1,12 @@ # Generated by devtools/yamaker from nixpkgs 21.11. LIBRARY() - + OWNER( g:contrib g:cpp-contrib ) - + VERSION(5.2.1) ORIGINAL_SOURCE(https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2) diff --git a/contrib/libs/jwt-cpp/ya.make b/contrib/libs/jwt-cpp/ya.make index 6c7006a0c0b..c7ddd331627 100644 --- a/contrib/libs/jwt-cpp/ya.make +++ b/contrib/libs/jwt-cpp/ya.make @@ -6,7 +6,7 @@ OWNER( pbludov g:cpp-contrib ) - + VERSION(0.2.0) ORIGINAL_SOURCE(https://github.com/Thalhammer/jwt-cpp/archive/2b3ddae668f5b0dac92f57207312dc50b5bdb2f8.tar.gz) diff --git a/contrib/libs/libaio/static/ya.make b/contrib/libs/libaio/static/ya.make index b92e014caa8..c4a7e200bbe 100644 --- a/contrib/libs/libaio/static/ya.make +++ b/contrib/libs/libaio/static/ya.make @@ -15,7 +15,7 @@ ELSE() ENDIF() LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - + OWNER( vskipin g:contrib diff --git a/contrib/libs/libaio/ya.make b/contrib/libs/libaio/ya.make index b16741c7cde..3eb09b549bb 100644 --- a/contrib/libs/libaio/ya.make +++ b/contrib/libs/libaio/ya.make @@ -4,7 +4,7 @@ LIBRARY() # revision: 5a546a834c36070648158d19dd564762d59f8eb8 LICENSE(Service-Dll-Harness) - + WITHOUT_LICENSE_TEXTS() VERSION(2015-07-01-5a546a834c36070648158d19dd564762d59f8eb8) diff --git a/contrib/libs/libbz2/blocksort.c b/contrib/libs/libbz2/blocksort.c index f788e88c808..92d81fe287e 100644 --- a/contrib/libs/libbz2/blocksort.c +++ b/contrib/libs/libbz2/blocksort.c @@ -1,1094 +1,1094 @@ - -/*-------------------------------------------------------------*/ -/*--- Block sorting machinery ---*/ -/*--- blocksort.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - - Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#include "bzlib_private.h" - -/*---------------------------------------------*/ -/*--- Fallback O(N log(N)^2) sorting ---*/ -/*--- algorithm, for repetitive blocks ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -__inline__ -void fallbackSimpleSort ( UInt32* fmap, - UInt32* eclass, - Int32 lo, - Int32 hi ) -{ - Int32 i, j, tmp; - UInt32 ec_tmp; - - if (lo == hi) return; - - if (hi - lo > 3) { - for ( i = hi-4; i >= lo; i-- ) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) - fmap[j-4] = fmap[j]; - fmap[j-4] = tmp; - } - } - - for ( i = hi-1; i >= lo; i-- ) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) - fmap[j-1] = fmap[j]; - fmap[j-1] = tmp; - } -} - - -/*---------------------------------------------*/ -#define fswap(zz1, zz2) \ - { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } - -#define fvswap(zzp1, zzp2, zzn) \ -{ \ - Int32 yyp1 = (zzp1); \ - Int32 yyp2 = (zzp2); \ - Int32 yyn = (zzn); \ - while (yyn > 0) { \ - fswap(fmap[yyp1], fmap[yyp2]); \ - yyp1++; yyp2++; yyn--; \ - } \ -} - - -#define fmin(a,b) ((a) < (b)) ? (a) : (b) - -#define fpush(lz,hz) { stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - sp++; } - -#define fpop(lz,hz) { sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; } - -#define FALLBACK_QSORT_SMALL_THRESH 10 -#define FALLBACK_QSORT_STACK_SIZE 100 - - + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ static -void fallbackQSort3 ( UInt32* fmap, - UInt32* eclass, - Int32 loSt, - Int32 hiSt ) -{ - Int32 unLo, unHi, ltLo, gtHi, n, m; - Int32 sp, lo, hi; - UInt32 med, r, r3; - Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; - Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; - - r = 0; - - sp = 0; - fpush ( loSt, hiSt ); - - while (sp > 0) { - - AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); - - fpop ( lo, hi ); - if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { - fallbackSimpleSort ( fmap, eclass, lo, hi ); - continue; - } - - /* Random partitioning. Median of 3 sometimes fails to - avoid bad cases. Median of 9 seems to help but - looks rather expensive. This too seems to work but - is cheaper. Guidance for the magic constants - 7621 and 32768 is taken from Sedgewick's algorithms - book, chapter 35. - */ - r = ((r * 7621) + 1) % 32768; - r3 = r % 3; - if (r3 == 0) med = eclass[fmap[lo]]; else - if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else - med = eclass[fmap[hi]]; - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) break; - n = (Int32)eclass[fmap[unLo]] - (Int32)med; - if (n == 0) { - fswap(fmap[unLo], fmap[ltLo]); - ltLo++; unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) break; - n = (Int32)eclass[fmap[unHi]] - (Int32)med; - if (n == 0) { - fswap(fmap[unHi], fmap[gtHi]); - gtHi--; unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; - } - - AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); - - if (gtHi < ltLo) continue; - - n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); - m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - if (n - lo > hi - m) { - fpush ( lo, n ); - fpush ( m, hi ); - } else { - fpush ( m, hi ); - fpush ( lo, n ); - } - } -} - -#undef fmin -#undef fpush -#undef fpop -#undef fswap -#undef fvswap -#undef FALLBACK_QSORT_SMALL_THRESH -#undef FALLBACK_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - nblock > 0 - eclass exists for [0 .. nblock-1] - ((UChar*)eclass) [0 .. nblock-1] holds block - ptr exists for [0 .. nblock-1] - - Post: - ((UChar*)eclass) [0 .. nblock-1] holds block - All other areas of eclass destroyed - fmap [0 .. nblock-1] holds sorted order - bhtab [ 0 .. 2+(nblock/32) ] destroyed -*/ - +__inline__ +void fallbackSimpleSort ( UInt32* fmap, + UInt32* eclass, + Int32 lo, + Int32 hi ) +{ + Int32 i, j, tmp; + UInt32 ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for ( i = hi-4; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for ( i = hi-1; i >= lo; i-- ) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define fvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + fswap(fmap[yyp1], fmap[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + + +#define fmin(a,b) ((a) < (b)) ? (a) : (b) + +#define fpush(lz,hz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; } + +#define fpop(lz,hz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; } + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + + +static +void fallbackQSort3 ( UInt32* fmap, + UInt32* eclass, + Int32 loSt, + Int32 hiSt ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m; + Int32 sp, lo, hi; + UInt32 med, r, r3; + Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; + Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush ( loSt, hiSt ); + + while (sp > 0) { + + AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); + + fpop ( lo, hi ); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort ( fmap, eclass, lo, hi ); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) med = eclass[fmap[lo]]; else + if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unLo]] - (Int32)med; + if (n == 0) { + fswap(fmap[unLo], fmap[ltLo]); + ltLo++; unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (Int32)eclass[fmap[unHi]] - (Int32)med; + if (n == 0) { + fswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); + + if (gtHi < ltLo) continue; + + n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); + m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush ( lo, n ); + fpush ( m, hi ); + } else { + fpush ( m, hi ); + fpush ( lo, n ); + } + } +} + +#undef fmin +#undef fpush +#undef fpop +#undef fswap +#undef fvswap +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + eclass exists for [0 .. nblock-1] + ((UChar*)eclass) [0 .. nblock-1] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UChar*)eclass) [0 .. nblock-1] holds block + All other areas of eclass destroyed + fmap [0 .. nblock-1] holds sorted order + bhtab [ 0 .. 2+(nblock/32) ] destroyed +*/ + #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31)) #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31)) #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31))) -#define WORD_BH(zz) bhtab[(zz) >> 5] -#define UNALIGNED_BH(zz) ((zz) & 0x01f) - -static -void fallbackSort ( UInt32* fmap, - UInt32* eclass, - UInt32* bhtab, - Int32 nblock, - Int32 verb ) -{ - Int32 ftab[257]; - Int32 ftabCopy[256]; - Int32 H, i, j, k, l, r, cc, cc1; - Int32 nNotDone; - Int32 nBhtab; - UChar* eclass8 = (UChar*)eclass; - - /*-- - Initial 1-char radix sort to generate - initial fmap and initial BH bits. - --*/ - if (verb >= 4) - VPrintf0 ( " bucket sorting ...\n" ); - for (i = 0; i < 257; i++) ftab[i] = 0; - for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; - for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; - - for (i = 0; i < nblock; i++) { - j = eclass8[i]; - k = ftab[j] - 1; - ftab[j] = k; - fmap[k] = i; - } - - nBhtab = 2 + (nblock / 32); - for (i = 0; i < nBhtab; i++) bhtab[i] = 0; - for (i = 0; i < 256; i++) SET_BH(ftab[i]); - +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort ( UInt32* fmap, + UInt32* eclass, + UInt32* bhtab, + Int32 nblock, + Int32 verb ) +{ + Int32 ftab[257]; + Int32 ftabCopy[256]; + Int32 H, i, j, k, l, r, cc, cc1; + Int32 nNotDone; + Int32 nBhtab; + UChar* eclass8 = (UChar*)eclass; + + /*-- + Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + if (verb >= 4) + VPrintf0 ( " bucket sorting ...\n" ); + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + (nblock / 32); + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /*-- + Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + + if (verb >= 4) + VPrintf1 ( " depth %6d has ", H ); + + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) j = i; + k = fmap[i] - H; if (k < 0) k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3 ( fmap, eclass, l, r ); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { SET_BH(i); cc = cc1; }; + } + } + } + + if (verb >= 4) + VPrintf1 ( "%6d unresolved strings\n", nNotDone ); + + H *= 2; + if (H > nblock || nNotDone == 0) break; + } + /*-- - Inductively refine the buckets. Kind-of an - "exponential radix sort" (!), inspired by the - Manber-Myers suffix array construction algorithm. - --*/ - - /*-- set sentinel bits for block-end detection --*/ - for (i = 0; i < 32; i++) { - SET_BH(nblock + 2*i); - CLEAR_BH(nblock + 2*i + 1); - } - - /*-- the log(N) loop --*/ - H = 1; - while (1) { - - if (verb >= 4) - VPrintf1 ( " depth %6d has ", H ); - - j = 0; - for (i = 0; i < nblock; i++) { - if (ISSET_BH(i)) j = i; - k = fmap[i] - H; if (k < 0) k += nblock; - eclass[k] = j; - } - - nNotDone = 0; - r = -1; - while (1) { - - /*-- find the next non-singleton bucket --*/ - k = r + 1; - while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; - if (ISSET_BH(k)) { - while (WORD_BH(k) == 0xffffffff) k += 32; - while (ISSET_BH(k)) k++; - } - l = k - 1; - if (l >= nblock) break; - while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; - if (!ISSET_BH(k)) { - while (WORD_BH(k) == 0x00000000) k += 32; - while (!ISSET_BH(k)) k++; - } - r = k - 1; - if (r >= nblock) break; - - /*-- now [l, r] bracket current bucket --*/ - if (r > l) { - nNotDone += (r - l + 1); - fallbackQSort3 ( fmap, eclass, l, r ); - - /*-- scan bucket and generate header bits-- */ - cc = -1; - for (i = l; i <= r; i++) { - cc1 = eclass[fmap[i]]; - if (cc != cc1) { SET_BH(i); cc = cc1; }; - } - } - } - - if (verb >= 4) - VPrintf1 ( "%6d unresolved strings\n", nNotDone ); - - H *= 2; - if (H > nblock || nNotDone == 0) break; - } - - /*-- - Reconstruct the original block in - eclass8 [0 .. nblock-1], since the - previous phase destroyed it. - --*/ - if (verb >= 4) - VPrintf0 ( " reconstructing block ...\n" ); - j = 0; - for (i = 0; i < nblock; i++) { - while (ftabCopy[j] == 0) j++; - ftabCopy[j]--; - eclass8[fmap[i]] = (UChar)j; - } - AssertH ( j < 256, 1005 ); -} - -#undef SET_BH -#undef CLEAR_BH -#undef ISSET_BH -#undef WORD_BH -#undef UNALIGNED_BH - - -/*---------------------------------------------*/ -/*--- The main, O(N^2 log(N)) sorting ---*/ -/*--- algorithm. Faster for "normal" ---*/ -/*--- non-repetitive blocks. ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -__inline__ -Bool mainGtU ( UInt32 i1, - UInt32 i2, - UChar* block, - UInt16* quadrant, - UInt32 nblock, - Int32* budget ) -{ - Int32 k; - UChar c1, c2; - UInt16 s1, s2; - - AssertD ( i1 != i2, "mainGtU" ); - /* 1 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 2 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 3 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 4 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 5 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 6 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 7 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 8 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 9 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 10 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 11 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - /* 12 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - k = nblock + 8; - - do { - /* 1 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 2 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 3 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 4 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 5 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 6 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 7 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - /* 8 */ - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - if (i1 >= nblock) i1 -= nblock; - if (i2 >= nblock) i2 -= nblock; - - k -= 8; - (*budget)--; - } - while (k >= 0); - - return False; -} - - -/*---------------------------------------------*/ -/*-- - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. ---*/ -static -Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - + Reconstruct the original block in + eclass8 [0 .. nblock-1], since the + previous phase destroyed it. + --*/ + if (verb >= 4) + VPrintf0 ( " reconstructing block ...\n" ); + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (UChar)j; + } + AssertH ( j < 256, 1005 ); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +__inline__ +Bool mainGtU ( UInt32 i1, + UInt32 i2, + UChar* block, + UInt16* quadrant, + UInt32 nblock, + Int32* budget ) +{ + Int32 k; + UChar c1, c2; + UInt16 s1, s2; + + AssertD ( i1 != i2, "mainGtU" ); + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 9 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 10 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 11 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 12 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + + k = nblock + 8; + + do { + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + k -= 8; + (*budget)--; + } + while (k >= 0); + + return False; +} + + +/*---------------------------------------------*/ +/*-- + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. +--*/ +static +Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 }; + +static +void mainSimpleSort ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + Int32 nblock, + Int32 lo, + Int32 hi, + Int32 d, + Int32* budget ) +{ + Int32 i, j, h, bigN, hp; + UInt32 v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (True) { + + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while ( mainGtU ( + ptr[j-h]+d, v+d, block, quadrant, nblock, budget + ) ) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/*-- + The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + +#define mswap(zz1, zz2) \ + { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } + +#define mvswap(zzp1, zzp2, zzn) \ +{ \ + Int32 yyp1 = (zzp1); \ + Int32 yyp2 = (zzp2); \ + Int32 yyn = (zzn); \ + while (yyn > 0) { \ + mswap(ptr[yyp1], ptr[yyp2]); \ + yyp1++; yyp2++; yyn--; \ + } \ +} + static -void mainSimpleSort ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - Int32 nblock, - Int32 lo, - Int32 hi, - Int32 d, - Int32* budget ) -{ - Int32 i, j, h, bigN, hp; - UInt32 v; - - bigN = hi - lo + 1; - if (bigN < 2) return; - - hp = 0; - while (incs[hp] < bigN) hp++; - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - - i = lo + h; - while (True) { - - /*-- copy 1 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 2 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while ( mainGtU ( - ptr[j-h]+d, v+d, block, quadrant, nblock, budget - ) ) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - if (*budget < 0) return; - } - } -} - - -/*---------------------------------------------*/ -/*-- - The following is an implementation of - an elegant 3-way quicksort for strings, - described in a paper "Fast Algorithms for - Sorting and Searching Strings", by Robert - Sedgewick and Jon L. Bentley. ---*/ - -#define mswap(zz1, zz2) \ - { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } - -#define mvswap(zzp1, zzp2, zzn) \ -{ \ - Int32 yyp1 = (zzp1); \ - Int32 yyp2 = (zzp2); \ - Int32 yyn = (zzn); \ - while (yyn > 0) { \ - mswap(ptr[yyp1], ptr[yyp2]); \ - yyp1++; yyp2++; yyn--; \ - } \ -} - -static -__inline__ -UChar mmed3 ( UChar a, UChar b, UChar c ) -{ - UChar t; - if (a > b) { t = a; a = b; b = t; }; - if (b > c) { - b = c; - if (a > b) b = a; - } - return b; -} - -#define mmin(a,b) ((a) < (b)) ? (a) : (b) - -#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - stackD [sp] = dz; \ - sp++; } - -#define mpop(lz,hz,dz) { sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ - dz = stackD [sp]; } - - -#define mnextsize(az) (nextHi[az]-nextLo[az]) - -#define mnextswap(az,bz) \ - { Int32 tz; \ - tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ - tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ - tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } - - -#define MAIN_QSORT_SMALL_THRESH 20 -#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) -#define MAIN_QSORT_STACK_SIZE 100 - -static -void mainQSort3 ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - Int32 nblock, - Int32 loSt, - Int32 hiSt, - Int32 dSt, - Int32* budget ) -{ - Int32 unLo, unHi, ltLo, gtHi, n, m, med; - Int32 sp, lo, hi, d; - - Int32 stackLo[MAIN_QSORT_STACK_SIZE]; - Int32 stackHi[MAIN_QSORT_STACK_SIZE]; - Int32 stackD [MAIN_QSORT_STACK_SIZE]; - - Int32 nextLo[3]; - Int32 nextHi[3]; - Int32 nextD [3]; - - sp = 0; - mpush ( loSt, hiSt, dSt ); - - while (sp > 0) { - - AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); - - mpop ( lo, hi, d ); - if (hi - lo < MAIN_QSORT_SMALL_THRESH || - d > MAIN_QSORT_DEPTH_THRESH) { - mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); - if (*budget < 0) return; - continue; - } - - med = (Int32) - mmed3 ( block[ptr[ lo ]+d], - block[ptr[ hi ]+d], - block[ptr[ (lo+hi)>>1 ]+d] ); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (True) { - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[ptr[unLo]+d]) - med; - if (n == 0) { - mswap(ptr[unLo], ptr[ltLo]); - ltLo++; unLo++; continue; - }; - if (n > 0) break; - unLo++; - } - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[ptr[unHi]+d]) - med; - if (n == 0) { - mswap(ptr[unHi], ptr[gtHi]); - gtHi--; unHi--; continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; - } - - AssertD ( unHi == unLo-1, "mainQSort3(2)" ); - - if (gtHi < ltLo) { - mpush(lo, hi, d+1 ); - continue; - } - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; - nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; - nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; - - if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); - if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); - if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); - - AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); - AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); - - mpush (nextLo[0], nextHi[0], nextD[0]); - mpush (nextLo[1], nextHi[1], nextD[1]); - mpush (nextLo[2], nextHi[2], nextD[2]); - } -} - -#undef mswap -#undef mvswap -#undef mpush -#undef mpop -#undef mmin -#undef mnextsize -#undef mnextswap -#undef MAIN_QSORT_SMALL_THRESH -#undef MAIN_QSORT_DEPTH_THRESH -#undef MAIN_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - nblock > N_OVERSHOOT - block32 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UChar*)block32) [0 .. nblock-1] holds block - ptr exists for [0 .. nblock-1] - - Post: - ((UChar*)block32) [0 .. nblock-1] holds block - All other areas of block32 destroyed - ftab [0 .. 65536 ] destroyed - ptr [0 .. nblock-1] holds sorted order - if (*budget < 0), sorting was abandoned -*/ - -#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) -#define SETMASK (1 << 21) -#define CLEARMASK (~(SETMASK)) - -static -void mainSort ( UInt32* ptr, - UChar* block, - UInt16* quadrant, - UInt32* ftab, - Int32 nblock, - Int32 verb, - Int32* budget ) -{ - Int32 i, j, k, ss, sb; - Int32 runningOrder[256]; - Bool bigDone[256]; - Int32 copyStart[256]; - Int32 copyEnd [256]; - UChar c1; - Int32 numQSorted; - UInt16 s; - if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); - - /*-- set up the 2-byte frequency table --*/ - for (i = 65536; i >= 0; i--) ftab[i] = 0; - - j = block[0] << 8; - i = nblock-1; - for (; i >= 3; i -= 4) { - quadrant[i] = 0; - j = (j >> 8) | ( ((UInt16)block[i]) << 8); - ftab[j]++; - quadrant[i-1] = 0; - j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); - ftab[j]++; - quadrant[i-2] = 0; - j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); - ftab[j]++; - quadrant[i-3] = 0; - j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); - ftab[j]++; - } - for (; i >= 0; i--) { - quadrant[i] = 0; - j = (j >> 8) | ( ((UInt16)block[i]) << 8); - ftab[j]++; - } - - /*-- (emphasises close relationship of block & quadrant) --*/ - for (i = 0; i < BZ_N_OVERSHOOT; i++) { - block [nblock+i] = block[i]; - quadrant[nblock+i] = 0; - } - - if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); - - /*-- Complete the initial radix sort --*/ - for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; - - s = block[0] << 8; - i = nblock-1; - for (; i >= 3; i -= 4) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i; - s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-1; - s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-2; - s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i-3; - } - for (; i >= 0; i--) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; - ftab[s] = j; - ptr[j] = i; - } - - /*-- - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - --*/ - for (i = 0; i <= 255; i++) { - bigDone [i] = False; - runningOrder[i] = i; - } - - { - Int32 vv; - Int32 h = 1; - do h = 3 * h + 1; while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { - runningOrder[j] = runningOrder[j-h]; - j = j - h; - if (j <= (h - 1)) goto zero; - } - zero: - runningOrder[j] = vv; - } - } while (h != 1); - } - - /*-- - The main sorting loop. - --*/ - - numQSorted = 0; - - for (i = 0; i <= 255; i++) { - - /*-- - Process big buckets, starting with the least full. - Basically this is a 3-step process in which we call - mainQSort3 to sort the small buckets [ss, j], but - also make a big effort to avoid the calls if we can. - --*/ - ss = runningOrder[i]; - - /*-- - Step 1: - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j], for j != ss. - Hopefully previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - --*/ - for (j = 0; j <= 255; j++) { - if (j != ss) { - sb = (ss << 8) + j; - if ( ! (ftab[sb] & SETMASK) ) { - Int32 lo = ftab[sb] & CLEARMASK; - Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; - if (hi > lo) { - if (verb >= 4) - VPrintf4 ( " qsort [0x%x, 0x%x] " - "done %d this %d\n", - ss, j, numQSorted, hi - lo + 1 ); - mainQSort3 ( - ptr, block, quadrant, nblock, - lo, hi, BZ_N_RADIX, budget - ); - numQSorted += (hi - lo + 1); - if (*budget < 0) return; - } - } - ftab[sb] |= SETMASK; - } - } - - AssertH ( !bigDone[ss], 1006 ); - - /*-- - Step 2: - Now scan this big bucket [ss] so as to synthesise the - sorted order for small buckets [t, ss] for all t, - including, magically, the bucket [ss,ss] too. - This will avoid doing Real Work in subsequent Step 1's. - --*/ - { - for (j = 0; j <= 255; j++) { - copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; - copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; - } - for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { - k = ptr[j]-1; if (k < 0) k += nblock; - c1 = block[k]; +__inline__ +UChar mmed3 ( UChar a, UChar b, UChar c ) +{ + UChar t; + if (a > b) { t = a; a = b; b = t; }; + if (b > c) { + b = c; + if (a > b) b = a; + } + return b; +} + +#define mmin(a,b) ((a) < (b)) ? (a) : (b) + +#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; } + +#define mpop(lz,hz,dz) { sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; } + + +#define mnextsize(az) (nextHi[az]-nextLo[az]) + +#define mnextswap(az,bz) \ + { Int32 tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } + + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static +void mainQSort3 ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + Int32 nblock, + Int32 loSt, + Int32 hiSt, + Int32 dSt, + Int32* budget ) +{ + Int32 unLo, unHi, ltLo, gtHi, n, m, med; + Int32 sp, lo, hi, d; + + Int32 stackLo[MAIN_QSORT_STACK_SIZE]; + Int32 stackHi[MAIN_QSORT_STACK_SIZE]; + Int32 stackD [MAIN_QSORT_STACK_SIZE]; + + Int32 nextLo[3]; + Int32 nextHi[3]; + Int32 nextD [3]; + + sp = 0; + mpush ( loSt, hiSt, dSt ); + + while (sp > 0) { + + AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); + + mpop ( lo, hi, d ); + if (hi - lo < MAIN_QSORT_SMALL_THRESH || + d > MAIN_QSORT_DEPTH_THRESH) { + mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); + if (*budget < 0) return; + continue; + } + + med = (Int32) + mmed3 ( block[ptr[ lo ]+d], + block[ptr[ hi ]+d], + block[ptr[ (lo+hi)>>1 ]+d] ); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (True) { + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; unLo++; continue; + }; + if (n > 0) break; + unLo++; + } + while (True) { + if (unLo > unHi) break; + n = ((Int32)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; unHi--; continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; + } + + AssertD ( unHi == unLo-1, "mainQSort3(2)" ); + + if (gtHi < ltLo) { + mpush(lo, hi, d+1 ); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); + + mpush (nextLo[0], nextHi[0], nextD[0]); + mpush (nextLo[1], nextHi[1], nextD[1]); + mpush (nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mswap +#undef mvswap +#undef mpush +#undef mpop +#undef mmin +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + nblock > N_OVERSHOOT + block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UChar*)block32) [0 .. nblock-1] holds block + ptr exists for [0 .. nblock-1] + + Post: + ((UChar*)block32) [0 .. nblock-1] holds block + All other areas of block32 destroyed + ftab [0 .. 65536 ] destroyed + ptr [0 .. nblock-1] holds sorted order + if (*budget < 0), sorting was abandoned +*/ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static +void mainSort ( UInt32* ptr, + UChar* block, + UInt16* quadrant, + UInt32* ftab, + Int32 nblock, + Int32 verb, + Int32* budget ) +{ + Int32 i, j, k, ss, sb; + Int32 runningOrder[256]; + Bool bigDone[256]; + Int32 copyStart[256]; + Int32 copyEnd [256]; + UChar c1; + Int32 numQSorted; + UInt16 s; + if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); + + /*-- set up the 2-byte frequency table --*/ + for (i = 65536; i >= 0; i--) ftab[i] = 0; + + j = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); + ftab[j]++; + } + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); + + /*-- Complete the initial radix sort --*/ + for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; + + s = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-3; + } + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + } + + /*-- + Now ftab contains the first loc of every small bucket. + Calculate the running order, from smallest to largest + big bucket. + --*/ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + Int32 vv; + Int32 h = 1; + do h = 3 * h + 1; while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /*-- + The main sorting loop. + --*/ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /*-- + Process big buckets, starting with the least full. + Basically this is a 3-step process in which we call + mainQSort3 to sort the small buckets [ss, j], but + also make a big effort to avoid the calls if we can. + --*/ + ss = runningOrder[i]; + + /*-- + Step 1: + Complete the big bucket [ss] by quicksorting + any unsorted small buckets [ss, j], for j != ss. + Hopefully previous pointer-scanning phases have already + completed many of the small buckets [ss, j], so + we don't have to sort them at all. + --*/ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if ( ! (ftab[sb] & SETMASK) ) { + Int32 lo = ftab[sb] & CLEARMASK; + Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + if (verb >= 4) + VPrintf4 ( " qsort [0x%x, 0x%x] " + "done %d this %d\n", + ss, j, numQSorted, hi - lo + 1 ); + mainQSort3 ( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + numQSorted += (hi - lo + 1); + if (*budget < 0) return; + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH ( !bigDone[ss], 1006 ); + + /*-- + Step 2: + Now scan this big bucket [ss] so as to synthesise the + sorted order for small buckets [t, ss] for all t, + including, magically, the bucket [ss,ss] too. + This will avoid doing Real Work in subsequent Step 1's. + --*/ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[ copyStart[c1]++ ] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; if (!bigDone[c1]) - ptr[ copyStart[c1]++ ] = k; - } - for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { - k = ptr[j]-1; if (k < 0) k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[ copyEnd[c1]-- ] = k; - } - } - - AssertH ( (copyStart[ss]-1 == copyEnd[ss]) - || - /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. - Necessity for this case is demonstrated by compressing - a sequence of approximately 48.5 million of character - 251; 1.0.0/1.0.1 will then die here. */ - (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), - 1007 ) - - for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; - - /*-- - Step 3: - The [ss] big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - - The quadrant array provides a way to incrementally - cache sort orderings, as they appear, so as to - make subsequent comparisons in fullGtU() complete - faster. For repetitive blocks this makes a big - difference (but not big enough to be able to avoid - the fallback sorting mechanism, exponential radix sort). - - The precise meaning is: at all times: - - for 0 <= i < nblock and 0 <= j <= nblock - - if block[i] != block[j], - - then the relative values of quadrant[i] and - quadrant[j] are meaningless. - - else { - if quadrant[i] < quadrant[j] - then the string starting at i lexicographically - precedes the string starting at j - - else if quadrant[i] > quadrant[j] - then the string starting at j lexicographically - precedes the string starting at i - - else - the relative ordering of the strings starting - at i and j has not yet been determined. - } - --*/ - bigDone[ss] = True; - - if (i < 255) { - Int32 bbStart = ftab[ss << 8] & CLEARMASK; - Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; - Int32 shifts = 0; - - while ((bbSize >> shifts) > 65534) shifts++; - - for (j = bbSize-1; j >= 0; j--) { - Int32 a2update = ptr[bbStart + j]; - UInt16 qVal = (UInt16)(j >> shifts); - quadrant[a2update] = qVal; - if (a2update < BZ_N_OVERSHOOT) - quadrant[a2update + nblock] = qVal; - } - AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); - } - - } - - if (verb >= 4) - VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", - nblock, numQSorted, nblock - numQSorted ); -} - -#undef BIGFREQ -#undef SETMASK -#undef CLEARMASK - - -/*---------------------------------------------*/ -/* Pre: - nblock > 0 - arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UChar*)arr2) [0 .. nblock-1] holds block - arr1 exists for [0 .. nblock-1] - - Post: - ((UChar*)arr2) [0 .. nblock-1] holds block - All other areas of block destroyed - ftab [ 0 .. 65536 ] destroyed - arr1 [0 .. nblock-1] holds sorted order -*/ -void BZ2_blockSort ( EState* s ) -{ - UInt32* ptr = s->ptr; - UChar* block = s->block; - UInt32* ftab = s->ftab; - Int32 nblock = s->nblock; - Int32 verb = s->verbosity; - Int32 wfact = s->workFactor; - UInt16* quadrant; - Int32 budget; - Int32 budgetInit; - Int32 i; - - if (nblock < 10000) { - fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); - } else { - /* Calculate the location for quadrant, remembering to get - the alignment right. Assumes that &(block[0]) is at least - 2-byte aligned -- this should be ok since block is really - the first section of arr2. - */ - i = nblock+BZ_N_OVERSHOOT; - if (i & 1) i++; - quadrant = (UInt16*)(&(block[i])); - - /* (wfact-1) / 3 puts the default-factor-30 - transition point at very roughly the same place as - with v0.1 and v0.9.0. - Not that it particularly matters any more, since the - resulting compressed stream is now the same regardless - of whether or not we use the main sort or fallback sort. - */ - if (wfact < 1 ) wfact = 1; - if (wfact > 100) wfact = 100; - budgetInit = nblock * ((wfact-1) / 3); - budget = budgetInit; - - mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); - if (verb >= 3) - VPrintf3 ( " %d work, %d block, ratio %5.2f\n", - budgetInit - budget, - nblock, - (float)(budgetInit - budget) / - (float)(nblock==0 ? 1 : nblock) ); - if (budget < 0) { - if (verb >= 2) - VPrintf0 ( " too repetitive; using fallback" - " sorting algorithm\n" ); - fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); - } - } - - s->origPtr = -1; - for (i = 0; i < s->nblock; i++) - if (ptr[i] == 0) - { s->origPtr = i; break; }; - - AssertH( s->origPtr != -1, 1003 ); -} - - -/*-------------------------------------------------------------*/ -/*--- end blocksort.c ---*/ -/*-------------------------------------------------------------*/ + ptr[ copyEnd[c1]-- ] = k; + } + } + + AssertH ( (copyStart[ss]-1 == copyEnd[ss]) + || + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + Necessity for this case is demonstrated by compressing + a sequence of approximately 48.5 million of character + 251; 1.0.0/1.0.1 will then die here. */ + (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), + 1007 ) + + for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; + + /*-- + Step 3: + The [ss] big bucket is now done. Record this fact, + and update the quadrant descriptors. Remember to + update quadrants in the overshoot area too, if + necessary. The "if (i < 255)" test merely skips + this updating for the last bucket processed, since + updating for the last bucket is pointless. + + The quadrant array provides a way to incrementally + cache sort orderings, as they appear, so as to + make subsequent comparisons in fullGtU() complete + faster. For repetitive blocks this makes a big + difference (but not big enough to be able to avoid + the fallback sorting mechanism, exponential radix sort). + + The precise meaning is: at all times: + + for 0 <= i < nblock and 0 <= j <= nblock + + if block[i] != block[j], + + then the relative values of quadrant[i] and + quadrant[j] are meaningless. + + else { + if quadrant[i] < quadrant[j] + then the string starting at i lexicographically + precedes the string starting at j + + else if quadrant[i] > quadrant[j] + then the string starting at j lexicographically + precedes the string starting at i + + else + the relative ordering of the strings starting + at i and j has not yet been determined. + } + --*/ + bigDone[ss] = True; + + if (i < 255) { + Int32 bbStart = ftab[ss << 8] & CLEARMASK; + Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + Int32 shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + Int32 a2update = ptr[bbStart + j]; + UInt16 qVal = (UInt16)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); + } + + } + + if (verb >= 4) + VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", + nblock, numQSorted, nblock - numQSorted ); +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + nblock > 0 + arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + ((UChar*)arr2) [0 .. nblock-1] holds block + arr1 exists for [0 .. nblock-1] + + Post: + ((UChar*)arr2) [0 .. nblock-1] holds block + All other areas of block destroyed + ftab [ 0 .. 65536 ] destroyed + arr1 [0 .. nblock-1] holds sorted order +*/ +void BZ2_blockSort ( EState* s ) +{ + UInt32* ptr = s->ptr; + UChar* block = s->block; + UInt32* ftab = s->ftab; + Int32 nblock = s->nblock; + Int32 verb = s->verbosity; + Int32 wfact = s->workFactor; + UInt16* quadrant; + Int32 budget; + Int32 budgetInit; + Int32 i; + + if (nblock < 10000) { + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } else { + /* Calculate the location for quadrant, remembering to get + the alignment right. Assumes that &(block[0]) is at least + 2-byte aligned -- this should be ok since block is really + the first section of arr2. + */ + i = nblock+BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (UInt16*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + transition point at very roughly the same place as + with v0.1 and v0.9.0. + Not that it particularly matters any more, since the + resulting compressed stream is now the same regardless + of whether or not we use the main sort or fallback sort. + */ + if (wfact < 1 ) wfact = 1; + if (wfact > 100) wfact = 100; + budgetInit = nblock * ((wfact-1) / 3); + budget = budgetInit; + + mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); + if (verb >= 3) + VPrintf3 ( " %d work, %d block, ratio %5.2f\n", + budgetInit - budget, + nblock, + (float)(budgetInit - budget) / + (float)(nblock==0 ? 1 : nblock) ); + if (budget < 0) { + if (verb >= 2) + VPrintf0 ( " too repetitive; using fallback" + " sorting algorithm\n" ); + fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) + { s->origPtr = i; break; }; + + AssertH( s->origPtr != -1, 1003 ); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/bzlib.c b/contrib/libs/libbz2/bzlib.c index ee4ec6e8ef9..21786551b60 100644 --- a/contrib/libs/libbz2/bzlib.c +++ b/contrib/libs/libbz2/bzlib.c @@ -1,1572 +1,1572 @@ - -/*-------------------------------------------------------------*/ -/*--- Library top-level functions. ---*/ -/*--- bzlib.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - - Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - -/* CHANGES - 0.9.0 -- original version. - 0.9.0a/b -- no changes in this file. - 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). - fixed bzWrite/bzRead to ignore zero-length requests. - fixed bzread to correctly handle read requests after EOF. - wrong parameter order in call to bzDecompressInit in - bzBuffToBuffDecompress. Fixed. -*/ - -#include "bzlib_private.h" - - -/*---------------------------------------------------*/ -/*--- Compression stuff ---*/ -/*---------------------------------------------------*/ - - -/*---------------------------------------------------*/ -#ifndef BZ_NO_STDIO -void BZ2_bz__AssertH__fail ( int errcode ) -{ - fprintf(stderr, - "\n\nbzip2/libbzip2: internal error number %d.\n" - "This is a bug in bzip2/libbzip2, %s.\n" + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + +/* CHANGES + 0.9.0 -- original version. + 0.9.0a/b -- no changes in this file. + 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + fixed bzWrite/bzRead to ignore zero-length requests. + fixed bzread to correctly handle read requests after EOF. + wrong parameter order in call to bzDecompressInit in + bzBuffToBuffDecompress. Fixed. +*/ + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + + +/*---------------------------------------------------*/ +#ifndef BZ_NO_STDIO +void BZ2_bz__AssertH__fail ( int errcode ) +{ + fprintf(stderr, + "\n\nbzip2/libbzip2: internal error number %d.\n" + "This is a bug in bzip2/libbzip2, %s.\n" "Please report it to: bzip2-devel@sourceware.org. If this happened\n" - "when you were using some program which uses libbzip2 as a\n" - "component, you should also report this bug to the author(s)\n" - "of that program. Please make an effort to report this bug;\n" - "timely and accurate bug reports eventually lead to higher\n" + "when you were using some program which uses libbzip2 as a\n" + "component, you should also report this bug to the author(s)\n" + "of that program. Please make an effort to report this bug;\n" + "timely and accurate bug reports eventually lead to higher\n" "quality software. Thanks.\n\n", - errcode, - BZ2_bzlibVersion() - ); - - if (errcode == 1007) { - fprintf(stderr, - "\n*** A special note about internal error number 1007 ***\n" - "\n" - "Experience suggests that a common cause of i.e. 1007\n" - "is unreliable memory or other hardware. The 1007 assertion\n" - "just happens to cross-check the results of huge numbers of\n" - "memory reads/writes, and so acts (unintendedly) as a stress\n" - "test of your memory system.\n" - "\n" - "I suggest the following: try compressing the file again,\n" - "possibly monitoring progress in detail with the -vv flag.\n" - "\n" - "* If the error cannot be reproduced, and/or happens at different\n" - " points in compression, you may have a flaky memory system.\n" - " Try a memory-test program. I have used Memtest86\n" - " (www.memtest86.com). At the time of writing it is free (GPLd).\n" - " Memtest86 tests memory much more thorougly than your BIOSs\n" - " power-on test, and may find failures that the BIOS doesn't.\n" - "\n" - "* If the error can be repeatably reproduced, this is a bug in\n" - " bzip2, and I would very much like to hear about it. Please\n" - " let me know, and, ideally, save a copy of the file causing the\n" - " problem -- without which I will be unable to investigate it.\n" - "\n" - ); - } - - exit(3); -} -#endif - - -/*---------------------------------------------------*/ -static -int bz_config_ok ( void ) -{ - if (sizeof(int) != 4) return 0; - if (sizeof(short) != 2) return 0; - if (sizeof(char) != 1) return 0; - return 1; -} - - -/*---------------------------------------------------*/ -static -void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) -{ - void* v = malloc ( items * size ); - return v; -} - -static -void default_bzfree ( void* opaque, void* addr ) -{ - if (addr != NULL) free ( addr ); -} - - -/*---------------------------------------------------*/ -static -void prepare_new_block ( EState* s ) -{ - Int32 i; - s->nblock = 0; - s->numZ = 0; - s->state_out_pos = 0; - BZ_INITIALISE_CRC ( s->blockCRC ); - for (i = 0; i < 256; i++) s->inUse[i] = False; - s->blockNo++; -} - - -/*---------------------------------------------------*/ -static -void init_RL ( EState* s ) -{ - s->state_in_ch = 256; - s->state_in_len = 0; -} - - -static -Bool isempty_RL ( EState* s ) -{ - if (s->state_in_ch < 256 && s->state_in_len > 0) - return False; else - return True; -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzCompressInit) - ( bz_stream* strm, - int blockSize100k, - int verbosity, - int workFactor ) -{ - Int32 n; - EState* s; - - if (!bz_config_ok()) return BZ_CONFIG_ERROR; - - if (strm == NULL || - blockSize100k < 1 || blockSize100k > 9 || - workFactor < 0 || workFactor > 250) - return BZ_PARAM_ERROR; - - if (workFactor == 0) workFactor = 30; - if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; - if (strm->bzfree == NULL) strm->bzfree = default_bzfree; - - s = BZALLOC( sizeof(EState) ); - if (s == NULL) return BZ_MEM_ERROR; - s->strm = strm; - - s->arr1 = NULL; - s->arr2 = NULL; - s->ftab = NULL; - - n = 100000 * blockSize100k; - s->arr1 = BZALLOC( n * sizeof(UInt32) ); - s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); - s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); - - if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { - if (s->arr1 != NULL) BZFREE(s->arr1); - if (s->arr2 != NULL) BZFREE(s->arr2); - if (s->ftab != NULL) BZFREE(s->ftab); - if (s != NULL) BZFREE(s); - return BZ_MEM_ERROR; - } - - s->blockNo = 0; - s->state = BZ_S_INPUT; - s->mode = BZ_M_RUNNING; - s->combinedCRC = 0; - s->blockSize100k = blockSize100k; - s->nblockMAX = 100000 * blockSize100k - 19; - s->verbosity = verbosity; - s->workFactor = workFactor; - - s->block = (UChar*)s->arr2; - s->mtfv = (UInt16*)s->arr1; - s->zbits = NULL; - s->ptr = (UInt32*)s->arr1; - - strm->state = s; - strm->total_in_lo32 = 0; - strm->total_in_hi32 = 0; - strm->total_out_lo32 = 0; - strm->total_out_hi32 = 0; - init_RL ( s ); - prepare_new_block ( s ); - return BZ_OK; -} - - -/*---------------------------------------------------*/ -static -void add_pair_to_block ( EState* s ) -{ - Int32 i; - UChar ch = (UChar)(s->state_in_ch); - for (i = 0; i < s->state_in_len; i++) { - BZ_UPDATE_CRC( s->blockCRC, ch ); - } - s->inUse[s->state_in_ch] = True; - switch (s->state_in_len) { - case 1: - s->block[s->nblock] = (UChar)ch; s->nblock++; - break; - case 2: - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - break; - case 3: - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - break; - default: - s->inUse[s->state_in_len-4] = True; - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = (UChar)ch; s->nblock++; - s->block[s->nblock] = ((UChar)(s->state_in_len-4)); - s->nblock++; - break; - } -} - - -/*---------------------------------------------------*/ -static -void flush_RL ( EState* s ) -{ - if (s->state_in_ch < 256) add_pair_to_block ( s ); - init_RL ( s ); -} - - -/*---------------------------------------------------*/ -#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ -{ \ - UInt32 zchh = (UInt32)(zchh0); \ - /*-- fast track the common case --*/ \ - if (zchh != zs->state_in_ch && \ - zs->state_in_len == 1) { \ - UChar ch = (UChar)(zs->state_in_ch); \ - BZ_UPDATE_CRC( zs->blockCRC, ch ); \ - zs->inUse[zs->state_in_ch] = True; \ - zs->block[zs->nblock] = (UChar)ch; \ - zs->nblock++; \ - zs->state_in_ch = zchh; \ - } \ - else \ - /*-- general, uncommon cases --*/ \ - if (zchh != zs->state_in_ch || \ - zs->state_in_len == 255) { \ - if (zs->state_in_ch < 256) \ - add_pair_to_block ( zs ); \ - zs->state_in_ch = zchh; \ - zs->state_in_len = 1; \ - } else { \ - zs->state_in_len++; \ - } \ -} - - -/*---------------------------------------------------*/ -static -Bool copy_input_until_stop ( EState* s ) -{ - Bool progress_in = False; - - if (s->mode == BZ_M_RUNNING) { - - /*-- fast track the common case --*/ - while (True) { - /*-- block full? --*/ - if (s->nblock >= s->nblockMAX) break; - /*-- no input? --*/ - if (s->strm->avail_in == 0) break; - progress_in = True; - ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); - s->strm->next_in++; - s->strm->avail_in--; - s->strm->total_in_lo32++; - if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; - } - - } else { - - /*-- general, uncommon case --*/ - while (True) { - /*-- block full? --*/ - if (s->nblock >= s->nblockMAX) break; - /*-- no input? --*/ - if (s->strm->avail_in == 0) break; - /*-- flush/finish end? --*/ - if (s->avail_in_expect == 0) break; - progress_in = True; - ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); - s->strm->next_in++; - s->strm->avail_in--; - s->strm->total_in_lo32++; - if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; - s->avail_in_expect--; - } - } - return progress_in; -} - - -/*---------------------------------------------------*/ -static -Bool copy_output_until_stop ( EState* s ) -{ - Bool progress_out = False; - - while (True) { - - /*-- no output space? --*/ - if (s->strm->avail_out == 0) break; - - /*-- block done? --*/ - if (s->state_out_pos >= s->numZ) break; - - progress_out = True; - *(s->strm->next_out) = s->zbits[s->state_out_pos]; - s->state_out_pos++; - s->strm->avail_out--; - s->strm->next_out++; - s->strm->total_out_lo32++; - if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; - } - - return progress_out; -} - - -/*---------------------------------------------------*/ -static -Bool handle_compress ( bz_stream* strm ) -{ - Bool progress_in = False; - Bool progress_out = False; - EState* s = strm->state; - - while (True) { - - if (s->state == BZ_S_OUTPUT) { - progress_out |= copy_output_until_stop ( s ); - if (s->state_out_pos < s->numZ) break; - if (s->mode == BZ_M_FINISHING && - s->avail_in_expect == 0 && - isempty_RL(s)) break; - prepare_new_block ( s ); - s->state = BZ_S_INPUT; - if (s->mode == BZ_M_FLUSHING && - s->avail_in_expect == 0 && - isempty_RL(s)) break; - } - - if (s->state == BZ_S_INPUT) { - progress_in |= copy_input_until_stop ( s ); - if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { - flush_RL ( s ); - BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); - s->state = BZ_S_OUTPUT; + errcode, + BZ2_bzlibVersion() + ); + + if (errcode == 1007) { + fprintf(stderr, + "\n*** A special note about internal error number 1007 ***\n" + "\n" + "Experience suggests that a common cause of i.e. 1007\n" + "is unreliable memory or other hardware. The 1007 assertion\n" + "just happens to cross-check the results of huge numbers of\n" + "memory reads/writes, and so acts (unintendedly) as a stress\n" + "test of your memory system.\n" + "\n" + "I suggest the following: try compressing the file again,\n" + "possibly monitoring progress in detail with the -vv flag.\n" + "\n" + "* If the error cannot be reproduced, and/or happens at different\n" + " points in compression, you may have a flaky memory system.\n" + " Try a memory-test program. I have used Memtest86\n" + " (www.memtest86.com). At the time of writing it is free (GPLd).\n" + " Memtest86 tests memory much more thorougly than your BIOSs\n" + " power-on test, and may find failures that the BIOS doesn't.\n" + "\n" + "* If the error can be repeatably reproduced, this is a bug in\n" + " bzip2, and I would very much like to hear about it. Please\n" + " let me know, and, ideally, save a copy of the file causing the\n" + " problem -- without which I will be unable to investigate it.\n" + "\n" + ); + } + + exit(3); +} +#endif + + +/*---------------------------------------------------*/ +static +int bz_config_ok ( void ) +{ + if (sizeof(int) != 4) return 0; + if (sizeof(short) != 2) return 0; + if (sizeof(char) != 1) return 0; + return 1; +} + + +/*---------------------------------------------------*/ +static +void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) +{ + void* v = malloc ( items * size ); + return v; +} + +static +void default_bzfree ( void* opaque, void* addr ) +{ + if (addr != NULL) free ( addr ); +} + + +/*---------------------------------------------------*/ +static +void prepare_new_block ( EState* s ) +{ + Int32 i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC ( s->blockCRC ); + for (i = 0; i < 256; i++) s->inUse[i] = False; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +void init_RL ( EState* s ) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +Bool isempty_RL ( EState* s ) +{ + if (s->state_in_ch < 256 && s->state_in_len > 0) + return False; else + return True; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompressInit) + ( bz_stream* strm, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 n; + EState* s; + + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + + if (strm == NULL || + blockSize100k < 1 || blockSize100k > 9 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; + if (strm->bzfree == NULL) strm->bzfree = default_bzfree; + + s = BZALLOC( sizeof(EState) ); + if (s == NULL) return BZ_MEM_ERROR; + s->strm = strm; + + s->arr1 = NULL; + s->arr2 = NULL; + s->ftab = NULL; + + n = 100000 * blockSize100k; + s->arr1 = BZALLOC( n * sizeof(UInt32) ); + s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); + s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); + + if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + if (s != NULL) BZFREE(s); + return BZ_MEM_ERROR; + } + + s->blockNo = 0; + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->combinedCRC = 0; + s->blockSize100k = blockSize100k; + s->nblockMAX = 100000 * blockSize100k - 19; + s->verbosity = verbosity; + s->workFactor = workFactor; + + s->block = (UChar*)s->arr2; + s->mtfv = (UInt16*)s->arr1; + s->zbits = NULL; + s->ptr = (UInt32*)s->arr1; + + strm->state = s; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; + init_RL ( s ); + prepare_new_block ( s ); + return BZ_OK; +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block ( EState* s ) +{ + Int32 i; + UChar ch = (UChar)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC( s->blockCRC, ch ); + } + s->inUse[s->state_in_ch] = True; + switch (s->state_in_len) { + case 1: + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 2: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 3: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len-4] = True; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = ((UChar)(s->state_in_len-4)); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL ( EState* s ) +{ + if (s->state_in_ch < 256) add_pair_to_block ( s ); + init_RL ( s ); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ +{ \ + UInt32 zchh = (UInt32)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && \ + zs->state_in_len == 1) { \ + UChar ch = (UChar)(zs->state_in_ch); \ + BZ_UPDATE_CRC( zs->blockCRC, ch ); \ + zs->inUse[zs->state_in_ch] = True; \ + zs->block[zs->nblock] = (UChar)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || \ + zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block ( zs ); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +Bool copy_input_until_stop ( EState* s ) +{ + Bool progress_in = False; + + if (s->mode == BZ_M_RUNNING) { + + /*-- fast track the common case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; + } + + } else { + + /*-- general, uncommon case --*/ + while (True) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- flush/finish end? --*/ + if (s->avail_in_expect == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); + s->strm->next_in++; + s->strm->avail_in--; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; + s->avail_in_expect--; + } + } + return progress_in; +} + + +/*---------------------------------------------------*/ +static +Bool copy_output_until_stop ( EState* s ) +{ + Bool progress_out = False; + + while (True) { + + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + progress_out = True; + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + return progress_out; +} + + +/*---------------------------------------------------*/ +static +Bool handle_compress ( bz_stream* strm ) +{ + Bool progress_in = False; + Bool progress_out = False; + EState* s = strm->state; + + while (True) { + + if (s->state == BZ_S_OUTPUT) { + progress_out |= copy_output_until_stop ( s ); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + prepare_new_block ( s ); + s->state = BZ_S_INPUT; + if (s->mode == BZ_M_FLUSHING && + s->avail_in_expect == 0 && + isempty_RL(s)) break; + } + + if (s->state == BZ_S_INPUT) { + progress_in |= copy_input_until_stop ( s ); + if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + flush_RL ( s ); + BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); + s->state = BZ_S_OUTPUT; + } + else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock ( s, False ); + s->state = BZ_S_OUTPUT; + } + else + if (s->strm->avail_in == 0) { + break; + } + } + + } + + return progress_in || progress_out; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) +{ + Bool progress; + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + preswitch: + switch (s->mode) { + + case BZ_M_IDLE: + return BZ_SEQUENCE_ERROR; + + case BZ_M_RUNNING: + if (action == BZ_RUN) { + progress = handle_compress ( strm ); + return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; } + else + if (action == BZ_FLUSH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto preswitch; + } + else + if (action == BZ_FINISH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto preswitch; + } else - if (s->nblock >= s->nblockMAX) { - BZ2_compressBlock ( s, False ); - s->state = BZ_S_OUTPUT; - } - else - if (s->strm->avail_in == 0) { - break; - } - } - - } - - return progress_in || progress_out; -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) -{ - Bool progress; - EState* s; - if (strm == NULL) return BZ_PARAM_ERROR; - s = strm->state; - if (s == NULL) return BZ_PARAM_ERROR; - if (s->strm != strm) return BZ_PARAM_ERROR; - - preswitch: - switch (s->mode) { - - case BZ_M_IDLE: - return BZ_SEQUENCE_ERROR; - - case BZ_M_RUNNING: - if (action == BZ_RUN) { - progress = handle_compress ( strm ); - return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; - } - else - if (action == BZ_FLUSH) { - s->avail_in_expect = strm->avail_in; - s->mode = BZ_M_FLUSHING; - goto preswitch; - } - else - if (action == BZ_FINISH) { - s->avail_in_expect = strm->avail_in; - s->mode = BZ_M_FINISHING; - goto preswitch; - } - else - return BZ_PARAM_ERROR; - - case BZ_M_FLUSHING: - if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; - if (s->avail_in_expect != s->strm->avail_in) - return BZ_SEQUENCE_ERROR; - progress = handle_compress ( strm ); - if (s->avail_in_expect > 0 || !isempty_RL(s) || - s->state_out_pos < s->numZ) return BZ_FLUSH_OK; - s->mode = BZ_M_RUNNING; - return BZ_RUN_OK; - - case BZ_M_FINISHING: - if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; - if (s->avail_in_expect != s->strm->avail_in) - return BZ_SEQUENCE_ERROR; - progress = handle_compress ( strm ); - if (!progress) return BZ_SEQUENCE_ERROR; - if (s->avail_in_expect > 0 || !isempty_RL(s) || - s->state_out_pos < s->numZ) return BZ_FINISH_OK; - s->mode = BZ_M_IDLE; - return BZ_STREAM_END; - } - return BZ_OK; /*--not reached--*/ -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) -{ - EState* s; - if (strm == NULL) return BZ_PARAM_ERROR; - s = strm->state; - if (s == NULL) return BZ_PARAM_ERROR; - if (s->strm != strm) return BZ_PARAM_ERROR; - - if (s->arr1 != NULL) BZFREE(s->arr1); - if (s->arr2 != NULL) BZFREE(s->arr2); - if (s->ftab != NULL) BZFREE(s->ftab); - BZFREE(strm->state); - - strm->state = NULL; - - return BZ_OK; -} - - -/*---------------------------------------------------*/ -/*--- Decompression stuff ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzDecompressInit) - ( bz_stream* strm, - int verbosity, - int small ) -{ - DState* s; - - if (!bz_config_ok()) return BZ_CONFIG_ERROR; - - if (strm == NULL) return BZ_PARAM_ERROR; - if (small != 0 && small != 1) return BZ_PARAM_ERROR; - if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; - - if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; - if (strm->bzfree == NULL) strm->bzfree = default_bzfree; - - s = BZALLOC( sizeof(DState) ); - if (s == NULL) return BZ_MEM_ERROR; - s->strm = strm; - strm->state = s; - s->state = BZ_X_MAGIC_1; - s->bsLive = 0; - s->bsBuff = 0; - s->calculatedCombinedCRC = 0; - strm->total_in_lo32 = 0; - strm->total_in_hi32 = 0; - strm->total_out_lo32 = 0; - strm->total_out_hi32 = 0; - s->smallDecompress = (Bool)small; - s->ll4 = NULL; - s->ll16 = NULL; - s->tt = NULL; - s->currBlockNo = 0; - s->verbosity = verbosity; - - return BZ_OK; -} - - -/*---------------------------------------------------*/ -/* Return True iff data corruption is discovered. - Returns False if there is no problem. -*/ -static -Bool unRLE_obuf_to_output_FAST ( DState* s ) -{ - UChar k1; - - if (s->blockRandomised) { - - while (True) { - /* try to finish existing run */ - while (True) { - if (s->strm->avail_out == 0) return False; - if (s->state_out_len == 0) break; - *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; - BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); - s->state_out_len--; - s->strm->next_out++; - s->strm->avail_out--; - s->strm->total_out_lo32++; - if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; - } - - /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return False; - - /* Only caused by corrupt data stream? */ - if (s->nblock_used > s->save_nblock+1) - return True; - - s->state_out_len = 1; - s->state_out_ch = s->k0; - BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 2; - BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 3; - BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - s->state_out_len = ((Int32)k1) + 4; - BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; - s->k0 ^= BZ_RAND_MASK; s->nblock_used++; - } - - } else { - - /* restore */ - UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC; - UChar c_state_out_ch = s->state_out_ch; - Int32 c_state_out_len = s->state_out_len; - Int32 c_nblock_used = s->nblock_used; - Int32 c_k0 = s->k0; - UInt32* c_tt = s->tt; - UInt32 c_tPos = s->tPos; - char* cs_next_out = s->strm->next_out; - unsigned int cs_avail_out = s->strm->avail_out; - Int32 ro_blockSize100k = s->blockSize100k; - /* end restore */ - - UInt32 avail_out_INIT = cs_avail_out; - Int32 s_save_nblockPP = s->save_nblock+1; - unsigned int total_out_lo32_old; - - while (True) { - - /* try to finish existing run */ - if (c_state_out_len > 0) { - while (True) { - if (cs_avail_out == 0) goto return_notr; - if (c_state_out_len == 1) break; - *( (UChar*)(cs_next_out) ) = c_state_out_ch; - BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); - c_state_out_len--; - cs_next_out++; - cs_avail_out--; - } - s_state_out_len_eq_one: - { - if (cs_avail_out == 0) { - c_state_out_len = 1; goto return_notr; - }; - *( (UChar*)(cs_next_out) ) = c_state_out_ch; - BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); - cs_next_out++; - cs_avail_out--; - } - } - /* Only caused by corrupt data stream? */ - if (c_nblock_used > s_save_nblockPP) - return True; - - /* can a new run be started? */ - if (c_nblock_used == s_save_nblockPP) { - c_state_out_len = 0; goto return_notr; - }; - c_state_out_ch = c_k0; - BZ_GET_FAST_C(k1); c_nblock_used++; - if (k1 != c_k0) { - c_k0 = k1; goto s_state_out_len_eq_one; - }; - if (c_nblock_used == s_save_nblockPP) - goto s_state_out_len_eq_one; - - c_state_out_len = 2; - BZ_GET_FAST_C(k1); c_nblock_used++; - if (c_nblock_used == s_save_nblockPP) continue; - if (k1 != c_k0) { c_k0 = k1; continue; }; - - c_state_out_len = 3; - BZ_GET_FAST_C(k1); c_nblock_used++; - if (c_nblock_used == s_save_nblockPP) continue; - if (k1 != c_k0) { c_k0 = k1; continue; }; - - BZ_GET_FAST_C(k1); c_nblock_used++; - c_state_out_len = ((Int32)k1) + 4; - BZ_GET_FAST_C(c_k0); c_nblock_used++; - } - - return_notr: - total_out_lo32_old = s->strm->total_out_lo32; - s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); - if (s->strm->total_out_lo32 < total_out_lo32_old) - s->strm->total_out_hi32++; - - /* save */ - s->calculatedBlockCRC = c_calculatedBlockCRC; - s->state_out_ch = c_state_out_ch; - s->state_out_len = c_state_out_len; - s->nblock_used = c_nblock_used; - s->k0 = c_k0; - s->tt = c_tt; - s->tPos = c_tPos; - s->strm->next_out = cs_next_out; - s->strm->avail_out = cs_avail_out; - /* end save */ - } - return False; -} - - - -/*---------------------------------------------------*/ -__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) -{ - Int32 nb, na, mid; - nb = 0; - na = 256; - do { - mid = (nb + na) >> 1; - if (indx >= cftab[mid]) nb = mid; else na = mid; - } - while (na - nb != 1); - return nb; -} - - -/*---------------------------------------------------*/ -/* Return True iff data corruption is discovered. - Returns False if there is no problem. -*/ -static -Bool unRLE_obuf_to_output_SMALL ( DState* s ) -{ - UChar k1; - - if (s->blockRandomised) { - - while (True) { - /* try to finish existing run */ - while (True) { - if (s->strm->avail_out == 0) return False; - if (s->state_out_len == 0) break; - *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; - BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); - s->state_out_len--; - s->strm->next_out++; - s->strm->avail_out--; - s->strm->total_out_lo32++; - if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; - } - - /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return False; - - /* Only caused by corrupt data stream? */ - if (s->nblock_used > s->save_nblock+1) - return True; - - s->state_out_len = 1; - s->state_out_ch = s->k0; - BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 2; - BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 3; - BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; - k1 ^= BZ_RAND_MASK; s->nblock_used++; - s->state_out_len = ((Int32)k1) + 4; - BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; - s->k0 ^= BZ_RAND_MASK; s->nblock_used++; - } - - } else { - - while (True) { - /* try to finish existing run */ - while (True) { - if (s->strm->avail_out == 0) return False; - if (s->state_out_len == 0) break; - *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; - BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); - s->state_out_len--; - s->strm->next_out++; - s->strm->avail_out--; - s->strm->total_out_lo32++; - if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; - } - - /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return False; - - /* Only caused by corrupt data stream? */ - if (s->nblock_used > s->save_nblock+1) - return True; - - s->state_out_len = 1; - s->state_out_ch = s->k0; - BZ_GET_SMALL(k1); s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 2; - BZ_GET_SMALL(k1); s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - s->state_out_len = 3; - BZ_GET_SMALL(k1); s->nblock_used++; - if (s->nblock_used == s->save_nblock+1) continue; - if (k1 != s->k0) { s->k0 = k1; continue; }; - - BZ_GET_SMALL(k1); s->nblock_used++; - s->state_out_len = ((Int32)k1) + 4; - BZ_GET_SMALL(s->k0); s->nblock_used++; - } - - } -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) -{ - Bool corrupt; - DState* s; - if (strm == NULL) return BZ_PARAM_ERROR; - s = strm->state; - if (s == NULL) return BZ_PARAM_ERROR; - if (s->strm != strm) return BZ_PARAM_ERROR; - - while (True) { - if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; - if (s->state == BZ_X_OUTPUT) { - if (s->smallDecompress) - corrupt = unRLE_obuf_to_output_SMALL ( s ); else - corrupt = unRLE_obuf_to_output_FAST ( s ); - if (corrupt) return BZ_DATA_ERROR; - if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { - BZ_FINALISE_CRC ( s->calculatedBlockCRC ); - if (s->verbosity >= 3) - VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, - s->calculatedBlockCRC ); - if (s->verbosity >= 2) VPrintf0 ( "]" ); - if (s->calculatedBlockCRC != s->storedBlockCRC) - return BZ_DATA_ERROR; - s->calculatedCombinedCRC - = (s->calculatedCombinedCRC << 1) | - (s->calculatedCombinedCRC >> 31); - s->calculatedCombinedCRC ^= s->calculatedBlockCRC; - s->state = BZ_X_BLKHDR_1; - } else { - return BZ_OK; - } - } - if (s->state >= BZ_X_MAGIC_1) { - Int32 r = BZ2_decompress ( s ); - if (r == BZ_STREAM_END) { + return BZ_PARAM_ERROR; + + case BZ_M_FLUSHING: + if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; + + case BZ_M_FINISHING: + if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress ( strm ); + if (!progress) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect > 0 || !isempty_RL(s) || + s->state_out_pos < s->numZ) return BZ_FINISH_OK; + s->mode = BZ_M_IDLE; + return BZ_STREAM_END; + } + return BZ_OK; /*--not reached--*/ +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) +{ + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->arr1 != NULL) BZFREE(s->arr1); + if (s->arr2 != NULL) BZFREE(s->arr2); + if (s->ftab != NULL) BZFREE(s->ftab); + BZFREE(strm->state); + + strm->state = NULL; + + return BZ_OK; +} + + +/*---------------------------------------------------*/ +/*--- Decompression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompressInit) + ( bz_stream* strm, + int verbosity, + int small ) +{ + DState* s; + + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + + if (strm == NULL) return BZ_PARAM_ERROR; + if (small != 0 && small != 1) return BZ_PARAM_ERROR; + if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; + + if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; + if (strm->bzfree == NULL) strm->bzfree = default_bzfree; + + s = BZALLOC( sizeof(DState) ); + if (s == NULL) return BZ_MEM_ERROR; + s->strm = strm; + strm->state = s; + s->state = BZ_X_MAGIC_1; + s->bsLive = 0; + s->bsBuff = 0; + s->calculatedCombinedCRC = 0; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; + s->smallDecompress = (Bool)small; + s->ll4 = NULL; + s->ll16 = NULL; + s->tt = NULL; + s->currBlockNo = 0; + s->verbosity = verbosity; + + return BZ_OK; +} + + +/*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ +static +Bool unRLE_obuf_to_output_FAST ( DState* s ) +{ + UChar k1; + + if (s->blockRandomised) { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; + s->k0 ^= BZ_RAND_MASK; s->nblock_used++; + } + + } else { + + /* restore */ + UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC; + UChar c_state_out_ch = s->state_out_ch; + Int32 c_state_out_len = s->state_out_len; + Int32 c_nblock_used = s->nblock_used; + Int32 c_k0 = s->k0; + UInt32* c_tt = s->tt; + UInt32 c_tPos = s->tPos; + char* cs_next_out = s->strm->next_out; + unsigned int cs_avail_out = s->strm->avail_out; + Int32 ro_blockSize100k = s->blockSize100k; + /* end restore */ + + UInt32 avail_out_INIT = cs_avail_out; + Int32 s_save_nblockPP = s->save_nblock+1; + unsigned int total_out_lo32_old; + + while (True) { + + /* try to finish existing run */ + if (c_state_out_len > 0) { + while (True) { + if (cs_avail_out == 0) goto return_notr; + if (c_state_out_len == 1) break; + *( (UChar*)(cs_next_out) ) = c_state_out_ch; + BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); + c_state_out_len--; + cs_next_out++; + cs_avail_out--; + } + s_state_out_len_eq_one: + { + if (cs_avail_out == 0) { + c_state_out_len = 1; goto return_notr; + }; + *( (UChar*)(cs_next_out) ) = c_state_out_ch; + BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); + cs_next_out++; + cs_avail_out--; + } + } + /* Only caused by corrupt data stream? */ + if (c_nblock_used > s_save_nblockPP) + return True; + + /* can a new run be started? */ + if (c_nblock_used == s_save_nblockPP) { + c_state_out_len = 0; goto return_notr; + }; + c_state_out_ch = c_k0; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (k1 != c_k0) { + c_k0 = k1; goto s_state_out_len_eq_one; + }; + if (c_nblock_used == s_save_nblockPP) + goto s_state_out_len_eq_one; + + c_state_out_len = 2; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (c_nblock_used == s_save_nblockPP) continue; + if (k1 != c_k0) { c_k0 = k1; continue; }; + + c_state_out_len = 3; + BZ_GET_FAST_C(k1); c_nblock_used++; + if (c_nblock_used == s_save_nblockPP) continue; + if (k1 != c_k0) { c_k0 = k1; continue; }; + + BZ_GET_FAST_C(k1); c_nblock_used++; + c_state_out_len = ((Int32)k1) + 4; + BZ_GET_FAST_C(c_k0); c_nblock_used++; + } + + return_notr: + total_out_lo32_old = s->strm->total_out_lo32; + s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); + if (s->strm->total_out_lo32 < total_out_lo32_old) + s->strm->total_out_hi32++; + + /* save */ + s->calculatedBlockCRC = c_calculatedBlockCRC; + s->state_out_ch = c_state_out_ch; + s->state_out_len = c_state_out_len; + s->nblock_used = c_nblock_used; + s->k0 = c_k0; + s->tt = c_tt; + s->tPos = c_tPos; + s->strm->next_out = cs_next_out; + s->strm->avail_out = cs_avail_out; + /* end save */ + } + return False; +} + + + +/*---------------------------------------------------*/ +__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) +{ + Int32 nb, na, mid; + nb = 0; + na = 256; + do { + mid = (nb + na) >> 1; + if (indx >= cftab[mid]) nb = mid; else na = mid; + } + while (na - nb != 1); + return nb; +} + + +/*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ +static +Bool unRLE_obuf_to_output_SMALL ( DState* s ) +{ + UChar k1; + + if (s->blockRandomised) { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; + k1 ^= BZ_RAND_MASK; s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; + s->k0 ^= BZ_RAND_MASK; s->nblock_used++; + } + + } else { + + while (True) { + /* try to finish existing run */ + while (True) { + if (s->strm->avail_out == 0) return False; + if (s->state_out_len == 0) break; + *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; + BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); + s->state_out_len--; + s->strm->next_out++; + s->strm->avail_out--; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; + } + + /* can a new run be started? */ + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; + + s->state_out_len = 1; + s->state_out_ch = s->k0; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 2; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + s->state_out_len = 3; + BZ_GET_SMALL(k1); s->nblock_used++; + if (s->nblock_used == s->save_nblock+1) continue; + if (k1 != s->k0) { s->k0 = k1; continue; }; + + BZ_GET_SMALL(k1); s->nblock_used++; + s->state_out_len = ((Int32)k1) + 4; + BZ_GET_SMALL(s->k0); s->nblock_used++; + } + + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) +{ + Bool corrupt; + DState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + while (True) { + if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; + if (s->state == BZ_X_OUTPUT) { + if (s->smallDecompress) + corrupt = unRLE_obuf_to_output_SMALL ( s ); else + corrupt = unRLE_obuf_to_output_FAST ( s ); + if (corrupt) return BZ_DATA_ERROR; + if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { + BZ_FINALISE_CRC ( s->calculatedBlockCRC ); if (s->verbosity >= 3) - VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", - s->storedCombinedCRC, s->calculatedCombinedCRC ); - if (s->calculatedCombinedCRC != s->storedCombinedCRC) - return BZ_DATA_ERROR; - return r; - } - if (s->state != BZ_X_OUTPUT) return r; - } - } - - AssertH ( 0, 6001 ); - - return 0; /*NOTREACHED*/ -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) -{ - DState* s; - if (strm == NULL) return BZ_PARAM_ERROR; - s = strm->state; - if (s == NULL) return BZ_PARAM_ERROR; - if (s->strm != strm) return BZ_PARAM_ERROR; - - if (s->tt != NULL) BZFREE(s->tt); - if (s->ll16 != NULL) BZFREE(s->ll16); - if (s->ll4 != NULL) BZFREE(s->ll4); - - BZFREE(strm->state); - strm->state = NULL; - - return BZ_OK; -} - - -#ifndef BZ_NO_STDIO -/*---------------------------------------------------*/ -/*--- File I/O stuff ---*/ -/*---------------------------------------------------*/ - -#define BZ_SETERR(eee) \ -{ \ - if (bzerror != NULL) *bzerror = eee; \ - if (bzf != NULL) bzf->lastErr = eee; \ -} - -typedef - struct { - FILE* handle; - Char buf[BZ_MAX_UNUSED]; - Int32 bufN; - Bool writing; - bz_stream strm; - Int32 lastErr; - Bool initialisedOk; - } - bzFile; - - -/*---------------------------------------------*/ -static Bool myfeof ( FILE* f ) -{ - Int32 c = fgetc ( f ); - if (c == EOF) return True; - ungetc ( c, f ); - return False; -} - - -/*---------------------------------------------------*/ -BZFILE* BZ_API(BZ2_bzWriteOpen) - ( int* bzerror, - FILE* f, - int blockSize100k, - int verbosity, - int workFactor ) -{ - Int32 ret; - bzFile* bzf = NULL; - - BZ_SETERR(BZ_OK); - + VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, + s->calculatedBlockCRC ); + if (s->verbosity >= 2) VPrintf0 ( "]" ); + if (s->calculatedBlockCRC != s->storedBlockCRC) + return BZ_DATA_ERROR; + s->calculatedCombinedCRC + = (s->calculatedCombinedCRC << 1) | + (s->calculatedCombinedCRC >> 31); + s->calculatedCombinedCRC ^= s->calculatedBlockCRC; + s->state = BZ_X_BLKHDR_1; + } else { + return BZ_OK; + } + } + if (s->state >= BZ_X_MAGIC_1) { + Int32 r = BZ2_decompress ( s ); + if (r == BZ_STREAM_END) { + if (s->verbosity >= 3) + VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", + s->storedCombinedCRC, s->calculatedCombinedCRC ); + if (s->calculatedCombinedCRC != s->storedCombinedCRC) + return BZ_DATA_ERROR; + return r; + } + if (s->state != BZ_X_OUTPUT) return r; + } + } + + AssertH ( 0, 6001 ); + + return 0; /*NOTREACHED*/ +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) +{ + DState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->tt != NULL) BZFREE(s->tt); + if (s->ll16 != NULL) BZFREE(s->ll16); + if (s->ll4 != NULL) BZFREE(s->ll4); + + BZFREE(strm->state); + strm->state = NULL; + + return BZ_OK; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ +/*--- File I/O stuff ---*/ +/*---------------------------------------------------*/ + +#define BZ_SETERR(eee) \ +{ \ + if (bzerror != NULL) *bzerror = eee; \ + if (bzf != NULL) bzf->lastErr = eee; \ +} + +typedef + struct { + FILE* handle; + Char buf[BZ_MAX_UNUSED]; + Int32 bufN; + Bool writing; + bz_stream strm; + Int32 lastErr; + Bool initialisedOk; + } + bzFile; + + +/*---------------------------------------------*/ +static Bool myfeof ( FILE* f ) +{ + Int32 c = fgetc ( f ); + if (c == EOF) return True; + ungetc ( c, f ); + return False; +} + + +/*---------------------------------------------------*/ +BZFILE* BZ_API(BZ2_bzWriteOpen) + ( int* bzerror, + FILE* f, + int blockSize100k, + int verbosity, + int workFactor ) +{ + Int32 ret; + bzFile* bzf = NULL; + + BZ_SETERR(BZ_OK); + + if (f == NULL || + (blockSize100k < 1 || blockSize100k > 9) || + (workFactor < 0 || workFactor > 250) || + (verbosity < 0 || verbosity > 4)) + { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; + + if (ferror(f)) + { BZ_SETERR(BZ_IO_ERROR); return NULL; }; + + bzf = malloc ( sizeof(bzFile) ); + if (bzf == NULL) + { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; + + BZ_SETERR(BZ_OK); + bzf->initialisedOk = False; + bzf->bufN = 0; + bzf->handle = f; + bzf->writing = True; + bzf->strm.bzalloc = NULL; + bzf->strm.bzfree = NULL; + bzf->strm.opaque = NULL; + + if (workFactor == 0) workFactor = 30; + ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) + { BZ_SETERR(ret); free(bzf); return NULL; }; + + bzf->strm.avail_in = 0; + bzf->initialisedOk = True; + return bzf; +} + + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzWrite) + ( int* bzerror, + BZFILE* b, + void* buf, + int len ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + if (bzf == NULL || buf == NULL || len < 0) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (len == 0) + { BZ_SETERR(BZ_OK); return; }; + + bzf->strm.avail_in = len; + bzf->strm.next_in = buf; + + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); + if (ret != BZ_RUN_OK) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (bzf->strm.avail_in == 0) + { BZ_SETERR(BZ_OK); return; }; + } +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzWriteClose) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out ) +{ + BZ2_bzWriteClose64 ( bzerror, b, abandon, + nbytes_in, NULL, nbytes_out, NULL ); +} + + +void BZ_API(BZ2_bzWriteClose64) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 ) +{ + Int32 n, n2, ret; + bzFile* bzf = (bzFile*)b; + + if (bzf == NULL) + { BZ_SETERR(BZ_OK); return; }; + if (!(bzf->writing)) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + + if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; + if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; + if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; + if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; + + if ((!abandon) && bzf->lastErr == BZ_OK) { + while (True) { + bzf->strm.avail_out = BZ_MAX_UNUSED; + bzf->strm.next_out = bzf->buf; + ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); + if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) + { BZ_SETERR(ret); return; }; + + if (bzf->strm.avail_out < BZ_MAX_UNUSED) { + n = BZ_MAX_UNUSED - bzf->strm.avail_out; + n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), + n, bzf->handle ); + if (n != n2 || ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (ret == BZ_STREAM_END) break; + } + } + + if ( !abandon && !ferror ( bzf->handle ) ) { + fflush ( bzf->handle ); + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return; }; + } + + if (nbytes_in_lo32 != NULL) + *nbytes_in_lo32 = bzf->strm.total_in_lo32; + if (nbytes_in_hi32 != NULL) + *nbytes_in_hi32 = bzf->strm.total_in_hi32; + if (nbytes_out_lo32 != NULL) + *nbytes_out_lo32 = bzf->strm.total_out_lo32; + if (nbytes_out_hi32 != NULL) + *nbytes_out_hi32 = bzf->strm.total_out_hi32; + + BZ_SETERR(BZ_OK); + BZ2_bzCompressEnd ( &(bzf->strm) ); + free ( bzf ); +} + + +/*---------------------------------------------------*/ +BZFILE* BZ_API(BZ2_bzReadOpen) + ( int* bzerror, + FILE* f, + int verbosity, + int small, + void* unused, + int nUnused ) +{ + bzFile* bzf = NULL; + int ret; + + BZ_SETERR(BZ_OK); + if (f == NULL || - (blockSize100k < 1 || blockSize100k > 9) || - (workFactor < 0 || workFactor > 250) || - (verbosity < 0 || verbosity > 4)) - { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; - - if (ferror(f)) - { BZ_SETERR(BZ_IO_ERROR); return NULL; }; - - bzf = malloc ( sizeof(bzFile) ); - if (bzf == NULL) - { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; - - BZ_SETERR(BZ_OK); - bzf->initialisedOk = False; - bzf->bufN = 0; - bzf->handle = f; - bzf->writing = True; - bzf->strm.bzalloc = NULL; - bzf->strm.bzfree = NULL; - bzf->strm.opaque = NULL; - - if (workFactor == 0) workFactor = 30; - ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, - verbosity, workFactor ); - if (ret != BZ_OK) - { BZ_SETERR(ret); free(bzf); return NULL; }; - - bzf->strm.avail_in = 0; - bzf->initialisedOk = True; - return bzf; -} - - - -/*---------------------------------------------------*/ -void BZ_API(BZ2_bzWrite) - ( int* bzerror, - BZFILE* b, - void* buf, - int len ) -{ - Int32 n, n2, ret; - bzFile* bzf = (bzFile*)b; - - BZ_SETERR(BZ_OK); - if (bzf == NULL || buf == NULL || len < 0) - { BZ_SETERR(BZ_PARAM_ERROR); return; }; - if (!(bzf->writing)) - { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; - if (ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return; }; - - if (len == 0) - { BZ_SETERR(BZ_OK); return; }; - - bzf->strm.avail_in = len; - bzf->strm.next_in = buf; - - while (True) { - bzf->strm.avail_out = BZ_MAX_UNUSED; - bzf->strm.next_out = bzf->buf; - ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); - if (ret != BZ_RUN_OK) - { BZ_SETERR(ret); return; }; - - if (bzf->strm.avail_out < BZ_MAX_UNUSED) { - n = BZ_MAX_UNUSED - bzf->strm.avail_out; - n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), - n, bzf->handle ); - if (n != n2 || ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return; }; - } - - if (bzf->strm.avail_in == 0) - { BZ_SETERR(BZ_OK); return; }; - } -} - - -/*---------------------------------------------------*/ -void BZ_API(BZ2_bzWriteClose) - ( int* bzerror, - BZFILE* b, - int abandon, - unsigned int* nbytes_in, - unsigned int* nbytes_out ) -{ - BZ2_bzWriteClose64 ( bzerror, b, abandon, - nbytes_in, NULL, nbytes_out, NULL ); -} - - -void BZ_API(BZ2_bzWriteClose64) - ( int* bzerror, - BZFILE* b, - int abandon, - unsigned int* nbytes_in_lo32, - unsigned int* nbytes_in_hi32, - unsigned int* nbytes_out_lo32, - unsigned int* nbytes_out_hi32 ) -{ - Int32 n, n2, ret; - bzFile* bzf = (bzFile*)b; - + (small != 0 && small != 1) || + (verbosity < 0 || verbosity > 4) || + (unused == NULL && nUnused != 0) || + (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) + { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; + + if (ferror(f)) + { BZ_SETERR(BZ_IO_ERROR); return NULL; }; + + bzf = malloc ( sizeof(bzFile) ); if (bzf == NULL) - { BZ_SETERR(BZ_OK); return; }; - if (!(bzf->writing)) - { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; - if (ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return; }; - - if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; - if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; - if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; - if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; - - if ((!abandon) && bzf->lastErr == BZ_OK) { - while (True) { - bzf->strm.avail_out = BZ_MAX_UNUSED; - bzf->strm.next_out = bzf->buf; - ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); - if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) - { BZ_SETERR(ret); return; }; - - if (bzf->strm.avail_out < BZ_MAX_UNUSED) { - n = BZ_MAX_UNUSED - bzf->strm.avail_out; - n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), - n, bzf->handle ); - if (n != n2 || ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return; }; - } - - if (ret == BZ_STREAM_END) break; - } - } - - if ( !abandon && !ferror ( bzf->handle ) ) { - fflush ( bzf->handle ); + { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; + + BZ_SETERR(BZ_OK); + + bzf->initialisedOk = False; + bzf->handle = f; + bzf->bufN = 0; + bzf->writing = False; + bzf->strm.bzalloc = NULL; + bzf->strm.bzfree = NULL; + bzf->strm.opaque = NULL; + + while (nUnused > 0) { + bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; + unused = ((void*)( 1 + ((UChar*)(unused)) )); + nUnused--; + } + + ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); + if (ret != BZ_OK) + { BZ_SETERR(ret); free(bzf); return NULL; }; + + bzf->strm.avail_in = bzf->bufN; + bzf->strm.next_in = bzf->buf; + + bzf->initialisedOk = True; + return bzf; +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) +{ + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + if (bzf == NULL) + { BZ_SETERR(BZ_OK); return; }; + + if (bzf->writing) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + + if (bzf->initialisedOk) + (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); + free ( bzf ); +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzRead) + ( int* bzerror, + BZFILE* b, + void* buf, + int len ) +{ + Int32 n, ret; + bzFile* bzf = (bzFile*)b; + + BZ_SETERR(BZ_OK); + + if (bzf == NULL || buf == NULL || len < 0) + { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; + + if (bzf->writing) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; + + if (len == 0) + { BZ_SETERR(BZ_OK); return 0; }; + + bzf->strm.avail_out = len; + bzf->strm.next_out = buf; + + while (True) { + if (ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return; }; - } - - if (nbytes_in_lo32 != NULL) - *nbytes_in_lo32 = bzf->strm.total_in_lo32; - if (nbytes_in_hi32 != NULL) - *nbytes_in_hi32 = bzf->strm.total_in_hi32; - if (nbytes_out_lo32 != NULL) - *nbytes_out_lo32 = bzf->strm.total_out_lo32; - if (nbytes_out_hi32 != NULL) - *nbytes_out_hi32 = bzf->strm.total_out_hi32; - - BZ_SETERR(BZ_OK); - BZ2_bzCompressEnd ( &(bzf->strm) ); - free ( bzf ); -} - - -/*---------------------------------------------------*/ -BZFILE* BZ_API(BZ2_bzReadOpen) - ( int* bzerror, - FILE* f, - int verbosity, - int small, - void* unused, - int nUnused ) -{ - bzFile* bzf = NULL; - int ret; - - BZ_SETERR(BZ_OK); - - if (f == NULL || - (small != 0 && small != 1) || - (verbosity < 0 || verbosity > 4) || - (unused == NULL && nUnused != 0) || - (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) - { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; - - if (ferror(f)) - { BZ_SETERR(BZ_IO_ERROR); return NULL; }; - - bzf = malloc ( sizeof(bzFile) ); - if (bzf == NULL) - { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; - - BZ_SETERR(BZ_OK); - - bzf->initialisedOk = False; - bzf->handle = f; - bzf->bufN = 0; - bzf->writing = False; - bzf->strm.bzalloc = NULL; - bzf->strm.bzfree = NULL; - bzf->strm.opaque = NULL; - - while (nUnused > 0) { - bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; - unused = ((void*)( 1 + ((UChar*)(unused)) )); - nUnused--; - } - - ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); - if (ret != BZ_OK) - { BZ_SETERR(ret); free(bzf); return NULL; }; - - bzf->strm.avail_in = bzf->bufN; - bzf->strm.next_in = bzf->buf; - - bzf->initialisedOk = True; - return bzf; -} - - -/*---------------------------------------------------*/ -void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) -{ - bzFile* bzf = (bzFile*)b; - - BZ_SETERR(BZ_OK); - if (bzf == NULL) - { BZ_SETERR(BZ_OK); return; }; - - if (bzf->writing) - { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; - - if (bzf->initialisedOk) - (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); - free ( bzf ); -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzRead) - ( int* bzerror, - BZFILE* b, - void* buf, - int len ) -{ - Int32 n, ret; - bzFile* bzf = (bzFile*)b; - - BZ_SETERR(BZ_OK); - - if (bzf == NULL || buf == NULL || len < 0) - { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; - - if (bzf->writing) - { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; - - if (len == 0) - { BZ_SETERR(BZ_OK); return 0; }; - - bzf->strm.avail_out = len; - bzf->strm.next_out = buf; - - while (True) { - - if (ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return 0; }; - - if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { - n = fread ( bzf->buf, sizeof(UChar), - BZ_MAX_UNUSED, bzf->handle ); - if (ferror(bzf->handle)) - { BZ_SETERR(BZ_IO_ERROR); return 0; }; - bzf->bufN = n; - bzf->strm.avail_in = bzf->bufN; - bzf->strm.next_in = bzf->buf; - } - - ret = BZ2_bzDecompress ( &(bzf->strm) ); - - if (ret != BZ_OK && ret != BZ_STREAM_END) - { BZ_SETERR(ret); return 0; }; - - if (ret == BZ_OK && myfeof(bzf->handle) && - bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) - { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; - - if (ret == BZ_STREAM_END) - { BZ_SETERR(BZ_STREAM_END); - return len - bzf->strm.avail_out; }; - if (bzf->strm.avail_out == 0) - { BZ_SETERR(BZ_OK); return len; }; - - } - - return 0; /*not reached*/ -} - - -/*---------------------------------------------------*/ -void BZ_API(BZ2_bzReadGetUnused) - ( int* bzerror, - BZFILE* b, - void** unused, - int* nUnused ) -{ - bzFile* bzf = (bzFile*)b; - if (bzf == NULL) - { BZ_SETERR(BZ_PARAM_ERROR); return; }; - if (bzf->lastErr != BZ_STREAM_END) - { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; - if (unused == NULL || nUnused == NULL) - { BZ_SETERR(BZ_PARAM_ERROR); return; }; - - BZ_SETERR(BZ_OK); - *nUnused = bzf->strm.avail_in; - *unused = bzf->strm.next_in; -} -#endif - - -/*---------------------------------------------------*/ -/*--- Misc convenience stuff ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzBuffToBuffCompress) - ( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int blockSize100k, - int verbosity, - int workFactor ) -{ - bz_stream strm; - int ret; - - if (dest == NULL || destLen == NULL || - source == NULL || - blockSize100k < 1 || blockSize100k > 9 || - verbosity < 0 || verbosity > 4 || - workFactor < 0 || workFactor > 250) - return BZ_PARAM_ERROR; - - if (workFactor == 0) workFactor = 30; - strm.bzalloc = NULL; - strm.bzfree = NULL; - strm.opaque = NULL; - ret = BZ2_bzCompressInit ( &strm, blockSize100k, - verbosity, workFactor ); - if (ret != BZ_OK) return ret; - - strm.next_in = source; - strm.next_out = dest; - strm.avail_in = sourceLen; - strm.avail_out = *destLen; - - ret = BZ2_bzCompress ( &strm, BZ_FINISH ); - if (ret == BZ_FINISH_OK) goto output_overflow; - if (ret != BZ_STREAM_END) goto errhandler; - - /* normal termination */ - *destLen -= strm.avail_out; - BZ2_bzCompressEnd ( &strm ); - return BZ_OK; - - output_overflow: - BZ2_bzCompressEnd ( &strm ); - return BZ_OUTBUFF_FULL; - - errhandler: - BZ2_bzCompressEnd ( &strm ); + { BZ_SETERR(BZ_IO_ERROR); return 0; }; + + if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { + n = fread ( bzf->buf, sizeof(UChar), + BZ_MAX_UNUSED, bzf->handle ); + if (ferror(bzf->handle)) + { BZ_SETERR(BZ_IO_ERROR); return 0; }; + bzf->bufN = n; + bzf->strm.avail_in = bzf->bufN; + bzf->strm.next_in = bzf->buf; + } + + ret = BZ2_bzDecompress ( &(bzf->strm) ); + + if (ret != BZ_OK && ret != BZ_STREAM_END) + { BZ_SETERR(ret); return 0; }; + + if (ret == BZ_OK && myfeof(bzf->handle) && + bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) + { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; + + if (ret == BZ_STREAM_END) + { BZ_SETERR(BZ_STREAM_END); + return len - bzf->strm.avail_out; }; + if (bzf->strm.avail_out == 0) + { BZ_SETERR(BZ_OK); return len; }; + + } + + return 0; /*not reached*/ +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzReadGetUnused) + ( int* bzerror, + BZFILE* b, + void** unused, + int* nUnused ) +{ + bzFile* bzf = (bzFile*)b; + if (bzf == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + if (bzf->lastErr != BZ_STREAM_END) + { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; + if (unused == NULL || nUnused == NULL) + { BZ_SETERR(BZ_PARAM_ERROR); return; }; + + BZ_SETERR(BZ_OK); + *nUnused = bzf->strm.avail_in; + *unused = bzf->strm.next_in; +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzBuffToBuffCompress) + ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor ) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + blockSize100k < 1 || blockSize100k > 9 || + verbosity < 0 || verbosity > 4 || + workFactor < 0 || workFactor > 250) + return BZ_PARAM_ERROR; + + if (workFactor == 0) workFactor = 30; + strm.bzalloc = NULL; + strm.bzfree = NULL; + strm.opaque = NULL; + ret = BZ2_bzCompressInit ( &strm, blockSize100k, + verbosity, workFactor ); + if (ret != BZ_OK) return ret; + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress ( &strm, BZ_FINISH ); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd ( &strm ); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd ( &strm ); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd ( &strm ); + return ret; +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzBuffToBuffDecompress) + ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity ) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + (small != 0 && small != 1) || + verbosity < 0 || verbosity > 4) + return BZ_PARAM_ERROR; + + strm.bzalloc = NULL; + strm.bzfree = NULL; + strm.opaque = NULL; + ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); + if (ret != BZ_OK) return ret; + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzDecompress ( &strm ); + if (ret == BZ_OK) goto output_overflow_or_eof; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzDecompressEnd ( &strm ); + return BZ_OK; + + output_overflow_or_eof: + if (strm.avail_out > 0) { + BZ2_bzDecompressEnd ( &strm ); + return BZ_UNEXPECTED_EOF; + } else { + BZ2_bzDecompressEnd ( &strm ); + return BZ_OUTBUFF_FULL; + }; + + errhandler: + BZ2_bzDecompressEnd ( &strm ); return ret; -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzBuffToBuffDecompress) - ( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int small, - int verbosity ) -{ - bz_stream strm; - int ret; - - if (dest == NULL || destLen == NULL || - source == NULL || - (small != 0 && small != 1) || - verbosity < 0 || verbosity > 4) - return BZ_PARAM_ERROR; - - strm.bzalloc = NULL; - strm.bzfree = NULL; - strm.opaque = NULL; - ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); - if (ret != BZ_OK) return ret; - - strm.next_in = source; - strm.next_out = dest; - strm.avail_in = sourceLen; - strm.avail_out = *destLen; - - ret = BZ2_bzDecompress ( &strm ); - if (ret == BZ_OK) goto output_overflow_or_eof; - if (ret != BZ_STREAM_END) goto errhandler; - - /* normal termination */ - *destLen -= strm.avail_out; - BZ2_bzDecompressEnd ( &strm ); - return BZ_OK; - - output_overflow_or_eof: - if (strm.avail_out > 0) { - BZ2_bzDecompressEnd ( &strm ); - return BZ_UNEXPECTED_EOF; - } else { - BZ2_bzDecompressEnd ( &strm ); - return BZ_OUTBUFF_FULL; - }; - - errhandler: - BZ2_bzDecompressEnd ( &strm ); - return ret; -} - - -/*---------------------------------------------------*/ -/*-- - Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) - to support better zlib compatibility. - This code is not _officially_ part of libbzip2 (yet); - I haven't tested it, documented it, or considered the - threading-safeness of it. - If this code breaks, please contact both Yoshioka and me. ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -/*-- - return version like "0.9.5d, 4-Sept-1999". ---*/ -const char * BZ_API(BZ2_bzlibVersion)(void) -{ - return BZ_VERSION; -} - - -#ifndef BZ_NO_STDIO -/*---------------------------------------------------*/ - -#if defined(_WIN32) || defined(OS2) || defined(MSDOS) -# include -# include -# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) -#else -# define SET_BINARY_MODE(file) -#endif -static -BZFILE * bzopen_or_bzdopen - ( const char *path, /* no use when bzdopen */ - int fd, /* no use when bzdopen */ - const char *mode, - int open_mode) /* bzopen: 0, bzdopen:1 */ -{ - int bzerr; - char unused[BZ_MAX_UNUSED]; - int blockSize100k = 9; - int writing = 0; - char mode2[10] = ""; - FILE *fp = NULL; - BZFILE *bzfp = NULL; - int verbosity = 0; - int workFactor = 30; - int smallMode = 0; - int nUnused = 0; - - if (mode == NULL) return NULL; - while (*mode) { - switch (*mode) { - case 'r': - writing = 0; break; - case 'w': - writing = 1; break; - case 's': - smallMode = 1; break; - default: - if (isdigit((int)(*mode))) { - blockSize100k = *mode-BZ_HDR_0; - } - } - mode++; - } - strcat(mode2, writing ? "w" : "r" ); - strcat(mode2,"b"); /* binary mode */ - - if (open_mode==0) { - if (path==NULL || strcmp(path,"")==0) { - fp = (writing ? stdout : stdin); - SET_BINARY_MODE(fp); - } else { - fp = fopen(path,mode2); - } - } else { -#ifdef BZ_STRICT_ANSI - fp = NULL; -#else - fp = fdopen(fd,mode2); -#endif - } - if (fp == NULL) return NULL; - - if (writing) { - /* Guard against total chaos and anarchy -- JRS */ - if (blockSize100k < 1) blockSize100k = 1; - if (blockSize100k > 9) blockSize100k = 9; - bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, - verbosity,workFactor); - } else { - bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, - unused,nUnused); - } - if (bzfp == NULL) { - if (fp != stdin && fp != stdout) fclose(fp); - return NULL; - } - return bzfp; -} - - -/*---------------------------------------------------*/ -/*-- - open file for read or write. - ex) bzopen("file","w9") - case path="" or NULL => use stdin or stdout. ---*/ -BZFILE * BZ_API(BZ2_bzopen) - ( const char *path, - const char *mode ) -{ - return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); -} - - -/*---------------------------------------------------*/ -BZFILE * BZ_API(BZ2_bzdopen) - ( int fd, - const char *mode ) -{ - return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) -{ - int bzerr, nread; - if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; - nread = BZ2_bzRead(&bzerr,b,buf,len); - if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { - return nread; - } else { - return -1; - } -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) -{ - int bzerr; - - BZ2_bzWrite(&bzerr,b,buf,len); - if(bzerr == BZ_OK){ - return len; - }else{ - return -1; - } -} - - -/*---------------------------------------------------*/ -int BZ_API(BZ2_bzflush) (BZFILE *b) -{ - /* do nothing now... */ - return 0; -} - - -/*---------------------------------------------------*/ -void BZ_API(BZ2_bzclose) (BZFILE* b) -{ - int bzerr; - FILE *fp; - - if (b==NULL) {return;} - fp = ((bzFile *)b)->handle; - if(((bzFile*)b)->writing){ - BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); - if(bzerr != BZ_OK){ - BZ2_bzWriteClose(NULL,b,1,NULL,NULL); - } - }else{ - BZ2_bzReadClose(&bzerr,b); - } - if(fp!=stdin && fp!=stdout){ - fclose(fp); - } -} - - -/*---------------------------------------------------*/ -/*-- - return last error code ---*/ -static const char *bzerrorstrings[] = { - "OK" - ,"SEQUENCE_ERROR" - ,"PARAM_ERROR" - ,"MEM_ERROR" - ,"DATA_ERROR" - ,"DATA_ERROR_MAGIC" - ,"IO_ERROR" - ,"UNEXPECTED_EOF" - ,"OUTBUFF_FULL" - ,"CONFIG_ERROR" - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ - ,"???" /* for future */ -}; - - -const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) -{ - int err = ((bzFile *)b)->lastErr; - - if(err>0) err = 0; - *errnum = err; - return bzerrorstrings[err*-1]; -} -#endif - - -/*-------------------------------------------------------------*/ -/*--- end bzlib.c ---*/ -/*-------------------------------------------------------------*/ +} + + +/*---------------------------------------------------*/ +/*-- + Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +/*-- + return version like "0.9.5d, 4-Sept-1999". +--*/ +const char * BZ_API(BZ2_bzlibVersion)(void) +{ + return BZ_VERSION; +} + + +#ifndef BZ_NO_STDIO +/*---------------------------------------------------*/ + +#if defined(_WIN32) || defined(OS2) || defined(MSDOS) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif +static +BZFILE * bzopen_or_bzdopen + ( const char *path, /* no use when bzdopen */ + int fd, /* no use when bzdopen */ + const char *mode, + int open_mode) /* bzopen: 0, bzdopen:1 */ +{ + int bzerr; + char unused[BZ_MAX_UNUSED]; + int blockSize100k = 9; + int writing = 0; + char mode2[10] = ""; + FILE *fp = NULL; + BZFILE *bzfp = NULL; + int verbosity = 0; + int workFactor = 30; + int smallMode = 0; + int nUnused = 0; + + if (mode == NULL) return NULL; + while (*mode) { + switch (*mode) { + case 'r': + writing = 0; break; + case 'w': + writing = 1; break; + case 's': + smallMode = 1; break; + default: + if (isdigit((int)(*mode))) { + blockSize100k = *mode-BZ_HDR_0; + } + } + mode++; + } + strcat(mode2, writing ? "w" : "r" ); + strcat(mode2,"b"); /* binary mode */ + + if (open_mode==0) { + if (path==NULL || strcmp(path,"")==0) { + fp = (writing ? stdout : stdin); + SET_BINARY_MODE(fp); + } else { + fp = fopen(path,mode2); + } + } else { +#ifdef BZ_STRICT_ANSI + fp = NULL; +#else + fp = fdopen(fd,mode2); +#endif + } + if (fp == NULL) return NULL; + + if (writing) { + /* Guard against total chaos and anarchy -- JRS */ + if (blockSize100k < 1) blockSize100k = 1; + if (blockSize100k > 9) blockSize100k = 9; + bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, + verbosity,workFactor); + } else { + bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, + unused,nUnused); + } + if (bzfp == NULL) { + if (fp != stdin && fp != stdout) fclose(fp); + return NULL; + } + return bzfp; +} + + +/*---------------------------------------------------*/ +/*-- + open file for read or write. + ex) bzopen("file","w9") + case path="" or NULL => use stdin or stdout. +--*/ +BZFILE * BZ_API(BZ2_bzopen) + ( const char *path, + const char *mode ) +{ + return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); +} + + +/*---------------------------------------------------*/ +BZFILE * BZ_API(BZ2_bzdopen) + ( int fd, + const char *mode ) +{ + return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) +{ + int bzerr, nread; + if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; + nread = BZ2_bzRead(&bzerr,b,buf,len); + if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { + return nread; + } else { + return -1; + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) +{ + int bzerr; + + BZ2_bzWrite(&bzerr,b,buf,len); + if(bzerr == BZ_OK){ + return len; + }else{ + return -1; + } +} + + +/*---------------------------------------------------*/ +int BZ_API(BZ2_bzflush) (BZFILE *b) +{ + /* do nothing now... */ + return 0; +} + + +/*---------------------------------------------------*/ +void BZ_API(BZ2_bzclose) (BZFILE* b) +{ + int bzerr; + FILE *fp; + + if (b==NULL) {return;} + fp = ((bzFile *)b)->handle; + if(((bzFile*)b)->writing){ + BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); + if(bzerr != BZ_OK){ + BZ2_bzWriteClose(NULL,b,1,NULL,NULL); + } + }else{ + BZ2_bzReadClose(&bzerr,b); + } + if(fp!=stdin && fp!=stdout){ + fclose(fp); + } +} + + +/*---------------------------------------------------*/ +/*-- + return last error code +--*/ +static const char *bzerrorstrings[] = { + "OK" + ,"SEQUENCE_ERROR" + ,"PARAM_ERROR" + ,"MEM_ERROR" + ,"DATA_ERROR" + ,"DATA_ERROR_MAGIC" + ,"IO_ERROR" + ,"UNEXPECTED_EOF" + ,"OUTBUFF_FULL" + ,"CONFIG_ERROR" + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ +}; + + +const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) +{ + int err = ((bzFile *)b)->lastErr; + + if(err>0) err = 0; + *errnum = err; + return bzerrorstrings[err*-1]; +} +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/bzlib.h b/contrib/libs/libbz2/bzlib.h index 1fcc8a1d474..a7d2cd61881 100644 --- a/contrib/libs/libbz2/bzlib.h +++ b/contrib/libs/libbz2/bzlib.h @@ -1,183 +1,183 @@ -/*-------------------------------------------------------------*/ -/*--- Public header file for the library. ---*/ -/*--- bzlib.h ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - + Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#ifndef _BZLIB_H -#define _BZLIB_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define BZ_RUN 0 -#define BZ_FLUSH 1 -#define BZ_FINISH 2 - -#define BZ_OK 0 -#define BZ_RUN_OK 1 -#define BZ_FLUSH_OK 2 -#define BZ_FINISH_OK 3 -#define BZ_STREAM_END 4 -#define BZ_SEQUENCE_ERROR (-1) -#define BZ_PARAM_ERROR (-2) -#define BZ_MEM_ERROR (-3) -#define BZ_DATA_ERROR (-4) -#define BZ_DATA_ERROR_MAGIC (-5) -#define BZ_IO_ERROR (-6) -#define BZ_UNEXPECTED_EOF (-7) -#define BZ_OUTBUFF_FULL (-8) -#define BZ_CONFIG_ERROR (-9) - + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#ifndef _BZLIB_H +#define _BZLIB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + typedef - struct { - char *next_in; - unsigned int avail_in; - unsigned int total_in_lo32; - unsigned int total_in_hi32; - - char *next_out; - unsigned int avail_out; - unsigned int total_out_lo32; - unsigned int total_out_hi32; - - void *state; - - void *(*bzalloc)(void *,int,int); - void (*bzfree)(void *,void *); - void *opaque; + struct { + char *next_in; + unsigned int avail_in; + unsigned int total_in_lo32; + unsigned int total_in_hi32; + + char *next_out; + unsigned int avail_out; + unsigned int total_out_lo32; + unsigned int total_out_hi32; + + void *state; + + void *(*bzalloc)(void *,int,int); + void (*bzfree)(void *,void *); + void *opaque; } - bz_stream; - - -#ifndef BZ_IMPORT -#define BZ_EXPORT -#endif - -#ifndef BZ_NO_STDIO -/* Need a definitition for FILE */ -#include -#endif - -#ifdef _WIN32 -# include -# ifdef small - /* windows.h define small to char */ -# undef small -# endif -#endif - -#define BZ_API(func) func -#define BZ_EXTERN extern - -/*-- Core (low-level) library functions --*/ - + bz_stream; + + +#ifndef BZ_IMPORT +#define BZ_EXPORT +#endif + +#ifndef BZ_NO_STDIO +/* Need a definitition for FILE */ +#include +#endif + +#ifdef _WIN32 +# include +# ifdef small + /* windows.h define small to char */ +# undef small +# endif +#endif + +#define BZ_API(func) func +#define BZ_EXTERN extern + +/*-- Core (low-level) library functions --*/ + BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( bz_stream* strm, int blockSize100k, int verbosity, int workFactor - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzCompress) ( bz_stream* strm, int action - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( bz_stream* strm - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( bz_stream *strm, int verbosity, - int small - ); - + int small + ); + BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( bz_stream* strm - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm - ); - - - -/*-- High(er) level library functions --*/ - -#ifndef BZ_NO_STDIO -#define BZ_MAX_UNUSED 5000 - -typedef void BZFILE; - + ); + + + +/*-- High(er) level library functions --*/ + +#ifndef BZ_NO_STDIO +#define BZ_MAX_UNUSED 5000 + +typedef void BZFILE; + BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( int* bzerror, FILE* f, int verbosity, - int small, + int small, void* unused, int nUnused - ); - + ); + BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( int* bzerror, BZFILE* b - ); - + ); + BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( int* bzerror, BZFILE* b, void** unused, int* nUnused - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzRead) ( int* bzerror, BZFILE* b, void* buf, int len - ); - + ); + BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( int* bzerror, FILE* f, int blockSize100k, int verbosity, int workFactor - ); - + ); + BZ_EXTERN void BZ_API(BZ2_bzWrite) ( int* bzerror, BZFILE* b, void* buf, int len - ); - + ); + BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( int* bzerror, BZFILE* b, int abandon, unsigned int* nbytes_in, unsigned int* nbytes_out - ); - + ); + BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( int* bzerror, BZFILE* b, @@ -185,89 +185,89 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( unsigned int* nbytes_in_lo32, unsigned int* nbytes_in_hi32, unsigned int* nbytes_out_lo32, - unsigned int* nbytes_out_hi32 - ); -#endif - - -/*-- Utility functions --*/ - + unsigned int* nbytes_out_hi32 + ); +#endif + + +/*-- Utility functions --*/ + BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( char* dest, - unsigned int* destLen, + unsigned int* destLen, char* source, - unsigned int sourceLen, + unsigned int sourceLen, int blockSize100k, int verbosity, int workFactor - ); - + ); + BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( char* dest, - unsigned int* destLen, + unsigned int* destLen, char* source, - unsigned int sourceLen, + unsigned int sourceLen, int small, int verbosity - ); - - -/*-- - Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) - to support better zlib compatibility. - This code is not _officially_ part of libbzip2 (yet); - I haven't tested it, documented it, or considered the - threading-safeness of it. - If this code breaks, please contact both Yoshioka and me. ---*/ - -BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( - void - ); - -#ifndef BZ_NO_STDIO -BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( - const char *path, - const char *mode - ); - -BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( - int fd, - const char *mode - ); + ); + + +/*-- + Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) + to support better zlib compatibility. + This code is not _officially_ part of libbzip2 (yet); + I haven't tested it, documented it, or considered the + threading-safeness of it. + If this code breaks, please contact both Yoshioka and me. +--*/ + +BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( + void + ); + +#ifndef BZ_NO_STDIO +BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( + const char *path, + const char *mode + ); + +BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( + int fd, + const char *mode + ); -BZ_EXTERN int BZ_API(BZ2_bzread) ( +BZ_EXTERN int BZ_API(BZ2_bzread) ( BZFILE* b, void* buf, int len - ); - -BZ_EXTERN int BZ_API(BZ2_bzwrite) ( + ); + +BZ_EXTERN int BZ_API(BZ2_bzwrite) ( BZFILE* b, void* buf, int len - ); - -BZ_EXTERN int BZ_API(BZ2_bzflush) ( - BZFILE* b - ); - -BZ_EXTERN void BZ_API(BZ2_bzclose) ( - BZFILE* b - ); - -BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( + ); + +BZ_EXTERN int BZ_API(BZ2_bzflush) ( + BZFILE* b + ); + +BZ_EXTERN void BZ_API(BZ2_bzclose) ( + BZFILE* b + ); + +BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( BZFILE *b, - int *errnum - ); -#endif - -#ifdef __cplusplus -} -#endif - -#endif - -/*-------------------------------------------------------------*/ -/*--- end bzlib.h ---*/ -/*-------------------------------------------------------------*/ + int *errnum + ); +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/bzlib_private.h b/contrib/libs/libbz2/bzlib_private.h index f20d7f06b1f..2578c2dcf2d 100644 --- a/contrib/libs/libbz2/bzlib_private.h +++ b/contrib/libs/libbz2/bzlib_private.h @@ -1,509 +1,509 @@ -/*-------------------------------------------------------------*/ -/*--- Private header file for the library. ---*/ -/*--- bzlib_private.h ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - + Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#ifndef _BZLIB_PRIVATE_H -#define _BZLIB_PRIVATE_H - -#include - -#ifndef BZ_NO_STDIO -#include -#include -#include -#endif - -#include "bzlib.h" - - - -/*-- General stuff. --*/ - + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#ifndef _BZLIB_PRIVATE_H +#define _BZLIB_PRIVATE_H + +#include + +#ifndef BZ_NO_STDIO +#include +#include +#include +#endif + +#include "bzlib.h" + + + +/*-- General stuff. --*/ + #define BZ_VERSION "1.0.8, 13-Jul-2019" - -typedef char Char; -typedef unsigned char Bool; -typedef unsigned char UChar; -typedef int Int32; -typedef unsigned int UInt32; -typedef short Int16; -typedef unsigned short UInt16; - -#define True ((Bool)1) -#define False ((Bool)0) - -#ifndef __GNUC__ -#define __inline__ /* */ -#endif - -#ifndef BZ_NO_STDIO - -extern void BZ2_bz__AssertH__fail ( int errcode ); -#define AssertH(cond,errcode) \ - { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } - -#if BZ_DEBUG -#define AssertD(cond,msg) \ - { if (!(cond)) { \ - fprintf ( stderr, \ - "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ - exit(1); \ - }} -#else -#define AssertD(cond,msg) /* */ -#endif - -#define VPrintf0(zf) \ - fprintf(stderr,zf) -#define VPrintf1(zf,za1) \ - fprintf(stderr,zf,za1) -#define VPrintf2(zf,za1,za2) \ - fprintf(stderr,zf,za1,za2) -#define VPrintf3(zf,za1,za2,za3) \ - fprintf(stderr,zf,za1,za2,za3) -#define VPrintf4(zf,za1,za2,za3,za4) \ - fprintf(stderr,zf,za1,za2,za3,za4) -#define VPrintf5(zf,za1,za2,za3,za4,za5) \ - fprintf(stderr,zf,za1,za2,za3,za4,za5) - -#else - -extern void bz_internal_error ( int errcode ); -#define AssertH(cond,errcode) \ - { if (!(cond)) bz_internal_error ( errcode ); } -#define AssertD(cond,msg) do { } while (0) -#define VPrintf0(zf) do { } while (0) -#define VPrintf1(zf,za1) do { } while (0) -#define VPrintf2(zf,za1,za2) do { } while (0) -#define VPrintf3(zf,za1,za2,za3) do { } while (0) -#define VPrintf4(zf,za1,za2,za3,za4) do { } while (0) -#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0) - + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +#ifndef __GNUC__ +#define __inline__ /* */ #endif - - -#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) -#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) - - -/*-- Header bytes. --*/ - -#define BZ_HDR_B 0x42 /* 'B' */ -#define BZ_HDR_Z 0x5a /* 'Z' */ -#define BZ_HDR_h 0x68 /* 'h' */ -#define BZ_HDR_0 0x30 /* '0' */ + +#ifndef BZ_NO_STDIO + +extern void BZ2_bz__AssertH__fail ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } + +#if BZ_DEBUG +#define AssertD(cond,msg) \ + { if (!(cond)) { \ + fprintf ( stderr, \ + "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ + exit(1); \ + }} +#else +#define AssertD(cond,msg) /* */ +#endif + +#define VPrintf0(zf) \ + fprintf(stderr,zf) +#define VPrintf1(zf,za1) \ + fprintf(stderr,zf,za1) +#define VPrintf2(zf,za1,za2) \ + fprintf(stderr,zf,za1,za2) +#define VPrintf3(zf,za1,za2,za3) \ + fprintf(stderr,zf,za1,za2,za3) +#define VPrintf4(zf,za1,za2,za3,za4) \ + fprintf(stderr,zf,za1,za2,za3,za4) +#define VPrintf5(zf,za1,za2,za3,za4,za5) \ + fprintf(stderr,zf,za1,za2,za3,za4,za5) + +#else + +extern void bz_internal_error ( int errcode ); +#define AssertH(cond,errcode) \ + { if (!(cond)) bz_internal_error ( errcode ); } +#define AssertD(cond,msg) do { } while (0) +#define VPrintf0(zf) do { } while (0) +#define VPrintf1(zf,za1) do { } while (0) +#define VPrintf2(zf,za1,za2) do { } while (0) +#define VPrintf3(zf,za1,za2,za3) do { } while (0) +#define VPrintf4(zf,za1,za2,za3,za4) do { } while (0) +#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0) + +#endif + + +#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) +#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ -/*-- Constants for the back end. --*/ - -#define BZ_MAX_ALPHA_SIZE 258 -#define BZ_MAX_CODE_LEN 23 - -#define BZ_RUNA 0 -#define BZ_RUNB 1 - -#define BZ_N_GROUPS 6 -#define BZ_G_SIZE 50 -#define BZ_N_ITERS 4 - -#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) - - - -/*-- Stuff for randomising repetitive blocks. --*/ - -extern const Int32 BZ2_rNums[512]; - -#define BZ_RAND_DECLS \ - Int32 rNToGo; \ - Int32 rTPos \ - -#define BZ_RAND_INIT_MASK \ - s->rNToGo = 0; \ - s->rTPos = 0 \ - -#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) - -#define BZ_RAND_UPD_MASK \ - if (s->rNToGo == 0) { \ - s->rNToGo = BZ2_rNums[s->rTPos]; \ - s->rTPos++; \ - if (s->rTPos == 512) s->rTPos = 0; \ - } \ - s->rNToGo--; - - - -/*-- Stuff for doing CRCs. --*/ - -extern const UInt32 BZ2_crc32Table[256]; - -#define BZ_INITIALISE_CRC(crcVar) \ -{ \ - crcVar = 0xffffffffL; \ -} - -#define BZ_FINALISE_CRC(crcVar) \ -{ \ - crcVar = ~(crcVar); \ -} - -#define BZ_UPDATE_CRC(crcVar,cha) \ -{ \ - crcVar = (crcVar << 8) ^ \ - BZ2_crc32Table[(crcVar >> 24) ^ \ - ((UChar)cha)]; \ -} - - - -/*-- States and modes for compression. --*/ - -#define BZ_M_IDLE 1 -#define BZ_M_RUNNING 2 -#define BZ_M_FLUSHING 3 -#define BZ_M_FINISHING 4 - -#define BZ_S_OUTPUT 1 -#define BZ_S_INPUT 2 - -#define BZ_N_RADIX 2 -#define BZ_N_QSORT 12 -#define BZ_N_SHELL 18 -#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) - - - - -/*-- Structure holding all the compression-side stuff. --*/ - -typedef - struct { - /* pointer back to the struct bz_stream */ - bz_stream* strm; - - /* mode this stream is in, and whether inputting */ - /* or outputting data */ - Int32 mode; - Int32 state; - - /* remembers avail_in when flush/finish requested */ - UInt32 avail_in_expect; - - /* for doing the block sorting */ - UInt32* arr1; - UInt32* arr2; - UInt32* ftab; - Int32 origPtr; - - /* aliases for arr1 and arr2 */ - UInt32* ptr; - UChar* block; - UInt16* mtfv; - UChar* zbits; - - /* for deciding when to use the fallback sorting algorithm */ - Int32 workFactor; - - /* run-length-encoding of the input */ - UInt32 state_in_ch; - Int32 state_in_len; - BZ_RAND_DECLS; - - /* input and output limits and current posns */ - Int32 nblock; - Int32 nblockMAX; - Int32 numZ; - Int32 state_out_pos; - - /* map of bytes used in block */ - Int32 nInUse; - Bool inUse[256]; - UChar unseqToSeq[256]; - - /* the buffer for bit stream creation */ - UInt32 bsBuff; - Int32 bsLive; - - /* block and combined CRCs */ - UInt32 blockCRC; - UInt32 combinedCRC; - - /* misc administratium */ - Int32 verbosity; - Int32 blockNo; - Int32 blockSize100k; - - /* stuff for coding the MTF values */ - Int32 nMTF; - Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; - UChar selector [BZ_MAX_SELECTORS]; - UChar selectorMtf[BZ_MAX_SELECTORS]; - - UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - /* second dimension: only 3 needed; 4 makes index calculations faster */ - UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; - - } - EState; - - - -/*-- externs for compression. --*/ - +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + + +/*-- Stuff for randomising repetitive blocks. --*/ + +extern const Int32 BZ2_rNums[512]; + +#define BZ_RAND_DECLS \ + Int32 rNToGo; \ + Int32 rTPos \ + +#define BZ_RAND_INIT_MASK \ + s->rNToGo = 0; \ + s->rTPos = 0 \ + +#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) + +#define BZ_RAND_UPD_MASK \ + if (s->rNToGo == 0) { \ + s->rNToGo = BZ2_rNums[s->rTPos]; \ + s->rTPos++; \ + if (s->rTPos == 512) s->rTPos = 0; \ + } \ + s->rNToGo--; + + + +/*-- Stuff for doing CRCs. --*/ + +extern const UInt32 BZ2_crc32Table[256]; + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(crcVar,cha) \ +{ \ + crcVar = (crcVar << 8) ^ \ + BZ2_crc32Table[(crcVar >> 24) ^ \ + ((UChar)cha)]; \ +} + + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + Int32 mode; + Int32 state; + + /* remembers avail_in when flush/finish requested */ + UInt32 avail_in_expect; + + /* for doing the block sorting */ + UInt32* arr1; + UInt32* arr2; + UInt32* ftab; + Int32 origPtr; + + /* aliases for arr1 and arr2 */ + UInt32* ptr; + UChar* block; + UInt16* mtfv; + UChar* zbits; + + /* for deciding when to use the fallback sorting algorithm */ + Int32 workFactor; + + /* run-length-encoding of the input */ + UInt32 state_in_ch; + Int32 state_in_len; + BZ_RAND_DECLS; + + /* input and output limits and current posns */ + Int32 nblock; + Int32 nblockMAX; + Int32 numZ; + Int32 state_out_pos; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + UChar unseqToSeq[256]; + + /* the buffer for bit stream creation */ + UInt32 bsBuff; + Int32 bsLive; + + /* block and combined CRCs */ + UInt32 blockCRC; + UInt32 combinedCRC; + + /* misc administratium */ + Int32 verbosity; + Int32 blockNo; + Int32 blockSize100k; + + /* stuff for coding the MTF values */ + Int32 nMTF; + Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + /* second dimension: only 3 needed; 4 makes index calculations faster */ + UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; + + } + EState; + + + +/*-- externs for compression. --*/ + extern void -BZ2_blockSort ( EState* ); - +BZ2_blockSort ( EState* ); + extern void -BZ2_compressBlock ( EState*, Bool ); - +BZ2_compressBlock ( EState*, Bool ); + extern void -BZ2_bsInitWrite ( EState* ); - +BZ2_bsInitWrite ( EState* ); + extern void -BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); - +BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); + extern void -BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); - - - -/*-- states for decompression. --*/ - -#define BZ_X_IDLE 1 -#define BZ_X_OUTPUT 2 - -#define BZ_X_MAGIC_1 10 -#define BZ_X_MAGIC_2 11 -#define BZ_X_MAGIC_3 12 -#define BZ_X_MAGIC_4 13 -#define BZ_X_BLKHDR_1 14 -#define BZ_X_BLKHDR_2 15 -#define BZ_X_BLKHDR_3 16 -#define BZ_X_BLKHDR_4 17 -#define BZ_X_BLKHDR_5 18 -#define BZ_X_BLKHDR_6 19 -#define BZ_X_BCRC_1 20 -#define BZ_X_BCRC_2 21 -#define BZ_X_BCRC_3 22 -#define BZ_X_BCRC_4 23 -#define BZ_X_RANDBIT 24 -#define BZ_X_ORIGPTR_1 25 -#define BZ_X_ORIGPTR_2 26 -#define BZ_X_ORIGPTR_3 27 -#define BZ_X_MAPPING_1 28 -#define BZ_X_MAPPING_2 29 -#define BZ_X_SELECTOR_1 30 -#define BZ_X_SELECTOR_2 31 -#define BZ_X_SELECTOR_3 32 -#define BZ_X_CODING_1 33 -#define BZ_X_CODING_2 34 -#define BZ_X_CODING_3 35 -#define BZ_X_MTF_1 36 -#define BZ_X_MTF_2 37 -#define BZ_X_MTF_3 38 -#define BZ_X_MTF_4 39 -#define BZ_X_MTF_5 40 -#define BZ_X_MTF_6 41 -#define BZ_X_ENDHDR_2 42 -#define BZ_X_ENDHDR_3 43 -#define BZ_X_ENDHDR_4 44 -#define BZ_X_ENDHDR_5 45 -#define BZ_X_ENDHDR_6 46 -#define BZ_X_CCRC_1 47 -#define BZ_X_CCRC_2 48 -#define BZ_X_CCRC_3 49 -#define BZ_X_CCRC_4 50 - - - -/*-- Constants for the fast MTF decoder. --*/ - -#define MTFA_SIZE 4096 -#define MTFL_SIZE 16 - - - -/*-- Structure holding all the decompression-side stuff. --*/ - -typedef - struct { - /* pointer back to the struct bz_stream */ - bz_stream* strm; - - /* state indicator for this stream */ - Int32 state; - - /* for doing the final run-length decoding */ - UChar state_out_ch; - Int32 state_out_len; - Bool blockRandomised; - BZ_RAND_DECLS; - - /* the buffer for bit stream reading */ - UInt32 bsBuff; - Int32 bsLive; - - /* misc administratium */ - Int32 blockSize100k; - Bool smallDecompress; - Int32 currBlockNo; - Int32 verbosity; - - /* for undoing the Burrows-Wheeler transform */ - Int32 origPtr; - UInt32 tPos; - Int32 k0; - Int32 unzftab[256]; - Int32 nblock_used; - Int32 cftab[257]; - Int32 cftabCopy[257]; - - /* for undoing the Burrows-Wheeler transform (FAST) */ - UInt32 *tt; - - /* for undoing the Burrows-Wheeler transform (SMALL) */ - UInt16 *ll16; - UChar *ll4; - - /* stored and calculated CRCs */ - UInt32 storedBlockCRC; - UInt32 storedCombinedCRC; - UInt32 calculatedBlockCRC; - UInt32 calculatedCombinedCRC; - - /* map of bytes used in block */ - Int32 nInUse; - Bool inUse[256]; - Bool inUse16[16]; - UChar seqToUnseq[256]; - - /* for decoding the MTF values */ - UChar mtfa [MTFA_SIZE]; - Int32 mtfbase[256 / MTFL_SIZE]; - UChar selector [BZ_MAX_SELECTORS]; - UChar selectorMtf[BZ_MAX_SELECTORS]; - UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - - Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 minLens[BZ_N_GROUPS]; - - /* save area for scalars in the main decompress code */ - Int32 save_i; - Int32 save_j; - Int32 save_t; - Int32 save_alphaSize; - Int32 save_nGroups; - Int32 save_nSelectors; - Int32 save_EOB; - Int32 save_groupNo; - Int32 save_groupPos; - Int32 save_nextSym; - Int32 save_nblockMAX; - Int32 save_nblock; - Int32 save_es; - Int32 save_N; - Int32 save_curr; - Int32 save_zt; +BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); + + + +/*-- states for decompression. --*/ + +#define BZ_X_IDLE 1 +#define BZ_X_OUTPUT 2 + +#define BZ_X_MAGIC_1 10 +#define BZ_X_MAGIC_2 11 +#define BZ_X_MAGIC_3 12 +#define BZ_X_MAGIC_4 13 +#define BZ_X_BLKHDR_1 14 +#define BZ_X_BLKHDR_2 15 +#define BZ_X_BLKHDR_3 16 +#define BZ_X_BLKHDR_4 17 +#define BZ_X_BLKHDR_5 18 +#define BZ_X_BLKHDR_6 19 +#define BZ_X_BCRC_1 20 +#define BZ_X_BCRC_2 21 +#define BZ_X_BCRC_3 22 +#define BZ_X_BCRC_4 23 +#define BZ_X_RANDBIT 24 +#define BZ_X_ORIGPTR_1 25 +#define BZ_X_ORIGPTR_2 26 +#define BZ_X_ORIGPTR_3 27 +#define BZ_X_MAPPING_1 28 +#define BZ_X_MAPPING_2 29 +#define BZ_X_SELECTOR_1 30 +#define BZ_X_SELECTOR_2 31 +#define BZ_X_SELECTOR_3 32 +#define BZ_X_CODING_1 33 +#define BZ_X_CODING_2 34 +#define BZ_X_CODING_3 35 +#define BZ_X_MTF_1 36 +#define BZ_X_MTF_2 37 +#define BZ_X_MTF_3 38 +#define BZ_X_MTF_4 39 +#define BZ_X_MTF_5 40 +#define BZ_X_MTF_6 41 +#define BZ_X_ENDHDR_2 42 +#define BZ_X_ENDHDR_3 43 +#define BZ_X_ENDHDR_4 44 +#define BZ_X_ENDHDR_5 45 +#define BZ_X_ENDHDR_6 46 +#define BZ_X_CCRC_1 47 +#define BZ_X_CCRC_2 48 +#define BZ_X_CCRC_3 49 +#define BZ_X_CCRC_4 50 + + + +/*-- Constants for the fast MTF decoder. --*/ + +#define MTFA_SIZE 4096 +#define MTFL_SIZE 16 + + + +/*-- Structure holding all the decompression-side stuff. --*/ + +typedef + struct { + /* pointer back to the struct bz_stream */ + bz_stream* strm; + + /* state indicator for this stream */ + Int32 state; + + /* for doing the final run-length decoding */ + UChar state_out_ch; + Int32 state_out_len; + Bool blockRandomised; + BZ_RAND_DECLS; + + /* the buffer for bit stream reading */ + UInt32 bsBuff; + Int32 bsLive; + + /* misc administratium */ + Int32 blockSize100k; + Bool smallDecompress; + Int32 currBlockNo; + Int32 verbosity; + + /* for undoing the Burrows-Wheeler transform */ + Int32 origPtr; + UInt32 tPos; + Int32 k0; + Int32 unzftab[256]; + Int32 nblock_used; + Int32 cftab[257]; + Int32 cftabCopy[257]; + + /* for undoing the Burrows-Wheeler transform (FAST) */ + UInt32 *tt; + + /* for undoing the Burrows-Wheeler transform (SMALL) */ + UInt16 *ll16; + UChar *ll4; + + /* stored and calculated CRCs */ + UInt32 storedBlockCRC; + UInt32 storedCombinedCRC; + UInt32 calculatedBlockCRC; + UInt32 calculatedCombinedCRC; + + /* map of bytes used in block */ + Int32 nInUse; + Bool inUse[256]; + Bool inUse16[16]; + UChar seqToUnseq[256]; + + /* for decoding the MTF values */ + UChar mtfa [MTFA_SIZE]; + Int32 mtfbase[256 / MTFL_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 minLens[BZ_N_GROUPS]; + + /* save area for scalars in the main decompress code */ + Int32 save_i; + Int32 save_j; + Int32 save_t; + Int32 save_alphaSize; + Int32 save_nGroups; + Int32 save_nSelectors; + Int32 save_EOB; + Int32 save_groupNo; + Int32 save_groupPos; + Int32 save_nextSym; + Int32 save_nblockMAX; + Int32 save_nblock; + Int32 save_es; + Int32 save_N; + Int32 save_curr; + Int32 save_zt; Int32 save_zn; - Int32 save_zvec; - Int32 save_zj; - Int32 save_gSel; - Int32 save_gMinlen; - Int32* save_gLimit; - Int32* save_gBase; - Int32* save_gPerm; - - } - DState; - - - -/*-- Macros for decompression. --*/ - -#define BZ_GET_FAST(cccc) \ - /* c_tPos is unsigned, hence test < 0 is pointless. */ \ - if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ - s->tPos = s->tt[s->tPos]; \ - cccc = (UChar)(s->tPos & 0xff); \ - s->tPos >>= 8; - -#define BZ_GET_FAST_C(cccc) \ - /* c_tPos is unsigned, hence test < 0 is pointless. */ \ - if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \ - c_tPos = c_tt[c_tPos]; \ - cccc = (UChar)(c_tPos & 0xff); \ - c_tPos >>= 8; - -#define SET_LL4(i,n) \ - { if (((i) & 0x1) == 0) \ - s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ - s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ - } - -#define GET_LL4(i) \ - ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) - -#define SET_LL(i,n) \ - { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ - SET_LL4(i, n >> 16); \ - } - -#define GET_LL(i) \ - (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) - -#define BZ_GET_SMALL(cccc) \ - /* c_tPos is unsigned, hence test < 0 is pointless. */ \ - if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ - cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ - s->tPos = GET_LL(s->tPos); - - -/*-- externs for decompression. --*/ - + Int32 save_zvec; + Int32 save_zj; + Int32 save_gSel; + Int32 save_gMinlen; + Int32* save_gLimit; + Int32* save_gBase; + Int32* save_gPerm; + + } + DState; + + + +/*-- Macros for decompression. --*/ + +#define BZ_GET_FAST(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ + s->tPos = s->tt[s->tPos]; \ + cccc = (UChar)(s->tPos & 0xff); \ + s->tPos >>= 8; + +#define BZ_GET_FAST_C(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \ + c_tPos = c_tt[c_tPos]; \ + cccc = (UChar)(c_tPos & 0xff); \ + c_tPos >>= 8; + +#define SET_LL4(i,n) \ + { if (((i) & 0x1) == 0) \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ + s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ + } + +#define GET_LL4(i) \ + ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) + +#define SET_LL(i,n) \ + { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ + SET_LL4(i, n >> 16); \ + } + +#define GET_LL(i) \ + (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) + +#define BZ_GET_SMALL(cccc) \ + /* c_tPos is unsigned, hence test < 0 is pointless. */ \ + if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ + cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ + s->tPos = GET_LL(s->tPos); + + +/*-- externs for decompression. --*/ + extern Int32 -BZ2_indexIntoF ( Int32, Int32* ); - +BZ2_indexIntoF ( Int32, Int32* ); + extern Int32 -BZ2_decompress ( DState* ); - +BZ2_decompress ( DState* ); + extern void -BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, - Int32, Int32, Int32 ); - - -#endif - - -/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ - -#ifdef BZ_NO_STDIO -#ifndef NULL -#define NULL 0 -#endif -#endif - - -/*-------------------------------------------------------------*/ -/*--- end bzlib_private.h ---*/ -/*-------------------------------------------------------------*/ +BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, + Int32, Int32, Int32 ); + + +#endif + + +/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ + +#ifdef BZ_NO_STDIO +#ifndef NULL +#define NULL 0 +#endif +#endif + + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/compress.c b/contrib/libs/libbz2/compress.c index 2880fdf8ec5..5dfa00231b0 100644 --- a/contrib/libs/libbz2/compress.c +++ b/contrib/libs/libbz2/compress.c @@ -1,672 +1,672 @@ - -/*-------------------------------------------------------------*/ -/*--- Compression machinery (not incl block sorting) ---*/ -/*--- compress.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - - Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -/* CHANGES - 0.9.0 -- original version. - 0.9.0a/b -- no changes in this file. - 0.9.0c -- changed setting of nGroups in sendMTFValues() - so as to do a bit better on small files -*/ - -#include "bzlib_private.h" - - -/*---------------------------------------------------*/ -/*--- Bit stream I/O ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -void BZ2_bsInitWrite ( EState* s ) -{ - s->bsLive = 0; - s->bsBuff = 0; -} - - -/*---------------------------------------------------*/ -static -void bsFinishWrite ( EState* s ) -{ - while (s->bsLive > 0) { - s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); - s->numZ++; - s->bsBuff <<= 8; - s->bsLive -= 8; - } -} - - -/*---------------------------------------------------*/ -#define bsNEEDW(nz) \ -{ \ - while (s->bsLive >= 8) { \ - s->zbits[s->numZ] \ - = (UChar)(s->bsBuff >> 24); \ - s->numZ++; \ - s->bsBuff <<= 8; \ - s->bsLive -= 8; \ - } \ -} - - -/*---------------------------------------------------*/ -static -__inline__ -void bsW ( EState* s, Int32 n, UInt32 v ) -{ - bsNEEDW ( n ); - s->bsBuff |= (v << (32 - s->bsLive - n)); - s->bsLive += n; -} - - -/*---------------------------------------------------*/ -static -void bsPutUInt32 ( EState* s, UInt32 u ) -{ - bsW ( s, 8, (u >> 24) & 0xffL ); - bsW ( s, 8, (u >> 16) & 0xffL ); - bsW ( s, 8, (u >> 8) & 0xffL ); - bsW ( s, 8, u & 0xffL ); -} - - -/*---------------------------------------------------*/ -static -void bsPutUChar ( EState* s, UChar c ) -{ - bsW( s, 8, (UInt32)c ); -} - - -/*---------------------------------------------------*/ -/*--- The back end proper ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -static -void makeMaps_e ( EState* s ) -{ - Int32 i; - s->nInUse = 0; - for (i = 0; i < 256; i++) - if (s->inUse[i]) { - s->unseqToSeq[i] = s->nInUse; - s->nInUse++; - } -} - - -/*---------------------------------------------------*/ -static -void generateMTFValues ( EState* s ) -{ - UChar yy[256]; - Int32 i, j; - Int32 zPend; - Int32 wr; - Int32 EOB; - - /* - After sorting (eg, here), - s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, - and - ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] - holds the original block data. - - The first thing to do is generate the MTF values, - and put them in - ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. - Because there are strictly fewer or equal MTF values - than block values, ptr values in this area are overwritten - with MTF values only when they are no longer needed. - - The final compressed bitstream is generated into the - area starting at - (UChar*) (&((UChar*)s->arr2)[s->nblock]) - - These storage aliases are set up in bzCompressInit(), - except for the last one, which is arranged in - compressBlock(). - */ - UInt32* ptr = s->ptr; - UChar* block = s->block; - UInt16* mtfv = s->mtfv; - - makeMaps_e ( s ); - EOB = s->nInUse+1; - - for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; - - wr = 0; - zPend = 0; - for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; - - for (i = 0; i < s->nblock; i++) { - UChar ll_i; - AssertD ( wr <= i, "generateMTFValues(1)" ); - j = ptr[i]-1; if (j < 0) j += s->nblock; - ll_i = s->unseqToSeq[block[j]]; - AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); - - if (yy[0] == ll_i) { - zPend++; - } else { - - if (zPend > 0) { - zPend--; - while (True) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - { + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +/* CHANGES + 0.9.0 -- original version. + 0.9.0a/b -- no changes in this file. + 0.9.0c -- changed setting of nGroups in sendMTFValues() + so as to do a bit better on small files +*/ + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +void BZ2_bsInitWrite ( EState* s ) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static +void bsFinishWrite ( EState* s ) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +#define bsNEEDW(nz) \ +{ \ + while (s->bsLive >= 8) { \ + s->zbits[s->numZ] \ + = (UChar)(s->bsBuff >> 24); \ + s->numZ++; \ + s->bsBuff <<= 8; \ + s->bsLive -= 8; \ + } \ +} + + +/*---------------------------------------------------*/ +static +__inline__ +void bsW ( EState* s, Int32 n, UInt32 v ) +{ + bsNEEDW ( n ); + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutUInt32 ( EState* s, UInt32 u ) +{ + bsW ( s, 8, (u >> 24) & 0xffL ); + bsW ( s, 8, (u >> 16) & 0xffL ); + bsW ( s, 8, (u >> 8) & 0xffL ); + bsW ( s, 8, u & 0xffL ); +} + + +/*---------------------------------------------------*/ +static +void bsPutUChar ( EState* s, UChar c ) +{ + bsW( s, 8, (UInt32)c ); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e ( EState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +static +void generateMTFValues ( EState* s ) +{ + UChar yy[256]; + Int32 i, j; + Int32 zPend; + Int32 wr; + Int32 EOB; + + /* + After sorting (eg, here), + s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, + and + ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] + holds the original block data. + + The first thing to do is generate the MTF values, + and put them in + ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. + Because there are strictly fewer or equal MTF values + than block values, ptr values in this area are overwritten + with MTF values only when they are no longer needed. + + The final compressed bitstream is generated into the + area starting at + (UChar*) (&((UChar*)s->arr2)[s->nblock]) + + These storage aliases are set up in bzCompressInit(), + except for the last one, which is arranged in + compressBlock(). + */ + UInt32* ptr = s->ptr; + UChar* block = s->block; + UInt16* mtfv = s->mtfv; + + makeMaps_e ( s ); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; + + for (i = 0; i < s->nblock; i++) { + UChar ll_i; + AssertD ( wr <= i, "generateMTFValues(1)" ); + j = ptr[i]-1; if (j < 0) j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); + + if (yy[0] == ll_i) { + zPend++; + } else { + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + { register UChar rtmp; register UChar* ryy_j; register UChar rll_i; - rtmp = yy[1]; - yy[1] = yy[0]; - ryy_j = &(yy[1]); - rll_i = ll_i; - while ( rll_i != rtmp ) { + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while ( rll_i != rtmp ) { register UChar rtmp2; - ryy_j++; - rtmp2 = rtmp; - rtmp = *ryy_j; - *ryy_j = rtmp2; - }; - yy[0] = rtmp; - j = ryy_j - &(yy[0]); - mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; - } - - } - } - - if (zPend > 0) { - zPend--; - while (True) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - - mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; - - s->nMTF = wr; -} - - -/*---------------------------------------------------*/ -#define BZ_LESSER_ICOST 0 -#define BZ_GREATER_ICOST 15 - -static -void sendMTFValues ( EState* s ) -{ - Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; - Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; - Int32 nGroups, nBytes; - - /*-- - UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - is a global since the decoder also needs it. - - Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - are also globals only used in this proc. - Made global to keep stack frame size small. - --*/ - - - UInt16 cost[BZ_N_GROUPS]; - Int32 fave[BZ_N_GROUPS]; - - UInt16* mtfv = s->mtfv; - - if (s->verbosity >= 3) - VPrintf3( " %d in block, %d after MTF & 1-2 coding, " - "%d+2 syms in use\n", - s->nblock, s->nMTF, s->nInUse ); - - alphaSize = s->nInUse+2; - for (t = 0; t < BZ_N_GROUPS; t++) - for (v = 0; v < alphaSize; v++) - s->len[t][v] = BZ_GREATER_ICOST; - - /*--- Decide how many coding tables to use ---*/ - AssertH ( s->nMTF > 0, 3001 ); - if (s->nMTF < 200) nGroups = 2; else - if (s->nMTF < 600) nGroups = 3; else - if (s->nMTF < 1200) nGroups = 4; else - if (s->nMTF < 2400) nGroups = 5; else - nGroups = 6; - - /*--- Generate an initial set of coding tables ---*/ - { - Int32 nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = s->nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs-1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize-1) { - ge++; - aFreq += s->mtfFreq[ge]; - } - - if (ge > gs - && nPart != nGroups && nPart != 1 - && ((nGroups-nPart) % 2 == 1)) { - aFreq -= s->mtfFreq[ge]; - ge--; - } - - if (s->verbosity >= 3) - VPrintf5( " initial group %d, [%d .. %d], " - "has %d syms (%4.1f%%)\n", - nPart, gs, ge, aFreq, - (100.0 * (float)aFreq) / (float)(s->nMTF) ); - - for (v = 0; v < alphaSize; v++) - if (v >= gs && v <= ge) - s->len[nPart-1][v] = BZ_LESSER_ICOST; else - s->len[nPart-1][v] = BZ_GREATER_ICOST; - - nPart--; - gs = ge+1; - remF -= aFreq; - } - } - - /*--- - Iterate up to BZ_N_ITERS times to improve the tables. - ---*/ - for (iter = 0; iter < BZ_N_ITERS; iter++) { - - for (t = 0; t < nGroups; t++) fave[t] = 0; - - for (t = 0; t < nGroups; t++) - for (v = 0; v < alphaSize; v++) - s->rfreq[t][v] = 0; - - /*--- - Set up an auxiliary length table which is used to fast-track - the common case (nGroups == 6). - ---*/ - if (nGroups == 6) { - for (v = 0; v < alphaSize; v++) { - s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; - s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; - s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; - } - } - - nSelectors = 0; - totc = 0; - gs = 0; - while (True) { - - /*--- Set group start & end marks. --*/ - if (gs >= s->nMTF) break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) ge = s->nMTF-1; - - /*-- - Calculate the cost of this group as coded - by each of the coding tables. - --*/ - for (t = 0; t < nGroups; t++) cost[t] = 0; - - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + } + + } + } + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + + mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static +void sendMTFValues ( EState* s ) +{ + Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; + Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; + Int32 nGroups, nBytes; + + /*-- + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + is a global since the decoder also needs it. + + Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + are also globals only used in this proc. + Made global to keep stack frame size small. + --*/ + + + UInt16 cost[BZ_N_GROUPS]; + Int32 fave[BZ_N_GROUPS]; + + UInt16* mtfv = s->mtfv; + + if (s->verbosity >= 3) + VPrintf3( " %d in block, %d after MTF & 1-2 coding, " + "%d+2 syms in use\n", + s->nblock, s->nMTF, s->nInUse ); + + alphaSize = s->nInUse+2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH ( s->nMTF > 0, 3001 ); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + Int32 nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs-1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups-nPart) % 2 == 1)) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + if (s->verbosity >= 3) + VPrintf5( " initial group %d, [%d .. %d], " + "has %d syms (%4.1f%%)\n", + nPart, gs, ge, aFreq, + (100.0 * (float)aFreq) / (float)(s->nMTF) ); + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge+1; + remF -= aFreq; + } + } + + /*--- + Iterate up to BZ_N_ITERS times to improve the tables. + ---*/ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + + for (t = 0; t < nGroups; t++) fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + + /*--- + Set up an auxiliary length table which is used to fast-track + the common case (nGroups == 6). + ---*/ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } + + nSelectors = 0; + totc = 0; + gs = 0; + while (True) { + + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + + /*-- + Calculate the cost of this group as coded + by each of the coding tables. + --*/ + for (t = 0; t < nGroups; t++) cost[t] = 0; + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ register UInt32 cost01, cost23, cost45; register UInt16 icv; - cost01 = cost23 = cost45 = 0; - -# define BZ_ITER(nn) \ - icv = mtfv[gs+(nn)]; \ - cost01 += s->len_pack[icv][0]; \ - cost23 += s->len_pack[icv][1]; \ - cost45 += s->len_pack[icv][2]; \ - - BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); - BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); - BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); - BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); - BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); - BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); - BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); - BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); - BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); - BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); - -# undef BZ_ITER - - cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; - cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; - cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) { - UInt16 icv = mtfv[i]; - for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; - } - } - - /*-- - Find the coding table which is best for this group, - and record its identity in the selector table. - --*/ - bc = 999999999; bt = -1; - for (t = 0; t < nGroups; t++) - if (cost[t] < bc) { bc = cost[t]; bt = t; }; - totc += bc; - fave[bt]++; - s->selector[nSelectors] = bt; - nSelectors++; - - /*-- - Increment the symbol frequencies for the selected table. - --*/ - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - -# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ - - BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); - BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); - BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); - BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); - BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); - BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); - BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); - BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); - BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); - BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); - -# undef BZ_ITUR - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) - s->rfreq[bt][ mtfv[i] ]++; - } - - gs = ge+1; - } - if (s->verbosity >= 3) { - VPrintf2 ( " pass %d: size is %d, grp uses are ", - iter+1, totc/8 ); - for (t = 0; t < nGroups; t++) - VPrintf1 ( "%d ", fave[t] ); - VPrintf0 ( "\n" ); - } - - /*-- - Recompute the tables based on the accumulated frequencies. - --*/ - /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See - comment in huffman.c for details. */ - for (t = 0; t < nGroups; t++) - BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), - alphaSize, 17 /*20*/ ); - } - - - AssertH( nGroups < 8, 3002 ); - AssertH( nSelectors < 32768 && + cost01 = cost23 = cost45 = 0; + +# define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; \ + + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); + +# undef BZ_ITER + + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + UInt16 icv = mtfv[i]; + for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; + } + } + + /*-- + Find the coding table which is best for this group, + and record its identity in the selector table. + --*/ + bc = 999999999; bt = -1; + for (t = 0; t < nGroups; t++) + if (cost[t] < bc) { bc = cost[t]; bt = t; }; + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /*-- + Increment the symbol frequencies for the selected table. + --*/ + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + +# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ + + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); + +# undef BZ_ITUR + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) + s->rfreq[bt][ mtfv[i] ]++; + } + + gs = ge+1; + } + if (s->verbosity >= 3) { + VPrintf2 ( " pass %d: size is %d, grp uses are ", + iter+1, totc/8 ); + for (t = 0; t < nGroups; t++) + VPrintf1 ( "%d ", fave[t] ); + VPrintf0 ( "\n" ); + } + + /*-- + Recompute the tables based on the accumulated frequencies. + --*/ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), + alphaSize, 17 /*20*/ ); + } + + + AssertH( nGroups < 8, 3002 ); + AssertH( nSelectors < 32768 && nSelectors <= BZ_MAX_SELECTORS, - 3003 ); - - - /*--- Compute MTF values for the selectors. ---*/ + 3003 ); + + + /*--- Compute MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + for (i = 0; i < nGroups; i++) pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while ( ll_i != tmp ) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); + AssertH ( !(minLen < 1), 3005 ); + BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), + minLen, maxLen, alphaSize ); + } + + /*--- Transmit the mapping table. ---*/ { - UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; - for (i = 0; i < nGroups; i++) pos[i] = i; - for (i = 0; i < nSelectors; i++) { - ll_i = s->selector[i]; - j = 0; - tmp = pos[j]; - while ( ll_i != tmp ) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - }; - pos[0] = tmp; - s->selectorMtf[i] = j; - } - }; - - /*--- Assign actual codes for the tables. --*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; - if (s->len[t][i] < minLen) minLen = s->len[t][i]; - } - AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); - AssertH ( !(minLen < 1), 3005 ); - BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), - minLen, maxLen, alphaSize ); - } - - /*--- Transmit the mapping table. ---*/ - { - Bool inUse16[16]; - for (i = 0; i < 16; i++) { - inUse16[i] = False; - for (j = 0; j < 16; j++) - if (s->inUse[i * 16 + j]) inUse16[i] = True; - } - - nBytes = s->numZ; - for (i = 0; i < 16; i++) - if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); - - for (i = 0; i < 16; i++) - if (inUse16[i]) - for (j = 0; j < 16; j++) { - if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); - } - - if (s->verbosity >= 3) - VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); - } - - /*--- Now the selectors. ---*/ - nBytes = s->numZ; - bsW ( s, 3, nGroups ); - bsW ( s, 15, nSelectors ); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); - bsW(s,1,0); - } - if (s->verbosity >= 3) - VPrintf1( "selectors %d, ", s->numZ-nBytes ); - - /*--- Now the coding tables. ---*/ - nBytes = s->numZ; - - for (t = 0; t < nGroups; t++) { - Int32 curr = s->len[t][0]; - bsW ( s, 5, curr ); - for (i = 0; i < alphaSize; i++) { - while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; - while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; - bsW ( s, 1, 0 ); - } - } - - if (s->verbosity >= 3) - VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); - - /*--- And finally, the block data proper ---*/ - nBytes = s->numZ; - selCtr = 0; - gs = 0; - while (True) { - if (gs >= s->nMTF) break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) ge = s->nMTF-1; - AssertH ( s->selector[selCtr] < nGroups, 3006 ); - - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - UInt16 mtfv_i; - UChar* s_len_sel_selCtr - = &(s->len[s->selector[selCtr]][0]); - Int32* s_code_sel_selCtr - = &(s->code[s->selector[selCtr]][0]); - -# define BZ_ITAH(nn) \ - mtfv_i = mtfv[gs+(nn)]; \ - bsW ( s, \ - s_len_sel_selCtr[mtfv_i], \ - s_code_sel_selCtr[mtfv_i] ) - - BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); - BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); - BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); - BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); - BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); - BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); - BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); - BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); - BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); - BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); - -# undef BZ_ITAH - - } else { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) { - bsW ( s, - s->len [s->selector[selCtr]] [mtfv[i]], - s->code [s->selector[selCtr]] [mtfv[i]] ); - } - } - - - gs = ge+1; - selCtr++; - } - AssertH( selCtr == nSelectors, 3007 ); - - if (s->verbosity >= 3) - VPrintf1( "codes %d\n", s->numZ-nBytes ); -} - - -/*---------------------------------------------------*/ -void BZ2_compressBlock ( EState* s, Bool is_last_block ) -{ - if (s->nblock > 0) { - - BZ_FINALISE_CRC ( s->blockCRC ); - s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); - s->combinedCRC ^= s->blockCRC; - if (s->blockNo > 1) s->numZ = 0; - - if (s->verbosity >= 2) - VPrintf4( " block %d: crc = 0x%08x, " - "combined CRC = 0x%08x, size = %d\n", - s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); - - BZ2_blockSort ( s ); - } - - s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); - - /*-- If this is the first block, create the stream header. --*/ - if (s->blockNo == 1) { - BZ2_bsInitWrite ( s ); - bsPutUChar ( s, BZ_HDR_B ); - bsPutUChar ( s, BZ_HDR_Z ); - bsPutUChar ( s, BZ_HDR_h ); - bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); - } - - if (s->nblock > 0) { - - bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); - bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); - bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); - - /*-- Now the block's CRC, so it is in a known place. --*/ - bsPutUInt32 ( s, s->blockCRC ); - - /*-- - Now a single bit indicating (non-)randomisation. - As of version 0.9.5, we use a better sorting algorithm - which makes randomisation unnecessary. So always set - the randomised bit to 'no'. Of course, the decoder - still needs to be able to handle randomised blocks - so as to maintain backwards compatibility with - older versions of bzip2. - --*/ - bsW(s,1,0); - - bsW ( s, 24, s->origPtr ); - generateMTFValues ( s ); - sendMTFValues ( s ); - } - - - /*-- If this is the last block, add the stream trailer. --*/ - if (is_last_block) { - - bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); - bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); - bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); - bsPutUInt32 ( s, s->combinedCRC ); - if (s->verbosity >= 2) - VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); - bsFinishWrite ( s ); - } -} - - -/*-------------------------------------------------------------*/ -/*--- end compress.c ---*/ -/*-------------------------------------------------------------*/ + Bool inUse16[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = False; + for (j = 0; j < 16; j++) + if (s->inUse[i * 16 + j]) inUse16[i] = True; + } + + nBytes = s->numZ; + for (i = 0; i < 16; i++) + if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); + + for (i = 0; i < 16; i++) + if (inUse16[i]) + for (j = 0; j < 16; j++) { + if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); + } + + if (s->verbosity >= 3) + VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW ( s, 3, nGroups ); + bsW ( s, 15, nSelectors ); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); + bsW(s,1,0); + } + if (s->verbosity >= 3) + VPrintf1( "selectors %d, ", s->numZ-nBytes ); + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + Int32 curr = s->len[t][0]; + bsW ( s, 5, curr ); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; + bsW ( s, 1, 0 ); + } + } + + if (s->verbosity >= 3) + VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (True) { + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + AssertH ( s->selector[selCtr] < nGroups, 3006 ); + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + UInt16 mtfv_i; + UChar* s_len_sel_selCtr + = &(s->len[s->selector[selCtr]][0]); + Int32* s_code_sel_selCtr + = &(s->code[s->selector[selCtr]][0]); + +# define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW ( s, \ + s_len_sel_selCtr[mtfv_i], \ + s_code_sel_selCtr[mtfv_i] ) + + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); + +# undef BZ_ITAH + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + bsW ( s, + s->len [s->selector[selCtr]] [mtfv[i]], + s->code [s->selector[selCtr]] [mtfv[i]] ); + } + } + + + gs = ge+1; + selCtr++; + } + AssertH( selCtr == nSelectors, 3007 ); + + if (s->verbosity >= 3) + VPrintf1( "codes %d\n", s->numZ-nBytes ); +} + + +/*---------------------------------------------------*/ +void BZ2_compressBlock ( EState* s, Bool is_last_block ) +{ + if (s->nblock > 0) { + + BZ_FINALISE_CRC ( s->blockCRC ); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) s->numZ = 0; + + if (s->verbosity >= 2) + VPrintf4( " block %d: crc = 0x%08x, " + "combined CRC = 0x%08x, size = %d\n", + s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); + + BZ2_blockSort ( s ); + } + + s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite ( s ); + bsPutUChar ( s, BZ_HDR_B ); + bsPutUChar ( s, BZ_HDR_Z ); + bsPutUChar ( s, BZ_HDR_h ); + bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); + } + + if (s->nblock > 0) { + + bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); + bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); + bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutUInt32 ( s, s->blockCRC ); + + /*-- + Now a single bit indicating (non-)randomisation. + As of version 0.9.5, we use a better sorting algorithm + which makes randomisation unnecessary. So always set + the randomised bit to 'no'. Of course, the decoder + still needs to be able to handle randomised blocks + so as to maintain backwards compatibility with + older versions of bzip2. + --*/ + bsW(s,1,0); + + bsW ( s, 24, s->origPtr ); + generateMTFValues ( s ); + sendMTFValues ( s ); + } + + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + + bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); + bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); + bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); + bsPutUInt32 ( s, s->combinedCRC ); + if (s->verbosity >= 2) + VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); + bsFinishWrite ( s ); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/crctable.c b/contrib/libs/libbz2/crctable.c index c961b190576..a9212dbf2c3 100644 --- a/contrib/libs/libbz2/crctable.c +++ b/contrib/libs/libbz2/crctable.c @@ -1,104 +1,104 @@ - -/*-------------------------------------------------------------*/ -/*--- Table for doing CRCs ---*/ -/*--- crctable.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Table for doing CRCs ---*/ +/*--- crctable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - + Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#include "bzlib_private.h" - -/*-- - I think this is an implementation of the AUTODIN-II, - Ethernet & FDDI 32-bit CRC standard. Vaguely derived - from code by Rob Warnock, in Section 51 of the - comp.compression FAQ. ---*/ - -const UInt32 BZ2_crc32Table[256] = { - - /*-- Ugly, innit? --*/ - - 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, - 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, - 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, - 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, - 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, - 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, - 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, - 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, - 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, - 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, - 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, - 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, - 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, - 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, - 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, - 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, - 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, - 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, - 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, - 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, - 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, - 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, - 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, - 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, - 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, - 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, - 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, - 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, - 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, - 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, - 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, - 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, - 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, - 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, - 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, - 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, - 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, - 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, - 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, - 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, - 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, - 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, - 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, - 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, - 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, - 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, - 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, - 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, - 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, - 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, - 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, - 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, - 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, - 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, - 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, - 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, - 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, - 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, - 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, - 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, - 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, - 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, - 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, - 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L -}; - - -/*-------------------------------------------------------------*/ -/*--- end crctable.c ---*/ -/*-------------------------------------------------------------*/ + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*-- + I think this is an implementation of the AUTODIN-II, + Ethernet & FDDI 32-bit CRC standard. Vaguely derived + from code by Rob Warnock, in Section 51 of the + comp.compression FAQ. +--*/ + +const UInt32 BZ2_crc32Table[256] = { + + /*-- Ugly, innit? --*/ + + 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, + 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, + 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, + 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, + 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, + 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, + 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, + 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, + 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, + 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, + 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, + 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, + 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, + 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, + 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, + 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, + 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, + 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, + 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, + 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, + 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, + 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, + 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, + 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, + 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, + 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, + 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, + 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, + 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, + 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, + 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, + 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, + 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, + 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, + 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, + 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, + 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, + 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, + 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, + 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, + 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, + 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, + 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, + 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, + 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, + 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, + 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, + 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, + 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, + 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, + 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, + 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, + 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, + 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, + 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, + 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, + 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, + 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, + 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, + 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, + 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, + 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, + 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, + 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L +}; + + +/*-------------------------------------------------------------*/ +/*--- end crctable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/decompress.c b/contrib/libs/libbz2/decompress.c index 5f37c27e2f7..a1a0bac8922 100644 --- a/contrib/libs/libbz2/decompress.c +++ b/contrib/libs/libbz2/decompress.c @@ -1,392 +1,392 @@ - -/*-------------------------------------------------------------*/ -/*--- Decompression machinery ---*/ -/*--- decompress.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Decompression machinery ---*/ +/*--- decompress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - - Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#include "bzlib_private.h" - - -/*---------------------------------------------------*/ -static -void makeMaps_d ( DState* s ) -{ - Int32 i; - s->nInUse = 0; - for (i = 0; i < 256; i++) - if (s->inUse[i]) { - s->seqToUnseq[s->nInUse] = i; - s->nInUse++; - } -} - - -/*---------------------------------------------------*/ -#define RETURN(rrr) \ - { retVal = rrr; goto save_state_and_return; }; - -#define GET_BITS(lll,vvv,nnn) \ - case lll: s->state = lll; \ - while (True) { \ - if (s->bsLive >= nnn) { \ - UInt32 v; \ - v = (s->bsBuff >> \ - (s->bsLive-nnn)) & ((1 << nnn)-1); \ - s->bsLive -= nnn; \ - vvv = v; \ - break; \ - } \ - if (s->strm->avail_in == 0) RETURN(BZ_OK); \ - s->bsBuff \ - = (s->bsBuff << 8) | \ - ((UInt32) \ - (*((UChar*)(s->strm->next_in)))); \ - s->bsLive += 8; \ - s->strm->next_in++; \ - s->strm->avail_in--; \ - s->strm->total_in_lo32++; \ - if (s->strm->total_in_lo32 == 0) \ - s->strm->total_in_hi32++; \ - } - -#define GET_UCHAR(lll,uuu) \ - GET_BITS(lll,uuu,8) - -#define GET_BIT(lll,uuu) \ - GET_BITS(lll,uuu,1) - -/*---------------------------------------------------*/ -#define GET_MTF_VAL(label1,label2,lval) \ -{ \ - if (groupPos == 0) { \ - groupNo++; \ - if (groupNo >= nSelectors) \ - RETURN(BZ_DATA_ERROR); \ - groupPos = BZ_G_SIZE; \ - gSel = s->selector[groupNo]; \ - gMinlen = s->minLens[gSel]; \ - gLimit = &(s->limit[gSel][0]); \ - gPerm = &(s->perm[gSel][0]); \ - gBase = &(s->base[gSel][0]); \ - } \ - groupPos--; \ - zn = gMinlen; \ - GET_BITS(label1, zvec, zn); \ - while (1) { \ - if (zn > 20 /* the longest code */) \ - RETURN(BZ_DATA_ERROR); \ - if (zvec <= gLimit[zn]) break; \ - zn++; \ - GET_BIT(label2, zj); \ - zvec = (zvec << 1) | zj; \ - }; \ - if (zvec - gBase[zn] < 0 \ - || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ - RETURN(BZ_DATA_ERROR); \ - lval = gPerm[zvec - gBase[zn]]; \ -} - - -/*---------------------------------------------------*/ -Int32 BZ2_decompress ( DState* s ) -{ - UChar uc; - Int32 retVal; - Int32 minLen, maxLen; - bz_stream* strm = s->strm; - - /* stuff that needs to be saved/restored */ - Int32 i; - Int32 j; - Int32 t; - Int32 alphaSize; - Int32 nGroups; - Int32 nSelectors; - Int32 EOB; - Int32 groupNo; - Int32 groupPos; - Int32 nextSym; - Int32 nblockMAX; - Int32 nblock; - Int32 es; - Int32 N; - Int32 curr; - Int32 zt; - Int32 zn; - Int32 zvec; - Int32 zj; - Int32 gSel; - Int32 gMinlen; - Int32* gLimit; - Int32* gBase; - Int32* gPerm; - - if (s->state == BZ_X_MAGIC_1) { - /*initialise the save area*/ - s->save_i = 0; - s->save_j = 0; - s->save_t = 0; - s->save_alphaSize = 0; - s->save_nGroups = 0; - s->save_nSelectors = 0; - s->save_EOB = 0; - s->save_groupNo = 0; - s->save_groupPos = 0; - s->save_nextSym = 0; - s->save_nblockMAX = 0; - s->save_nblock = 0; - s->save_es = 0; - s->save_N = 0; - s->save_curr = 0; - s->save_zt = 0; - s->save_zn = 0; - s->save_zvec = 0; - s->save_zj = 0; - s->save_gSel = 0; - s->save_gMinlen = 0; - s->save_gLimit = NULL; - s->save_gBase = NULL; - s->save_gPerm = NULL; - } - - /*restore from the save area*/ - i = s->save_i; - j = s->save_j; - t = s->save_t; - alphaSize = s->save_alphaSize; - nGroups = s->save_nGroups; - nSelectors = s->save_nSelectors; - EOB = s->save_EOB; - groupNo = s->save_groupNo; - groupPos = s->save_groupPos; - nextSym = s->save_nextSym; - nblockMAX = s->save_nblockMAX; - nblock = s->save_nblock; - es = s->save_es; - N = s->save_N; - curr = s->save_curr; - zt = s->save_zt; - zn = s->save_zn; - zvec = s->save_zvec; - zj = s->save_zj; - gSel = s->save_gSel; - gMinlen = s->save_gMinlen; - gLimit = s->save_gLimit; - gBase = s->save_gBase; - gPerm = s->save_gPerm; - - retVal = BZ_OK; - - switch (s->state) { - - GET_UCHAR(BZ_X_MAGIC_1, uc); - if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); - - GET_UCHAR(BZ_X_MAGIC_2, uc); - if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); - - GET_UCHAR(BZ_X_MAGIC_3, uc) - if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); - - GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) - if (s->blockSize100k < (BZ_HDR_0 + 1) || - s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); - s->blockSize100k -= BZ_HDR_0; - - if (s->smallDecompress) { - s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); - s->ll4 = BZALLOC( - ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) - ); - if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); - } else { - s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); - if (s->tt == NULL) RETURN(BZ_MEM_ERROR); - } - - GET_UCHAR(BZ_X_BLKHDR_1, uc); - - if (uc == 0x17) goto endhdr_2; - if (uc != 0x31) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_BLKHDR_2, uc); - if (uc != 0x41) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_BLKHDR_3, uc); - if (uc != 0x59) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_BLKHDR_4, uc); - if (uc != 0x26) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_BLKHDR_5, uc); - if (uc != 0x53) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_BLKHDR_6, uc); - if (uc != 0x59) RETURN(BZ_DATA_ERROR); - - s->currBlockNo++; - if (s->verbosity >= 2) - VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); - - s->storedBlockCRC = 0; - GET_UCHAR(BZ_X_BCRC_1, uc); - s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_BCRC_2, uc); - s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_BCRC_3, uc); - s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_BCRC_4, uc); - s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); - - GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); - - s->origPtr = 0; - GET_UCHAR(BZ_X_ORIGPTR_1, uc); - s->origPtr = (s->origPtr << 8) | ((Int32)uc); - GET_UCHAR(BZ_X_ORIGPTR_2, uc); - s->origPtr = (s->origPtr << 8) | ((Int32)uc); - GET_UCHAR(BZ_X_ORIGPTR_3, uc); - s->origPtr = (s->origPtr << 8) | ((Int32)uc); - - if (s->origPtr < 0) - RETURN(BZ_DATA_ERROR); - if (s->origPtr > 10 + 100000*s->blockSize100k) - RETURN(BZ_DATA_ERROR); - - /*--- Receive the mapping table ---*/ - for (i = 0; i < 16; i++) { - GET_BIT(BZ_X_MAPPING_1, uc); - if (uc == 1) - s->inUse16[i] = True; else - s->inUse16[i] = False; - } - - for (i = 0; i < 256; i++) s->inUse[i] = False; - - for (i = 0; i < 16; i++) - if (s->inUse16[i]) - for (j = 0; j < 16; j++) { - GET_BIT(BZ_X_MAPPING_2, uc); - if (uc == 1) s->inUse[i * 16 + j] = True; - } - makeMaps_d ( s ); - if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); - alphaSize = s->nInUse+2; - - /*--- Now the selectors ---*/ - GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + + +/*---------------------------------------------------*/ +static +void makeMaps_d ( DState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->seqToUnseq[s->nInUse] = i; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +#define RETURN(rrr) \ + { retVal = rrr; goto save_state_and_return; }; + +#define GET_BITS(lll,vvv,nnn) \ + case lll: s->state = lll; \ + while (True) { \ + if (s->bsLive >= nnn) { \ + UInt32 v; \ + v = (s->bsBuff >> \ + (s->bsLive-nnn)) & ((1 << nnn)-1); \ + s->bsLive -= nnn; \ + vvv = v; \ + break; \ + } \ + if (s->strm->avail_in == 0) RETURN(BZ_OK); \ + s->bsBuff \ + = (s->bsBuff << 8) | \ + ((UInt32) \ + (*((UChar*)(s->strm->next_in)))); \ + s->bsLive += 8; \ + s->strm->next_in++; \ + s->strm->avail_in--; \ + s->strm->total_in_lo32++; \ + if (s->strm->total_in_lo32 == 0) \ + s->strm->total_in_hi32++; \ + } + +#define GET_UCHAR(lll,uuu) \ + GET_BITS(lll,uuu,8) + +#define GET_BIT(lll,uuu) \ + GET_BITS(lll,uuu,1) + +/*---------------------------------------------------*/ +#define GET_MTF_VAL(label1,label2,lval) \ +{ \ + if (groupPos == 0) { \ + groupNo++; \ + if (groupNo >= nSelectors) \ + RETURN(BZ_DATA_ERROR); \ + groupPos = BZ_G_SIZE; \ + gSel = s->selector[groupNo]; \ + gMinlen = s->minLens[gSel]; \ + gLimit = &(s->limit[gSel][0]); \ + gPerm = &(s->perm[gSel][0]); \ + gBase = &(s->base[gSel][0]); \ + } \ + groupPos--; \ + zn = gMinlen; \ + GET_BITS(label1, zvec, zn); \ + while (1) { \ + if (zn > 20 /* the longest code */) \ + RETURN(BZ_DATA_ERROR); \ + if (zvec <= gLimit[zn]) break; \ + zn++; \ + GET_BIT(label2, zj); \ + zvec = (zvec << 1) | zj; \ + }; \ + if (zvec - gBase[zn] < 0 \ + || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ + RETURN(BZ_DATA_ERROR); \ + lval = gPerm[zvec - gBase[zn]]; \ +} + + +/*---------------------------------------------------*/ +Int32 BZ2_decompress ( DState* s ) +{ + UChar uc; + Int32 retVal; + Int32 minLen, maxLen; + bz_stream* strm = s->strm; + + /* stuff that needs to be saved/restored */ + Int32 i; + Int32 j; + Int32 t; + Int32 alphaSize; + Int32 nGroups; + Int32 nSelectors; + Int32 EOB; + Int32 groupNo; + Int32 groupPos; + Int32 nextSym; + Int32 nblockMAX; + Int32 nblock; + Int32 es; + Int32 N; + Int32 curr; + Int32 zt; + Int32 zn; + Int32 zvec; + Int32 zj; + Int32 gSel; + Int32 gMinlen; + Int32* gLimit; + Int32* gBase; + Int32* gPerm; + + if (s->state == BZ_X_MAGIC_1) { + /*initialise the save area*/ + s->save_i = 0; + s->save_j = 0; + s->save_t = 0; + s->save_alphaSize = 0; + s->save_nGroups = 0; + s->save_nSelectors = 0; + s->save_EOB = 0; + s->save_groupNo = 0; + s->save_groupPos = 0; + s->save_nextSym = 0; + s->save_nblockMAX = 0; + s->save_nblock = 0; + s->save_es = 0; + s->save_N = 0; + s->save_curr = 0; + s->save_zt = 0; + s->save_zn = 0; + s->save_zvec = 0; + s->save_zj = 0; + s->save_gSel = 0; + s->save_gMinlen = 0; + s->save_gLimit = NULL; + s->save_gBase = NULL; + s->save_gPerm = NULL; + } + + /*restore from the save area*/ + i = s->save_i; + j = s->save_j; + t = s->save_t; + alphaSize = s->save_alphaSize; + nGroups = s->save_nGroups; + nSelectors = s->save_nSelectors; + EOB = s->save_EOB; + groupNo = s->save_groupNo; + groupPos = s->save_groupPos; + nextSym = s->save_nextSym; + nblockMAX = s->save_nblockMAX; + nblock = s->save_nblock; + es = s->save_es; + N = s->save_N; + curr = s->save_curr; + zt = s->save_zt; + zn = s->save_zn; + zvec = s->save_zvec; + zj = s->save_zj; + gSel = s->save_gSel; + gMinlen = s->save_gMinlen; + gLimit = s->save_gLimit; + gBase = s->save_gBase; + gPerm = s->save_gPerm; + + retVal = BZ_OK; + + switch (s->state) { + + GET_UCHAR(BZ_X_MAGIC_1, uc); + if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_UCHAR(BZ_X_MAGIC_2, uc); + if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_UCHAR(BZ_X_MAGIC_3, uc) + if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); + + GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) + if (s->blockSize100k < (BZ_HDR_0 + 1) || + s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); + s->blockSize100k -= BZ_HDR_0; + + if (s->smallDecompress) { + s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); + s->ll4 = BZALLOC( + ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) + ); + if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); + } else { + s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); + if (s->tt == NULL) RETURN(BZ_MEM_ERROR); + } + + GET_UCHAR(BZ_X_BLKHDR_1, uc); + + if (uc == 0x17) goto endhdr_2; + if (uc != 0x31) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_2, uc); + if (uc != 0x41) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_3, uc); + if (uc != 0x59) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_4, uc); + if (uc != 0x26) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_5, uc); + if (uc != 0x53) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_BLKHDR_6, uc); + if (uc != 0x59) RETURN(BZ_DATA_ERROR); + + s->currBlockNo++; + if (s->verbosity >= 2) + VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); + + s->storedBlockCRC = 0; + GET_UCHAR(BZ_X_BCRC_1, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_2, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_3, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_BCRC_4, uc); + s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); + + GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); + + s->origPtr = 0; + GET_UCHAR(BZ_X_ORIGPTR_1, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + GET_UCHAR(BZ_X_ORIGPTR_2, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + GET_UCHAR(BZ_X_ORIGPTR_3, uc); + s->origPtr = (s->origPtr << 8) | ((Int32)uc); + + if (s->origPtr < 0) + RETURN(BZ_DATA_ERROR); + if (s->origPtr > 10 + 100000*s->blockSize100k) + RETURN(BZ_DATA_ERROR); + + /*--- Receive the mapping table ---*/ + for (i = 0; i < 16; i++) { + GET_BIT(BZ_X_MAPPING_1, uc); + if (uc == 1) + s->inUse16[i] = True; else + s->inUse16[i] = False; + } + + for (i = 0; i < 256; i++) s->inUse[i] = False; + + for (i = 0; i < 16; i++) + if (s->inUse16[i]) + for (j = 0; j < 16; j++) { + GET_BIT(BZ_X_MAPPING_2, uc); + if (uc == 1) s->inUse[i * 16 + j] = True; + } + makeMaps_d ( s ); + if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); + alphaSize = s->nInUse+2; + + /*--- Now the selectors ---*/ + GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR); - GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); + GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); if (nSelectors < 1) RETURN(BZ_DATA_ERROR); - for (i = 0; i < nSelectors; i++) { - j = 0; - while (True) { - GET_BIT(BZ_X_SELECTOR_3, uc); - if (uc == 0) break; - j++; - if (j >= nGroups) RETURN(BZ_DATA_ERROR); - } + for (i = 0; i < nSelectors; i++) { + j = 0; + while (True) { + GET_BIT(BZ_X_SELECTOR_3, uc); + if (uc == 0) break; + j++; + if (j >= nGroups) RETURN(BZ_DATA_ERROR); + } /* Having more than BZ_MAX_SELECTORS doesn't make much sense since they will never be used, but some implementations might "round up" the number of selectors, so just ignore those. */ if (i < BZ_MAX_SELECTORS) s->selectorMtf[i] = j; - } + } if (nSelectors > BZ_MAX_SELECTORS) nSelectors = BZ_MAX_SELECTORS; - - /*--- Undo the MTF values for the selectors. ---*/ - { - UChar pos[BZ_N_GROUPS], tmp, v; - for (v = 0; v < nGroups; v++) pos[v] = v; - - for (i = 0; i < nSelectors; i++) { - v = s->selectorMtf[i]; - tmp = pos[v]; - while (v > 0) { pos[v] = pos[v-1]; v--; } - pos[0] = tmp; - s->selector[i] = tmp; - } - } - - /*--- Now the coding tables ---*/ - for (t = 0; t < nGroups; t++) { - GET_BITS(BZ_X_CODING_1, curr, 5); - for (i = 0; i < alphaSize; i++) { - while (True) { - if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); - GET_BIT(BZ_X_CODING_2, uc); - if (uc == 0) break; - GET_BIT(BZ_X_CODING_3, uc); - if (uc == 0) curr++; else curr--; - } - s->len[t][i] = curr; - } - } - - /*--- Create the Huffman decoding tables ---*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; - if (s->len[t][i] < minLen) minLen = s->len[t][i]; - } - BZ2_hbCreateDecodeTables ( - &(s->limit[t][0]), - &(s->base[t][0]), - &(s->perm[t][0]), - &(s->len[t][0]), - minLen, maxLen, alphaSize - ); - s->minLens[t] = minLen; - } - - /*--- Now the MTF values ---*/ - - EOB = s->nInUse+1; - nblockMAX = 100000 * s->blockSize100k; - groupNo = -1; - groupPos = 0; - - for (i = 0; i <= 255; i++) s->unzftab[i] = 0; - - /*-- MTF init --*/ - { - Int32 ii, jj, kk; - kk = MTFA_SIZE-1; - for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { - for (jj = MTFL_SIZE-1; jj >= 0; jj--) { - s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); - kk--; - } - s->mtfbase[ii] = kk + 1; - } - } - /*-- end MTF init --*/ - - nblock = 0; - GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); - - while (True) { - - if (nextSym == EOB) break; - - if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { - - es = -1; - N = 1; - do { + + /*--- Undo the MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], tmp, v; + for (v = 0; v < nGroups; v++) pos[v] = v; + + for (i = 0; i < nSelectors; i++) { + v = s->selectorMtf[i]; + tmp = pos[v]; + while (v > 0) { pos[v] = pos[v-1]; v--; } + pos[0] = tmp; + s->selector[i] = tmp; + } + } + + /*--- Now the coding tables ---*/ + for (t = 0; t < nGroups; t++) { + GET_BITS(BZ_X_CODING_1, curr, 5); + for (i = 0; i < alphaSize; i++) { + while (True) { + if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); + GET_BIT(BZ_X_CODING_2, uc); + if (uc == 0) break; + GET_BIT(BZ_X_CODING_3, uc); + if (uc == 0) curr++; else curr--; + } + s->len[t][i] = curr; + } + } + + /*--- Create the Huffman decoding tables ---*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + BZ2_hbCreateDecodeTables ( + &(s->limit[t][0]), + &(s->base[t][0]), + &(s->perm[t][0]), + &(s->len[t][0]), + minLen, maxLen, alphaSize + ); + s->minLens[t] = minLen; + } + + /*--- Now the MTF values ---*/ + + EOB = s->nInUse+1; + nblockMAX = 100000 * s->blockSize100k; + groupNo = -1; + groupPos = 0; + + for (i = 0; i <= 255; i++) s->unzftab[i] = 0; + + /*-- MTF init --*/ + { + Int32 ii, jj, kk; + kk = MTFA_SIZE-1; + for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { + for (jj = MTFL_SIZE-1; jj >= 0; jj--) { + s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); + kk--; + } + s->mtfbase[ii] = kk + 1; + } + } + /*-- end MTF init --*/ + + nblock = 0; + GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); + + while (True) { + + if (nextSym == EOB) break; + + if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { + + es = -1; + N = 1; + do { /* Check that N doesn't get too big, so that es doesn't go negative. The maximum value that can be RUNA/RUNB encoded is equal to the block size (post @@ -394,259 +394,259 @@ Int32 BZ2_decompress ( DState* s ) million should guard against overflow without rejecting any legitimate inputs. */ if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR); - if (nextSym == BZ_RUNA) es = es + (0+1) * N; else - if (nextSym == BZ_RUNB) es = es + (1+1) * N; - N = N * 2; - GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); - } - while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); - - es++; - uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; - s->unzftab[uc] += es; - - if (s->smallDecompress) - while (es > 0) { - if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); - s->ll16[nblock] = (UInt16)uc; - nblock++; - es--; - } - else - while (es > 0) { - if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); - s->tt[nblock] = (UInt32)uc; - nblock++; - es--; - }; - - continue; - - } else { - - if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); - - /*-- uc = MTF ( nextSym-1 ) --*/ - { - Int32 ii, jj, kk, pp, lno, off; - UInt32 nn; - nn = (UInt32)(nextSym - 1); - - if (nn < MTFL_SIZE) { - /* avoid general-case expense */ - pp = s->mtfbase[0]; - uc = s->mtfa[pp+nn]; - while (nn > 3) { - Int32 z = pp+nn; - s->mtfa[(z) ] = s->mtfa[(z)-1]; - s->mtfa[(z)-1] = s->mtfa[(z)-2]; - s->mtfa[(z)-2] = s->mtfa[(z)-3]; - s->mtfa[(z)-3] = s->mtfa[(z)-4]; - nn -= 4; - } - while (nn > 0) { - s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; - }; - s->mtfa[pp] = uc; - } else { - /* general case */ - lno = nn / MTFL_SIZE; - off = nn % MTFL_SIZE; - pp = s->mtfbase[lno] + off; - uc = s->mtfa[pp]; - while (pp > s->mtfbase[lno]) { - s->mtfa[pp] = s->mtfa[pp-1]; pp--; - }; - s->mtfbase[lno]++; - while (lno > 0) { - s->mtfbase[lno]--; - s->mtfa[s->mtfbase[lno]] - = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; - lno--; - } - s->mtfbase[0]--; - s->mtfa[s->mtfbase[0]] = uc; - if (s->mtfbase[0] == 0) { - kk = MTFA_SIZE-1; - for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { - for (jj = MTFL_SIZE-1; jj >= 0; jj--) { - s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; - kk--; - } - s->mtfbase[ii] = kk + 1; - } - } - } - } - /*-- end uc = MTF ( nextSym-1 ) --*/ - - s->unzftab[s->seqToUnseq[uc]]++; - if (s->smallDecompress) - s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else - s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); - nblock++; - - GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); - continue; - } - } - - /* Now we know what nblock is, we can do a better sanity - check on s->origPtr. - */ - if (s->origPtr < 0 || s->origPtr >= nblock) - RETURN(BZ_DATA_ERROR); - - /*-- Set up cftab to facilitate generation of T^(-1) --*/ + if (nextSym == BZ_RUNA) es = es + (0+1) * N; else + if (nextSym == BZ_RUNB) es = es + (1+1) * N; + N = N * 2; + GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); + } + while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); + + es++; + uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; + s->unzftab[uc] += es; + + if (s->smallDecompress) + while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + s->ll16[nblock] = (UInt16)uc; + nblock++; + es--; + } + else + while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + s->tt[nblock] = (UInt32)uc; + nblock++; + es--; + }; + + continue; + + } else { + + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); + + /*-- uc = MTF ( nextSym-1 ) --*/ + { + Int32 ii, jj, kk, pp, lno, off; + UInt32 nn; + nn = (UInt32)(nextSym - 1); + + if (nn < MTFL_SIZE) { + /* avoid general-case expense */ + pp = s->mtfbase[0]; + uc = s->mtfa[pp+nn]; + while (nn > 3) { + Int32 z = pp+nn; + s->mtfa[(z) ] = s->mtfa[(z)-1]; + s->mtfa[(z)-1] = s->mtfa[(z)-2]; + s->mtfa[(z)-2] = s->mtfa[(z)-3]; + s->mtfa[(z)-3] = s->mtfa[(z)-4]; + nn -= 4; + } + while (nn > 0) { + s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; + }; + s->mtfa[pp] = uc; + } else { + /* general case */ + lno = nn / MTFL_SIZE; + off = nn % MTFL_SIZE; + pp = s->mtfbase[lno] + off; + uc = s->mtfa[pp]; + while (pp > s->mtfbase[lno]) { + s->mtfa[pp] = s->mtfa[pp-1]; pp--; + }; + s->mtfbase[lno]++; + while (lno > 0) { + s->mtfbase[lno]--; + s->mtfa[s->mtfbase[lno]] + = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; + lno--; + } + s->mtfbase[0]--; + s->mtfa[s->mtfbase[0]] = uc; + if (s->mtfbase[0] == 0) { + kk = MTFA_SIZE-1; + for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { + for (jj = MTFL_SIZE-1; jj >= 0; jj--) { + s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; + kk--; + } + s->mtfbase[ii] = kk + 1; + } + } + } + } + /*-- end uc = MTF ( nextSym-1 ) --*/ + + s->unzftab[s->seqToUnseq[uc]]++; + if (s->smallDecompress) + s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else + s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); + nblock++; + + GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); + continue; + } + } + + /* Now we know what nblock is, we can do a better sanity + check on s->origPtr. + */ + if (s->origPtr < 0 || s->origPtr >= nblock) + RETURN(BZ_DATA_ERROR); + + /*-- Set up cftab to facilitate generation of T^(-1) --*/ /* Check: unzftab entries in range. */ for (i = 0; i <= 255; i++) { if (s->unzftab[i] < 0 || s->unzftab[i] > nblock) RETURN(BZ_DATA_ERROR); } /* Actually generate cftab. */ - s->cftab[0] = 0; - for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; - for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; + s->cftab[0] = 0; + for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; + for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; /* Check: cftab entries in range. */ - for (i = 0; i <= 256; i++) { - if (s->cftab[i] < 0 || s->cftab[i] > nblock) { - /* s->cftab[i] can legitimately be == nblock */ - RETURN(BZ_DATA_ERROR); - } - } + for (i = 0; i <= 256; i++) { + if (s->cftab[i] < 0 || s->cftab[i] > nblock) { + /* s->cftab[i] can legitimately be == nblock */ + RETURN(BZ_DATA_ERROR); + } + } /* Check: cftab entries non-descending. */ for (i = 1; i <= 256; i++) { if (s->cftab[i-1] > s->cftab[i]) { RETURN(BZ_DATA_ERROR); } } - - s->state_out_len = 0; - s->state_out_ch = 0; - BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); - s->state = BZ_X_OUTPUT; - if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); - - if (s->smallDecompress) { - - /*-- Make a copy of cftab, used in generation of T --*/ - for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; - - /*-- compute the T vector --*/ - for (i = 0; i < nblock; i++) { - uc = (UChar)(s->ll16[i]); - SET_LL(i, s->cftabCopy[uc]); - s->cftabCopy[uc]++; - } - - /*-- Compute T^(-1) by pointer reversal on T --*/ - i = s->origPtr; - j = GET_LL(i); - do { - Int32 tmp = GET_LL(j); - SET_LL(j, i); - i = j; - j = tmp; - } - while (i != s->origPtr); - - s->tPos = s->origPtr; - s->nblock_used = 0; - if (s->blockRandomised) { - BZ_RAND_INIT_MASK; - BZ_GET_SMALL(s->k0); s->nblock_used++; - BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; - } else { - BZ_GET_SMALL(s->k0); s->nblock_used++; - } - - } else { - - /*-- compute the T^(-1) vector --*/ - for (i = 0; i < nblock; i++) { - uc = (UChar)(s->tt[i] & 0xff); - s->tt[s->cftab[uc]] |= (i << 8); - s->cftab[uc]++; - } - - s->tPos = s->tt[s->origPtr] >> 8; - s->nblock_used = 0; - if (s->blockRandomised) { - BZ_RAND_INIT_MASK; - BZ_GET_FAST(s->k0); s->nblock_used++; - BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; - } else { - BZ_GET_FAST(s->k0); s->nblock_used++; - } - - } - - RETURN(BZ_OK); - - - - endhdr_2: - - GET_UCHAR(BZ_X_ENDHDR_2, uc); - if (uc != 0x72) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_ENDHDR_3, uc); - if (uc != 0x45) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_ENDHDR_4, uc); - if (uc != 0x38) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_ENDHDR_5, uc); - if (uc != 0x50) RETURN(BZ_DATA_ERROR); - GET_UCHAR(BZ_X_ENDHDR_6, uc); - if (uc != 0x90) RETURN(BZ_DATA_ERROR); - - s->storedCombinedCRC = 0; - GET_UCHAR(BZ_X_CCRC_1, uc); - s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_CCRC_2, uc); - s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_CCRC_3, uc); - s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); - GET_UCHAR(BZ_X_CCRC_4, uc); - s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); - - s->state = BZ_X_IDLE; - RETURN(BZ_STREAM_END); - - default: AssertH ( False, 4001 ); - } - - AssertH ( False, 4002 ); - - save_state_and_return: - - s->save_i = i; - s->save_j = j; - s->save_t = t; - s->save_alphaSize = alphaSize; - s->save_nGroups = nGroups; - s->save_nSelectors = nSelectors; - s->save_EOB = EOB; - s->save_groupNo = groupNo; - s->save_groupPos = groupPos; - s->save_nextSym = nextSym; - s->save_nblockMAX = nblockMAX; - s->save_nblock = nblock; - s->save_es = es; - s->save_N = N; - s->save_curr = curr; - s->save_zt = zt; - s->save_zn = zn; - s->save_zvec = zvec; - s->save_zj = zj; - s->save_gSel = gSel; - s->save_gMinlen = gMinlen; - s->save_gLimit = gLimit; - s->save_gBase = gBase; - s->save_gPerm = gPerm; - - return retVal; -} - - -/*-------------------------------------------------------------*/ -/*--- end decompress.c ---*/ -/*-------------------------------------------------------------*/ + + s->state_out_len = 0; + s->state_out_ch = 0; + BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); + s->state = BZ_X_OUTPUT; + if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); + + if (s->smallDecompress) { + + /*-- Make a copy of cftab, used in generation of T --*/ + for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; + + /*-- compute the T vector --*/ + for (i = 0; i < nblock; i++) { + uc = (UChar)(s->ll16[i]); + SET_LL(i, s->cftabCopy[uc]); + s->cftabCopy[uc]++; + } + + /*-- Compute T^(-1) by pointer reversal on T --*/ + i = s->origPtr; + j = GET_LL(i); + do { + Int32 tmp = GET_LL(j); + SET_LL(j, i); + i = j; + j = tmp; + } + while (i != s->origPtr); + + s->tPos = s->origPtr; + s->nblock_used = 0; + if (s->blockRandomised) { + BZ_RAND_INIT_MASK; + BZ_GET_SMALL(s->k0); s->nblock_used++; + BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; + } else { + BZ_GET_SMALL(s->k0); s->nblock_used++; + } + + } else { + + /*-- compute the T^(-1) vector --*/ + for (i = 0; i < nblock; i++) { + uc = (UChar)(s->tt[i] & 0xff); + s->tt[s->cftab[uc]] |= (i << 8); + s->cftab[uc]++; + } + + s->tPos = s->tt[s->origPtr] >> 8; + s->nblock_used = 0; + if (s->blockRandomised) { + BZ_RAND_INIT_MASK; + BZ_GET_FAST(s->k0); s->nblock_used++; + BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; + } else { + BZ_GET_FAST(s->k0); s->nblock_used++; + } + + } + + RETURN(BZ_OK); + + + + endhdr_2: + + GET_UCHAR(BZ_X_ENDHDR_2, uc); + if (uc != 0x72) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_3, uc); + if (uc != 0x45) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_4, uc); + if (uc != 0x38) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_5, uc); + if (uc != 0x50) RETURN(BZ_DATA_ERROR); + GET_UCHAR(BZ_X_ENDHDR_6, uc); + if (uc != 0x90) RETURN(BZ_DATA_ERROR); + + s->storedCombinedCRC = 0; + GET_UCHAR(BZ_X_CCRC_1, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_2, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_3, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + GET_UCHAR(BZ_X_CCRC_4, uc); + s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); + + s->state = BZ_X_IDLE; + RETURN(BZ_STREAM_END); + + default: AssertH ( False, 4001 ); + } + + AssertH ( False, 4002 ); + + save_state_and_return: + + s->save_i = i; + s->save_j = j; + s->save_t = t; + s->save_alphaSize = alphaSize; + s->save_nGroups = nGroups; + s->save_nSelectors = nSelectors; + s->save_EOB = EOB; + s->save_groupNo = groupNo; + s->save_groupPos = groupPos; + s->save_nextSym = nextSym; + s->save_nblockMAX = nblockMAX; + s->save_nblock = nblock; + s->save_es = es; + s->save_N = N; + s->save_curr = curr; + s->save_zt = zt; + s->save_zn = zn; + s->save_zvec = zvec; + s->save_zj = zj; + s->save_gSel = gSel; + s->save_gMinlen = gMinlen; + s->save_gLimit = gLimit; + s->save_gBase = gBase; + s->save_gPerm = gPerm; + + return retVal; +} + + +/*-------------------------------------------------------------*/ +/*--- end decompress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/huffman.c b/contrib/libs/libbz2/huffman.c index b4a0d0e4cbf..43a1899e468 100644 --- a/contrib/libs/libbz2/huffman.c +++ b/contrib/libs/libbz2/huffman.c @@ -1,205 +1,205 @@ - -/*-------------------------------------------------------------*/ -/*--- Huffman coding low-level stuff ---*/ -/*--- huffman.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - - Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#include "bzlib_private.h" - -/*---------------------------------------------------*/ -#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) -#define DEPTHOF(zz1) ((zz1) & 0x000000ff) -#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) - -#define ADDWEIGHTS(zw1,zw2) \ - (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ - (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) - -#define UPHEAP(z) \ -{ \ - Int32 zz, tmp; \ - zz = z; tmp = heap[zz]; \ - while (weight[tmp] < weight[heap[zz >> 1]]) { \ - heap[zz] = heap[zz >> 1]; \ - zz >>= 1; \ - } \ - heap[zz] = tmp; \ -} - -#define DOWNHEAP(z) \ -{ \ - Int32 zz, yy, tmp; \ - zz = z; tmp = heap[zz]; \ - while (True) { \ - yy = zz << 1; \ - if (yy > nHeap) break; \ - if (yy < nHeap && \ - weight[heap[yy+1]] < weight[heap[yy]]) \ - yy++; \ - if (weight[tmp] < weight[heap[yy]]) break; \ - heap[zz] = heap[yy]; \ - zz = yy; \ - } \ - heap[zz] = tmp; \ -} - - -/*---------------------------------------------------*/ -void BZ2_hbMakeCodeLengths ( UChar *len, - Int32 *freq, - Int32 alphaSize, - Int32 maxLen ) -{ - /*-- - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - --*/ - Int32 nNodes, nHeap, n1, n2, i, j, k; - Bool tooLong; - - Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; - Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; - Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; - - for (i = 0; i < alphaSize; i++) - weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - - while (True) { - - nNodes = alphaSize; - nHeap = 0; - - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (i = 1; i <= alphaSize; i++) { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - UPHEAP(nHeap); - } - - AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); - - while (nHeap > 1) { - n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); - n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); - nNodes++; - parent[n1] = parent[n2] = nNodes; - weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - UPHEAP(nHeap); - } - - AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); - - tooLong = False; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { k = parent[k]; j++; } - len[i-1] = j; - if (j > maxLen) tooLong = True; - } - - if (! tooLong) break; - - /* 17 Oct 04: keep-going condition for the following loop used - to be 'i < alphaSize', which missed the last element, - theoretically leading to the possibility of the compressor - looping. However, this count-scaling step is only needed if - one of the generated Huffman code words is longer than - maxLen, which up to and including version 1.0.2 was 20 bits, - which is extremely unlikely. In version 1.0.3 maxLen was - changed to 17 bits, which has minimal effect on compression - ratio, but does mean this scaling step is used from time to - time, enough to verify that it works. - - This means that bzip2-1.0.3 and later will only produce - Huffman codes with a maximum length of 17 bits. However, in - order to preserve backwards compatibility with bitstreams - produced by versions pre-1.0.3, the decompressor must still - handle lengths of up to 20. */ - - for (i = 1; i <= alphaSize; i++) { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; - } - } -} - - -/*---------------------------------------------------*/ -void BZ2_hbAssignCodes ( Int32 *code, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) -{ - Int32 n, vec, i; - - vec = 0; - for (n = minLen; n <= maxLen; n++) { - for (i = 0; i < alphaSize; i++) - if (length[i] == n) { code[i] = vec; vec++; }; - vec <<= 1; - } -} - - -/*---------------------------------------------------*/ -void BZ2_hbCreateDecodeTables ( Int32 *limit, - Int32 *base, - Int32 *perm, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) -{ - Int32 pp, i, j, vec; - - pp = 0; - for (i = minLen; i <= maxLen; i++) - for (j = 0; j < alphaSize; j++) - if (length[j] == i) { perm[pp] = j; pp++; }; - - for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; - for (i = 0; i < alphaSize; i++) base[length[i]+1]++; - - for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; - - for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; - vec = 0; - - for (i = minLen; i <= maxLen; i++) { - vec += (base[i+1] - base[i]); - limit[i] = vec-1; - vec <<= 1; - } - for (i = minLen + 1; i <= maxLen; i++) - base[i] = ((limit[i-1] + 1) << 1) - base[i]; -} - - -/*-------------------------------------------------------------*/ -/*--- end huffman.c ---*/ -/*-------------------------------------------------------------*/ + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + Int32 zz, tmp; \ + zz = z; tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + +#define DOWNHEAP(z) \ +{ \ + Int32 zz, yy, tmp; \ + zz = z; tmp = heap[zz]; \ + while (True) { \ + yy = zz << 1; \ + if (yy > nHeap) break; \ + if (yy < nHeap && \ + weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + + +/*---------------------------------------------------*/ +void BZ2_hbMakeCodeLengths ( UChar *len, + Int32 *freq, + Int32 alphaSize, + Int32 maxLen ) +{ + /*-- + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + --*/ + Int32 nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; + Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; + Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (True) { + + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { k = parent[k]; j++; } + len[i-1] = j; + if (j > maxLen) tooLong = True; + } + + if (! tooLong) break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +} + + +/*---------------------------------------------------*/ +void BZ2_hbAssignCodes ( Int32 *code, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) + if (length[i] == n) { code[i] = vec; vec++; }; + vec <<= 1; + } +} + + +/*---------------------------------------------------*/ +void BZ2_hbCreateDecodeTables ( Int32 *limit, + Int32 *base, + Int32 *perm, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) +{ + Int32 pp, i, j, vec; + + pp = 0; + for (i = minLen; i <= maxLen; i++) + for (j = 0; j < alphaSize; j++) + if (length[j] == i) { perm[pp] = j; pp++; }; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; + for (i = 0; i < alphaSize; i++) base[length[i]+1]++; + + for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; + + for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; + vec = 0; + + for (i = minLen; i <= maxLen; i++) { + vec += (base[i+1] - base[i]); + limit[i] = vec-1; + vec <<= 1; + } + for (i = minLen + 1; i <= maxLen; i++) + base[i] = ((limit[i-1] + 1) << 1) - base[i]; +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/randtable.c b/contrib/libs/libbz2/randtable.c index 87146eb7baa..43de39ff686 100644 --- a/contrib/libs/libbz2/randtable.c +++ b/contrib/libs/libbz2/randtable.c @@ -1,29 +1,29 @@ - -/*-------------------------------------------------------------*/ -/*--- Table for randomising repetitive blocks ---*/ -/*--- randtable.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ - This file is part of bzip2/libbzip2, a program and library for - lossless, block-sorting data compression. - + +/*-------------------------------------------------------------*/ +/*--- Table for randomising repetitive blocks ---*/ +/*--- randtable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + bzip2/libbzip2 version 1.0.8 of 13 July 2019 Copyright (C) 1996-2019 Julian Seward - + Please read the WARNING, DISCLAIMER and PATENTS sections in the - README file. - - This program is released under the terms of the license contained - in the file LICENSE. - ------------------------------------------------------------------ */ - - -#include "bzlib_private.h" - - -/*---------------------------------------------*/ -const Int32 BZ2_rNums[512] = { + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + + +#include "bzlib_private.h" + + +/*---------------------------------------------*/ +const Int32 BZ2_rNums[512] = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, @@ -75,10 +75,10 @@ const Int32 BZ2_rNums[512] = { 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, - 936, 638 -}; - - -/*-------------------------------------------------------------*/ -/*--- end randtable.c ---*/ -/*-------------------------------------------------------------*/ + 936, 638 +}; + + +/*-------------------------------------------------------------*/ +/*--- end randtable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/contrib/libs/libbz2/ya.make b/contrib/libs/libbz2/ya.make index 3c0c17b863f..3d5be083221 100644 --- a/contrib/libs/libbz2/ya.make +++ b/contrib/libs/libbz2/ya.make @@ -1,12 +1,12 @@ # Generated by devtools/yamaker from nixpkgs 5852a21819542e6809f68ba5a798600e69874e76. -LIBRARY() +LIBRARY() OWNER( orivej g:cpp-contrib ) - + VERSION(1.0.8) ORIGINAL_SOURCE(https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz) @@ -19,21 +19,21 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) NO_COMPILER_WARNINGS() - + NO_RUNTIME() - + IF (SANITIZER_TYPE == undefined) - NO_SANITIZE() + NO_SANITIZE() ENDIF() - -SRCS( + +SRCS( blocksort.c bzlib.c compress.c - crctable.c - decompress.c - huffman.c + crctable.c + decompress.c + huffman.c randtable.c -) - -END() +) + +END() diff --git a/contrib/libs/libc_compat/ifaddrs.c b/contrib/libs/libc_compat/ifaddrs.c index a09db789ba0..c59d8bc7455 100644 --- a/contrib/libs/libc_compat/ifaddrs.c +++ b/contrib/libs/libc_compat/ifaddrs.c @@ -1,663 +1,663 @@ -/* -Copyright (c) 2013, Kenneth MacKay -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "ifaddrs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef struct NetlinkList -{ - struct NetlinkList *m_next; - struct nlmsghdr *m_data; - unsigned int m_size; -} NetlinkList; - -static int netlink_socket(void) -{ - int l_socket = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if(l_socket < 0) - { - return -1; - } - - struct sockaddr_nl l_addr; - memset(&l_addr, 0, sizeof(l_addr)); - l_addr.nl_family = AF_NETLINK; - if(bind(l_socket, (struct sockaddr *)&l_addr, sizeof(l_addr)) < 0) - { - close(l_socket); - return -1; - } - - return l_socket; -} - -static int netlink_send(int p_socket, int p_request) -{ - struct - { - struct nlmsghdr m_hdr; - struct rtgenmsg m_msg; - } l_data; - - memset(&l_data, 0, sizeof(l_data)); - - l_data.m_hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); - l_data.m_hdr.nlmsg_type = p_request; - l_data.m_hdr.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; - l_data.m_hdr.nlmsg_pid = 0; - l_data.m_hdr.nlmsg_seq = p_socket; - l_data.m_msg.rtgen_family = AF_UNSPEC; - - struct sockaddr_nl l_addr; - memset(&l_addr, 0, sizeof(l_addr)); - l_addr.nl_family = AF_NETLINK; - return (sendto(p_socket, &l_data.m_hdr, l_data.m_hdr.nlmsg_len, 0, (struct sockaddr *)&l_addr, sizeof(l_addr))); -} - -static int netlink_recv(int p_socket, void *p_buffer, size_t p_len) -{ - struct msghdr l_msg; - struct iovec l_iov = { p_buffer, p_len }; - struct sockaddr_nl l_addr; - - for(;;) - { - l_msg.msg_name = (void *)&l_addr; - l_msg.msg_namelen = sizeof(l_addr); - l_msg.msg_iov = &l_iov; - l_msg.msg_iovlen = 1; - l_msg.msg_control = NULL; - l_msg.msg_controllen = 0; - l_msg.msg_flags = 0; - int l_result = recvmsg(p_socket, &l_msg, 0); - - if(l_result < 0) - { - if(errno == EINTR) - { - continue; - } - return -2; - } - - if(l_msg.msg_flags & MSG_TRUNC) - { // buffer was too small - return -1; - } - return l_result; - } -} - -static struct nlmsghdr *getNetlinkResponse(int p_socket, int *p_size, int *p_done) -{ - size_t l_size = 4096; - void *l_buffer = NULL; - - for(;;) - { - free(l_buffer); - l_buffer = malloc(l_size); - if (l_buffer == NULL) - { - return NULL; - } - - int l_read = netlink_recv(p_socket, l_buffer, l_size); - *p_size = l_read; - if(l_read == -2) - { - free(l_buffer); - return NULL; - } - if(l_read >= 0) - { - pid_t l_pid = getpid(); - struct nlmsghdr *l_hdr; - for(l_hdr = (struct nlmsghdr *)l_buffer; NLMSG_OK(l_hdr, (unsigned int)l_read); l_hdr = (struct nlmsghdr *)NLMSG_NEXT(l_hdr, l_read)) - { - if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) - { - continue; - } - - if(l_hdr->nlmsg_type == NLMSG_DONE) - { - *p_done = 1; - break; - } - - if(l_hdr->nlmsg_type == NLMSG_ERROR) - { - free(l_buffer); - return NULL; - } - } - return l_buffer; - } - - l_size *= 2; - } -} - -static NetlinkList *newListItem(struct nlmsghdr *p_data, unsigned int p_size) -{ - NetlinkList *l_item = malloc(sizeof(NetlinkList)); - if (l_item == NULL) - { - return NULL; - } - - l_item->m_next = NULL; - l_item->m_data = p_data; - l_item->m_size = p_size; - return l_item; -} - -static void freeResultList(NetlinkList *p_list) -{ - NetlinkList *l_cur; - while(p_list) - { - l_cur = p_list; - p_list = p_list->m_next; - free(l_cur->m_data); - free(l_cur); - } -} - -static NetlinkList *getResultList(int p_socket, int p_request) -{ - if(netlink_send(p_socket, p_request) < 0) - { - return NULL; - } - - NetlinkList *l_list = NULL; - NetlinkList *l_end = NULL; - int l_size; - int l_done = 0; - while(!l_done) - { - struct nlmsghdr *l_hdr = getNetlinkResponse(p_socket, &l_size, &l_done); - if(!l_hdr) - { // error - freeResultList(l_list); - return NULL; - } - - NetlinkList *l_item = newListItem(l_hdr, l_size); - if (!l_item) - { - freeResultList(l_list); - return NULL; - } - if(!l_list) - { - l_list = l_item; - } - else - { - l_end->m_next = l_item; - } - l_end = l_item; - } - return l_list; -} - -static size_t maxSize(size_t a, size_t b) -{ - return (a > b ? a : b); -} - -static size_t calcAddrLen(sa_family_t p_family, int p_dataSize) -{ - switch(p_family) - { - case AF_INET: - return sizeof(struct sockaddr_in); - case AF_INET6: - return sizeof(struct sockaddr_in6); - case AF_PACKET: - return maxSize(sizeof(struct sockaddr_ll), offsetof(struct sockaddr_ll, sll_addr) + p_dataSize); - default: - return maxSize(sizeof(struct sockaddr), offsetof(struct sockaddr, sa_data) + p_dataSize); - } -} - -static void makeSockaddr(sa_family_t p_family, struct sockaddr *p_dest, void *p_data, size_t p_size) -{ - switch(p_family) - { - case AF_INET: - memcpy(&((struct sockaddr_in*)p_dest)->sin_addr, p_data, p_size); - break; - case AF_INET6: - memcpy(&((struct sockaddr_in6*)p_dest)->sin6_addr, p_data, p_size); - break; - case AF_PACKET: - memcpy(((struct sockaddr_ll*)p_dest)->sll_addr, p_data, p_size); - ((struct sockaddr_ll*)p_dest)->sll_halen = p_size; - break; - default: - memcpy(p_dest->sa_data, p_data, p_size); - break; - } - p_dest->sa_family = p_family; -} - -static void addToEnd(struct ifaddrs **p_resultList, struct ifaddrs *p_entry) -{ - if(!*p_resultList) - { - *p_resultList = p_entry; - } - else - { - struct ifaddrs *l_cur = *p_resultList; - while(l_cur->ifa_next) - { - l_cur = l_cur->ifa_next; - } - l_cur->ifa_next = p_entry; - } -} - -static int interpretLink(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList) -{ - struct ifinfomsg *l_info = (struct ifinfomsg *)NLMSG_DATA(p_hdr); - - size_t l_nameSize = 0; - size_t l_addrSize = 0; - size_t l_dataSize = 0; - - size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); - struct rtattr *l_rta; - for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) - { - void *l_rtaData = RTA_DATA(l_rta); - size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); - switch(l_rta->rta_type) - { - case IFLA_ADDRESS: - case IFLA_BROADCAST: - l_addrSize += NLMSG_ALIGN(calcAddrLen(AF_PACKET, l_rtaDataSize)); - break; - case IFLA_IFNAME: - l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); - break; - case IFLA_STATS: - l_dataSize += NLMSG_ALIGN(l_rtaSize); - break; - default: - break; - } - } - - struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + sizeof(int) + l_nameSize + l_addrSize + l_dataSize); - if (l_entry == NULL) - { - return -1; - } - memset(l_entry, 0, sizeof(struct ifaddrs)); - l_entry->ifa_name = ""; - - char *l_index = ((char *)l_entry) + sizeof(struct ifaddrs); - char *l_name = l_index + sizeof(int); - char *l_addr = l_name + l_nameSize; - char *l_data = l_addr + l_addrSize; - - // save the interface index so we can look it up when handling the addresses. - memcpy(l_index, &l_info->ifi_index, sizeof(int)); - - l_entry->ifa_flags = l_info->ifi_flags; - - l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); - for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) - { - void *l_rtaData = RTA_DATA(l_rta); - size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); - switch(l_rta->rta_type) - { - case IFLA_ADDRESS: - case IFLA_BROADCAST: - { - size_t l_addrLen = calcAddrLen(AF_PACKET, l_rtaDataSize); - makeSockaddr(AF_PACKET, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); - ((struct sockaddr_ll *)l_addr)->sll_ifindex = l_info->ifi_index; - ((struct sockaddr_ll *)l_addr)->sll_hatype = l_info->ifi_type; - if(l_rta->rta_type == IFLA_ADDRESS) - { - l_entry->ifa_addr = (struct sockaddr *)l_addr; - } - else - { - l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; - } - l_addr += NLMSG_ALIGN(l_addrLen); - break; - } - case IFLA_IFNAME: - strncpy(l_name, l_rtaData, l_rtaDataSize); - l_name[l_rtaDataSize] = '\0'; - l_entry->ifa_name = l_name; - break; - case IFLA_STATS: - memcpy(l_data, l_rtaData, l_rtaDataSize); - l_entry->ifa_data = l_data; - break; - default: - break; - } - } - - addToEnd(p_resultList, l_entry); - return 0; -} - -static struct ifaddrs *findInterface(int p_index, struct ifaddrs **p_links, int p_numLinks) -{ - int l_num = 0; - struct ifaddrs *l_cur = *p_links; - while(l_cur && l_num < p_numLinks) - { - char *l_indexPtr = ((char *)l_cur) + sizeof(struct ifaddrs); - int l_index; - memcpy(&l_index, l_indexPtr, sizeof(int)); - if(l_index == p_index) - { - return l_cur; - } - - l_cur = l_cur->ifa_next; - ++l_num; - } - return NULL; -} - -static int interpretAddr(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList, int p_numLinks) -{ - struct ifaddrmsg *l_info = (struct ifaddrmsg *)NLMSG_DATA(p_hdr); - struct ifaddrs *l_interface = findInterface(l_info->ifa_index, p_resultList, p_numLinks); - - if(l_info->ifa_family == AF_PACKET) - { - return 0; - } - - size_t l_nameSize = 0; - size_t l_addrSize = 0; - - int l_addedNetmask = 0; - - size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); - struct rtattr *l_rta; - for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) - { - void *l_rtaData = RTA_DATA(l_rta); - size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); - - switch(l_rta->rta_type) - { - case IFA_ADDRESS: - case IFA_LOCAL: - if((l_info->ifa_family == AF_INET || l_info->ifa_family == AF_INET6) && !l_addedNetmask) - { // make room for netmask - l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); - l_addedNetmask = 1; - } - case IFA_BROADCAST: - l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); - break; - case IFA_LABEL: - l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); - break; - default: - break; - } - } - - struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize); - if (l_entry == NULL) - { - return -1; - } - memset(l_entry, 0, sizeof(struct ifaddrs)); - l_entry->ifa_name = (l_interface ? l_interface->ifa_name : ""); - - char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs); - char *l_addr = l_name + l_nameSize; - - l_entry->ifa_flags = l_info->ifa_flags; - if(l_interface) - { - l_entry->ifa_flags |= l_interface->ifa_flags; - } - - l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); - for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) - { - void *l_rtaData = RTA_DATA(l_rta); - size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); - switch(l_rta->rta_type) - { - case IFA_ADDRESS: - case IFA_BROADCAST: - case IFA_LOCAL: - { - size_t l_addrLen = calcAddrLen(l_info->ifa_family, l_rtaDataSize); - makeSockaddr(l_info->ifa_family, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); - if(l_info->ifa_family == AF_INET6) - { - if(IN6_IS_ADDR_LINKLOCAL((struct in6_addr *)l_rtaData) || IN6_IS_ADDR_MC_LINKLOCAL((struct in6_addr *)l_rtaData)) - { - ((struct sockaddr_in6 *)l_addr)->sin6_scope_id = l_info->ifa_index; - } - } - - if(l_rta->rta_type == IFA_ADDRESS) - { // apparently in a point-to-point network IFA_ADDRESS contains the dest address and IFA_LOCAL contains the local address - if(l_entry->ifa_addr) - { - l_entry->ifa_dstaddr = (struct sockaddr *)l_addr; - } - else - { - l_entry->ifa_addr = (struct sockaddr *)l_addr; - } - } - else if(l_rta->rta_type == IFA_LOCAL) - { - if(l_entry->ifa_addr) - { - l_entry->ifa_dstaddr = l_entry->ifa_addr; - } - l_entry->ifa_addr = (struct sockaddr *)l_addr; - } - else - { - l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; - } - l_addr += NLMSG_ALIGN(l_addrLen); - break; - } - case IFA_LABEL: - strncpy(l_name, l_rtaData, l_rtaDataSize); - l_name[l_rtaDataSize] = '\0'; - l_entry->ifa_name = l_name; - break; - default: - break; - } - } - - if(l_entry->ifa_addr && (l_entry->ifa_addr->sa_family == AF_INET || l_entry->ifa_addr->sa_family == AF_INET6)) - { - unsigned l_maxPrefix = (l_entry->ifa_addr->sa_family == AF_INET ? 32 : 128); - unsigned l_prefix = (l_info->ifa_prefixlen > l_maxPrefix ? l_maxPrefix : l_info->ifa_prefixlen); - char l_mask[16] = {0}; - unsigned i; - for(i=0; i<(l_prefix/8); ++i) - { - l_mask[i] = 0xff; - } - if(l_prefix % 8) - { - l_mask[i] = 0xff << (8 - (l_prefix % 8)); - } - - makeSockaddr(l_entry->ifa_addr->sa_family, (struct sockaddr *)l_addr, l_mask, l_maxPrefix / 8); - l_entry->ifa_netmask = (struct sockaddr *)l_addr; - } - - addToEnd(p_resultList, l_entry); - return 0; -} - -static int interpretLinks(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList) -{ - int l_numLinks = 0; - pid_t l_pid = getpid(); - for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) - { - unsigned int l_nlsize = p_netlinkList->m_size; - struct nlmsghdr *l_hdr; - for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) - { - if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) - { - continue; - } - - if(l_hdr->nlmsg_type == NLMSG_DONE) - { - break; - } - - if(l_hdr->nlmsg_type == RTM_NEWLINK) - { - if(interpretLink(l_hdr, p_resultList) == -1) - { - return -1; - } - ++l_numLinks; - } - } - } - return l_numLinks; -} - -static int interpretAddrs(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList, int p_numLinks) -{ - pid_t l_pid = getpid(); - for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) - { - unsigned int l_nlsize = p_netlinkList->m_size; - struct nlmsghdr *l_hdr; - for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) - { - if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) - { - continue; - } - - if(l_hdr->nlmsg_type == NLMSG_DONE) - { - break; - } - - if(l_hdr->nlmsg_type == RTM_NEWADDR) - { - if (interpretAddr(l_hdr, p_resultList, p_numLinks) == -1) - { - return -1; - } - } - } - } - return 0; -} - -int getifaddrs(struct ifaddrs **ifap) -{ - if(!ifap) - { - return -1; - } - *ifap = NULL; - - int l_socket = netlink_socket(); - if(l_socket < 0) - { - return -1; - } - - NetlinkList *l_linkResults = getResultList(l_socket, RTM_GETLINK); - if(!l_linkResults) - { - close(l_socket); - return -1; - } - - NetlinkList *l_addrResults = getResultList(l_socket, RTM_GETADDR); - if(!l_addrResults) - { - close(l_socket); - freeResultList(l_linkResults); - return -1; - } - - int l_result = 0; - int l_numLinks = interpretLinks(l_socket, l_linkResults, ifap); - if(l_numLinks == -1 || interpretAddrs(l_socket, l_addrResults, ifap, l_numLinks) == -1) - { - l_result = -1; - } - - freeResultList(l_linkResults); - freeResultList(l_addrResults); - close(l_socket); - return l_result; -} - -void freeifaddrs(struct ifaddrs *ifa) -{ - struct ifaddrs *l_cur; - while(ifa) - { - l_cur = ifa; - ifa = ifa->ifa_next; - free(l_cur); - } -} +/* +Copyright (c) 2013, Kenneth MacKay +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "ifaddrs.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct NetlinkList +{ + struct NetlinkList *m_next; + struct nlmsghdr *m_data; + unsigned int m_size; +} NetlinkList; + +static int netlink_socket(void) +{ + int l_socket = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if(l_socket < 0) + { + return -1; + } + + struct sockaddr_nl l_addr; + memset(&l_addr, 0, sizeof(l_addr)); + l_addr.nl_family = AF_NETLINK; + if(bind(l_socket, (struct sockaddr *)&l_addr, sizeof(l_addr)) < 0) + { + close(l_socket); + return -1; + } + + return l_socket; +} + +static int netlink_send(int p_socket, int p_request) +{ + struct + { + struct nlmsghdr m_hdr; + struct rtgenmsg m_msg; + } l_data; + + memset(&l_data, 0, sizeof(l_data)); + + l_data.m_hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + l_data.m_hdr.nlmsg_type = p_request; + l_data.m_hdr.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + l_data.m_hdr.nlmsg_pid = 0; + l_data.m_hdr.nlmsg_seq = p_socket; + l_data.m_msg.rtgen_family = AF_UNSPEC; + + struct sockaddr_nl l_addr; + memset(&l_addr, 0, sizeof(l_addr)); + l_addr.nl_family = AF_NETLINK; + return (sendto(p_socket, &l_data.m_hdr, l_data.m_hdr.nlmsg_len, 0, (struct sockaddr *)&l_addr, sizeof(l_addr))); +} + +static int netlink_recv(int p_socket, void *p_buffer, size_t p_len) +{ + struct msghdr l_msg; + struct iovec l_iov = { p_buffer, p_len }; + struct sockaddr_nl l_addr; + + for(;;) + { + l_msg.msg_name = (void *)&l_addr; + l_msg.msg_namelen = sizeof(l_addr); + l_msg.msg_iov = &l_iov; + l_msg.msg_iovlen = 1; + l_msg.msg_control = NULL; + l_msg.msg_controllen = 0; + l_msg.msg_flags = 0; + int l_result = recvmsg(p_socket, &l_msg, 0); + + if(l_result < 0) + { + if(errno == EINTR) + { + continue; + } + return -2; + } + + if(l_msg.msg_flags & MSG_TRUNC) + { // buffer was too small + return -1; + } + return l_result; + } +} + +static struct nlmsghdr *getNetlinkResponse(int p_socket, int *p_size, int *p_done) +{ + size_t l_size = 4096; + void *l_buffer = NULL; + + for(;;) + { + free(l_buffer); + l_buffer = malloc(l_size); + if (l_buffer == NULL) + { + return NULL; + } + + int l_read = netlink_recv(p_socket, l_buffer, l_size); + *p_size = l_read; + if(l_read == -2) + { + free(l_buffer); + return NULL; + } + if(l_read >= 0) + { + pid_t l_pid = getpid(); + struct nlmsghdr *l_hdr; + for(l_hdr = (struct nlmsghdr *)l_buffer; NLMSG_OK(l_hdr, (unsigned int)l_read); l_hdr = (struct nlmsghdr *)NLMSG_NEXT(l_hdr, l_read)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + *p_done = 1; + break; + } + + if(l_hdr->nlmsg_type == NLMSG_ERROR) + { + free(l_buffer); + return NULL; + } + } + return l_buffer; + } + + l_size *= 2; + } +} + +static NetlinkList *newListItem(struct nlmsghdr *p_data, unsigned int p_size) +{ + NetlinkList *l_item = malloc(sizeof(NetlinkList)); + if (l_item == NULL) + { + return NULL; + } + + l_item->m_next = NULL; + l_item->m_data = p_data; + l_item->m_size = p_size; + return l_item; +} + +static void freeResultList(NetlinkList *p_list) +{ + NetlinkList *l_cur; + while(p_list) + { + l_cur = p_list; + p_list = p_list->m_next; + free(l_cur->m_data); + free(l_cur); + } +} + +static NetlinkList *getResultList(int p_socket, int p_request) +{ + if(netlink_send(p_socket, p_request) < 0) + { + return NULL; + } + + NetlinkList *l_list = NULL; + NetlinkList *l_end = NULL; + int l_size; + int l_done = 0; + while(!l_done) + { + struct nlmsghdr *l_hdr = getNetlinkResponse(p_socket, &l_size, &l_done); + if(!l_hdr) + { // error + freeResultList(l_list); + return NULL; + } + + NetlinkList *l_item = newListItem(l_hdr, l_size); + if (!l_item) + { + freeResultList(l_list); + return NULL; + } + if(!l_list) + { + l_list = l_item; + } + else + { + l_end->m_next = l_item; + } + l_end = l_item; + } + return l_list; +} + +static size_t maxSize(size_t a, size_t b) +{ + return (a > b ? a : b); +} + +static size_t calcAddrLen(sa_family_t p_family, int p_dataSize) +{ + switch(p_family) + { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_PACKET: + return maxSize(sizeof(struct sockaddr_ll), offsetof(struct sockaddr_ll, sll_addr) + p_dataSize); + default: + return maxSize(sizeof(struct sockaddr), offsetof(struct sockaddr, sa_data) + p_dataSize); + } +} + +static void makeSockaddr(sa_family_t p_family, struct sockaddr *p_dest, void *p_data, size_t p_size) +{ + switch(p_family) + { + case AF_INET: + memcpy(&((struct sockaddr_in*)p_dest)->sin_addr, p_data, p_size); + break; + case AF_INET6: + memcpy(&((struct sockaddr_in6*)p_dest)->sin6_addr, p_data, p_size); + break; + case AF_PACKET: + memcpy(((struct sockaddr_ll*)p_dest)->sll_addr, p_data, p_size); + ((struct sockaddr_ll*)p_dest)->sll_halen = p_size; + break; + default: + memcpy(p_dest->sa_data, p_data, p_size); + break; + } + p_dest->sa_family = p_family; +} + +static void addToEnd(struct ifaddrs **p_resultList, struct ifaddrs *p_entry) +{ + if(!*p_resultList) + { + *p_resultList = p_entry; + } + else + { + struct ifaddrs *l_cur = *p_resultList; + while(l_cur->ifa_next) + { + l_cur = l_cur->ifa_next; + } + l_cur->ifa_next = p_entry; + } +} + +static int interpretLink(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList) +{ + struct ifinfomsg *l_info = (struct ifinfomsg *)NLMSG_DATA(p_hdr); + + size_t l_nameSize = 0; + size_t l_addrSize = 0; + size_t l_dataSize = 0; + + size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); + struct rtattr *l_rta; + for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFLA_ADDRESS: + case IFLA_BROADCAST: + l_addrSize += NLMSG_ALIGN(calcAddrLen(AF_PACKET, l_rtaDataSize)); + break; + case IFLA_IFNAME: + l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); + break; + case IFLA_STATS: + l_dataSize += NLMSG_ALIGN(l_rtaSize); + break; + default: + break; + } + } + + struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + sizeof(int) + l_nameSize + l_addrSize + l_dataSize); + if (l_entry == NULL) + { + return -1; + } + memset(l_entry, 0, sizeof(struct ifaddrs)); + l_entry->ifa_name = ""; + + char *l_index = ((char *)l_entry) + sizeof(struct ifaddrs); + char *l_name = l_index + sizeof(int); + char *l_addr = l_name + l_nameSize; + char *l_data = l_addr + l_addrSize; + + // save the interface index so we can look it up when handling the addresses. + memcpy(l_index, &l_info->ifi_index, sizeof(int)); + + l_entry->ifa_flags = l_info->ifi_flags; + + l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); + for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFLA_ADDRESS: + case IFLA_BROADCAST: + { + size_t l_addrLen = calcAddrLen(AF_PACKET, l_rtaDataSize); + makeSockaddr(AF_PACKET, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); + ((struct sockaddr_ll *)l_addr)->sll_ifindex = l_info->ifi_index; + ((struct sockaddr_ll *)l_addr)->sll_hatype = l_info->ifi_type; + if(l_rta->rta_type == IFLA_ADDRESS) + { + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; + } + l_addr += NLMSG_ALIGN(l_addrLen); + break; + } + case IFLA_IFNAME: + strncpy(l_name, l_rtaData, l_rtaDataSize); + l_name[l_rtaDataSize] = '\0'; + l_entry->ifa_name = l_name; + break; + case IFLA_STATS: + memcpy(l_data, l_rtaData, l_rtaDataSize); + l_entry->ifa_data = l_data; + break; + default: + break; + } + } + + addToEnd(p_resultList, l_entry); + return 0; +} + +static struct ifaddrs *findInterface(int p_index, struct ifaddrs **p_links, int p_numLinks) +{ + int l_num = 0; + struct ifaddrs *l_cur = *p_links; + while(l_cur && l_num < p_numLinks) + { + char *l_indexPtr = ((char *)l_cur) + sizeof(struct ifaddrs); + int l_index; + memcpy(&l_index, l_indexPtr, sizeof(int)); + if(l_index == p_index) + { + return l_cur; + } + + l_cur = l_cur->ifa_next; + ++l_num; + } + return NULL; +} + +static int interpretAddr(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList, int p_numLinks) +{ + struct ifaddrmsg *l_info = (struct ifaddrmsg *)NLMSG_DATA(p_hdr); + struct ifaddrs *l_interface = findInterface(l_info->ifa_index, p_resultList, p_numLinks); + + if(l_info->ifa_family == AF_PACKET) + { + return 0; + } + + size_t l_nameSize = 0; + size_t l_addrSize = 0; + + int l_addedNetmask = 0; + + size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); + struct rtattr *l_rta; + for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + + switch(l_rta->rta_type) + { + case IFA_ADDRESS: + case IFA_LOCAL: + if((l_info->ifa_family == AF_INET || l_info->ifa_family == AF_INET6) && !l_addedNetmask) + { // make room for netmask + l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); + l_addedNetmask = 1; + } + case IFA_BROADCAST: + l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); + break; + case IFA_LABEL: + l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); + break; + default: + break; + } + } + + struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize); + if (l_entry == NULL) + { + return -1; + } + memset(l_entry, 0, sizeof(struct ifaddrs)); + l_entry->ifa_name = (l_interface ? l_interface->ifa_name : ""); + + char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs); + char *l_addr = l_name + l_nameSize; + + l_entry->ifa_flags = l_info->ifa_flags; + if(l_interface) + { + l_entry->ifa_flags |= l_interface->ifa_flags; + } + + l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); + for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFA_ADDRESS: + case IFA_BROADCAST: + case IFA_LOCAL: + { + size_t l_addrLen = calcAddrLen(l_info->ifa_family, l_rtaDataSize); + makeSockaddr(l_info->ifa_family, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); + if(l_info->ifa_family == AF_INET6) + { + if(IN6_IS_ADDR_LINKLOCAL((struct in6_addr *)l_rtaData) || IN6_IS_ADDR_MC_LINKLOCAL((struct in6_addr *)l_rtaData)) + { + ((struct sockaddr_in6 *)l_addr)->sin6_scope_id = l_info->ifa_index; + } + } + + if(l_rta->rta_type == IFA_ADDRESS) + { // apparently in a point-to-point network IFA_ADDRESS contains the dest address and IFA_LOCAL contains the local address + if(l_entry->ifa_addr) + { + l_entry->ifa_dstaddr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + } + else if(l_rta->rta_type == IFA_LOCAL) + { + if(l_entry->ifa_addr) + { + l_entry->ifa_dstaddr = l_entry->ifa_addr; + } + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; + } + l_addr += NLMSG_ALIGN(l_addrLen); + break; + } + case IFA_LABEL: + strncpy(l_name, l_rtaData, l_rtaDataSize); + l_name[l_rtaDataSize] = '\0'; + l_entry->ifa_name = l_name; + break; + default: + break; + } + } + + if(l_entry->ifa_addr && (l_entry->ifa_addr->sa_family == AF_INET || l_entry->ifa_addr->sa_family == AF_INET6)) + { + unsigned l_maxPrefix = (l_entry->ifa_addr->sa_family == AF_INET ? 32 : 128); + unsigned l_prefix = (l_info->ifa_prefixlen > l_maxPrefix ? l_maxPrefix : l_info->ifa_prefixlen); + char l_mask[16] = {0}; + unsigned i; + for(i=0; i<(l_prefix/8); ++i) + { + l_mask[i] = 0xff; + } + if(l_prefix % 8) + { + l_mask[i] = 0xff << (8 - (l_prefix % 8)); + } + + makeSockaddr(l_entry->ifa_addr->sa_family, (struct sockaddr *)l_addr, l_mask, l_maxPrefix / 8); + l_entry->ifa_netmask = (struct sockaddr *)l_addr; + } + + addToEnd(p_resultList, l_entry); + return 0; +} + +static int interpretLinks(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList) +{ + int l_numLinks = 0; + pid_t l_pid = getpid(); + for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) + { + unsigned int l_nlsize = p_netlinkList->m_size; + struct nlmsghdr *l_hdr; + for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + break; + } + + if(l_hdr->nlmsg_type == RTM_NEWLINK) + { + if(interpretLink(l_hdr, p_resultList) == -1) + { + return -1; + } + ++l_numLinks; + } + } + } + return l_numLinks; +} + +static int interpretAddrs(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList, int p_numLinks) +{ + pid_t l_pid = getpid(); + for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) + { + unsigned int l_nlsize = p_netlinkList->m_size; + struct nlmsghdr *l_hdr; + for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + break; + } + + if(l_hdr->nlmsg_type == RTM_NEWADDR) + { + if (interpretAddr(l_hdr, p_resultList, p_numLinks) == -1) + { + return -1; + } + } + } + } + return 0; +} + +int getifaddrs(struct ifaddrs **ifap) +{ + if(!ifap) + { + return -1; + } + *ifap = NULL; + + int l_socket = netlink_socket(); + if(l_socket < 0) + { + return -1; + } + + NetlinkList *l_linkResults = getResultList(l_socket, RTM_GETLINK); + if(!l_linkResults) + { + close(l_socket); + return -1; + } + + NetlinkList *l_addrResults = getResultList(l_socket, RTM_GETADDR); + if(!l_addrResults) + { + close(l_socket); + freeResultList(l_linkResults); + return -1; + } + + int l_result = 0; + int l_numLinks = interpretLinks(l_socket, l_linkResults, ifap); + if(l_numLinks == -1 || interpretAddrs(l_socket, l_addrResults, ifap, l_numLinks) == -1) + { + l_result = -1; + } + + freeResultList(l_linkResults); + freeResultList(l_addrResults); + close(l_socket); + return l_result; +} + +void freeifaddrs(struct ifaddrs *ifa) +{ + struct ifaddrs *l_cur; + while(ifa) + { + l_cur = ifa; + ifa = ifa->ifa_next; + free(l_cur); + } +} diff --git a/contrib/libs/libc_compat/ubuntu_14/ya.make b/contrib/libs/libc_compat/ubuntu_14/ya.make index 69f50fedd46..7355c4ad9d8 100644 --- a/contrib/libs/libc_compat/ubuntu_14/ya.make +++ b/contrib/libs/libc_compat/ubuntu_14/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + OWNER( somov g:contrib diff --git a/contrib/libs/libevent/event_core/ya.make b/contrib/libs/libevent/event_core/ya.make index bff1de464f6..3988de817a3 100644 --- a/contrib/libs/libevent/event_core/ya.make +++ b/contrib/libs/libevent/event_core/ya.make @@ -11,8 +11,8 @@ OWNER( g:cpp-contrib ) -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + PEERDIR( contrib/libs/libc_compat ) diff --git a/contrib/libs/libevent/event_extra/ya.make b/contrib/libs/libevent/event_extra/ya.make index f73e9b2bd90..9f4c289e622 100644 --- a/contrib/libs/libevent/event_extra/ya.make +++ b/contrib/libs/libevent/event_extra/ya.make @@ -11,8 +11,8 @@ OWNER( g:cpp-contrib ) -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + PEERDIR( contrib/libs/libc_compat ) diff --git a/contrib/libs/libevent/event_openssl/ya.make b/contrib/libs/libevent/event_openssl/ya.make index c6d86475fe0..fe042893c05 100644 --- a/contrib/libs/libevent/event_openssl/ya.make +++ b/contrib/libs/libevent/event_openssl/ya.make @@ -11,8 +11,8 @@ OWNER( g:cpp-contrib ) -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + PEERDIR( contrib/libs/libc_compat contrib/libs/openssl diff --git a/contrib/libs/libevent/event_thread/ya.make b/contrib/libs/libevent/event_thread/ya.make index 548b7f53128..63579b456a6 100644 --- a/contrib/libs/libevent/event_thread/ya.make +++ b/contrib/libs/libevent/event_thread/ya.make @@ -11,8 +11,8 @@ OWNER( g:cpp-contrib ) -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + PEERDIR( contrib/libs/libc_compat ) diff --git a/contrib/libs/libidn/static/ya.make b/contrib/libs/libidn/static/ya.make index c4fd089976b..a927d7918ed 100644 --- a/contrib/libs/libidn/static/ya.make +++ b/contrib/libs/libidn/static/ya.make @@ -3,8 +3,8 @@ OWNER( g:cpp-contrib ) -LIBRARY() - +LIBRARY() + LICENSE( Custom-Punycode AND Ietf AND @@ -12,15 +12,15 @@ LICENSE( LGPL-2.1-only AND LGPL-2.1-or-later ) - + LICENSE_TEXTS(../.yandex_meta/licenses.list.txt) -VERSION(1.9) - +VERSION(1.9) + PROVIDES(libidn) -NO_RUNTIME() - +NO_RUNTIME() + NO_COMPILER_WARNINGS() ADDINCL( @@ -34,14 +34,14 @@ CFLAGS( IF (OS_WINDOWS) CFLAGS( -DLIBIDN_EXPORTS - ) + ) ENDIF() IF (OS_ANDROID) CFLAGS( -DHAVE_LOCALE_H=1 ) -ENDIF() +ENDIF() SRCDIR(contrib/libs/libidn) @@ -68,5 +68,5 @@ SRCS( c-strcasecmp.c c-ctype.c ) - -END() + +END() diff --git a/contrib/libs/libidn/unix/config.h b/contrib/libs/libidn/unix/config.h index af4b48f607d..0b4dad46a26 100644 --- a/contrib/libs/libidn/unix/config.h +++ b/contrib/libs/libidn/unix/config.h @@ -9,7 +9,7 @@ /* Define to 1 if translation of program messages to the user's native language is requested. */ -//#define ENABLE_NLS 1 +//#define ENABLE_NLS 1 /* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the CoreFoundation framework. */ @@ -21,7 +21,7 @@ /* Define if the GNU dcgettext() function is already present or preinstalled. */ -//#define HAVE_DCGETTEXT 1 +//#define HAVE_DCGETTEXT 1 /* Define to 1 if you have the declaration of `getenv', and to 0 if you don't. */ @@ -192,11 +192,11 @@ /* Define to l, ll, u, ul, ull, etc., as suitable for constants of type 'wchar_t'. */ -#define WCHAR_T_SUFFIX +#define WCHAR_T_SUFFIX /* Define to l, ll, u, ul, ull, etc., as suitable for constants of type 'wint_t'. */ -#define WINT_T_SUFFIX +#define WINT_T_SUFFIX /* Define to 1 if you want TLD code. */ #define WITH_TLD 1 diff --git a/contrib/libs/libidn/win/ac-stdint.h b/contrib/libs/libidn/win/ac-stdint.h index 67b3518bf0c..a3560eb49a6 100644 --- a/contrib/libs/libidn/win/ac-stdint.h +++ b/contrib/libs/libidn/win/ac-stdint.h @@ -21,7 +21,7 @@ #ifndef _GENERATED_STDINT_H #define _GENERATED_STDINT_H -#include +#include #endif #endif diff --git a/contrib/libs/libidn/ya.make b/contrib/libs/libidn/ya.make index 5be09da161e..678a813caa7 100644 --- a/contrib/libs/libidn/ya.make +++ b/contrib/libs/libidn/ya.make @@ -3,32 +3,32 @@ OWNER( g:cpp-contrib ) -LIBRARY() - +LIBRARY() + LICENSE(Service-Dll-Harness) WITHOUT_LICENSE_TEXTS() - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(1.9) - -NO_RUNTIME() - +VERSION(1.9) + +NO_RUNTIME() + NO_COMPILER_WARNINGS() IF (USE_DYNAMIC_IDN) - PEERDIR( - contrib/libs/libidn/dynamic - ) -ELSE() + PEERDIR( + contrib/libs/libidn/dynamic + ) +ELSE() PEERDIR( contrib/libs/libidn/static ) -ENDIF() - -END() - +ENDIF() + +END() + RECURSE( dynamic static diff --git a/contrib/libs/libunwind/include/__libunwind_config.h b/contrib/libs/libunwind/include/__libunwind_config.h index 8cb3981f607..e87bcf40034 100644 --- a/contrib/libs/libunwind/include/__libunwind_config.h +++ b/contrib/libs/libunwind/include/__libunwind_config.h @@ -1,19 +1,19 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef ____LIBUNWIND_CONFIG_H__ -#define ____LIBUNWIND_CONFIG_H__ - -#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ - !defined(__ARM_DWARF_EH__) +// +//===----------------------------------------------------------------------===// + +#ifndef ____LIBUNWIND_CONFIG_H__ +#define ____LIBUNWIND_CONFIG_H__ + +#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ + !defined(__ARM_DWARF_EH__) #define _LIBUNWIND_ARM_EHABI -#endif - +#endif + #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86 8 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64 32 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC 112 @@ -181,4 +181,4 @@ # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287 #endif // _LIBUNWIND_IS_NATIVE_ONLY -#endif // ____LIBUNWIND_CONFIG_H__ +#endif // ____LIBUNWIND_CONFIG_H__ diff --git a/contrib/libs/libunwind/include/libunwind.h b/contrib/libs/libunwind/include/libunwind.h index 8e1a4266cfc..8303c1a04c9 100644 --- a/contrib/libs/libunwind/include/libunwind.h +++ b/contrib/libs/libunwind/include/libunwind.h @@ -1,24 +1,24 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// +// +// // Compatible with libunwind API documented at: -// http://www.nongnu.org/libunwind/man/libunwind(3).html -// -//===----------------------------------------------------------------------===// - -#ifndef __LIBUNWIND__ -#define __LIBUNWIND__ - -#include "__libunwind_config.h" - -#include -#include - -#ifdef __APPLE__ +// http://www.nongnu.org/libunwind/man/libunwind(3).html +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBUNWIND__ +#define __LIBUNWIND__ + +#include "__libunwind_config.h" + +#include +#include + +#ifdef __APPLE__ #if __clang__ #if __has_include() #include @@ -35,140 +35,140 @@ #include #ifdef AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define LIBUNWIND_AVAIL AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER - #else + #else #define LIBUNWIND_AVAIL __attribute__((unavailable)) - #endif + #endif #endif -#else - #define LIBUNWIND_AVAIL -#endif - +#else + #define LIBUNWIND_AVAIL +#endif + #if defined(_WIN32) && defined(__SEH__) #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR __attribute__((__aligned__(16))) #else #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR #endif -/* error codes */ -enum { - UNW_ESUCCESS = 0, /* no error */ - UNW_EUNSPEC = -6540, /* unspecified (general) error */ - UNW_ENOMEM = -6541, /* out of memory */ - UNW_EBADREG = -6542, /* bad register number */ - UNW_EREADONLYREG = -6543, /* attempt to write read-only register */ - UNW_ESTOPUNWIND = -6544, /* stop unwinding */ - UNW_EINVALIDIP = -6545, /* invalid IP */ - UNW_EBADFRAME = -6546, /* bad frame */ - UNW_EINVAL = -6547, /* unsupported operation or bad value */ - UNW_EBADVERSION = -6548, /* unwind info has unsupported version */ - UNW_ENOINFO = -6549 /* no unwind info found */ +/* error codes */ +enum { + UNW_ESUCCESS = 0, /* no error */ + UNW_EUNSPEC = -6540, /* unspecified (general) error */ + UNW_ENOMEM = -6541, /* out of memory */ + UNW_EBADREG = -6542, /* bad register number */ + UNW_EREADONLYREG = -6543, /* attempt to write read-only register */ + UNW_ESTOPUNWIND = -6544, /* stop unwinding */ + UNW_EINVALIDIP = -6545, /* invalid IP */ + UNW_EBADFRAME = -6546, /* bad frame */ + UNW_EINVAL = -6547, /* unsupported operation or bad value */ + UNW_EBADVERSION = -6548, /* unwind info has unsupported version */ + UNW_ENOINFO = -6549 /* no unwind info found */ #if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY) , UNW_ECROSSRASIGNING = -6550 /* cross unwind with return address signing */ #endif -}; - -struct unw_context_t { +}; + +struct unw_context_t { uint64_t data[_LIBUNWIND_CONTEXT_SIZE]; -}; -typedef struct unw_context_t unw_context_t; - -struct unw_cursor_t { +}; +typedef struct unw_context_t unw_context_t; + +struct unw_cursor_t { uint64_t data[_LIBUNWIND_CURSOR_SIZE]; } LIBUNWIND_CURSOR_ALIGNMENT_ATTR; -typedef struct unw_cursor_t unw_cursor_t; - -typedef struct unw_addr_space *unw_addr_space_t; - -typedef int unw_regnum_t; +typedef struct unw_cursor_t unw_cursor_t; + +typedef struct unw_addr_space *unw_addr_space_t; + +typedef int unw_regnum_t; typedef uintptr_t unw_word_t; #if defined(__arm__) && !defined(__ARM_DWARF_EH__) -typedef uint64_t unw_fpreg_t; -#else -typedef double unw_fpreg_t; -#endif - -struct unw_proc_info_t { - unw_word_t start_ip; /* start address of function */ - unw_word_t end_ip; /* address after end of function */ - unw_word_t lsda; /* address of language specific data area, */ - /* or zero if not used */ - unw_word_t handler; /* personality routine, or zero if not used */ - unw_word_t gp; /* not used */ - unw_word_t flags; /* not used */ - uint32_t format; /* compact unwind encoding, or zero if none */ +typedef uint64_t unw_fpreg_t; +#else +typedef double unw_fpreg_t; +#endif + +struct unw_proc_info_t { + unw_word_t start_ip; /* start address of function */ + unw_word_t end_ip; /* address after end of function */ + unw_word_t lsda; /* address of language specific data area, */ + /* or zero if not used */ + unw_word_t handler; /* personality routine, or zero if not used */ + unw_word_t gp; /* not used */ + unw_word_t flags; /* not used */ + uint32_t format; /* compact unwind encoding, or zero if none */ uint32_t unwind_info_size; /* size of DWARF unwind info, or zero if none */ unw_word_t unwind_info; /* address of DWARF unwind info, or zero */ - unw_word_t extra; /* mach_header of mach-o image containing func */ -}; -typedef struct unw_proc_info_t unw_proc_info_t; - -#ifdef __cplusplus -extern "C" { -#endif - -extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL; -extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL; -extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL; -extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL; -extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL; -extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t) LIBUNWIND_AVAIL; -extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t) LIBUNWIND_AVAIL; -extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL; - -#ifdef __arm__ -/* Save VFP registers in FSTMX format (instead of FSTMD). */ -extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL; -#endif - - -extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; -extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL; -extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; -extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL; -extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL; -//extern int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*); - -extern unw_addr_space_t unw_local_addr_space; - -#ifdef __cplusplus -} -#endif - -// architecture independent register numbers -enum { - UNW_REG_IP = -1, // instruction pointer - UNW_REG_SP = -2, // stack pointer -}; - -// 32-bit x86 registers -enum { - UNW_X86_EAX = 0, - UNW_X86_ECX = 1, - UNW_X86_EDX = 2, - UNW_X86_EBX = 3, - UNW_X86_EBP = 4, - UNW_X86_ESP = 5, - UNW_X86_ESI = 6, - UNW_X86_EDI = 7 -}; - -// 64-bit x86_64 registers -enum { - UNW_X86_64_RAX = 0, - UNW_X86_64_RDX = 1, - UNW_X86_64_RCX = 2, - UNW_X86_64_RBX = 3, - UNW_X86_64_RSI = 4, - UNW_X86_64_RDI = 5, - UNW_X86_64_RBP = 6, - UNW_X86_64_RSP = 7, - UNW_X86_64_R8 = 8, - UNW_X86_64_R9 = 9, - UNW_X86_64_R10 = 10, - UNW_X86_64_R11 = 11, - UNW_X86_64_R12 = 12, - UNW_X86_64_R13 = 13, - UNW_X86_64_R14 = 14, + unw_word_t extra; /* mach_header of mach-o image containing func */ +}; +typedef struct unw_proc_info_t unw_proc_info_t; + +#ifdef __cplusplus +extern "C" { +#endif + +extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL; +extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL; +extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL; +extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL; +extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL; +extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t) LIBUNWIND_AVAIL; +extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t) LIBUNWIND_AVAIL; +extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL; + +#ifdef __arm__ +/* Save VFP registers in FSTMX format (instead of FSTMD). */ +extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL; +#endif + + +extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; +extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL; +extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; +extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL; +extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL; +//extern int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*); + +extern unw_addr_space_t unw_local_addr_space; + +#ifdef __cplusplus +} +#endif + +// architecture independent register numbers +enum { + UNW_REG_IP = -1, // instruction pointer + UNW_REG_SP = -2, // stack pointer +}; + +// 32-bit x86 registers +enum { + UNW_X86_EAX = 0, + UNW_X86_ECX = 1, + UNW_X86_EDX = 2, + UNW_X86_EBX = 3, + UNW_X86_EBP = 4, + UNW_X86_ESP = 5, + UNW_X86_ESI = 6, + UNW_X86_EDI = 7 +}; + +// 64-bit x86_64 registers +enum { + UNW_X86_64_RAX = 0, + UNW_X86_64_RDX = 1, + UNW_X86_64_RCX = 2, + UNW_X86_64_RBX = 3, + UNW_X86_64_RSI = 4, + UNW_X86_64_RDI = 5, + UNW_X86_64_RBP = 6, + UNW_X86_64_RSP = 7, + UNW_X86_64_R8 = 8, + UNW_X86_64_R9 = 9, + UNW_X86_64_R10 = 10, + UNW_X86_64_R11 = 11, + UNW_X86_64_R12 = 12, + UNW_X86_64_R13 = 13, + UNW_X86_64_R14 = 14, UNW_X86_64_R15 = 15, UNW_X86_64_RIP = 16, UNW_X86_64_XMM0 = 17, @@ -187,126 +187,126 @@ enum { UNW_X86_64_XMM13 = 30, UNW_X86_64_XMM14 = 31, UNW_X86_64_XMM15 = 32, -}; - - -// 32-bit ppc register numbers -enum { - UNW_PPC_R0 = 0, - UNW_PPC_R1 = 1, - UNW_PPC_R2 = 2, - UNW_PPC_R3 = 3, - UNW_PPC_R4 = 4, - UNW_PPC_R5 = 5, - UNW_PPC_R6 = 6, - UNW_PPC_R7 = 7, - UNW_PPC_R8 = 8, - UNW_PPC_R9 = 9, - UNW_PPC_R10 = 10, - UNW_PPC_R11 = 11, - UNW_PPC_R12 = 12, - UNW_PPC_R13 = 13, - UNW_PPC_R14 = 14, - UNW_PPC_R15 = 15, - UNW_PPC_R16 = 16, - UNW_PPC_R17 = 17, - UNW_PPC_R18 = 18, - UNW_PPC_R19 = 19, - UNW_PPC_R20 = 20, - UNW_PPC_R21 = 21, - UNW_PPC_R22 = 22, - UNW_PPC_R23 = 23, - UNW_PPC_R24 = 24, - UNW_PPC_R25 = 25, - UNW_PPC_R26 = 26, - UNW_PPC_R27 = 27, - UNW_PPC_R28 = 28, - UNW_PPC_R29 = 29, - UNW_PPC_R30 = 30, - UNW_PPC_R31 = 31, - UNW_PPC_F0 = 32, - UNW_PPC_F1 = 33, - UNW_PPC_F2 = 34, - UNW_PPC_F3 = 35, - UNW_PPC_F4 = 36, - UNW_PPC_F5 = 37, - UNW_PPC_F6 = 38, - UNW_PPC_F7 = 39, - UNW_PPC_F8 = 40, - UNW_PPC_F9 = 41, - UNW_PPC_F10 = 42, - UNW_PPC_F11 = 43, - UNW_PPC_F12 = 44, - UNW_PPC_F13 = 45, - UNW_PPC_F14 = 46, - UNW_PPC_F15 = 47, - UNW_PPC_F16 = 48, - UNW_PPC_F17 = 49, - UNW_PPC_F18 = 50, - UNW_PPC_F19 = 51, - UNW_PPC_F20 = 52, - UNW_PPC_F21 = 53, - UNW_PPC_F22 = 54, - UNW_PPC_F23 = 55, - UNW_PPC_F24 = 56, - UNW_PPC_F25 = 57, - UNW_PPC_F26 = 58, - UNW_PPC_F27 = 59, - UNW_PPC_F28 = 60, - UNW_PPC_F29 = 61, - UNW_PPC_F30 = 62, - UNW_PPC_F31 = 63, - UNW_PPC_MQ = 64, - UNW_PPC_LR = 65, - UNW_PPC_CTR = 66, - UNW_PPC_AP = 67, - UNW_PPC_CR0 = 68, - UNW_PPC_CR1 = 69, - UNW_PPC_CR2 = 70, - UNW_PPC_CR3 = 71, - UNW_PPC_CR4 = 72, - UNW_PPC_CR5 = 73, - UNW_PPC_CR6 = 74, - UNW_PPC_CR7 = 75, - UNW_PPC_XER = 76, - UNW_PPC_V0 = 77, - UNW_PPC_V1 = 78, - UNW_PPC_V2 = 79, - UNW_PPC_V3 = 80, - UNW_PPC_V4 = 81, - UNW_PPC_V5 = 82, - UNW_PPC_V6 = 83, - UNW_PPC_V7 = 84, - UNW_PPC_V8 = 85, - UNW_PPC_V9 = 86, - UNW_PPC_V10 = 87, - UNW_PPC_V11 = 88, - UNW_PPC_V12 = 89, - UNW_PPC_V13 = 90, - UNW_PPC_V14 = 91, - UNW_PPC_V15 = 92, - UNW_PPC_V16 = 93, - UNW_PPC_V17 = 94, - UNW_PPC_V18 = 95, - UNW_PPC_V19 = 96, - UNW_PPC_V20 = 97, - UNW_PPC_V21 = 98, - UNW_PPC_V22 = 99, - UNW_PPC_V23 = 100, - UNW_PPC_V24 = 101, - UNW_PPC_V25 = 102, - UNW_PPC_V26 = 103, - UNW_PPC_V27 = 104, - UNW_PPC_V28 = 105, - UNW_PPC_V29 = 106, - UNW_PPC_V30 = 107, - UNW_PPC_V31 = 108, - UNW_PPC_VRSAVE = 109, - UNW_PPC_VSCR = 110, - UNW_PPC_SPE_ACC = 111, - UNW_PPC_SPEFSCR = 112 -}; - +}; + + +// 32-bit ppc register numbers +enum { + UNW_PPC_R0 = 0, + UNW_PPC_R1 = 1, + UNW_PPC_R2 = 2, + UNW_PPC_R3 = 3, + UNW_PPC_R4 = 4, + UNW_PPC_R5 = 5, + UNW_PPC_R6 = 6, + UNW_PPC_R7 = 7, + UNW_PPC_R8 = 8, + UNW_PPC_R9 = 9, + UNW_PPC_R10 = 10, + UNW_PPC_R11 = 11, + UNW_PPC_R12 = 12, + UNW_PPC_R13 = 13, + UNW_PPC_R14 = 14, + UNW_PPC_R15 = 15, + UNW_PPC_R16 = 16, + UNW_PPC_R17 = 17, + UNW_PPC_R18 = 18, + UNW_PPC_R19 = 19, + UNW_PPC_R20 = 20, + UNW_PPC_R21 = 21, + UNW_PPC_R22 = 22, + UNW_PPC_R23 = 23, + UNW_PPC_R24 = 24, + UNW_PPC_R25 = 25, + UNW_PPC_R26 = 26, + UNW_PPC_R27 = 27, + UNW_PPC_R28 = 28, + UNW_PPC_R29 = 29, + UNW_PPC_R30 = 30, + UNW_PPC_R31 = 31, + UNW_PPC_F0 = 32, + UNW_PPC_F1 = 33, + UNW_PPC_F2 = 34, + UNW_PPC_F3 = 35, + UNW_PPC_F4 = 36, + UNW_PPC_F5 = 37, + UNW_PPC_F6 = 38, + UNW_PPC_F7 = 39, + UNW_PPC_F8 = 40, + UNW_PPC_F9 = 41, + UNW_PPC_F10 = 42, + UNW_PPC_F11 = 43, + UNW_PPC_F12 = 44, + UNW_PPC_F13 = 45, + UNW_PPC_F14 = 46, + UNW_PPC_F15 = 47, + UNW_PPC_F16 = 48, + UNW_PPC_F17 = 49, + UNW_PPC_F18 = 50, + UNW_PPC_F19 = 51, + UNW_PPC_F20 = 52, + UNW_PPC_F21 = 53, + UNW_PPC_F22 = 54, + UNW_PPC_F23 = 55, + UNW_PPC_F24 = 56, + UNW_PPC_F25 = 57, + UNW_PPC_F26 = 58, + UNW_PPC_F27 = 59, + UNW_PPC_F28 = 60, + UNW_PPC_F29 = 61, + UNW_PPC_F30 = 62, + UNW_PPC_F31 = 63, + UNW_PPC_MQ = 64, + UNW_PPC_LR = 65, + UNW_PPC_CTR = 66, + UNW_PPC_AP = 67, + UNW_PPC_CR0 = 68, + UNW_PPC_CR1 = 69, + UNW_PPC_CR2 = 70, + UNW_PPC_CR3 = 71, + UNW_PPC_CR4 = 72, + UNW_PPC_CR5 = 73, + UNW_PPC_CR6 = 74, + UNW_PPC_CR7 = 75, + UNW_PPC_XER = 76, + UNW_PPC_V0 = 77, + UNW_PPC_V1 = 78, + UNW_PPC_V2 = 79, + UNW_PPC_V3 = 80, + UNW_PPC_V4 = 81, + UNW_PPC_V5 = 82, + UNW_PPC_V6 = 83, + UNW_PPC_V7 = 84, + UNW_PPC_V8 = 85, + UNW_PPC_V9 = 86, + UNW_PPC_V10 = 87, + UNW_PPC_V11 = 88, + UNW_PPC_V12 = 89, + UNW_PPC_V13 = 90, + UNW_PPC_V14 = 91, + UNW_PPC_V15 = 92, + UNW_PPC_V16 = 93, + UNW_PPC_V17 = 94, + UNW_PPC_V18 = 95, + UNW_PPC_V19 = 96, + UNW_PPC_V20 = 97, + UNW_PPC_V21 = 98, + UNW_PPC_V22 = 99, + UNW_PPC_V23 = 100, + UNW_PPC_V24 = 101, + UNW_PPC_V25 = 102, + UNW_PPC_V26 = 103, + UNW_PPC_V27 = 104, + UNW_PPC_V28 = 105, + UNW_PPC_V29 = 106, + UNW_PPC_V30 = 107, + UNW_PPC_V31 = 108, + UNW_PPC_VRSAVE = 109, + UNW_PPC_VSCR = 110, + UNW_PPC_SPE_ACC = 111, + UNW_PPC_SPEFSCR = 112 +}; + // 64-bit ppc register numbers enum { UNW_PPC64_R0 = 0, @@ -491,8 +491,8 @@ enum { UNW_PPC64_VS63 = UNW_PPC64_V31 }; -// 64-bit ARM64 registers -enum { +// 64-bit ARM64 registers +enum { UNW_AARCH64_X0 = 0, UNW_AARCH64_X1 = 1, UNW_AARCH64_X2 = 2, @@ -530,7 +530,7 @@ enum { UNW_AARCH64_SP = 31, UNW_AARCH64_PC = 32, - // reserved block + // reserved block UNW_AARCH64_RA_SIGN_STATE = 34, // FP/vector registers @@ -637,176 +637,176 @@ enum { UNW_ARM64_D29 = UNW_AARCH64_V29, UNW_ARM64_D30 = UNW_AARCH64_V30, UNW_ARM64_D31 = UNW_AARCH64_V31, -}; - -// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1. -// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3. -// In this scheme, even though the 64-bit floating point registers D0-D31 -// overlap physically with the 32-bit floating pointer registers S0-S31, -// they are given a non-overlapping range of register numbers. -// -// Commented out ranges are not preserved during unwinding. -enum { - UNW_ARM_R0 = 0, - UNW_ARM_R1 = 1, - UNW_ARM_R2 = 2, - UNW_ARM_R3 = 3, - UNW_ARM_R4 = 4, - UNW_ARM_R5 = 5, - UNW_ARM_R6 = 6, - UNW_ARM_R7 = 7, - UNW_ARM_R8 = 8, - UNW_ARM_R9 = 9, - UNW_ARM_R10 = 10, - UNW_ARM_R11 = 11, - UNW_ARM_R12 = 12, - UNW_ARM_SP = 13, // Logical alias for UNW_REG_SP - UNW_ARM_R13 = 13, - UNW_ARM_LR = 14, - UNW_ARM_R14 = 14, - UNW_ARM_IP = 15, // Logical alias for UNW_REG_IP - UNW_ARM_R15 = 15, - // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31. - UNW_ARM_S0 = 64, - UNW_ARM_S1 = 65, - UNW_ARM_S2 = 66, - UNW_ARM_S3 = 67, - UNW_ARM_S4 = 68, - UNW_ARM_S5 = 69, - UNW_ARM_S6 = 70, - UNW_ARM_S7 = 71, - UNW_ARM_S8 = 72, - UNW_ARM_S9 = 73, - UNW_ARM_S10 = 74, - UNW_ARM_S11 = 75, - UNW_ARM_S12 = 76, - UNW_ARM_S13 = 77, - UNW_ARM_S14 = 78, - UNW_ARM_S15 = 79, - UNW_ARM_S16 = 80, - UNW_ARM_S17 = 81, - UNW_ARM_S18 = 82, - UNW_ARM_S19 = 83, - UNW_ARM_S20 = 84, - UNW_ARM_S21 = 85, - UNW_ARM_S22 = 86, - UNW_ARM_S23 = 87, - UNW_ARM_S24 = 88, - UNW_ARM_S25 = 89, - UNW_ARM_S26 = 90, - UNW_ARM_S27 = 91, - UNW_ARM_S28 = 92, - UNW_ARM_S29 = 93, - UNW_ARM_S30 = 94, - UNW_ARM_S31 = 95, - // 96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP. - // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX) - UNW_ARM_WR0 = 112, - UNW_ARM_WR1 = 113, - UNW_ARM_WR2 = 114, - UNW_ARM_WR3 = 115, - UNW_ARM_WR4 = 116, - UNW_ARM_WR5 = 117, - UNW_ARM_WR6 = 118, - UNW_ARM_WR7 = 119, - UNW_ARM_WR8 = 120, - UNW_ARM_WR9 = 121, - UNW_ARM_WR10 = 122, - UNW_ARM_WR11 = 123, - UNW_ARM_WR12 = 124, - UNW_ARM_WR13 = 125, - UNW_ARM_WR14 = 126, - UNW_ARM_WR15 = 127, - // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC} +}; + +// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1. +// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3. +// In this scheme, even though the 64-bit floating point registers D0-D31 +// overlap physically with the 32-bit floating pointer registers S0-S31, +// they are given a non-overlapping range of register numbers. +// +// Commented out ranges are not preserved during unwinding. +enum { + UNW_ARM_R0 = 0, + UNW_ARM_R1 = 1, + UNW_ARM_R2 = 2, + UNW_ARM_R3 = 3, + UNW_ARM_R4 = 4, + UNW_ARM_R5 = 5, + UNW_ARM_R6 = 6, + UNW_ARM_R7 = 7, + UNW_ARM_R8 = 8, + UNW_ARM_R9 = 9, + UNW_ARM_R10 = 10, + UNW_ARM_R11 = 11, + UNW_ARM_R12 = 12, + UNW_ARM_SP = 13, // Logical alias for UNW_REG_SP + UNW_ARM_R13 = 13, + UNW_ARM_LR = 14, + UNW_ARM_R14 = 14, + UNW_ARM_IP = 15, // Logical alias for UNW_REG_IP + UNW_ARM_R15 = 15, + // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31. + UNW_ARM_S0 = 64, + UNW_ARM_S1 = 65, + UNW_ARM_S2 = 66, + UNW_ARM_S3 = 67, + UNW_ARM_S4 = 68, + UNW_ARM_S5 = 69, + UNW_ARM_S6 = 70, + UNW_ARM_S7 = 71, + UNW_ARM_S8 = 72, + UNW_ARM_S9 = 73, + UNW_ARM_S10 = 74, + UNW_ARM_S11 = 75, + UNW_ARM_S12 = 76, + UNW_ARM_S13 = 77, + UNW_ARM_S14 = 78, + UNW_ARM_S15 = 79, + UNW_ARM_S16 = 80, + UNW_ARM_S17 = 81, + UNW_ARM_S18 = 82, + UNW_ARM_S19 = 83, + UNW_ARM_S20 = 84, + UNW_ARM_S21 = 85, + UNW_ARM_S22 = 86, + UNW_ARM_S23 = 87, + UNW_ARM_S24 = 88, + UNW_ARM_S25 = 89, + UNW_ARM_S26 = 90, + UNW_ARM_S27 = 91, + UNW_ARM_S28 = 92, + UNW_ARM_S29 = 93, + UNW_ARM_S30 = 94, + UNW_ARM_S31 = 95, + // 96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP. + // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX) + UNW_ARM_WR0 = 112, + UNW_ARM_WR1 = 113, + UNW_ARM_WR2 = 114, + UNW_ARM_WR3 = 115, + UNW_ARM_WR4 = 116, + UNW_ARM_WR5 = 117, + UNW_ARM_WR6 = 118, + UNW_ARM_WR7 = 119, + UNW_ARM_WR8 = 120, + UNW_ARM_WR9 = 121, + UNW_ARM_WR10 = 122, + UNW_ARM_WR11 = 123, + UNW_ARM_WR12 = 124, + UNW_ARM_WR13 = 125, + UNW_ARM_WR14 = 126, + UNW_ARM_WR15 = 127, + // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC} // 134-142 -- Reserved UNW_ARM_RA_AUTH_CODE = 143, - // 144-150 -- R8_USR-R14_USR - // 151-157 -- R8_FIQ-R14_FIQ - // 158-159 -- R13_IRQ-R14_IRQ - // 160-161 -- R13_ABT-R14_ABT - // 162-163 -- R13_UND-R14_UND - // 164-165 -- R13_SVC-R14_SVC - // 166-191 -- Reserved - UNW_ARM_WC0 = 192, - UNW_ARM_WC1 = 193, - UNW_ARM_WC2 = 194, - UNW_ARM_WC3 = 195, - // 196-199 -- wC4-wC7 (Intel wireless MMX control) - // 200-255 -- Reserved - UNW_ARM_D0 = 256, - UNW_ARM_D1 = 257, - UNW_ARM_D2 = 258, - UNW_ARM_D3 = 259, - UNW_ARM_D4 = 260, - UNW_ARM_D5 = 261, - UNW_ARM_D6 = 262, - UNW_ARM_D7 = 263, - UNW_ARM_D8 = 264, - UNW_ARM_D9 = 265, - UNW_ARM_D10 = 266, - UNW_ARM_D11 = 267, - UNW_ARM_D12 = 268, - UNW_ARM_D13 = 269, - UNW_ARM_D14 = 270, - UNW_ARM_D15 = 271, - UNW_ARM_D16 = 272, - UNW_ARM_D17 = 273, - UNW_ARM_D18 = 274, - UNW_ARM_D19 = 275, - UNW_ARM_D20 = 276, - UNW_ARM_D21 = 277, - UNW_ARM_D22 = 278, - UNW_ARM_D23 = 279, - UNW_ARM_D24 = 280, - UNW_ARM_D25 = 281, - UNW_ARM_D26 = 282, - UNW_ARM_D27 = 283, - UNW_ARM_D28 = 284, - UNW_ARM_D29 = 285, - UNW_ARM_D30 = 286, - UNW_ARM_D31 = 287, - // 288-319 -- Reserved for VFP/Neon - // 320-8191 -- Reserved - // 8192-16383 -- Unspecified vendor co-processor register. -}; - -// OpenRISC1000 register numbers -enum { - UNW_OR1K_R0 = 0, - UNW_OR1K_R1 = 1, - UNW_OR1K_R2 = 2, - UNW_OR1K_R3 = 3, - UNW_OR1K_R4 = 4, - UNW_OR1K_R5 = 5, - UNW_OR1K_R6 = 6, - UNW_OR1K_R7 = 7, - UNW_OR1K_R8 = 8, - UNW_OR1K_R9 = 9, - UNW_OR1K_R10 = 10, - UNW_OR1K_R11 = 11, - UNW_OR1K_R12 = 12, - UNW_OR1K_R13 = 13, - UNW_OR1K_R14 = 14, - UNW_OR1K_R15 = 15, - UNW_OR1K_R16 = 16, - UNW_OR1K_R17 = 17, - UNW_OR1K_R18 = 18, - UNW_OR1K_R19 = 19, - UNW_OR1K_R20 = 20, - UNW_OR1K_R21 = 21, - UNW_OR1K_R22 = 22, - UNW_OR1K_R23 = 23, - UNW_OR1K_R24 = 24, - UNW_OR1K_R25 = 25, - UNW_OR1K_R26 = 26, - UNW_OR1K_R27 = 27, - UNW_OR1K_R28 = 28, - UNW_OR1K_R29 = 29, - UNW_OR1K_R30 = 30, - UNW_OR1K_R31 = 31, + // 144-150 -- R8_USR-R14_USR + // 151-157 -- R8_FIQ-R14_FIQ + // 158-159 -- R13_IRQ-R14_IRQ + // 160-161 -- R13_ABT-R14_ABT + // 162-163 -- R13_UND-R14_UND + // 164-165 -- R13_SVC-R14_SVC + // 166-191 -- Reserved + UNW_ARM_WC0 = 192, + UNW_ARM_WC1 = 193, + UNW_ARM_WC2 = 194, + UNW_ARM_WC3 = 195, + // 196-199 -- wC4-wC7 (Intel wireless MMX control) + // 200-255 -- Reserved + UNW_ARM_D0 = 256, + UNW_ARM_D1 = 257, + UNW_ARM_D2 = 258, + UNW_ARM_D3 = 259, + UNW_ARM_D4 = 260, + UNW_ARM_D5 = 261, + UNW_ARM_D6 = 262, + UNW_ARM_D7 = 263, + UNW_ARM_D8 = 264, + UNW_ARM_D9 = 265, + UNW_ARM_D10 = 266, + UNW_ARM_D11 = 267, + UNW_ARM_D12 = 268, + UNW_ARM_D13 = 269, + UNW_ARM_D14 = 270, + UNW_ARM_D15 = 271, + UNW_ARM_D16 = 272, + UNW_ARM_D17 = 273, + UNW_ARM_D18 = 274, + UNW_ARM_D19 = 275, + UNW_ARM_D20 = 276, + UNW_ARM_D21 = 277, + UNW_ARM_D22 = 278, + UNW_ARM_D23 = 279, + UNW_ARM_D24 = 280, + UNW_ARM_D25 = 281, + UNW_ARM_D26 = 282, + UNW_ARM_D27 = 283, + UNW_ARM_D28 = 284, + UNW_ARM_D29 = 285, + UNW_ARM_D30 = 286, + UNW_ARM_D31 = 287, + // 288-319 -- Reserved for VFP/Neon + // 320-8191 -- Reserved + // 8192-16383 -- Unspecified vendor co-processor register. +}; + +// OpenRISC1000 register numbers +enum { + UNW_OR1K_R0 = 0, + UNW_OR1K_R1 = 1, + UNW_OR1K_R2 = 2, + UNW_OR1K_R3 = 3, + UNW_OR1K_R4 = 4, + UNW_OR1K_R5 = 5, + UNW_OR1K_R6 = 6, + UNW_OR1K_R7 = 7, + UNW_OR1K_R8 = 8, + UNW_OR1K_R9 = 9, + UNW_OR1K_R10 = 10, + UNW_OR1K_R11 = 11, + UNW_OR1K_R12 = 12, + UNW_OR1K_R13 = 13, + UNW_OR1K_R14 = 14, + UNW_OR1K_R15 = 15, + UNW_OR1K_R16 = 16, + UNW_OR1K_R17 = 17, + UNW_OR1K_R18 = 18, + UNW_OR1K_R19 = 19, + UNW_OR1K_R20 = 20, + UNW_OR1K_R21 = 21, + UNW_OR1K_R22 = 22, + UNW_OR1K_R23 = 23, + UNW_OR1K_R24 = 24, + UNW_OR1K_R25 = 25, + UNW_OR1K_R26 = 26, + UNW_OR1K_R27 = 27, + UNW_OR1K_R28 = 28, + UNW_OR1K_R29 = 29, + UNW_OR1K_R30 = 30, + UNW_OR1K_R31 = 31, UNW_OR1K_EPCR = 32, -}; - +}; + // MIPS registers enum { UNW_MIPS_R0 = 0, @@ -1174,4 +1174,4 @@ enum { UNW_VE_VL = 145, }; -#endif +#endif diff --git a/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h b/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h index 45d873f75c3..68d562eec43 100644 --- a/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h +++ b/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h @@ -1,477 +1,477 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// +// +// // Darwin's alternative to DWARF based unwind encodings. -// -//===----------------------------------------------------------------------===// - - -#ifndef __COMPACT_UNWIND_ENCODING__ -#define __COMPACT_UNWIND_ENCODING__ - -#include - -// +// +//===----------------------------------------------------------------------===// + + +#ifndef __COMPACT_UNWIND_ENCODING__ +#define __COMPACT_UNWIND_ENCODING__ + +#include + +// // Compilers can emit standard DWARF FDEs in the __TEXT,__eh_frame section -// of object files. Or compilers can emit compact unwind information in -// the __LD,__compact_unwind section. -// -// When the linker creates a final linked image, it will create a -// __TEXT,__unwind_info section. This section is a small and fast way for the -// runtime to access unwind info for any given function. If the compiler -// emitted compact unwind info for the function, that compact unwind info will -// be encoded in the __TEXT,__unwind_info section. If the compiler emitted +// of object files. Or compilers can emit compact unwind information in +// the __LD,__compact_unwind section. +// +// When the linker creates a final linked image, it will create a +// __TEXT,__unwind_info section. This section is a small and fast way for the +// runtime to access unwind info for any given function. If the compiler +// emitted compact unwind info for the function, that compact unwind info will +// be encoded in the __TEXT,__unwind_info section. If the compiler emitted // DWARF unwind info, the __TEXT,__unwind_info section will contain the offset -// of the FDE in the __TEXT,__eh_frame section in the final linked image. -// +// of the FDE in the __TEXT,__eh_frame section in the final linked image. +// // Note: Previously, the linker would transform some DWARF unwind infos into -// compact unwind info. But that is fragile and no longer done. - - -// -// The compact unwind endoding is a 32-bit value which encoded in an -// architecture specific way, which registers to restore from where, and how -// to unwind out of the function. -// -typedef uint32_t compact_unwind_encoding_t; - - -// architecture independent bits -enum { - UNWIND_IS_NOT_FUNCTION_START = 0x80000000, - UNWIND_HAS_LSDA = 0x40000000, - UNWIND_PERSONALITY_MASK = 0x30000000, -}; - - - - -// -// x86 -// -// 1-bit: start -// 1-bit: has lsda -// 2-bit: personality index -// +// compact unwind info. But that is fragile and no longer done. + + +// +// The compact unwind endoding is a 32-bit value which encoded in an +// architecture specific way, which registers to restore from where, and how +// to unwind out of the function. +// +typedef uint32_t compact_unwind_encoding_t; + + +// architecture independent bits +enum { + UNWIND_IS_NOT_FUNCTION_START = 0x80000000, + UNWIND_HAS_LSDA = 0x40000000, + UNWIND_PERSONALITY_MASK = 0x30000000, +}; + + + + +// +// x86 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// // 4-bits: 0=old, 1=ebp based, 2=stack-imm, 3=stack-ind, 4=DWARF -// ebp based: -// 15-bits (5*3-bits per reg) register permutation -// 8-bits for stack offset -// frameless: -// 8-bits stack size -// 3-bits stack adjust -// 3-bits register count -// 10-bits register permutation -// -enum { - UNWIND_X86_MODE_MASK = 0x0F000000, - UNWIND_X86_MODE_EBP_FRAME = 0x01000000, - UNWIND_X86_MODE_STACK_IMMD = 0x02000000, - UNWIND_X86_MODE_STACK_IND = 0x03000000, - UNWIND_X86_MODE_DWARF = 0x04000000, - - UNWIND_X86_EBP_FRAME_REGISTERS = 0x00007FFF, - UNWIND_X86_EBP_FRAME_OFFSET = 0x00FF0000, - - UNWIND_X86_FRAMELESS_STACK_SIZE = 0x00FF0000, - UNWIND_X86_FRAMELESS_STACK_ADJUST = 0x0000E000, - UNWIND_X86_FRAMELESS_STACK_REG_COUNT = 0x00001C00, - UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, - - UNWIND_X86_DWARF_SECTION_OFFSET = 0x00FFFFFF, -}; - -enum { - UNWIND_X86_REG_NONE = 0, - UNWIND_X86_REG_EBX = 1, - UNWIND_X86_REG_ECX = 2, - UNWIND_X86_REG_EDX = 3, - UNWIND_X86_REG_EDI = 4, - UNWIND_X86_REG_ESI = 5, - UNWIND_X86_REG_EBP = 6, -}; - -// -// For x86 there are four modes for the compact unwind encoding: -// UNWIND_X86_MODE_EBP_FRAME: -// EBP based frame where EBP is push on stack immediately after return address, -// then ESP is moved to EBP. Thus, to unwind ESP is restored with the current -// EPB value, then EBP is restored by popping off the stack, and the return -// is done by popping the stack once more into the pc. -// All non-volatile registers that need to be restored must have been saved -// in a small range in the stack that starts EBP-4 to EBP-1020. The offset/4 -// is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits. The registers saved -// are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries. -// Each entry contains which register to restore. -// UNWIND_X86_MODE_STACK_IMMD: -// A "frameless" (EBP not used as frame pointer) function with a small -// constant stack size. To return, a constant (encoded in the compact -// unwind encoding) is added to the ESP. Then the return is done by -// popping the stack into the pc. -// All non-volatile registers that need to be restored must have been saved -// on the stack immediately after the return address. The stack_size/4 is -// encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024). -// The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT. -// UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were -// saved and their order. -// UNWIND_X86_MODE_STACK_IND: -// A "frameless" (EBP not used as frame pointer) function large constant -// stack size. This case is like the previous, except the stack size is too -// large to encode in the compact unwind encoding. Instead it requires that -// the function contains "subl $nnnnnnnn,ESP" in its prolog. The compact -// encoding contains the offset to the nnnnnnnn value in the function in -// UNWIND_X86_FRAMELESS_STACK_SIZE. -// UNWIND_X86_MODE_DWARF: -// No compact unwind encoding is available. Instead the low 24-bits of the +// ebp based: +// 15-bits (5*3-bits per reg) register permutation +// 8-bits for stack offset +// frameless: +// 8-bits stack size +// 3-bits stack adjust +// 3-bits register count +// 10-bits register permutation +// +enum { + UNWIND_X86_MODE_MASK = 0x0F000000, + UNWIND_X86_MODE_EBP_FRAME = 0x01000000, + UNWIND_X86_MODE_STACK_IMMD = 0x02000000, + UNWIND_X86_MODE_STACK_IND = 0x03000000, + UNWIND_X86_MODE_DWARF = 0x04000000, + + UNWIND_X86_EBP_FRAME_REGISTERS = 0x00007FFF, + UNWIND_X86_EBP_FRAME_OFFSET = 0x00FF0000, + + UNWIND_X86_FRAMELESS_STACK_SIZE = 0x00FF0000, + UNWIND_X86_FRAMELESS_STACK_ADJUST = 0x0000E000, + UNWIND_X86_FRAMELESS_STACK_REG_COUNT = 0x00001C00, + UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, + + UNWIND_X86_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; + +enum { + UNWIND_X86_REG_NONE = 0, + UNWIND_X86_REG_EBX = 1, + UNWIND_X86_REG_ECX = 2, + UNWIND_X86_REG_EDX = 3, + UNWIND_X86_REG_EDI = 4, + UNWIND_X86_REG_ESI = 5, + UNWIND_X86_REG_EBP = 6, +}; + +// +// For x86 there are four modes for the compact unwind encoding: +// UNWIND_X86_MODE_EBP_FRAME: +// EBP based frame where EBP is push on stack immediately after return address, +// then ESP is moved to EBP. Thus, to unwind ESP is restored with the current +// EPB value, then EBP is restored by popping off the stack, and the return +// is done by popping the stack once more into the pc. +// All non-volatile registers that need to be restored must have been saved +// in a small range in the stack that starts EBP-4 to EBP-1020. The offset/4 +// is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits. The registers saved +// are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries. +// Each entry contains which register to restore. +// UNWIND_X86_MODE_STACK_IMMD: +// A "frameless" (EBP not used as frame pointer) function with a small +// constant stack size. To return, a constant (encoded in the compact +// unwind encoding) is added to the ESP. Then the return is done by +// popping the stack into the pc. +// All non-volatile registers that need to be restored must have been saved +// on the stack immediately after the return address. The stack_size/4 is +// encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024). +// The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT. +// UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were +// saved and their order. +// UNWIND_X86_MODE_STACK_IND: +// A "frameless" (EBP not used as frame pointer) function large constant +// stack size. This case is like the previous, except the stack size is too +// large to encode in the compact unwind encoding. Instead it requires that +// the function contains "subl $nnnnnnnn,ESP" in its prolog. The compact +// encoding contains the offset to the nnnnnnnn value in the function in +// UNWIND_X86_FRAMELESS_STACK_SIZE. +// UNWIND_X86_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the // compact encoding is the offset of the DWARF FDE in the __eh_frame section. -// This mode is never used in object files. It is only generated by the +// This mode is never used in object files. It is only generated by the // linker in final linked images which have only DWARF unwind info for a -// function. -// -// The permutation encoding is a Lehmer code sequence encoded into a -// single variable-base number so we can encode the ordering of up to -// six registers in a 10-bit space. -// -// The following is the algorithm used to create the permutation encoding used -// with frameless stacks. It is passed the number of registers to be saved and -// an array of the register numbers saved. -// -//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6]) -//{ -// uint32_t renumregs[6]; -// for (int i=6-registerCount; i < 6; ++i) { -// int countless = 0; -// for (int j=6-registerCount; j < i; ++j) { -// if ( registers[j] < registers[i] ) -// ++countless; -// } -// renumregs[i] = registers[i] - countless -1; -// } -// uint32_t permutationEncoding = 0; -// switch ( registerCount ) { -// case 6: -// permutationEncoding |= (120*renumregs[0] + 24*renumregs[1] -// + 6*renumregs[2] + 2*renumregs[3] -// + renumregs[4]); -// break; -// case 5: -// permutationEncoding |= (120*renumregs[1] + 24*renumregs[2] -// + 6*renumregs[3] + 2*renumregs[4] -// + renumregs[5]); -// break; -// case 4: -// permutationEncoding |= (60*renumregs[2] + 12*renumregs[3] -// + 3*renumregs[4] + renumregs[5]); -// break; -// case 3: -// permutationEncoding |= (20*renumregs[3] + 4*renumregs[4] -// + renumregs[5]); -// break; -// case 2: -// permutationEncoding |= (5*renumregs[4] + renumregs[5]); -// break; -// case 1: -// permutationEncoding |= (renumregs[5]); -// break; -// } -// return permutationEncoding; -//} -// - - - - -// -// x86_64 -// -// 1-bit: start -// 1-bit: has lsda -// 2-bit: personality index -// +// function. +// +// The permutation encoding is a Lehmer code sequence encoded into a +// single variable-base number so we can encode the ordering of up to +// six registers in a 10-bit space. +// +// The following is the algorithm used to create the permutation encoding used +// with frameless stacks. It is passed the number of registers to be saved and +// an array of the register numbers saved. +// +//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6]) +//{ +// uint32_t renumregs[6]; +// for (int i=6-registerCount; i < 6; ++i) { +// int countless = 0; +// for (int j=6-registerCount; j < i; ++j) { +// if ( registers[j] < registers[i] ) +// ++countless; +// } +// renumregs[i] = registers[i] - countless -1; +// } +// uint32_t permutationEncoding = 0; +// switch ( registerCount ) { +// case 6: +// permutationEncoding |= (120*renumregs[0] + 24*renumregs[1] +// + 6*renumregs[2] + 2*renumregs[3] +// + renumregs[4]); +// break; +// case 5: +// permutationEncoding |= (120*renumregs[1] + 24*renumregs[2] +// + 6*renumregs[3] + 2*renumregs[4] +// + renumregs[5]); +// break; +// case 4: +// permutationEncoding |= (60*renumregs[2] + 12*renumregs[3] +// + 3*renumregs[4] + renumregs[5]); +// break; +// case 3: +// permutationEncoding |= (20*renumregs[3] + 4*renumregs[4] +// + renumregs[5]); +// break; +// case 2: +// permutationEncoding |= (5*renumregs[4] + renumregs[5]); +// break; +// case 1: +// permutationEncoding |= (renumregs[5]); +// break; +// } +// return permutationEncoding; +//} +// + + + + +// +// x86_64 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// // 4-bits: 0=old, 1=rbp based, 2=stack-imm, 3=stack-ind, 4=DWARF -// rbp based: -// 15-bits (5*3-bits per reg) register permutation -// 8-bits for stack offset -// frameless: -// 8-bits stack size -// 3-bits stack adjust -// 3-bits register count -// 10-bits register permutation -// -enum { - UNWIND_X86_64_MODE_MASK = 0x0F000000, - UNWIND_X86_64_MODE_RBP_FRAME = 0x01000000, - UNWIND_X86_64_MODE_STACK_IMMD = 0x02000000, - UNWIND_X86_64_MODE_STACK_IND = 0x03000000, - UNWIND_X86_64_MODE_DWARF = 0x04000000, - - UNWIND_X86_64_RBP_FRAME_REGISTERS = 0x00007FFF, - UNWIND_X86_64_RBP_FRAME_OFFSET = 0x00FF0000, - - UNWIND_X86_64_FRAMELESS_STACK_SIZE = 0x00FF0000, - UNWIND_X86_64_FRAMELESS_STACK_ADJUST = 0x0000E000, - UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT = 0x00001C00, - UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, - - UNWIND_X86_64_DWARF_SECTION_OFFSET = 0x00FFFFFF, -}; - -enum { - UNWIND_X86_64_REG_NONE = 0, - UNWIND_X86_64_REG_RBX = 1, - UNWIND_X86_64_REG_R12 = 2, - UNWIND_X86_64_REG_R13 = 3, - UNWIND_X86_64_REG_R14 = 4, - UNWIND_X86_64_REG_R15 = 5, - UNWIND_X86_64_REG_RBP = 6, -}; -// -// For x86_64 there are four modes for the compact unwind encoding: -// UNWIND_X86_64_MODE_RBP_FRAME: -// RBP based frame where RBP is push on stack immediately after return address, -// then RSP is moved to RBP. Thus, to unwind RSP is restored with the current -// EPB value, then RBP is restored by popping off the stack, and the return -// is done by popping the stack once more into the pc. -// All non-volatile registers that need to be restored must have been saved -// in a small range in the stack that starts RBP-8 to RBP-2040. The offset/8 -// is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits. The registers saved -// are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries. -// Each entry contains which register to restore. -// UNWIND_X86_64_MODE_STACK_IMMD: -// A "frameless" (RBP not used as frame pointer) function with a small -// constant stack size. To return, a constant (encoded in the compact -// unwind encoding) is added to the RSP. Then the return is done by -// popping the stack into the pc. -// All non-volatile registers that need to be restored must have been saved -// on the stack immediately after the return address. The stack_size/8 is -// encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048). -// The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT. -// UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were -// saved and their order. -// UNWIND_X86_64_MODE_STACK_IND: -// A "frameless" (RBP not used as frame pointer) function large constant -// stack size. This case is like the previous, except the stack size is too -// large to encode in the compact unwind encoding. Instead it requires that -// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact -// encoding contains the offset to the nnnnnnnn value in the function in -// UNWIND_X86_64_FRAMELESS_STACK_SIZE. -// UNWIND_X86_64_MODE_DWARF: -// No compact unwind encoding is available. Instead the low 24-bits of the +// rbp based: +// 15-bits (5*3-bits per reg) register permutation +// 8-bits for stack offset +// frameless: +// 8-bits stack size +// 3-bits stack adjust +// 3-bits register count +// 10-bits register permutation +// +enum { + UNWIND_X86_64_MODE_MASK = 0x0F000000, + UNWIND_X86_64_MODE_RBP_FRAME = 0x01000000, + UNWIND_X86_64_MODE_STACK_IMMD = 0x02000000, + UNWIND_X86_64_MODE_STACK_IND = 0x03000000, + UNWIND_X86_64_MODE_DWARF = 0x04000000, + + UNWIND_X86_64_RBP_FRAME_REGISTERS = 0x00007FFF, + UNWIND_X86_64_RBP_FRAME_OFFSET = 0x00FF0000, + + UNWIND_X86_64_FRAMELESS_STACK_SIZE = 0x00FF0000, + UNWIND_X86_64_FRAMELESS_STACK_ADJUST = 0x0000E000, + UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT = 0x00001C00, + UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, + + UNWIND_X86_64_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; + +enum { + UNWIND_X86_64_REG_NONE = 0, + UNWIND_X86_64_REG_RBX = 1, + UNWIND_X86_64_REG_R12 = 2, + UNWIND_X86_64_REG_R13 = 3, + UNWIND_X86_64_REG_R14 = 4, + UNWIND_X86_64_REG_R15 = 5, + UNWIND_X86_64_REG_RBP = 6, +}; +// +// For x86_64 there are four modes for the compact unwind encoding: +// UNWIND_X86_64_MODE_RBP_FRAME: +// RBP based frame where RBP is push on stack immediately after return address, +// then RSP is moved to RBP. Thus, to unwind RSP is restored with the current +// EPB value, then RBP is restored by popping off the stack, and the return +// is done by popping the stack once more into the pc. +// All non-volatile registers that need to be restored must have been saved +// in a small range in the stack that starts RBP-8 to RBP-2040. The offset/8 +// is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits. The registers saved +// are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries. +// Each entry contains which register to restore. +// UNWIND_X86_64_MODE_STACK_IMMD: +// A "frameless" (RBP not used as frame pointer) function with a small +// constant stack size. To return, a constant (encoded in the compact +// unwind encoding) is added to the RSP. Then the return is done by +// popping the stack into the pc. +// All non-volatile registers that need to be restored must have been saved +// on the stack immediately after the return address. The stack_size/8 is +// encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048). +// The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT. +// UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were +// saved and their order. +// UNWIND_X86_64_MODE_STACK_IND: +// A "frameless" (RBP not used as frame pointer) function large constant +// stack size. This case is like the previous, except the stack size is too +// large to encode in the compact unwind encoding. Instead it requires that +// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact +// encoding contains the offset to the nnnnnnnn value in the function in +// UNWIND_X86_64_FRAMELESS_STACK_SIZE. +// UNWIND_X86_64_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the // compact encoding is the offset of the DWARF FDE in the __eh_frame section. -// This mode is never used in object files. It is only generated by the +// This mode is never used in object files. It is only generated by the // linker in final linked images which have only DWARF unwind info for a -// function. -// - - -// ARM64 -// -// 1-bit: start -// 1-bit: has lsda -// 2-bit: personality index -// +// function. +// + + +// ARM64 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// // 4-bits: 4=frame-based, 3=DWARF, 2=frameless -// frameless: -// 12-bits of stack size -// frame-based: -// 4-bits D reg pairs saved -// 5-bits X reg pairs saved +// frameless: +// 12-bits of stack size +// frame-based: +// 4-bits D reg pairs saved +// 5-bits X reg pairs saved // DWARF: // 24-bits offset of DWARF FDE in __eh_frame section -// -enum { - UNWIND_ARM64_MODE_MASK = 0x0F000000, - UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, - UNWIND_ARM64_MODE_DWARF = 0x03000000, - UNWIND_ARM64_MODE_FRAME = 0x04000000, - - UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, - UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, - UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, - UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, - UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, - UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, - UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, - UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, - UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800, - - UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000, - UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF, -}; -// For arm64 there are three modes for the compact unwind encoding: -// UNWIND_ARM64_MODE_FRAME: -// This is a standard arm64 prolog where FP/LR are immediately pushed on the -// stack, then SP is copied to FP. If there are any non-volatile registers -// saved, then are copied into the stack frame in pairs in a contiguous -// range right below the saved FP/LR pair. Any subset of the five X pairs -// and four D pairs can be saved, but the memory layout must be in register -// number order. -// UNWIND_ARM64_MODE_FRAMELESS: -// A "frameless" leaf function, where FP/LR are not saved. The return address -// remains in LR throughout the function. If any non-volatile registers -// are saved, they must be pushed onto the stack before any stack space is -// allocated for local variables. The stack sized (including any saved -// non-volatile registers) divided by 16 is encoded in the bits -// UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK. -// UNWIND_ARM64_MODE_DWARF: -// No compact unwind encoding is available. Instead the low 24-bits of the +// +enum { + UNWIND_ARM64_MODE_MASK = 0x0F000000, + UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, + UNWIND_ARM64_MODE_DWARF = 0x03000000, + UNWIND_ARM64_MODE_FRAME = 0x04000000, + + UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, + UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, + UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, + UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, + UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, + UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, + UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, + UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, + UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800, + + UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000, + UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; +// For arm64 there are three modes for the compact unwind encoding: +// UNWIND_ARM64_MODE_FRAME: +// This is a standard arm64 prolog where FP/LR are immediately pushed on the +// stack, then SP is copied to FP. If there are any non-volatile registers +// saved, then are copied into the stack frame in pairs in a contiguous +// range right below the saved FP/LR pair. Any subset of the five X pairs +// and four D pairs can be saved, but the memory layout must be in register +// number order. +// UNWIND_ARM64_MODE_FRAMELESS: +// A "frameless" leaf function, where FP/LR are not saved. The return address +// remains in LR throughout the function. If any non-volatile registers +// are saved, they must be pushed onto the stack before any stack space is +// allocated for local variables. The stack sized (including any saved +// non-volatile registers) divided by 16 is encoded in the bits +// UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK. +// UNWIND_ARM64_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the // compact encoding is the offset of the DWARF FDE in the __eh_frame section. -// This mode is never used in object files. It is only generated by the +// This mode is never used in object files. It is only generated by the // linker in final linked images which have only DWARF unwind info for a -// function. -// - - - - - -//////////////////////////////////////////////////////////////////////////////// -// -// Relocatable Object Files: __LD,__compact_unwind -// -//////////////////////////////////////////////////////////////////////////////// - -// -// A compiler can generated compact unwind information for a function by adding -// a "row" to the __LD,__compact_unwind section. This section has the -// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers. -// It is removed by the new linker, so never ends up in final executables. -// This section is a table, initially with one row per function (that needs -// unwind info). The table columns and some conceptual entries are: -// -// range-start pointer to start of function/range -// range-length -// compact-unwind-encoding 32-bit encoding -// personality-function or zero if no personality function -// lsda or zero if no LSDA data -// -// The length and encoding fields are 32-bits. The other are all pointer sized. -// -// In x86_64 assembly, these entry would look like: -// -// .section __LD,__compact_unwind,regular,debug -// -// #compact unwind for _foo -// .quad _foo -// .set L1,LfooEnd-_foo -// .long L1 -// .long 0x01010001 -// .quad 0 -// .quad 0 -// -// #compact unwind for _bar -// .quad _bar -// .set L2,LbarEnd-_bar -// .long L2 -// .long 0x01020011 -// .quad __gxx_personality -// .quad except_tab1 -// -// -// Notes: There is no need for any labels in the the __compact_unwind section. -// The use of the .set directive is to force the evaluation of the -// range-length at assembly time, instead of generating relocations. -// -// To support future compiler optimizations where which non-volatile registers -// are saved changes within a function (e.g. delay saving non-volatiles until -// necessary), there can by multiple lines in the __compact_unwind table for one -// function, each with a different (non-overlapping) range and each with -// different compact unwind encodings that correspond to the non-volatiles -// saved at that range of the function. -// -// If a particular function is so wacky that there is no compact unwind way +// function. +// + + + + + +//////////////////////////////////////////////////////////////////////////////// +// +// Relocatable Object Files: __LD,__compact_unwind +// +//////////////////////////////////////////////////////////////////////////////// + +// +// A compiler can generated compact unwind information for a function by adding +// a "row" to the __LD,__compact_unwind section. This section has the +// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers. +// It is removed by the new linker, so never ends up in final executables. +// This section is a table, initially with one row per function (that needs +// unwind info). The table columns and some conceptual entries are: +// +// range-start pointer to start of function/range +// range-length +// compact-unwind-encoding 32-bit encoding +// personality-function or zero if no personality function +// lsda or zero if no LSDA data +// +// The length and encoding fields are 32-bits. The other are all pointer sized. +// +// In x86_64 assembly, these entry would look like: +// +// .section __LD,__compact_unwind,regular,debug +// +// #compact unwind for _foo +// .quad _foo +// .set L1,LfooEnd-_foo +// .long L1 +// .long 0x01010001 +// .quad 0 +// .quad 0 +// +// #compact unwind for _bar +// .quad _bar +// .set L2,LbarEnd-_bar +// .long L2 +// .long 0x01020011 +// .quad __gxx_personality +// .quad except_tab1 +// +// +// Notes: There is no need for any labels in the the __compact_unwind section. +// The use of the .set directive is to force the evaluation of the +// range-length at assembly time, instead of generating relocations. +// +// To support future compiler optimizations where which non-volatile registers +// are saved changes within a function (e.g. delay saving non-volatiles until +// necessary), there can by multiple lines in the __compact_unwind table for one +// function, each with a different (non-overlapping) range and each with +// different compact unwind encodings that correspond to the non-volatiles +// saved at that range of the function. +// +// If a particular function is so wacky that there is no compact unwind way // to encode it, then the compiler can emit traditional DWARF unwind info. -// The runtime will use which ever is available. -// -// Runtime support for compact unwind encodings are only available on 10.6 -// and later. So, the compiler should not generate it when targeting pre-10.6. - - - - -//////////////////////////////////////////////////////////////////////////////// -// -// Final Linked Images: __TEXT,__unwind_info -// -//////////////////////////////////////////////////////////////////////////////// - -// -// The __TEXT,__unwind_info section is laid out for an efficient two level lookup. -// The header of the section contains a coarse index that maps function address -// to the page (4096 byte block) containing the unwind info for that function. -// - -#define UNWIND_SECTION_VERSION 1 -struct unwind_info_section_header -{ - uint32_t version; // UNWIND_SECTION_VERSION - uint32_t commonEncodingsArraySectionOffset; - uint32_t commonEncodingsArrayCount; - uint32_t personalityArraySectionOffset; - uint32_t personalityArrayCount; - uint32_t indexSectionOffset; - uint32_t indexCount; - // compact_unwind_encoding_t[] - // uint32_t personalities[] - // unwind_info_section_header_index_entry[] - // unwind_info_section_header_lsda_index_entry[] -}; - -struct unwind_info_section_header_index_entry -{ - uint32_t functionOffset; - uint32_t secondLevelPagesSectionOffset; // section offset to start of regular or compress page - uint32_t lsdaIndexArraySectionOffset; // section offset to start of lsda_index array for this range -}; - -struct unwind_info_section_header_lsda_index_entry -{ - uint32_t functionOffset; - uint32_t lsdaOffset; -}; - -// -// There are two kinds of second level index pages: regular and compressed. -// A compressed page can hold up to 1021 entries, but it cannot be used -// if too many different encoding types are used. The regular page holds -// 511 entries. -// - -struct unwind_info_regular_second_level_entry -{ - uint32_t functionOffset; - compact_unwind_encoding_t encoding; -}; - -#define UNWIND_SECOND_LEVEL_REGULAR 2 -struct unwind_info_regular_second_level_page_header -{ - uint32_t kind; // UNWIND_SECOND_LEVEL_REGULAR - uint16_t entryPageOffset; - uint16_t entryCount; - // entry array -}; - -#define UNWIND_SECOND_LEVEL_COMPRESSED 3 -struct unwind_info_compressed_second_level_page_header -{ - uint32_t kind; // UNWIND_SECOND_LEVEL_COMPRESSED - uint16_t entryPageOffset; - uint16_t entryCount; - uint16_t encodingsPageOffset; - uint16_t encodingsCount; - // 32-bit entry array - // encodings array -}; - -#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry) (entry & 0x00FFFFFF) -#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry) ((entry >> 24) & 0xFF) - - - -#endif - +// The runtime will use which ever is available. +// +// Runtime support for compact unwind encodings are only available on 10.6 +// and later. So, the compiler should not generate it when targeting pre-10.6. + + + + +//////////////////////////////////////////////////////////////////////////////// +// +// Final Linked Images: __TEXT,__unwind_info +// +//////////////////////////////////////////////////////////////////////////////// + +// +// The __TEXT,__unwind_info section is laid out for an efficient two level lookup. +// The header of the section contains a coarse index that maps function address +// to the page (4096 byte block) containing the unwind info for that function. +// + +#define UNWIND_SECTION_VERSION 1 +struct unwind_info_section_header +{ + uint32_t version; // UNWIND_SECTION_VERSION + uint32_t commonEncodingsArraySectionOffset; + uint32_t commonEncodingsArrayCount; + uint32_t personalityArraySectionOffset; + uint32_t personalityArrayCount; + uint32_t indexSectionOffset; + uint32_t indexCount; + // compact_unwind_encoding_t[] + // uint32_t personalities[] + // unwind_info_section_header_index_entry[] + // unwind_info_section_header_lsda_index_entry[] +}; + +struct unwind_info_section_header_index_entry +{ + uint32_t functionOffset; + uint32_t secondLevelPagesSectionOffset; // section offset to start of regular or compress page + uint32_t lsdaIndexArraySectionOffset; // section offset to start of lsda_index array for this range +}; + +struct unwind_info_section_header_lsda_index_entry +{ + uint32_t functionOffset; + uint32_t lsdaOffset; +}; + +// +// There are two kinds of second level index pages: regular and compressed. +// A compressed page can hold up to 1021 entries, but it cannot be used +// if too many different encoding types are used. The regular page holds +// 511 entries. +// + +struct unwind_info_regular_second_level_entry +{ + uint32_t functionOffset; + compact_unwind_encoding_t encoding; +}; + +#define UNWIND_SECOND_LEVEL_REGULAR 2 +struct unwind_info_regular_second_level_page_header +{ + uint32_t kind; // UNWIND_SECOND_LEVEL_REGULAR + uint16_t entryPageOffset; + uint16_t entryCount; + // entry array +}; + +#define UNWIND_SECOND_LEVEL_COMPRESSED 3 +struct unwind_info_compressed_second_level_page_header +{ + uint32_t kind; // UNWIND_SECOND_LEVEL_COMPRESSED + uint16_t entryPageOffset; + uint16_t entryCount; + uint16_t encodingsPageOffset; + uint16_t encodingsCount; + // 32-bit entry array + // encodings array +}; + +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry) (entry & 0x00FFFFFF) +#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry) ((entry >> 24) & 0xFF) + + + +#endif + diff --git a/contrib/libs/libunwind/include/unwind.h b/contrib/libs/libunwind/include/unwind.h index 13ff2b6bf8f..6949e063dde 100644 --- a/contrib/libs/libunwind/include/unwind.h +++ b/contrib/libs/libunwind/include/unwind.h @@ -1,190 +1,190 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// C++ ABI Level 1 ABI documented at: +// +// +// C++ ABI Level 1 ABI documented at: // https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html -// -//===----------------------------------------------------------------------===// - -#ifndef __UNWIND_H__ -#define __UNWIND_H__ - -#include "__libunwind_config.h" - -#include -#include - +// +//===----------------------------------------------------------------------===// + +#ifndef __UNWIND_H__ +#define __UNWIND_H__ + +#include "__libunwind_config.h" + +#include +#include + #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) && defined(_WIN32) #include #include #endif -#if defined(__APPLE__) -#define LIBUNWIND_UNAVAIL __attribute__ (( deprecated )) -#else -#define LIBUNWIND_UNAVAIL -#endif - -typedef enum { - _URC_NO_REASON = 0, - _URC_OK = 0, - _URC_FOREIGN_EXCEPTION_CAUGHT = 1, - _URC_FATAL_PHASE2_ERROR = 2, - _URC_FATAL_PHASE1_ERROR = 3, - _URC_NORMAL_STOP = 4, - _URC_END_OF_STACK = 5, - _URC_HANDLER_FOUND = 6, - _URC_INSTALL_CONTEXT = 7, - _URC_CONTINUE_UNWIND = 8, +#if defined(__APPLE__) +#define LIBUNWIND_UNAVAIL __attribute__ (( deprecated )) +#else +#define LIBUNWIND_UNAVAIL +#endif + +typedef enum { + _URC_NO_REASON = 0, + _URC_OK = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8, #if defined(_LIBUNWIND_ARM_EHABI) - _URC_FAILURE = 9 -#endif -} _Unwind_Reason_Code; - -typedef enum { - _UA_SEARCH_PHASE = 1, - _UA_CLEANUP_PHASE = 2, - _UA_HANDLER_FRAME = 4, - _UA_FORCE_UNWIND = 8, - _UA_END_OF_STACK = 16 // gcc extension to C++ ABI -} _Unwind_Action; - -typedef struct _Unwind_Context _Unwind_Context; // opaque - + _URC_FAILURE = 9 +#endif +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 // gcc extension to C++ ABI +} _Unwind_Action; + +typedef struct _Unwind_Context _Unwind_Context; // opaque + #if defined(_LIBUNWIND_ARM_EHABI) #include "unwind_arm_ehabi.h" #else #include "unwind_itanium.h" #endif - -typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) - (int version, - _Unwind_Action actions, + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) + (int version, + _Unwind_Action actions, _Unwind_Exception_Class exceptionClass, - _Unwind_Exception* exceptionObject, - struct _Unwind_Context* context, + _Unwind_Exception* exceptionObject, + struct _Unwind_Context* context, void* stop_parameter); - -#ifdef __cplusplus -extern "C" { -#endif - -extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context); -extern uintptr_t - _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context); -#ifdef __USING_SJLJ_EXCEPTIONS__ -extern _Unwind_Reason_Code - _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object, - _Unwind_Stop_Fn stop, void *stop_parameter); -#else -extern _Unwind_Reason_Code - _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, - _Unwind_Stop_Fn stop, void *stop_parameter); -#endif - -#ifdef __USING_SJLJ_EXCEPTIONS__ -typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t; -extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc); -extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc); -#endif - -// -// The following are semi-suppoted extensions to the C++ ABI -// - -// -// called by __cxa_rethrow(). -// -#ifdef __USING_SJLJ_EXCEPTIONS__ -extern _Unwind_Reason_Code - _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object); -#else -extern _Unwind_Reason_Code - _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object); -#endif - -// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the -// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack -// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON. -typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, - void *); -extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); - -// _Unwind_GetCFA is a gcc extension that can be called from within a -// personality handler to get the CFA (stack pointer before call) of -// current frame. -extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *); - - -// _Unwind_GetIPInfo is a gcc extension that can be called from within a -// personality handler. Similar to _Unwind_GetIP() but also returns in -// *ipBefore a non-zero value if the instruction pointer is at or before the -// instruction causing the unwind. Normally, in a function call, the IP returned -// is the return address which is after the call instruction and may be past the -// end of the function containing the call instruction. -extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, - int *ipBefore); - - -// __register_frame() is used with dynamically generated code to register the -// FDE for a generated (JIT) code. The FDE must use pc-rel addressing to point -// to its function and optional LSDA. -// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and -// 10.5 it was buggy and did not actually register the FDE with the unwinder. -// In 10.6 and later it does register properly. -extern void __register_frame(const void *fde); -extern void __deregister_frame(const void *fde); - -// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has -// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind + +#ifdef __cplusplus +extern "C" { +#endif + +extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context); +extern uintptr_t + _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context); +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter); +#else +extern _Unwind_Reason_Code + _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter); +#endif + +#ifdef __USING_SJLJ_EXCEPTIONS__ +typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t; +extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc); +extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc); +#endif + +// +// The following are semi-suppoted extensions to the C++ ABI +// + +// +// called by __cxa_rethrow(). +// +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object); +#else +extern _Unwind_Reason_Code + _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object); +#endif + +// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the +// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack +// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON. +typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); +extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +// _Unwind_GetCFA is a gcc extension that can be called from within a +// personality handler to get the CFA (stack pointer before call) of +// current frame. +extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *); + + +// _Unwind_GetIPInfo is a gcc extension that can be called from within a +// personality handler. Similar to _Unwind_GetIP() but also returns in +// *ipBefore a non-zero value if the instruction pointer is at or before the +// instruction causing the unwind. Normally, in a function call, the IP returned +// is the return address which is after the call instruction and may be past the +// end of the function containing the call instruction. +extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore); + + +// __register_frame() is used with dynamically generated code to register the +// FDE for a generated (JIT) code. The FDE must use pc-rel addressing to point +// to its function and optional LSDA. +// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and +// 10.5 it was buggy and did not actually register the FDE with the unwinder. +// In 10.6 and later it does register properly. +extern void __register_frame(const void *fde); +extern void __deregister_frame(const void *fde); + +// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has +// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind // info" which the runtime uses in preference to DWARF unwind info. This -// function will only work if the target function has an FDE but no compact -// unwind info. -struct dwarf_eh_bases { - uintptr_t tbase; - uintptr_t dbase; - uintptr_t func; -}; -extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *); - - -// This function attempts to find the start (address of first instruction) of -// a function given an address inside the function. It only works if the +// function will only work if the target function has an FDE but no compact +// unwind info. +struct dwarf_eh_bases { + uintptr_t tbase; + uintptr_t dbase; + uintptr_t func; +}; +extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *); + + +// This function attempts to find the start (address of first instruction) of +// a function given an address inside the function. It only works if the // function has an FDE (DWARF unwind info). -// This function is unimplemented on Mac OS X 10.6 and later. Instead, use -// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result. -extern void *_Unwind_FindEnclosingFunction(void *pc); - -// Mac OS X does not support text-rel and data-rel addressing so these functions -// are unimplemented -extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context) - LIBUNWIND_UNAVAIL; -extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context) - LIBUNWIND_UNAVAIL; - -// Mac OS X 10.4 and 10.5 had implementations of these functions in -// libgcc_s.dylib, but they never worked. -/// These functions are no longer available on Mac OS X. -extern void __register_frame_info_bases(const void *fde, void *ob, void *tb, - void *db) LIBUNWIND_UNAVAIL; -extern void __register_frame_info(const void *fde, void *ob) - LIBUNWIND_UNAVAIL; -extern void __register_frame_info_table_bases(const void *fde, void *ob, - void *tb, void *db) - LIBUNWIND_UNAVAIL; -extern void __register_frame_info_table(const void *fde, void *ob) - LIBUNWIND_UNAVAIL; -extern void __register_frame_table(const void *fde) - LIBUNWIND_UNAVAIL; -extern void *__deregister_frame_info(const void *fde) - LIBUNWIND_UNAVAIL; -extern void *__deregister_frame_info_bases(const void *fde) - LIBUNWIND_UNAVAIL; - +// This function is unimplemented on Mac OS X 10.6 and later. Instead, use +// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result. +extern void *_Unwind_FindEnclosingFunction(void *pc); + +// Mac OS X does not support text-rel and data-rel addressing so these functions +// are unimplemented +extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context) + LIBUNWIND_UNAVAIL; +extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context) + LIBUNWIND_UNAVAIL; + +// Mac OS X 10.4 and 10.5 had implementations of these functions in +// libgcc_s.dylib, but they never worked. +/// These functions are no longer available on Mac OS X. +extern void __register_frame_info_bases(const void *fde, void *ob, void *tb, + void *db) LIBUNWIND_UNAVAIL; +extern void __register_frame_info(const void *fde, void *ob) + LIBUNWIND_UNAVAIL; +extern void __register_frame_info_table_bases(const void *fde, void *ob, + void *tb, void *db) + LIBUNWIND_UNAVAIL; +extern void __register_frame_info_table(const void *fde, void *ob) + LIBUNWIND_UNAVAIL; +extern void __register_frame_table(const void *fde) + LIBUNWIND_UNAVAIL; +extern void *__deregister_frame_info(const void *fde) + LIBUNWIND_UNAVAIL; +extern void *__deregister_frame_info_bases(const void *fde) + LIBUNWIND_UNAVAIL; + #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) #ifndef _WIN32 typedef struct _EXCEPTION_RECORD EXCEPTION_RECORD; @@ -215,8 +215,8 @@ typedef struct _Unwind_Backtrace_Buffer { void* backtrace[_YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_SIZE]; } _Unwind_Backtrace_Buffer; #endif -#ifdef __cplusplus -} -#endif - -#endif // __UNWIND_H__ +#ifdef __cplusplus +} +#endif + +#endif // __UNWIND_H__ diff --git a/contrib/libs/libunwind/include/unwind_arm_ehabi.h b/contrib/libs/libunwind/include/unwind_arm_ehabi.h index 178834a2417..6277a1457f8 100644 --- a/contrib/libs/libunwind/include/unwind_arm_ehabi.h +++ b/contrib/libs/libunwind/include/unwind_arm_ehabi.h @@ -1,170 +1,170 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// C++ ABI Level 1 ABI documented at: +// +// +// C++ ABI Level 1 ABI documented at: // https://github.com/ARM-software/abi-aa/blob/main/ehabi32/ehabi32.rst -// -//===----------------------------------------------------------------------===// - +// +//===----------------------------------------------------------------------===// + #ifndef __ARM_EHABI_UNWIND_H__ #define __ARM_EHABI_UNWIND_H__ - -typedef uint32_t _Unwind_State; - -static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME = 0; -static const _Unwind_State _US_UNWIND_FRAME_STARTING = 1; -static const _Unwind_State _US_UNWIND_FRAME_RESUME = 2; + +typedef uint32_t _Unwind_State; + +static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME = 0; +static const _Unwind_State _US_UNWIND_FRAME_STARTING = 1; +static const _Unwind_State _US_UNWIND_FRAME_RESUME = 2; static const _Unwind_State _US_ACTION_MASK = 3; -/* Undocumented flag for force unwinding. */ -static const _Unwind_State _US_FORCE_UNWIND = 8; - -typedef uint32_t _Unwind_EHT_Header; - -struct _Unwind_Control_Block; -typedef struct _Unwind_Control_Block _Unwind_Control_Block; +/* Undocumented flag for force unwinding. */ +static const _Unwind_State _US_FORCE_UNWIND = 8; + +typedef uint32_t _Unwind_EHT_Header; + +struct _Unwind_Control_Block; +typedef struct _Unwind_Control_Block _Unwind_Control_Block; #define _Unwind_Exception _Unwind_Control_Block /* Alias */ typedef uint8_t _Unwind_Exception_Class[8]; - -struct _Unwind_Control_Block { + +struct _Unwind_Control_Block { _Unwind_Exception_Class exception_class; - void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*); - - /* Unwinder cache, private fields for the unwinder's use */ - struct { - uint32_t reserved1; /* init reserved1 to 0, then don't touch */ - uint32_t reserved2; - uint32_t reserved3; - uint32_t reserved4; - uint32_t reserved5; - } unwinder_cache; - - /* Propagation barrier cache (valid after phase 1): */ - struct { - uint32_t sp; - uint32_t bitpattern[5]; - } barrier_cache; - - /* Cleanup cache (preserved over cleanup): */ - struct { - uint32_t bitpattern[4]; - } cleanup_cache; - - /* Pr cache (for pr's benefit): */ - struct { - uint32_t fnstart; /* function start address */ - _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */ - uint32_t additional; - uint32_t reserved1; - } pr_cache; - - long long int :0; /* Enforce the 8-byte alignment */ + void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*); + + /* Unwinder cache, private fields for the unwinder's use */ + struct { + uint32_t reserved1; /* init reserved1 to 0, then don't touch */ + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + } unwinder_cache; + + /* Propagation barrier cache (valid after phase 1): */ + struct { + uint32_t sp; + uint32_t bitpattern[5]; + } barrier_cache; + + /* Cleanup cache (preserved over cleanup): */ + struct { + uint32_t bitpattern[4]; + } cleanup_cache; + + /* Pr cache (for pr's benefit): */ + struct { + uint32_t fnstart; /* function start address */ + _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */ + uint32_t additional; + uint32_t reserved1; + } pr_cache; + + long long int :0; /* Enforce the 8-byte alignment */ } __attribute__((__aligned__(8))); - + typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( _Unwind_State state, _Unwind_Exception *exceptionObject, struct _Unwind_Context *context); - -#ifdef __cplusplus -extern "C" { -#endif - -// -// The following are the base functions documented by the C++ ABI -// -#ifdef __USING_SJLJ_EXCEPTIONS__ -extern _Unwind_Reason_Code - _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); -extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); -#else -extern _Unwind_Reason_Code - _Unwind_RaiseException(_Unwind_Exception *exception_object); -extern void _Unwind_Resume(_Unwind_Exception *exception_object); -#endif -extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); - -typedef enum { + +#ifdef __cplusplus +extern "C" { +#endif + +// +// The following are the base functions documented by the C++ ABI +// +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); +#else +extern _Unwind_Reason_Code + _Unwind_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_Resume(_Unwind_Exception *exception_object); +#endif +extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); + +typedef enum { _UVRSC_CORE = 0, /* integer register */ _UVRSC_VFP = 1, /* vfp */ - _UVRSC_WMMXD = 3, /* Intel WMMX data register */ + _UVRSC_WMMXD = 3, /* Intel WMMX data register */ _UVRSC_WMMXC = 4, /* Intel WMMX control register */ _UVRSC_PSEUDO = 5 /* Special purpose pseudo register */ -} _Unwind_VRS_RegClass; - -typedef enum { - _UVRSD_UINT32 = 0, - _UVRSD_VFPX = 1, - _UVRSD_UINT64 = 3, - _UVRSD_FLOAT = 4, - _UVRSD_DOUBLE = 5 -} _Unwind_VRS_DataRepresentation; - -typedef enum { - _UVRSR_OK = 0, - _UVRSR_NOT_IMPLEMENTED = 1, - _UVRSR_FAILED = 2 -} _Unwind_VRS_Result; - -extern void _Unwind_Complete(_Unwind_Exception* exception_object); - -extern _Unwind_VRS_Result -_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, - uint32_t regno, _Unwind_VRS_DataRepresentation representation, - void *valuep); - -extern _Unwind_VRS_Result -_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, - uint32_t regno, _Unwind_VRS_DataRepresentation representation, - void *valuep); - -extern _Unwind_VRS_Result -_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, - uint32_t discriminator, - _Unwind_VRS_DataRepresentation representation); - -#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE) -#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern -#else -#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__ -#endif - -// These are de facto helper functions for ARM, which delegate the function -// calls to _Unwind_VRS_Get/Set(). These are not a part of ARM EHABI -// specification, thus these function MUST be inlined. Please don't replace -// these with the "extern" function declaration; otherwise, the program -// including this header won't be ABI compatible and will result in -// link error when we are linking the program with libgcc. - -_LIBUNWIND_EXPORT_UNWIND_LEVEL1 -uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { - uintptr_t value = 0; - _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); - return value; -} - -_LIBUNWIND_EXPORT_UNWIND_LEVEL1 -void _Unwind_SetGR(struct _Unwind_Context *context, int index, - uintptr_t value) { - _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); -} - -_LIBUNWIND_EXPORT_UNWIND_LEVEL1 -uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { - // remove the thumb-bit before returning - return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1); -} - -_LIBUNWIND_EXPORT_UNWIND_LEVEL1 -void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) { - uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1); - _Unwind_SetGR(context, 15, value | thumb_bit); -} - -#ifdef __cplusplus -} -#endif - +} _Unwind_VRS_RegClass; + +typedef enum { + _UVRSD_UINT32 = 0, + _UVRSD_VFPX = 1, + _UVRSD_UINT64 = 3, + _UVRSD_FLOAT = 4, + _UVRSD_DOUBLE = 5 +} _Unwind_VRS_DataRepresentation; + +typedef enum { + _UVRSR_OK = 0, + _UVRSR_NOT_IMPLEMENTED = 1, + _UVRSR_FAILED = 2 +} _Unwind_VRS_Result; + +extern void _Unwind_Complete(_Unwind_Exception* exception_object); + +extern _Unwind_VRS_Result +_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep); + +extern _Unwind_VRS_Result +_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep); + +extern _Unwind_VRS_Result +_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t discriminator, + _Unwind_VRS_DataRepresentation representation); + +#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE) +#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern +#else +#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__ +#endif + +// These are de facto helper functions for ARM, which delegate the function +// calls to _Unwind_VRS_Get/Set(). These are not a part of ARM EHABI +// specification, thus these function MUST be inlined. Please don't replace +// these with the "extern" function declaration; otherwise, the program +// including this header won't be ABI compatible and will result in +// link error when we are linking the program with libgcc. + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { + uintptr_t value = 0; + _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); + return value; +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t value) { + _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + // remove the thumb-bit before returning + return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1); +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) { + uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1); + _Unwind_SetGR(context, 15, value | thumb_bit); +} + +#ifdef __cplusplus +} +#endif + #endif // __ARM_EHABI_UNWIND_H__ diff --git a/contrib/libs/libunwind/include/unwind_itanium.h b/contrib/libs/libunwind/include/unwind_itanium.h index 9a415d243b6..d94a6183be2 100644 --- a/contrib/libs/libunwind/include/unwind_itanium.h +++ b/contrib/libs/libunwind/include/unwind_itanium.h @@ -1,76 +1,76 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// C++ ABI Level 1 ABI documented at: +// +// +// C++ ABI Level 1 ABI documented at: // https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html -// -//===----------------------------------------------------------------------===// - +// +//===----------------------------------------------------------------------===// + #ifndef __ITANIUM_UNWIND_H__ #define __ITANIUM_UNWIND_H__ - -struct _Unwind_Context; // opaque -struct _Unwind_Exception; // forward declaration -typedef struct _Unwind_Exception _Unwind_Exception; + +struct _Unwind_Context; // opaque +struct _Unwind_Exception; // forward declaration +typedef struct _Unwind_Exception _Unwind_Exception; typedef uint64_t _Unwind_Exception_Class; - -struct _Unwind_Exception { + +struct _Unwind_Exception { _Unwind_Exception_Class exception_class; - void (*exception_cleanup)(_Unwind_Reason_Code reason, - _Unwind_Exception *exc); + void (*exception_cleanup)(_Unwind_Reason_Code reason, + _Unwind_Exception *exc); #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) uintptr_t private_[6]; #else - uintptr_t private_1; // non-zero means forced unwind - uintptr_t private_2; // holds sp that phase1 found for phase2 to use + uintptr_t private_1; // non-zero means forced unwind + uintptr_t private_2; // holds sp that phase1 found for phase2 to use #endif #if __SIZEOF_POINTER__ == 4 // The implementation of _Unwind_Exception uses an attribute mode on the // above fields which has the side effect of causing this whole struct to // round up to 32 bytes in size (48 with SEH). To be more explicit, we add // pad fields added for binary compatibility. - uint32_t reserved[3]; -#endif + uint32_t reserved[3]; +#endif // The Itanium ABI requires that _Unwind_Exception objects are "double-word // aligned". GCC has interpreted this to mean "use the maximum useful // alignment for the target"; so do we. } __attribute__((__aligned__)); - + typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( int version, _Unwind_Action actions, uint64_t exceptionClass, _Unwind_Exception *exceptionObject, struct _Unwind_Context *context); - -#ifdef __cplusplus -extern "C" { -#endif - -// -// The following are the base functions documented by the C++ ABI -// -#ifdef __USING_SJLJ_EXCEPTIONS__ -extern _Unwind_Reason_Code - _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); -extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); -#else -extern _Unwind_Reason_Code - _Unwind_RaiseException(_Unwind_Exception *exception_object); -extern void _Unwind_Resume(_Unwind_Exception *exception_object); -#endif -extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); - - -extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index); -extern void _Unwind_SetGR(struct _Unwind_Context *context, int index, - uintptr_t new_value); -extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context); -extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value); - -#ifdef __cplusplus -} -#endif - + +#ifdef __cplusplus +extern "C" { +#endif + +// +// The following are the base functions documented by the C++ ABI +// +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); +#else +extern _Unwind_Reason_Code + _Unwind_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_Resume(_Unwind_Exception *exception_object); +#endif +extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); + + +extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index); +extern void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t new_value); +extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context); +extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value); + +#ifdef __cplusplus +} +#endif + #endif // __ITANIUM_UNWIND_H__ diff --git a/contrib/libs/libunwind/src/AddressSpace.hpp b/contrib/libs/libunwind/src/AddressSpace.hpp index 6eff5225930..0c4dfeb4e68 100644 --- a/contrib/libs/libunwind/src/AddressSpace.hpp +++ b/contrib/libs/libunwind/src/AddressSpace.hpp @@ -1,22 +1,22 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Abstracts accessing local vs remote address spaces. -// -//===----------------------------------------------------------------------===// - -#ifndef __ADDRESSSPACE_HPP__ -#define __ADDRESSSPACE_HPP__ - -#include -#include -#include -#include - +// +// +// Abstracts accessing local vs remote address spaces. +// +//===----------------------------------------------------------------------===// + +#ifndef __ADDRESSSPACE_HPP__ +#define __ADDRESSSPACE_HPP__ + +#include +#include +#include +#include + #include "libunwind.h" #include "config.h" #include "dwarf2.h" @@ -32,12 +32,12 @@ #endif #if _LIBUNWIND_USE_DLADDR -#include +#include #if defined(__ELF__) && defined(_LIBUNWIND_LINK_DL_LIB) #pragma comment(lib, "dl") -#endif #endif - +#endif + #if defined(_LIBUNWIND_ARM_EHABI) struct EHABIIndexEntry { uint32_t functionOffset; @@ -45,8 +45,8 @@ struct EHABIIndexEntry { }; #endif -#ifdef __APPLE__ - +#ifdef __APPLE__ + struct dyld_unwind_sections { const struct mach_header* mh; @@ -55,15 +55,15 @@ struct EHABIIndexEntry { const void* compact_unwind_section; uintptr_t compact_unwind_section_length; }; - + // In 10.7.0 or later, libSystem.dylib implements this function. extern "C" bool _dyld_find_unwind_sections(void *, dyld_unwind_sections *); - + #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -// When statically linked on bare-metal, the symbols for the EH table are looked -// up without going through the dynamic loader. - +// When statically linked on bare-metal, the symbols for the EH table are looked +// up without going through the dynamic loader. + // The following linker script may be used to produce the necessary sections and symbols. // Unless the --eh-frame-hdr linker option is provided, the section is not generated // and does not take space in the output file. @@ -108,100 +108,100 @@ extern char __exidx_end; #include -#endif - -namespace libunwind { - -/// Used by findUnwindSections() to return info about needed sections. -struct UnwindInfoSections { +#endif + +namespace libunwind { + +/// Used by findUnwindSections() to return info about needed sections. +struct UnwindInfoSections { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) || \ defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) || \ defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) // No dso_base for SEH. - uintptr_t dso_base; -#endif + uintptr_t dso_base; +#endif #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) size_t text_segment_length; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - uintptr_t dwarf_section; + uintptr_t dwarf_section; size_t dwarf_section_length; -#endif +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - uintptr_t dwarf_index_section; + uintptr_t dwarf_index_section; size_t dwarf_index_section_length; -#endif +#endif #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - uintptr_t compact_unwind_section; + uintptr_t compact_unwind_section; size_t compact_unwind_section_length; -#endif +#endif #if defined(_LIBUNWIND_ARM_EHABI) - uintptr_t arm_section; + uintptr_t arm_section; size_t arm_section_length; -#endif -}; - - -/// LocalAddressSpace is used as a template parameter to UnwindCursor when -/// unwinding a thread in the same process. The wrappers compile away, -/// making local unwinds fast. +#endif +}; + + +/// LocalAddressSpace is used as a template parameter to UnwindCursor when +/// unwinding a thread in the same process. The wrappers compile away, +/// making local unwinds fast. class _LIBUNWIND_HIDDEN LocalAddressSpace { -public: +public: typedef uintptr_t pint_t; typedef intptr_t sint_t; - uint8_t get8(pint_t addr) { - uint8_t val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - uint16_t get16(pint_t addr) { - uint16_t val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - uint32_t get32(pint_t addr) { - uint32_t val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - uint64_t get64(pint_t addr) { - uint64_t val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - double getDouble(pint_t addr) { - double val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - v128 getVector(pint_t addr) { - v128 val; - memcpy(&val, (void *)addr, sizeof(val)); - return val; - } - uintptr_t getP(pint_t addr); + uint8_t get8(pint_t addr) { + uint8_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint16_t get16(pint_t addr) { + uint16_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint32_t get32(pint_t addr) { + uint32_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint64_t get64(pint_t addr) { + uint64_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + double getDouble(pint_t addr) { + double val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + v128 getVector(pint_t addr) { + v128 val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uintptr_t getP(pint_t addr); uint64_t getRegister(pint_t addr); - static uint64_t getULEB128(pint_t &addr, pint_t end); - static int64_t getSLEB128(pint_t &addr, pint_t end); - - pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, - pint_t datarelBase = 0); - bool findFunctionName(pint_t addr, char *buf, size_t bufLen, - unw_word_t *offset); - bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); - bool findOtherFDE(pint_t targetAddr, pint_t &fde); - - static LocalAddressSpace sThisAddressSpace; -}; - -inline uintptr_t LocalAddressSpace::getP(pint_t addr) { + static uint64_t getULEB128(pint_t &addr, pint_t end); + static int64_t getSLEB128(pint_t &addr, pint_t end); + + pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, + pint_t datarelBase = 0); + bool findFunctionName(pint_t addr, char *buf, size_t bufLen, + unw_word_t *offset); + bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); + bool findOtherFDE(pint_t targetAddr, pint_t &fde); + + static LocalAddressSpace sThisAddressSpace; +}; + +inline uintptr_t LocalAddressSpace::getP(pint_t addr) { #if __SIZEOF_POINTER__ == 8 - return get64(addr); -#else - return get32(addr); -#endif -} - + return get64(addr); +#else + return get32(addr); +#endif +} + inline uint64_t LocalAddressSpace::getRegister(pint_t addr) { #if __SIZEOF_POINTER__ == 8 || defined(__mips64) return get64(addr); @@ -210,144 +210,144 @@ inline uint64_t LocalAddressSpace::getRegister(pint_t addr) { #endif } -/// Read a ULEB128 into a 64-bit word. -inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) { - const uint8_t *p = (uint8_t *)addr; - const uint8_t *pend = (uint8_t *)end; - uint64_t result = 0; - int bit = 0; - do { - uint64_t b; - - if (p == pend) - _LIBUNWIND_ABORT("truncated uleb128 expression"); - - b = *p & 0x7f; - - if (bit >= 64 || b << bit >> bit != b) { - _LIBUNWIND_ABORT("malformed uleb128 expression"); - } else { - result |= b << bit; - bit += 7; - } - } while (*p++ >= 0x80); - addr = (pint_t) p; - return result; -} - -/// Read a SLEB128 into a 64-bit word. -inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { - const uint8_t *p = (uint8_t *)addr; - const uint8_t *pend = (uint8_t *)end; - int64_t result = 0; - int bit = 0; - uint8_t byte; - do { - if (p == pend) - _LIBUNWIND_ABORT("truncated sleb128 expression"); - byte = *p++; +/// Read a ULEB128 into a 64-bit word. +inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) { + const uint8_t *p = (uint8_t *)addr; + const uint8_t *pend = (uint8_t *)end; + uint64_t result = 0; + int bit = 0; + do { + uint64_t b; + + if (p == pend) + _LIBUNWIND_ABORT("truncated uleb128 expression"); + + b = *p & 0x7f; + + if (bit >= 64 || b << bit >> bit != b) { + _LIBUNWIND_ABORT("malformed uleb128 expression"); + } else { + result |= b << bit; + bit += 7; + } + } while (*p++ >= 0x80); + addr = (pint_t) p; + return result; +} + +/// Read a SLEB128 into a 64-bit word. +inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { + const uint8_t *p = (uint8_t *)addr; + const uint8_t *pend = (uint8_t *)end; + int64_t result = 0; + int bit = 0; + uint8_t byte; + do { + if (p == pend) + _LIBUNWIND_ABORT("truncated sleb128 expression"); + byte = *p++; result |= (uint64_t)(byte & 0x7f) << bit; - bit += 7; - } while (byte & 0x80); - // sign extend negative numbers + bit += 7; + } while (byte & 0x80); + // sign extend negative numbers if ((byte & 0x40) != 0 && bit < 64) - result |= (-1ULL) << bit; - addr = (pint_t) p; - return result; -} - -inline LocalAddressSpace::pint_t -LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, - pint_t datarelBase) { - pint_t startAddr = addr; - const uint8_t *p = (uint8_t *)addr; - pint_t result; - - // first get value - switch (encoding & 0x0F) { - case DW_EH_PE_ptr: - result = getP(addr); - p += sizeof(pint_t); - addr = (pint_t) p; - break; - case DW_EH_PE_uleb128: - result = (pint_t)getULEB128(addr, end); - break; - case DW_EH_PE_udata2: - result = get16(addr); - p += 2; - addr = (pint_t) p; - break; - case DW_EH_PE_udata4: - result = get32(addr); - p += 4; - addr = (pint_t) p; - break; - case DW_EH_PE_udata8: - result = (pint_t)get64(addr); - p += 8; - addr = (pint_t) p; - break; - case DW_EH_PE_sleb128: - result = (pint_t)getSLEB128(addr, end); - break; - case DW_EH_PE_sdata2: - // Sign extend from signed 16-bit value. - result = (pint_t)(int16_t)get16(addr); - p += 2; - addr = (pint_t) p; - break; - case DW_EH_PE_sdata4: - // Sign extend from signed 32-bit value. - result = (pint_t)(int32_t)get32(addr); - p += 4; - addr = (pint_t) p; - break; - case DW_EH_PE_sdata8: - result = (pint_t)get64(addr); - p += 8; - addr = (pint_t) p; - break; - default: - _LIBUNWIND_ABORT("unknown pointer encoding"); - } - - // then add relative offset - switch (encoding & 0x70) { - case DW_EH_PE_absptr: - // do nothing - break; - case DW_EH_PE_pcrel: - result += startAddr; - break; - case DW_EH_PE_textrel: - _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported"); - break; - case DW_EH_PE_datarel: - // DW_EH_PE_datarel is only valid in a few places, so the parameter has a - // default value of 0, and we abort in the event that someone calls this - // function with a datarelBase of 0 and DW_EH_PE_datarel encoding. - if (datarelBase == 0) - _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0"); - result += datarelBase; - break; - case DW_EH_PE_funcrel: - _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported"); - break; - case DW_EH_PE_aligned: - _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported"); - break; - default: - _LIBUNWIND_ABORT("unknown pointer encoding"); - break; - } - - if (encoding & DW_EH_PE_indirect) - result = getP(result); - - return result; -} - + result |= (-1ULL) << bit; + addr = (pint_t) p; + return result; +} + +inline LocalAddressSpace::pint_t +LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, + pint_t datarelBase) { + pint_t startAddr = addr; + const uint8_t *p = (uint8_t *)addr; + pint_t result; + + // first get value + switch (encoding & 0x0F) { + case DW_EH_PE_ptr: + result = getP(addr); + p += sizeof(pint_t); + addr = (pint_t) p; + break; + case DW_EH_PE_uleb128: + result = (pint_t)getULEB128(addr, end); + break; + case DW_EH_PE_udata2: + result = get16(addr); + p += 2; + addr = (pint_t) p; + break; + case DW_EH_PE_udata4: + result = get32(addr); + p += 4; + addr = (pint_t) p; + break; + case DW_EH_PE_udata8: + result = (pint_t)get64(addr); + p += 8; + addr = (pint_t) p; + break; + case DW_EH_PE_sleb128: + result = (pint_t)getSLEB128(addr, end); + break; + case DW_EH_PE_sdata2: + // Sign extend from signed 16-bit value. + result = (pint_t)(int16_t)get16(addr); + p += 2; + addr = (pint_t) p; + break; + case DW_EH_PE_sdata4: + // Sign extend from signed 32-bit value. + result = (pint_t)(int32_t)get32(addr); + p += 4; + addr = (pint_t) p; + break; + case DW_EH_PE_sdata8: + result = (pint_t)get64(addr); + p += 8; + addr = (pint_t) p; + break; + default: + _LIBUNWIND_ABORT("unknown pointer encoding"); + } + + // then add relative offset + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += startAddr; + break; + case DW_EH_PE_textrel: + _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported"); + break; + case DW_EH_PE_datarel: + // DW_EH_PE_datarel is only valid in a few places, so the parameter has a + // default value of 0, and we abort in the event that someone calls this + // function with a datarelBase of 0 and DW_EH_PE_datarel encoding. + if (datarelBase == 0) + _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0"); + result += datarelBase; + break; + case DW_EH_PE_funcrel: + _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported"); + break; + case DW_EH_PE_aligned: + _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported"); + break; + default: + _LIBUNWIND_ABORT("unknown pointer encoding"); + break; + } + + if (encoding & DW_EH_PE_indirect) + result = getP(result); + + return result; +} + #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) // The ElfW() macro for pointer-size independent ELF header traversal is not @@ -498,23 +498,23 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, #endif // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) -inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, - UnwindInfoSections &info) { -#ifdef __APPLE__ - dyld_unwind_sections dyldInfo; - if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) { - info.dso_base = (uintptr_t)dyldInfo.mh; +inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, + UnwindInfoSections &info) { +#ifdef __APPLE__ + dyld_unwind_sections dyldInfo; + if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) { + info.dso_base = (uintptr_t)dyldInfo.mh; #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - info.dwarf_section = (uintptr_t)dyldInfo.dwarf_section; + info.dwarf_section = (uintptr_t)dyldInfo.dwarf_section; info.dwarf_section_length = (size_t)dyldInfo.dwarf_section_length; - #endif - info.compact_unwind_section = (uintptr_t)dyldInfo.compact_unwind_section; + #endif + info.compact_unwind_section = (uintptr_t)dyldInfo.compact_unwind_section; info.compact_unwind_section_length = (size_t)dyldInfo.compact_unwind_section_length; - return true; - } + return true; + } #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) info.dso_base = 0; - // Bare metal is statically linked, so no need to ask the dynamic loader + // Bare metal is statically linked, so no need to ask the dynamic loader info.dwarf_section_length = (size_t)(&__eh_frame_end - &__eh_frame_start); info.dwarf_section = (uintptr_t)(&__eh_frame_start); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", @@ -529,7 +529,7 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, return true; #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader - info.arm_section = (uintptr_t)(&__exidx_start); + info.arm_section = (uintptr_t)(&__exidx_start); info.arm_section_length = (size_t)(&__exidx_end - &__exidx_start); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", (void *)info.arm_section, (void *)info.arm_section_length); @@ -581,50 +581,50 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, (void)info; return true; #elif defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) - int length = 0; + int length = 0; info.arm_section = (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length); info.arm_section_length = (size_t)length * sizeof(EHABIIndexEntry); - if (info.arm_section && info.arm_section_length) - return true; + if (info.arm_section && info.arm_section_length) + return true; #elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) - dl_iterate_cb_data cb_data = {this, &info, targetAddr}; + dl_iterate_cb_data cb_data = {this, &info, targetAddr}; int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data); - return static_cast(found); -#endif - - return false; -} - - -inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) { - // TO DO: if OS has way to dynamically register FDEs, check that. - (void)targetAddr; - (void)fde; - return false; -} - -inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf, - size_t bufLen, - unw_word_t *offset) { + return static_cast(found); +#endif + + return false; +} + + +inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) { + // TO DO: if OS has way to dynamically register FDEs, check that. + (void)targetAddr; + (void)fde; + return false; +} + +inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf, + size_t bufLen, + unw_word_t *offset) { #if _LIBUNWIND_USE_DLADDR - Dl_info dyldInfo; - if (dladdr((void *)addr, &dyldInfo)) { - if (dyldInfo.dli_sname != NULL) { - snprintf(buf, bufLen, "%s", dyldInfo.dli_sname); - *offset = (addr - (pint_t) dyldInfo.dli_saddr); - return true; - } - } + Dl_info dyldInfo; + if (dladdr((void *)addr, &dyldInfo)) { + if (dyldInfo.dli_sname != NULL) { + snprintf(buf, bufLen, "%s", dyldInfo.dli_sname); + *offset = (addr - (pint_t) dyldInfo.dli_saddr); + return true; + } + } #else (void)addr; (void)buf; (void)bufLen; (void)offset; -#endif - return false; -} - -} // namespace libunwind - -#endif // __ADDRESSSPACE_HPP__ +#endif + return false; +} + +} // namespace libunwind + +#endif // __ADDRESSSPACE_HPP__ diff --git a/contrib/libs/libunwind/src/CompactUnwinder.hpp b/contrib/libs/libunwind/src/CompactUnwinder.hpp index daa7b0cd15e..0b2b5e111bf 100644 --- a/contrib/libs/libunwind/src/CompactUnwinder.hpp +++ b/contrib/libs/libunwind/src/CompactUnwinder.hpp @@ -1,697 +1,697 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Does runtime stack unwinding using compact unwind encodings. -// -//===----------------------------------------------------------------------===// - -#ifndef __COMPACT_UNWINDER_HPP__ -#define __COMPACT_UNWINDER_HPP__ - -#include -#include - -#include -#include - -#include "Registers.hpp" - -#define EXTRACT_BITS(value, mask) \ - ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1)) - -namespace libunwind { - +// +// +// Does runtime stack unwinding using compact unwind encodings. +// +//===----------------------------------------------------------------------===// + +#ifndef __COMPACT_UNWINDER_HPP__ +#define __COMPACT_UNWINDER_HPP__ + +#include +#include + +#include +#include + +#include "Registers.hpp" + +#define EXTRACT_BITS(value, mask) \ + ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1)) + +namespace libunwind { + #if defined(_LIBUNWIND_TARGET_I386) -/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka -/// unwind) by modifying a Registers_x86 register set -template -class CompactUnwinder_x86 { -public: - - static int stepWithCompactEncoding(compact_unwind_encoding_t info, - uint32_t functionStart, A &addressSpace, - Registers_x86 ®isters); - -private: - typename A::pint_t pint_t; - - static void frameUnwind(A &addressSpace, Registers_x86 ®isters); - static void framelessUnwind(A &addressSpace, - typename A::pint_t returnAddressLocation, - Registers_x86 ®isters); - static int - stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding, - uint32_t functionStart, A &addressSpace, - Registers_x86 ®isters); - static int stepWithCompactEncodingFrameless( - compact_unwind_encoding_t compactEncoding, uint32_t functionStart, - A &addressSpace, Registers_x86 ®isters, bool indirectStackSize); -}; - -template -int CompactUnwinder_x86
::stepWithCompactEncoding( - compact_unwind_encoding_t compactEncoding, uint32_t functionStart, - A &addressSpace, Registers_x86 ®isters) { - switch (compactEncoding & UNWIND_X86_MODE_MASK) { - case UNWIND_X86_MODE_EBP_FRAME: - return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart, - addressSpace, registers); - case UNWIND_X86_MODE_STACK_IMMD: - return stepWithCompactEncodingFrameless(compactEncoding, functionStart, - addressSpace, registers, false); - case UNWIND_X86_MODE_STACK_IND: - return stepWithCompactEncodingFrameless(compactEncoding, functionStart, - addressSpace, registers, true); - } - _LIBUNWIND_ABORT("invalid compact unwind encoding"); -} - -template -int CompactUnwinder_x86::stepWithCompactEncodingEBPFrame( - compact_unwind_encoding_t compactEncoding, uint32_t functionStart, - A &addressSpace, Registers_x86 ®isters) { - uint32_t savedRegistersOffset = - EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET); - uint32_t savedRegistersLocations = - EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS); - - uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset; - for (int i = 0; i < 5; ++i) { - switch (savedRegistersLocations & 0x7) { - case UNWIND_X86_REG_NONE: - // no register saved in this slot - break; - case UNWIND_X86_REG_EBX: - registers.setEBX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_ECX: - registers.setECX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_EDX: - registers.setEDX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_EDI: - registers.setEDI(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_ESI: - registers.setESI(addressSpace.get32(savedRegisters)); - break; - default: - (void)functionStart; - _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for " +/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_x86 register set +template +class CompactUnwinder_x86 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t info, + uint32_t functionStart, A &addressSpace, + Registers_x86 ®isters); + +private: + typename A::pint_t pint_t; + + static void frameUnwind(A &addressSpace, Registers_x86 ®isters); + static void framelessUnwind(A &addressSpace, + typename A::pint_t returnAddressLocation, + Registers_x86 ®isters); + static int + stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding, + uint32_t functionStart, A &addressSpace, + Registers_x86 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters, bool indirectStackSize); +}; + +template +int CompactUnwinder_x86::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters) { + switch (compactEncoding & UNWIND_X86_MODE_MASK) { + case UNWIND_X86_MODE_EBP_FRAME: + return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_X86_MODE_STACK_IMMD: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, false); + case UNWIND_X86_MODE_STACK_IND: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, true); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_x86::stepWithCompactEncodingEBPFrame( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters) { + uint32_t savedRegistersOffset = + EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET); + uint32_t savedRegistersLocations = + EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS); + + uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset; + for (int i = 0; i < 5; ++i) { + switch (savedRegistersLocations & 0x7) { + case UNWIND_X86_REG_NONE: + // no register saved in this slot + break; + case UNWIND_X86_REG_EBX: + registers.setEBX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_ECX: + registers.setECX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_EDX: + registers.setEDX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_EDI: + registers.setEDI(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_ESI: + registers.setESI(addressSpace.get32(savedRegisters)); + break; + default: + (void)functionStart; + _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for " "function starting at 0x%X", - compactEncoding, functionStart); - _LIBUNWIND_ABORT("invalid compact unwind encoding"); - } - savedRegisters += 4; - savedRegistersLocations = (savedRegistersLocations >> 3); - } - frameUnwind(addressSpace, registers); - return UNW_STEP_SUCCESS; -} - -template -int CompactUnwinder_x86::stepWithCompactEncodingFrameless( - compact_unwind_encoding_t encoding, uint32_t functionStart, - A &addressSpace, Registers_x86 ®isters, bool indirectStackSize) { - uint32_t stackSizeEncoded = - EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); - uint32_t stackAdjust = - EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST); - uint32_t regCount = - EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT); - uint32_t permutation = - EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION); - uint32_t stackSize = stackSizeEncoded * 4; - if (indirectStackSize) { - // stack size is encoded in subl $xxx,%esp instruction - uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); - stackSize = subl + 4 * stackAdjust; - } - // decompress permutation - uint32_t permunreg[6]; - switch (regCount) { - case 6: - permunreg[0] = permutation / 120; - permutation -= (permunreg[0] * 120); - permunreg[1] = permutation / 24; - permutation -= (permunreg[1] * 24); - permunreg[2] = permutation / 6; - permutation -= (permunreg[2] * 6); - permunreg[3] = permutation / 2; - permutation -= (permunreg[3] * 2); - permunreg[4] = permutation; - permunreg[5] = 0; - break; - case 5: - permunreg[0] = permutation / 120; - permutation -= (permunreg[0] * 120); - permunreg[1] = permutation / 24; - permutation -= (permunreg[1] * 24); - permunreg[2] = permutation / 6; - permutation -= (permunreg[2] * 6); - permunreg[3] = permutation / 2; - permutation -= (permunreg[3] * 2); - permunreg[4] = permutation; - break; - case 4: - permunreg[0] = permutation / 60; - permutation -= (permunreg[0] * 60); - permunreg[1] = permutation / 12; - permutation -= (permunreg[1] * 12); - permunreg[2] = permutation / 3; - permutation -= (permunreg[2] * 3); - permunreg[3] = permutation; - break; - case 3: - permunreg[0] = permutation / 20; - permutation -= (permunreg[0] * 20); - permunreg[1] = permutation / 4; - permutation -= (permunreg[1] * 4); - permunreg[2] = permutation; - break; - case 2: - permunreg[0] = permutation / 5; - permutation -= (permunreg[0] * 5); - permunreg[1] = permutation; - break; - case 1: - permunreg[0] = permutation; - break; - } - // re-number registers back to standard numbers - int registersSaved[6]; - bool used[7] = { false, false, false, false, false, false, false }; - for (uint32_t i = 0; i < regCount; ++i) { - uint32_t renum = 0; - for (int u = 1; u < 7; ++u) { - if (!used[u]) { - if (renum == permunreg[i]) { - registersSaved[i] = u; - used[u] = true; - break; - } - ++renum; - } - } - } - uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount; - for (uint32_t i = 0; i < regCount; ++i) { - switch (registersSaved[i]) { - case UNWIND_X86_REG_EBX: - registers.setEBX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_ECX: - registers.setECX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_EDX: - registers.setEDX(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_EDI: - registers.setEDI(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_ESI: - registers.setESI(addressSpace.get32(savedRegisters)); - break; - case UNWIND_X86_REG_EBP: - registers.setEBP(addressSpace.get32(savedRegisters)); - break; - default: - _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " + compactEncoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 4; + savedRegistersLocations = (savedRegistersLocations >> 3); + } + frameUnwind(addressSpace, registers); + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_x86::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters, bool indirectStackSize) { + uint32_t stackSizeEncoded = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); + uint32_t stackAdjust = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST); + uint32_t regCount = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT); + uint32_t permutation = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION); + uint32_t stackSize = stackSizeEncoded * 4; + if (indirectStackSize) { + // stack size is encoded in subl $xxx,%esp instruction + uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); + stackSize = subl + 4 * stackAdjust; + } + // decompress permutation + uint32_t permunreg[6]; + switch (regCount) { + case 6: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + permunreg[5] = 0; + break; + case 5: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + break; + case 4: + permunreg[0] = permutation / 60; + permutation -= (permunreg[0] * 60); + permunreg[1] = permutation / 12; + permutation -= (permunreg[1] * 12); + permunreg[2] = permutation / 3; + permutation -= (permunreg[2] * 3); + permunreg[3] = permutation; + break; + case 3: + permunreg[0] = permutation / 20; + permutation -= (permunreg[0] * 20); + permunreg[1] = permutation / 4; + permutation -= (permunreg[1] * 4); + permunreg[2] = permutation; + break; + case 2: + permunreg[0] = permutation / 5; + permutation -= (permunreg[0] * 5); + permunreg[1] = permutation; + break; + case 1: + permunreg[0] = permutation; + break; + } + // re-number registers back to standard numbers + int registersSaved[6]; + bool used[7] = { false, false, false, false, false, false, false }; + for (uint32_t i = 0; i < regCount; ++i) { + uint32_t renum = 0; + for (int u = 1; u < 7; ++u) { + if (!used[u]) { + if (renum == permunreg[i]) { + registersSaved[i] = u; + used[u] = true; + break; + } + ++renum; + } + } + } + uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount; + for (uint32_t i = 0; i < regCount; ++i) { + switch (registersSaved[i]) { + case UNWIND_X86_REG_EBX: + registers.setEBX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_ECX: + registers.setECX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_EDX: + registers.setEDX(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_EDI: + registers.setEDI(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_ESI: + registers.setESI(addressSpace.get32(savedRegisters)); + break; + case UNWIND_X86_REG_EBP: + registers.setEBP(addressSpace.get32(savedRegisters)); + break; + default: + _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " "function starting at 0x%X", - encoding, functionStart); - _LIBUNWIND_ABORT("invalid compact unwind encoding"); - } - savedRegisters += 4; - } - framelessUnwind(addressSpace, savedRegisters, registers); - return UNW_STEP_SUCCESS; -} - - -template -void CompactUnwinder_x86::frameUnwind(A &addressSpace, - Registers_x86 ®isters) { - typename A::pint_t bp = registers.getEBP(); - // ebp points to old ebp - registers.setEBP(addressSpace.get32(bp)); - // old esp is ebp less saved ebp and return address - registers.setSP((uint32_t)bp + 8); - // pop return address into eip - registers.setIP(addressSpace.get32(bp + 4)); -} - -template -void CompactUnwinder_x86::framelessUnwind( - A &addressSpace, typename A::pint_t returnAddressLocation, - Registers_x86 ®isters) { - // return address is on stack after last saved register - registers.setIP(addressSpace.get32(returnAddressLocation)); - // old esp is before return address - registers.setSP((uint32_t)returnAddressLocation + 4); -} + encoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 4; + } + framelessUnwind(addressSpace, savedRegisters, registers); + return UNW_STEP_SUCCESS; +} + + +template +void CompactUnwinder_x86::frameUnwind(A &addressSpace, + Registers_x86 ®isters) { + typename A::pint_t bp = registers.getEBP(); + // ebp points to old ebp + registers.setEBP(addressSpace.get32(bp)); + // old esp is ebp less saved ebp and return address + registers.setSP((uint32_t)bp + 8); + // pop return address into eip + registers.setIP(addressSpace.get32(bp + 4)); +} + +template +void CompactUnwinder_x86::framelessUnwind( + A &addressSpace, typename A::pint_t returnAddressLocation, + Registers_x86 ®isters) { + // return address is on stack after last saved register + registers.setIP(addressSpace.get32(returnAddressLocation)); + // old esp is before return address + registers.setSP((uint32_t)returnAddressLocation + 4); +} #endif // _LIBUNWIND_TARGET_I386 - - + + #if defined(_LIBUNWIND_TARGET_X86_64) -/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka -/// unwind) by modifying a Registers_x86_64 register set -template -class CompactUnwinder_x86_64 { -public: - - static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, - uint64_t functionStart, A &addressSpace, - Registers_x86_64 ®isters); - -private: - typename A::pint_t pint_t; - - static void frameUnwind(A &addressSpace, Registers_x86_64 ®isters); - static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation, - Registers_x86_64 ®isters); - static int - stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding, - uint64_t functionStart, A &addressSpace, - Registers_x86_64 ®isters); - static int stepWithCompactEncodingFrameless( - compact_unwind_encoding_t compactEncoding, uint64_t functionStart, - A &addressSpace, Registers_x86_64 ®isters, bool indirectStackSize); -}; - -template -int CompactUnwinder_x86_64::stepWithCompactEncoding( - compact_unwind_encoding_t compactEncoding, uint64_t functionStart, - A &addressSpace, Registers_x86_64 ®isters) { - switch (compactEncoding & UNWIND_X86_64_MODE_MASK) { - case UNWIND_X86_64_MODE_RBP_FRAME: - return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart, - addressSpace, registers); - case UNWIND_X86_64_MODE_STACK_IMMD: - return stepWithCompactEncodingFrameless(compactEncoding, functionStart, - addressSpace, registers, false); - case UNWIND_X86_64_MODE_STACK_IND: - return stepWithCompactEncodingFrameless(compactEncoding, functionStart, - addressSpace, registers, true); - } - _LIBUNWIND_ABORT("invalid compact unwind encoding"); -} - -template -int CompactUnwinder_x86_64::stepWithCompactEncodingRBPFrame( - compact_unwind_encoding_t compactEncoding, uint64_t functionStart, - A &addressSpace, Registers_x86_64 ®isters) { - uint32_t savedRegistersOffset = - EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET); - uint32_t savedRegistersLocations = - EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); - - uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset; - for (int i = 0; i < 5; ++i) { - switch (savedRegistersLocations & 0x7) { - case UNWIND_X86_64_REG_NONE: - // no register saved in this slot - break; - case UNWIND_X86_64_REG_RBX: - registers.setRBX(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R12: - registers.setR12(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R13: - registers.setR13(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R14: - registers.setR14(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R15: - registers.setR15(addressSpace.get64(savedRegisters)); - break; - default: - (void)functionStart; - _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for " +/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_x86_64 register set +template +class CompactUnwinder_x86_64 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters); + +private: + typename A::pint_t pint_t; + + static void frameUnwind(A &addressSpace, Registers_x86_64 ®isters); + static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation, + Registers_x86_64 ®isters); + static int + stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters, bool indirectStackSize); +}; + +template +int CompactUnwinder_x86_64::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters) { + switch (compactEncoding & UNWIND_X86_64_MODE_MASK) { + case UNWIND_X86_64_MODE_RBP_FRAME: + return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_X86_64_MODE_STACK_IMMD: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, false); + case UNWIND_X86_64_MODE_STACK_IND: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, true); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_x86_64::stepWithCompactEncodingRBPFrame( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters) { + uint32_t savedRegistersOffset = + EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET); + uint32_t savedRegistersLocations = + EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); + + uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset; + for (int i = 0; i < 5; ++i) { + switch (savedRegistersLocations & 0x7) { + case UNWIND_X86_64_REG_NONE: + // no register saved in this slot + break; + case UNWIND_X86_64_REG_RBX: + registers.setRBX(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R12: + registers.setR12(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R13: + registers.setR13(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R14: + registers.setR14(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R15: + registers.setR15(addressSpace.get64(savedRegisters)); + break; + default: + (void)functionStart; + _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for " "function starting at 0x%llX", - compactEncoding, functionStart); - _LIBUNWIND_ABORT("invalid compact unwind encoding"); - } - savedRegisters += 8; - savedRegistersLocations = (savedRegistersLocations >> 3); - } - frameUnwind(addressSpace, registers); - return UNW_STEP_SUCCESS; -} - -template -int CompactUnwinder_x86_64::stepWithCompactEncodingFrameless( - compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace, - Registers_x86_64 ®isters, bool indirectStackSize) { - uint32_t stackSizeEncoded = - EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); - uint32_t stackAdjust = - EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST); - uint32_t regCount = - EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT); - uint32_t permutation = - EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION); - uint32_t stackSize = stackSizeEncoded * 8; - if (indirectStackSize) { - // stack size is encoded in subl $xxx,%esp instruction - uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); - stackSize = subl + 8 * stackAdjust; - } - // decompress permutation - uint32_t permunreg[6]; - switch (regCount) { - case 6: - permunreg[0] = permutation / 120; - permutation -= (permunreg[0] * 120); - permunreg[1] = permutation / 24; - permutation -= (permunreg[1] * 24); - permunreg[2] = permutation / 6; - permutation -= (permunreg[2] * 6); - permunreg[3] = permutation / 2; - permutation -= (permunreg[3] * 2); - permunreg[4] = permutation; - permunreg[5] = 0; - break; - case 5: - permunreg[0] = permutation / 120; - permutation -= (permunreg[0] * 120); - permunreg[1] = permutation / 24; - permutation -= (permunreg[1] * 24); - permunreg[2] = permutation / 6; - permutation -= (permunreg[2] * 6); - permunreg[3] = permutation / 2; - permutation -= (permunreg[3] * 2); - permunreg[4] = permutation; - break; - case 4: - permunreg[0] = permutation / 60; - permutation -= (permunreg[0] * 60); - permunreg[1] = permutation / 12; - permutation -= (permunreg[1] * 12); - permunreg[2] = permutation / 3; - permutation -= (permunreg[2] * 3); - permunreg[3] = permutation; - break; - case 3: - permunreg[0] = permutation / 20; - permutation -= (permunreg[0] * 20); - permunreg[1] = permutation / 4; - permutation -= (permunreg[1] * 4); - permunreg[2] = permutation; - break; - case 2: - permunreg[0] = permutation / 5; - permutation -= (permunreg[0] * 5); - permunreg[1] = permutation; - break; - case 1: - permunreg[0] = permutation; - break; - } - // re-number registers back to standard numbers - int registersSaved[6]; - bool used[7] = { false, false, false, false, false, false, false }; - for (uint32_t i = 0; i < regCount; ++i) { - uint32_t renum = 0; - for (int u = 1; u < 7; ++u) { - if (!used[u]) { - if (renum == permunreg[i]) { - registersSaved[i] = u; - used[u] = true; - break; - } - ++renum; - } - } - } - uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount; - for (uint32_t i = 0; i < regCount; ++i) { - switch (registersSaved[i]) { - case UNWIND_X86_64_REG_RBX: - registers.setRBX(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R12: - registers.setR12(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R13: - registers.setR13(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R14: - registers.setR14(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_R15: - registers.setR15(addressSpace.get64(savedRegisters)); - break; - case UNWIND_X86_64_REG_RBP: - registers.setRBP(addressSpace.get64(savedRegisters)); - break; - default: - _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " + compactEncoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 8; + savedRegistersLocations = (savedRegistersLocations >> 3); + } + frameUnwind(addressSpace, registers); + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_x86_64::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters, bool indirectStackSize) { + uint32_t stackSizeEncoded = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); + uint32_t stackAdjust = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST); + uint32_t regCount = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT); + uint32_t permutation = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION); + uint32_t stackSize = stackSizeEncoded * 8; + if (indirectStackSize) { + // stack size is encoded in subl $xxx,%esp instruction + uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); + stackSize = subl + 8 * stackAdjust; + } + // decompress permutation + uint32_t permunreg[6]; + switch (regCount) { + case 6: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + permunreg[5] = 0; + break; + case 5: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + break; + case 4: + permunreg[0] = permutation / 60; + permutation -= (permunreg[0] * 60); + permunreg[1] = permutation / 12; + permutation -= (permunreg[1] * 12); + permunreg[2] = permutation / 3; + permutation -= (permunreg[2] * 3); + permunreg[3] = permutation; + break; + case 3: + permunreg[0] = permutation / 20; + permutation -= (permunreg[0] * 20); + permunreg[1] = permutation / 4; + permutation -= (permunreg[1] * 4); + permunreg[2] = permutation; + break; + case 2: + permunreg[0] = permutation / 5; + permutation -= (permunreg[0] * 5); + permunreg[1] = permutation; + break; + case 1: + permunreg[0] = permutation; + break; + } + // re-number registers back to standard numbers + int registersSaved[6]; + bool used[7] = { false, false, false, false, false, false, false }; + for (uint32_t i = 0; i < regCount; ++i) { + uint32_t renum = 0; + for (int u = 1; u < 7; ++u) { + if (!used[u]) { + if (renum == permunreg[i]) { + registersSaved[i] = u; + used[u] = true; + break; + } + ++renum; + } + } + } + uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount; + for (uint32_t i = 0; i < regCount; ++i) { + switch (registersSaved[i]) { + case UNWIND_X86_64_REG_RBX: + registers.setRBX(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R12: + registers.setR12(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R13: + registers.setR13(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R14: + registers.setR14(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_R15: + registers.setR15(addressSpace.get64(savedRegisters)); + break; + case UNWIND_X86_64_REG_RBP: + registers.setRBP(addressSpace.get64(savedRegisters)); + break; + default: + _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " "function starting at 0x%llX", - encoding, functionStart); - _LIBUNWIND_ABORT("invalid compact unwind encoding"); - } - savedRegisters += 8; - } - framelessUnwind(addressSpace, savedRegisters, registers); - return UNW_STEP_SUCCESS; -} - - -template -void CompactUnwinder_x86_64::frameUnwind(A &addressSpace, - Registers_x86_64 ®isters) { - uint64_t rbp = registers.getRBP(); - // ebp points to old ebp - registers.setRBP(addressSpace.get64(rbp)); - // old esp is ebp less saved ebp and return address - registers.setSP(rbp + 16); - // pop return address into eip - registers.setIP(addressSpace.get64(rbp + 8)); -} - -template -void CompactUnwinder_x86_64::framelessUnwind(A &addressSpace, - uint64_t returnAddressLocation, - Registers_x86_64 ®isters) { - // return address is on stack after last saved register - registers.setIP(addressSpace.get64(returnAddressLocation)); - // old esp is before return address - registers.setSP(returnAddressLocation + 8); -} + encoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 8; + } + framelessUnwind(addressSpace, savedRegisters, registers); + return UNW_STEP_SUCCESS; +} + + +template +void CompactUnwinder_x86_64::frameUnwind(A &addressSpace, + Registers_x86_64 ®isters) { + uint64_t rbp = registers.getRBP(); + // ebp points to old ebp + registers.setRBP(addressSpace.get64(rbp)); + // old esp is ebp less saved ebp and return address + registers.setSP(rbp + 16); + // pop return address into eip + registers.setIP(addressSpace.get64(rbp + 8)); +} + +template +void CompactUnwinder_x86_64::framelessUnwind(A &addressSpace, + uint64_t returnAddressLocation, + Registers_x86_64 ®isters) { + // return address is on stack after last saved register + registers.setIP(addressSpace.get64(returnAddressLocation)); + // old esp is before return address + registers.setSP(returnAddressLocation + 8); +} #endif // _LIBUNWIND_TARGET_X86_64 - - - + + + #if defined(_LIBUNWIND_TARGET_AARCH64) -/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka -/// unwind) by modifying a Registers_arm64 register set -template -class CompactUnwinder_arm64 { -public: - - static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, - uint64_t functionStart, A &addressSpace, - Registers_arm64 ®isters); - -private: - typename A::pint_t pint_t; - - static int - stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding, - uint64_t functionStart, A &addressSpace, - Registers_arm64 ®isters); - static int stepWithCompactEncodingFrameless( - compact_unwind_encoding_t compactEncoding, uint64_t functionStart, - A &addressSpace, Registers_arm64 ®isters); -}; - -template -int CompactUnwinder_arm64::stepWithCompactEncoding( - compact_unwind_encoding_t compactEncoding, uint64_t functionStart, - A &addressSpace, Registers_arm64 ®isters) { - switch (compactEncoding & UNWIND_ARM64_MODE_MASK) { - case UNWIND_ARM64_MODE_FRAME: - return stepWithCompactEncodingFrame(compactEncoding, functionStart, - addressSpace, registers); - case UNWIND_ARM64_MODE_FRAMELESS: - return stepWithCompactEncodingFrameless(compactEncoding, functionStart, - addressSpace, registers); - } - _LIBUNWIND_ABORT("invalid compact unwind encoding"); -} - -template -int CompactUnwinder_arm64::stepWithCompactEncodingFrameless( - compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, - Registers_arm64 ®isters) { - uint32_t stackSize = - 16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK); - - uint64_t savedRegisterLoc = registers.getSP() + stackSize; - - if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { +/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_arm64 register set +template +class CompactUnwinder_arm64 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_arm64 ®isters); + +private: + typename A::pint_t pint_t; + + static int + stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_arm64 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_arm64 ®isters); +}; + +template +int CompactUnwinder_arm64::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_arm64 ®isters) { + switch (compactEncoding & UNWIND_ARM64_MODE_MASK) { + case UNWIND_ARM64_MODE_FRAME: + return stepWithCompactEncodingFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_ARM64_MODE_FRAMELESS: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_arm64::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, + Registers_arm64 ®isters) { + uint32_t stackSize = + 16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK); + + uint64_t savedRegisterLoc = registers.getSP() + stackSize; + + if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - - if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { + savedRegisterLoc -= 8; + } + + if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { registers.setFloatRegister(UNW_AARCH64_V8, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V9, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { registers.setFloatRegister(UNW_AARCH64_V10, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V11, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { registers.setFloatRegister(UNW_AARCH64_V12, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V13, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { registers.setFloatRegister(UNW_AARCH64_V14, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V15, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - - // subtract stack size off of sp - registers.setSP(savedRegisterLoc); - - // set pc to be value in lr + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + + // subtract stack size off of sp + registers.setSP(savedRegisterLoc); + + // set pc to be value in lr registers.setIP(registers.getRegister(UNW_AARCH64_LR)); - - return UNW_STEP_SUCCESS; -} - -template -int CompactUnwinder_arm64::stepWithCompactEncodingFrame( - compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, - Registers_arm64 ®isters) { - uint64_t savedRegisterLoc = registers.getFP() - 8; - - if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { + + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_arm64::stepWithCompactEncodingFrame( + compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, + Registers_arm64 ®isters) { + uint64_t savedRegisterLoc = registers.getFP() - 8; + + if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; + savedRegisterLoc -= 8; registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - - if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { + savedRegisterLoc -= 8; + } + + if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { registers.setFloatRegister(UNW_AARCH64_V8, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V9, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { registers.setFloatRegister(UNW_AARCH64_V10, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V11, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { registers.setFloatRegister(UNW_AARCH64_V12, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V13, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { registers.setFloatRegister(UNW_AARCH64_V14, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; registers.setFloatRegister(UNW_AARCH64_V15, - addressSpace.getDouble(savedRegisterLoc)); - savedRegisterLoc -= 8; - } - - uint64_t fp = registers.getFP(); - // fp points to old fp - registers.setFP(addressSpace.get64(fp)); - // old sp is fp less saved fp and lr - registers.setSP(fp + 16); - // pop return address into pc - registers.setIP(addressSpace.get64(fp + 8)); - - return UNW_STEP_SUCCESS; -} + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + + uint64_t fp = registers.getFP(); + // fp points to old fp + registers.setFP(addressSpace.get64(fp)); + // old sp is fp less saved fp and lr + registers.setSP(fp + 16); + // pop return address into pc + registers.setIP(addressSpace.get64(fp + 8)); + + return UNW_STEP_SUCCESS; +} #endif // _LIBUNWIND_TARGET_AARCH64 - - -} // namespace libunwind - -#endif // __COMPACT_UNWINDER_HPP__ + + +} // namespace libunwind + +#endif // __COMPACT_UNWINDER_HPP__ diff --git a/contrib/libs/libunwind/src/DwarfInstructions.hpp b/contrib/libs/libunwind/src/DwarfInstructions.hpp index 4f61bf739c9..c1a241c55ce 100644 --- a/contrib/libs/libunwind/src/DwarfInstructions.hpp +++ b/contrib/libs/libunwind/src/DwarfInstructions.hpp @@ -1,79 +1,79 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// +// +// // Processor specific interpretation of DWARF unwind info. -// -//===----------------------------------------------------------------------===// - -#ifndef __DWARF_INSTRUCTIONS_HPP__ -#define __DWARF_INSTRUCTIONS_HPP__ - -#include -#include -#include - -#include "dwarf2.h" -#include "Registers.hpp" -#include "DwarfParser.hpp" -#include "config.h" - - -namespace libunwind { - - +// +//===----------------------------------------------------------------------===// + +#ifndef __DWARF_INSTRUCTIONS_HPP__ +#define __DWARF_INSTRUCTIONS_HPP__ + +#include +#include +#include + +#include "dwarf2.h" +#include "Registers.hpp" +#include "DwarfParser.hpp" +#include "config.h" + + +namespace libunwind { + + /// DwarfInstructions maps abtract DWARF unwind instructions to a particular -/// architecture -template -class DwarfInstructions { -public: - typedef typename A::pint_t pint_t; - typedef typename A::sint_t sint_t; - - static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, +/// architecture +template +class DwarfInstructions { +public: + typedef typename A::pint_t pint_t; + typedef typename A::sint_t sint_t; + + static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, R ®isters, bool &isSignalFrame); - -private: - - enum { - DW_X86_64_RET_ADDR = 16 - }; - - enum { - DW_X86_RET_ADDR = 8 - }; - - typedef typename CFI_Parser::RegisterLocation RegisterLocation; - typedef typename CFI_Parser::PrologInfo PrologInfo; - typedef typename CFI_Parser::FDE_Info FDE_Info; - typedef typename CFI_Parser::CIE_Info CIE_Info; - - static pint_t evaluateExpression(pint_t expression, A &addressSpace, - const R ®isters, - pint_t initialStackValue); - static pint_t getSavedRegister(A &addressSpace, const R ®isters, - pint_t cfa, const RegisterLocation &savedReg); - static double getSavedFloatRegister(A &addressSpace, const R ®isters, - pint_t cfa, const RegisterLocation &savedReg); - static v128 getSavedVectorRegister(A &addressSpace, const R ®isters, - pint_t cfa, const RegisterLocation &savedReg); - - static pint_t getCFA(A &addressSpace, const PrologInfo &prolog, - const R ®isters) { + +private: + + enum { + DW_X86_64_RET_ADDR = 16 + }; + + enum { + DW_X86_RET_ADDR = 8 + }; + + typedef typename CFI_Parser::RegisterLocation RegisterLocation; + typedef typename CFI_Parser::PrologInfo PrologInfo; + typedef typename CFI_Parser::FDE_Info FDE_Info; + typedef typename CFI_Parser::CIE_Info CIE_Info; + + static pint_t evaluateExpression(pint_t expression, A &addressSpace, + const R ®isters, + pint_t initialStackValue); + static pint_t getSavedRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg); + static double getSavedFloatRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg); + static v128 getSavedVectorRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg); + + static pint_t getCFA(A &addressSpace, const PrologInfo &prolog, + const R ®isters) { if (prolog.cfaRegister != 0) return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) + - prolog.cfaRegisterOffset); - if (prolog.cfaExpression != 0) - return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, - registers, 0); - assert(0 && "getCFA(): unknown location"); - __builtin_unreachable(); - } -}; - + prolog.cfaRegisterOffset); + if (prolog.cfaExpression != 0) + return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, + registers, 0); + assert(0 && "getCFA(): unknown location"); + __builtin_unreachable(); + } +}; + template auto getSparcWCookie(const R &r, int) -> decltype(r.getWCookie()) { return r.getWCookie(); @@ -81,108 +81,108 @@ auto getSparcWCookie(const R &r, int) -> decltype(r.getWCookie()) { template uint64_t getSparcWCookie(const R &, long) { return 0; } - -template -typename A::pint_t DwarfInstructions::getSavedRegister( - A &addressSpace, const R ®isters, pint_t cfa, - const RegisterLocation &savedReg) { - switch (savedReg.location) { - case CFI_Parser::kRegisterInCFA: + +template +typename A::pint_t DwarfInstructions::getSavedRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: return (pint_t)addressSpace.getRegister(cfa + (pint_t)savedReg.value); - + case CFI_Parser::kRegisterInCFADecrypt: // sparc64 specific return addressSpace.getP(cfa + (pint_t)savedReg.value) ^ getSparcWCookie(registers, 0); - case CFI_Parser::kRegisterAtExpression: + case CFI_Parser::kRegisterAtExpression: return (pint_t)addressSpace.getRegister(evaluateExpression( (pint_t)savedReg.value, addressSpace, registers, cfa)); - - case CFI_Parser::kRegisterIsExpression: - return evaluateExpression((pint_t)savedReg.value, addressSpace, - registers, cfa); - - case CFI_Parser::kRegisterInRegister: - return registers.getRegister((int)savedReg.value); + + case CFI_Parser::kRegisterIsExpression: + return evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa); + + case CFI_Parser::kRegisterInRegister: + return registers.getRegister((int)savedReg.value); case CFI_Parser::kRegisterUndefined: return 0; - case CFI_Parser::kRegisterUnused: - case CFI_Parser::kRegisterOffsetFromCFA: - // FIX ME - break; - } - _LIBUNWIND_ABORT("unsupported restore location for register"); -} - -template -double DwarfInstructions::getSavedFloatRegister( - A &addressSpace, const R ®isters, pint_t cfa, - const RegisterLocation &savedReg) { - switch (savedReg.location) { - case CFI_Parser::kRegisterInCFA: - return addressSpace.getDouble(cfa + (pint_t)savedReg.value); - - case CFI_Parser::kRegisterAtExpression: - return addressSpace.getDouble( - evaluateExpression((pint_t)savedReg.value, addressSpace, - registers, cfa)); + case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterOffsetFromCFA: + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for register"); +} + +template +double DwarfInstructions::getSavedFloatRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: + return addressSpace.getDouble(cfa + (pint_t)savedReg.value); + + case CFI_Parser::kRegisterAtExpression: + return addressSpace.getDouble( + evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa)); case CFI_Parser::kRegisterUndefined: return 0.0; case CFI_Parser::kRegisterInRegister: #ifndef _LIBUNWIND_TARGET_ARM return registers.getFloatRegister((int)savedReg.value); #endif - case CFI_Parser::kRegisterIsExpression: - case CFI_Parser::kRegisterUnused: - case CFI_Parser::kRegisterOffsetFromCFA: + case CFI_Parser::kRegisterIsExpression: + case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterOffsetFromCFA: case CFI_Parser::kRegisterInCFADecrypt: - // FIX ME - break; - } - _LIBUNWIND_ABORT("unsupported restore location for float register"); -} - -template -v128 DwarfInstructions::getSavedVectorRegister( - A &addressSpace, const R ®isters, pint_t cfa, - const RegisterLocation &savedReg) { - switch (savedReg.location) { - case CFI_Parser::kRegisterInCFA: - return addressSpace.getVector(cfa + (pint_t)savedReg.value); - - case CFI_Parser::kRegisterAtExpression: - return addressSpace.getVector( - evaluateExpression((pint_t)savedReg.value, addressSpace, - registers, cfa)); - - case CFI_Parser::kRegisterIsExpression: - case CFI_Parser::kRegisterUnused: + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for float register"); +} + +template +v128 DwarfInstructions::getSavedVectorRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: + return addressSpace.getVector(cfa + (pint_t)savedReg.value); + + case CFI_Parser::kRegisterAtExpression: + return addressSpace.getVector( + evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa)); + + case CFI_Parser::kRegisterIsExpression: + case CFI_Parser::kRegisterUnused: case CFI_Parser::kRegisterUndefined: - case CFI_Parser::kRegisterOffsetFromCFA: - case CFI_Parser::kRegisterInRegister: + case CFI_Parser::kRegisterOffsetFromCFA: + case CFI_Parser::kRegisterInRegister: case CFI_Parser::kRegisterInCFADecrypt: - // FIX ME - break; - } - _LIBUNWIND_ABORT("unsupported restore location for vector register"); -} - -template -int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for vector register"); +} + +template +int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, R ®isters, bool &isSignalFrame) { - FDE_Info fdeInfo; - CIE_Info cieInfo; - if (CFI_Parser::decodeFDE(addressSpace, fdeStart, &fdeInfo, - &cieInfo) == NULL) { - PrologInfo prolog; - if (CFI_Parser::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc, + FDE_Info fdeInfo; + CIE_Info cieInfo; + if (CFI_Parser::decodeFDE(addressSpace, fdeStart, &fdeInfo, + &cieInfo) == NULL) { + PrologInfo prolog; + if (CFI_Parser::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc, R::getArch(), &prolog)) { - // get pointer to cfa (architecture specific) - pint_t cfa = getCFA(addressSpace, prolog, registers); - + // get pointer to cfa (architecture specific) + pint_t cfa = getCFA(addressSpace, prolog, registers); + // restore registers that DWARF says were saved - R newRegisters = registers; + R newRegisters = registers; // Typically, the CFA is the stack pointer at the call site in // the previous frame. However, there are scenarios in which this is not @@ -193,39 +193,39 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // by a CFI directive later on. newRegisters.setSP(cfa); - pint_t returnAddress = 0; - const int lastReg = R::lastDwarfRegNum(); + pint_t returnAddress = 0; + const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && - "register range too large"); - assert(lastReg >= (int)cieInfo.returnAddressRegister && - "register range does not contain return address register"); - for (int i = 0; i <= lastReg; ++i) { - if (prolog.savedRegisters[i].location != - CFI_Parser::kRegisterUnused) { - if (registers.validFloatRegister(i)) - newRegisters.setFloatRegister( - i, getSavedFloatRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i])); - else if (registers.validVectorRegister(i)) - newRegisters.setVectorRegister( - i, getSavedVectorRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i])); - else if (i == (int)cieInfo.returnAddressRegister) - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i]); - else if (registers.validRegister(i)) - newRegisters.setRegister( - i, getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i])); - else - return UNW_EBADREG; + "register range too large"); + assert(lastReg >= (int)cieInfo.returnAddressRegister && + "register range does not contain return address register"); + for (int i = 0; i <= lastReg; ++i) { + if (prolog.savedRegisters[i].location != + CFI_Parser::kRegisterUnused) { + if (registers.validFloatRegister(i)) + newRegisters.setFloatRegister( + i, getSavedFloatRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i])); + else if (registers.validVectorRegister(i)) + newRegisters.setVectorRegister( + i, getSavedVectorRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i])); + else if (i == (int)cieInfo.returnAddressRegister) + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i]); + else if (registers.validRegister(i)) + newRegisters.setRegister( + i, getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i])); + else + return UNW_EBADREG; } else if (i == (int)cieInfo.returnAddressRegister) { // Leaf function keeps the return address in register and there is no // explicit intructions how to restore it. returnAddress = registers.getRegister(cieInfo.returnAddressRegister); - } - } - + } + } + isSignalFrame = cieInfo.isSignalFrame; #if defined(_LIBUNWIND_TARGET_AARCH64) @@ -310,562 +310,562 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, } #endif - // Return address is address after call site instruction, so setting IP to - // that does simualates a return. - newRegisters.setIP(returnAddress); - - // Simulate the step by replacing the register set with the new ones. - registers = newRegisters; - - return UNW_STEP_SUCCESS; - } - } - return UNW_EBADFRAME; -} - -template -typename A::pint_t -DwarfInstructions::evaluateExpression(pint_t expression, A &addressSpace, - const R ®isters, - pint_t initialStackValue) { - const bool log = false; - pint_t p = expression; - pint_t expressionEnd = expression + 20; // temp, until len read - pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd); - expressionEnd = p + length; - if (log) - fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n", - (uint64_t)length); - pint_t stack[100]; - pint_t *sp = stack; - *(++sp) = initialStackValue; - - while (p < expressionEnd) { - if (log) { - for (pint_t *t = sp; t > stack; --t) { - fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t)); - } - } - uint8_t opcode = addressSpace.get8(p++); - sint_t svalue, svalue2; - pint_t value; - uint32_t reg; - switch (opcode) { - case DW_OP_addr: - // push immediate address sized value - value = addressSpace.getP(p); - p += sizeof(pint_t); - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_deref: - // pop stack, dereference, push result - value = *sp--; - *(++sp) = addressSpace.getP(value); - if (log) - fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_const1u: - // push immediate 1 byte value - value = addressSpace.get8(p); - p += 1; - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_const1s: - // push immediate 1 byte signed value - svalue = (int8_t) addressSpace.get8(p); - p += 1; - *(++sp) = (pint_t)svalue; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); - break; - - case DW_OP_const2u: - // push immediate 2 byte value - value = addressSpace.get16(p); - p += 2; - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_const2s: - // push immediate 2 byte signed value - svalue = (int16_t) addressSpace.get16(p); - p += 2; - *(++sp) = (pint_t)svalue; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); - break; - - case DW_OP_const4u: - // push immediate 4 byte value - value = addressSpace.get32(p); - p += 4; - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_const4s: - // push immediate 4 byte signed value - svalue = (int32_t)addressSpace.get32(p); - p += 4; - *(++sp) = (pint_t)svalue; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); - break; - - case DW_OP_const8u: - // push immediate 8 byte value - value = (pint_t)addressSpace.get64(p); - p += 8; - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_const8s: - // push immediate 8 byte signed value - value = (pint_t)addressSpace.get64(p); - p += 8; - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_constu: - // push immediate ULEB128 value - value = (pint_t)addressSpace.getULEB128(p, expressionEnd); - *(++sp) = value; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_consts: - // push immediate SLEB128 value - svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); - *(++sp) = (pint_t)svalue; - if (log) - fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); - break; - - case DW_OP_dup: - // push top of stack - value = *sp; - *(++sp) = value; - if (log) - fprintf(stderr, "duplicate top of stack\n"); - break; - - case DW_OP_drop: - // pop - --sp; - if (log) - fprintf(stderr, "pop top of stack\n"); - break; - - case DW_OP_over: - // dup second - value = sp[-1]; - *(++sp) = value; - if (log) - fprintf(stderr, "duplicate second in stack\n"); - break; - - case DW_OP_pick: - // pick from - reg = addressSpace.get8(p); - p += 1; + // Return address is address after call site instruction, so setting IP to + // that does simualates a return. + newRegisters.setIP(returnAddress); + + // Simulate the step by replacing the register set with the new ones. + registers = newRegisters; + + return UNW_STEP_SUCCESS; + } + } + return UNW_EBADFRAME; +} + +template +typename A::pint_t +DwarfInstructions::evaluateExpression(pint_t expression, A &addressSpace, + const R ®isters, + pint_t initialStackValue) { + const bool log = false; + pint_t p = expression; + pint_t expressionEnd = expression + 20; // temp, until len read + pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd); + expressionEnd = p + length; + if (log) + fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n", + (uint64_t)length); + pint_t stack[100]; + pint_t *sp = stack; + *(++sp) = initialStackValue; + + while (p < expressionEnd) { + if (log) { + for (pint_t *t = sp; t > stack; --t) { + fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t)); + } + } + uint8_t opcode = addressSpace.get8(p++); + sint_t svalue, svalue2; + pint_t value; + uint32_t reg; + switch (opcode) { + case DW_OP_addr: + // push immediate address sized value + value = addressSpace.getP(p); + p += sizeof(pint_t); + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_deref: + // pop stack, dereference, push result + value = *sp--; + *(++sp) = addressSpace.getP(value); + if (log) + fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const1u: + // push immediate 1 byte value + value = addressSpace.get8(p); + p += 1; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const1s: + // push immediate 1 byte signed value + svalue = (int8_t) addressSpace.get8(p); + p += 1; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const2u: + // push immediate 2 byte value + value = addressSpace.get16(p); + p += 2; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const2s: + // push immediate 2 byte signed value + svalue = (int16_t) addressSpace.get16(p); + p += 2; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const4u: + // push immediate 4 byte value + value = addressSpace.get32(p); + p += 4; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const4s: + // push immediate 4 byte signed value + svalue = (int32_t)addressSpace.get32(p); + p += 4; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const8u: + // push immediate 8 byte value + value = (pint_t)addressSpace.get64(p); + p += 8; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const8s: + // push immediate 8 byte signed value + value = (pint_t)addressSpace.get64(p); + p += 8; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_constu: + // push immediate ULEB128 value + value = (pint_t)addressSpace.getULEB128(p, expressionEnd); + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_consts: + // push immediate SLEB128 value + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_dup: + // push top of stack + value = *sp; + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate top of stack\n"); + break; + + case DW_OP_drop: + // pop + --sp; + if (log) + fprintf(stderr, "pop top of stack\n"); + break; + + case DW_OP_over: + // dup second + value = sp[-1]; + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate second in stack\n"); + break; + + case DW_OP_pick: + // pick from + reg = addressSpace.get8(p); + p += 1; value = sp[-(int)reg]; - *(++sp) = value; - if (log) - fprintf(stderr, "duplicate %d in stack\n", reg); - break; - - case DW_OP_swap: - // swap top two - value = sp[0]; - sp[0] = sp[-1]; - sp[-1] = value; - if (log) - fprintf(stderr, "swap top of stack\n"); - break; - - case DW_OP_rot: - // rotate top three - value = sp[0]; - sp[0] = sp[-1]; - sp[-1] = sp[-2]; - sp[-2] = value; - if (log) - fprintf(stderr, "rotate top three of stack\n"); - break; - - case DW_OP_xderef: - // pop stack, dereference, push result - value = *sp--; - *sp = *((pint_t*)value); - if (log) - fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_abs: - svalue = (sint_t)*sp; - if (svalue < 0) - *sp = (pint_t)(-svalue); - if (log) - fprintf(stderr, "abs\n"); - break; - - case DW_OP_and: - value = *sp--; - *sp &= value; - if (log) - fprintf(stderr, "and\n"); - break; - - case DW_OP_div: - svalue = (sint_t)(*sp--); - svalue2 = (sint_t)*sp; - *sp = (pint_t)(svalue2 / svalue); - if (log) - fprintf(stderr, "div\n"); - break; - - case DW_OP_minus: - value = *sp--; - *sp = *sp - value; - if (log) - fprintf(stderr, "minus\n"); - break; - - case DW_OP_mod: - svalue = (sint_t)(*sp--); - svalue2 = (sint_t)*sp; - *sp = (pint_t)(svalue2 % svalue); - if (log) - fprintf(stderr, "module\n"); - break; - - case DW_OP_mul: - svalue = (sint_t)(*sp--); - svalue2 = (sint_t)*sp; - *sp = (pint_t)(svalue2 * svalue); - if (log) - fprintf(stderr, "mul\n"); - break; - - case DW_OP_neg: - *sp = 0 - *sp; - if (log) - fprintf(stderr, "neg\n"); - break; - - case DW_OP_not: - svalue = (sint_t)(*sp); - *sp = (pint_t)(~svalue); - if (log) - fprintf(stderr, "not\n"); - break; - - case DW_OP_or: - value = *sp--; - *sp |= value; - if (log) - fprintf(stderr, "or\n"); - break; - - case DW_OP_plus: - value = *sp--; - *sp += value; - if (log) - fprintf(stderr, "plus\n"); - break; - - case DW_OP_plus_uconst: - // pop stack, add uelb128 constant, push result + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate %d in stack\n", reg); + break; + + case DW_OP_swap: + // swap top two + value = sp[0]; + sp[0] = sp[-1]; + sp[-1] = value; + if (log) + fprintf(stderr, "swap top of stack\n"); + break; + + case DW_OP_rot: + // rotate top three + value = sp[0]; + sp[0] = sp[-1]; + sp[-1] = sp[-2]; + sp[-2] = value; + if (log) + fprintf(stderr, "rotate top three of stack\n"); + break; + + case DW_OP_xderef: + // pop stack, dereference, push result + value = *sp--; + *sp = *((pint_t*)value); + if (log) + fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_abs: + svalue = (sint_t)*sp; + if (svalue < 0) + *sp = (pint_t)(-svalue); + if (log) + fprintf(stderr, "abs\n"); + break; + + case DW_OP_and: + value = *sp--; + *sp &= value; + if (log) + fprintf(stderr, "and\n"); + break; + + case DW_OP_div: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 / svalue); + if (log) + fprintf(stderr, "div\n"); + break; + + case DW_OP_minus: + value = *sp--; + *sp = *sp - value; + if (log) + fprintf(stderr, "minus\n"); + break; + + case DW_OP_mod: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 % svalue); + if (log) + fprintf(stderr, "module\n"); + break; + + case DW_OP_mul: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 * svalue); + if (log) + fprintf(stderr, "mul\n"); + break; + + case DW_OP_neg: + *sp = 0 - *sp; + if (log) + fprintf(stderr, "neg\n"); + break; + + case DW_OP_not: + svalue = (sint_t)(*sp); + *sp = (pint_t)(~svalue); + if (log) + fprintf(stderr, "not\n"); + break; + + case DW_OP_or: + value = *sp--; + *sp |= value; + if (log) + fprintf(stderr, "or\n"); + break; + + case DW_OP_plus: + value = *sp--; + *sp += value; + if (log) + fprintf(stderr, "plus\n"); + break; + + case DW_OP_plus_uconst: + // pop stack, add uelb128 constant, push result *sp += static_cast(addressSpace.getULEB128(p, expressionEnd)); - if (log) - fprintf(stderr, "add constant\n"); - break; - - case DW_OP_shl: - value = *sp--; - *sp = *sp << value; - if (log) - fprintf(stderr, "shift left\n"); - break; - - case DW_OP_shr: - value = *sp--; - *sp = *sp >> value; - if (log) - fprintf(stderr, "shift left\n"); - break; - - case DW_OP_shra: - value = *sp--; - svalue = (sint_t)*sp; - *sp = (pint_t)(svalue >> value); - if (log) - fprintf(stderr, "shift left arithmetric\n"); - break; - - case DW_OP_xor: - value = *sp--; - *sp ^= value; - if (log) - fprintf(stderr, "xor\n"); - break; - - case DW_OP_skip: - svalue = (int16_t) addressSpace.get16(p); - p += 2; - p = (pint_t)((sint_t)p + svalue); - if (log) - fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue); - break; - - case DW_OP_bra: - svalue = (int16_t) addressSpace.get16(p); - p += 2; - if (*sp--) - p = (pint_t)((sint_t)p + svalue); - if (log) - fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue); - break; - - case DW_OP_eq: - value = *sp--; - *sp = (*sp == value); - if (log) - fprintf(stderr, "eq\n"); - break; - - case DW_OP_ge: - value = *sp--; - *sp = (*sp >= value); - if (log) - fprintf(stderr, "ge\n"); - break; - - case DW_OP_gt: - value = *sp--; - *sp = (*sp > value); - if (log) - fprintf(stderr, "gt\n"); - break; - - case DW_OP_le: - value = *sp--; - *sp = (*sp <= value); - if (log) - fprintf(stderr, "le\n"); - break; - - case DW_OP_lt: - value = *sp--; - *sp = (*sp < value); - if (log) - fprintf(stderr, "lt\n"); - break; - - case DW_OP_ne: - value = *sp--; - *sp = (*sp != value); - if (log) - fprintf(stderr, "ne\n"); - break; - - case DW_OP_lit0: - case DW_OP_lit1: - case DW_OP_lit2: - case DW_OP_lit3: - case DW_OP_lit4: - case DW_OP_lit5: - case DW_OP_lit6: - case DW_OP_lit7: - case DW_OP_lit8: - case DW_OP_lit9: - case DW_OP_lit10: - case DW_OP_lit11: - case DW_OP_lit12: - case DW_OP_lit13: - case DW_OP_lit14: - case DW_OP_lit15: - case DW_OP_lit16: - case DW_OP_lit17: - case DW_OP_lit18: - case DW_OP_lit19: - case DW_OP_lit20: - case DW_OP_lit21: - case DW_OP_lit22: - case DW_OP_lit23: - case DW_OP_lit24: - case DW_OP_lit25: - case DW_OP_lit26: - case DW_OP_lit27: - case DW_OP_lit28: - case DW_OP_lit29: - case DW_OP_lit30: - case DW_OP_lit31: - value = static_cast(opcode - DW_OP_lit0); - *(++sp) = value; - if (log) - fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_reg0: - case DW_OP_reg1: - case DW_OP_reg2: - case DW_OP_reg3: - case DW_OP_reg4: - case DW_OP_reg5: - case DW_OP_reg6: - case DW_OP_reg7: - case DW_OP_reg8: - case DW_OP_reg9: - case DW_OP_reg10: - case DW_OP_reg11: - case DW_OP_reg12: - case DW_OP_reg13: - case DW_OP_reg14: - case DW_OP_reg15: - case DW_OP_reg16: - case DW_OP_reg17: - case DW_OP_reg18: - case DW_OP_reg19: - case DW_OP_reg20: - case DW_OP_reg21: - case DW_OP_reg22: - case DW_OP_reg23: - case DW_OP_reg24: - case DW_OP_reg25: - case DW_OP_reg26: - case DW_OP_reg27: - case DW_OP_reg28: - case DW_OP_reg29: - case DW_OP_reg30: - case DW_OP_reg31: - reg = static_cast(opcode - DW_OP_reg0); - *(++sp) = registers.getRegister((int)reg); - if (log) - fprintf(stderr, "push reg %d\n", reg); - break; - - case DW_OP_regx: - reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); - *(++sp) = registers.getRegister((int)reg); - if (log) - fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); - break; - - case DW_OP_breg0: - case DW_OP_breg1: - case DW_OP_breg2: - case DW_OP_breg3: - case DW_OP_breg4: - case DW_OP_breg5: - case DW_OP_breg6: - case DW_OP_breg7: - case DW_OP_breg8: - case DW_OP_breg9: - case DW_OP_breg10: - case DW_OP_breg11: - case DW_OP_breg12: - case DW_OP_breg13: - case DW_OP_breg14: - case DW_OP_breg15: - case DW_OP_breg16: - case DW_OP_breg17: - case DW_OP_breg18: - case DW_OP_breg19: - case DW_OP_breg20: - case DW_OP_breg21: - case DW_OP_breg22: - case DW_OP_breg23: - case DW_OP_breg24: - case DW_OP_breg25: - case DW_OP_breg26: - case DW_OP_breg27: - case DW_OP_breg28: - case DW_OP_breg29: - case DW_OP_breg30: - case DW_OP_breg31: - reg = static_cast(opcode - DW_OP_breg0); - svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); - svalue += static_cast(registers.getRegister((int)reg)); - *(++sp) = (pint_t)(svalue); - if (log) - fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); - break; - - case DW_OP_bregx: - reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); - svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); - svalue += static_cast(registers.getRegister((int)reg)); - *(++sp) = (pint_t)(svalue); - if (log) - fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); - break; - - case DW_OP_fbreg: - _LIBUNWIND_ABORT("DW_OP_fbreg not implemented"); - break; - - case DW_OP_piece: - _LIBUNWIND_ABORT("DW_OP_piece not implemented"); - break; - - case DW_OP_deref_size: - // pop stack, dereference, push result - value = *sp--; - switch (addressSpace.get8(p++)) { - case 1: - value = addressSpace.get8(value); - break; - case 2: - value = addressSpace.get16(value); - break; - case 4: - value = addressSpace.get32(value); - break; - case 8: - value = (pint_t)addressSpace.get64(value); - break; - default: - _LIBUNWIND_ABORT("DW_OP_deref_size with bad size"); - } - *(++sp) = value; - if (log) - fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value); - break; - - case DW_OP_xderef_size: - case DW_OP_nop: - case DW_OP_push_object_addres: - case DW_OP_call2: - case DW_OP_call4: - case DW_OP_call_ref: - default: + if (log) + fprintf(stderr, "add constant\n"); + break; + + case DW_OP_shl: + value = *sp--; + *sp = *sp << value; + if (log) + fprintf(stderr, "shift left\n"); + break; + + case DW_OP_shr: + value = *sp--; + *sp = *sp >> value; + if (log) + fprintf(stderr, "shift left\n"); + break; + + case DW_OP_shra: + value = *sp--; + svalue = (sint_t)*sp; + *sp = (pint_t)(svalue >> value); + if (log) + fprintf(stderr, "shift left arithmetric\n"); + break; + + case DW_OP_xor: + value = *sp--; + *sp ^= value; + if (log) + fprintf(stderr, "xor\n"); + break; + + case DW_OP_skip: + svalue = (int16_t) addressSpace.get16(p); + p += 2; + p = (pint_t)((sint_t)p + svalue); + if (log) + fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue); + break; + + case DW_OP_bra: + svalue = (int16_t) addressSpace.get16(p); + p += 2; + if (*sp--) + p = (pint_t)((sint_t)p + svalue); + if (log) + fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue); + break; + + case DW_OP_eq: + value = *sp--; + *sp = (*sp == value); + if (log) + fprintf(stderr, "eq\n"); + break; + + case DW_OP_ge: + value = *sp--; + *sp = (*sp >= value); + if (log) + fprintf(stderr, "ge\n"); + break; + + case DW_OP_gt: + value = *sp--; + *sp = (*sp > value); + if (log) + fprintf(stderr, "gt\n"); + break; + + case DW_OP_le: + value = *sp--; + *sp = (*sp <= value); + if (log) + fprintf(stderr, "le\n"); + break; + + case DW_OP_lt: + value = *sp--; + *sp = (*sp < value); + if (log) + fprintf(stderr, "lt\n"); + break; + + case DW_OP_ne: + value = *sp--; + *sp = (*sp != value); + if (log) + fprintf(stderr, "ne\n"); + break; + + case DW_OP_lit0: + case DW_OP_lit1: + case DW_OP_lit2: + case DW_OP_lit3: + case DW_OP_lit4: + case DW_OP_lit5: + case DW_OP_lit6: + case DW_OP_lit7: + case DW_OP_lit8: + case DW_OP_lit9: + case DW_OP_lit10: + case DW_OP_lit11: + case DW_OP_lit12: + case DW_OP_lit13: + case DW_OP_lit14: + case DW_OP_lit15: + case DW_OP_lit16: + case DW_OP_lit17: + case DW_OP_lit18: + case DW_OP_lit19: + case DW_OP_lit20: + case DW_OP_lit21: + case DW_OP_lit22: + case DW_OP_lit23: + case DW_OP_lit24: + case DW_OP_lit25: + case DW_OP_lit26: + case DW_OP_lit27: + case DW_OP_lit28: + case DW_OP_lit29: + case DW_OP_lit30: + case DW_OP_lit31: + value = static_cast(opcode - DW_OP_lit0); + *(++sp) = value; + if (log) + fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_reg0: + case DW_OP_reg1: + case DW_OP_reg2: + case DW_OP_reg3: + case DW_OP_reg4: + case DW_OP_reg5: + case DW_OP_reg6: + case DW_OP_reg7: + case DW_OP_reg8: + case DW_OP_reg9: + case DW_OP_reg10: + case DW_OP_reg11: + case DW_OP_reg12: + case DW_OP_reg13: + case DW_OP_reg14: + case DW_OP_reg15: + case DW_OP_reg16: + case DW_OP_reg17: + case DW_OP_reg18: + case DW_OP_reg19: + case DW_OP_reg20: + case DW_OP_reg21: + case DW_OP_reg22: + case DW_OP_reg23: + case DW_OP_reg24: + case DW_OP_reg25: + case DW_OP_reg26: + case DW_OP_reg27: + case DW_OP_reg28: + case DW_OP_reg29: + case DW_OP_reg30: + case DW_OP_reg31: + reg = static_cast(opcode - DW_OP_reg0); + *(++sp) = registers.getRegister((int)reg); + if (log) + fprintf(stderr, "push reg %d\n", reg); + break; + + case DW_OP_regx: + reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); + *(++sp) = registers.getRegister((int)reg); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_breg0: + case DW_OP_breg1: + case DW_OP_breg2: + case DW_OP_breg3: + case DW_OP_breg4: + case DW_OP_breg5: + case DW_OP_breg6: + case DW_OP_breg7: + case DW_OP_breg8: + case DW_OP_breg9: + case DW_OP_breg10: + case DW_OP_breg11: + case DW_OP_breg12: + case DW_OP_breg13: + case DW_OP_breg14: + case DW_OP_breg15: + case DW_OP_breg16: + case DW_OP_breg17: + case DW_OP_breg18: + case DW_OP_breg19: + case DW_OP_breg20: + case DW_OP_breg21: + case DW_OP_breg22: + case DW_OP_breg23: + case DW_OP_breg24: + case DW_OP_breg25: + case DW_OP_breg26: + case DW_OP_breg27: + case DW_OP_breg28: + case DW_OP_breg29: + case DW_OP_breg30: + case DW_OP_breg31: + reg = static_cast(opcode - DW_OP_breg0); + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + svalue += static_cast(registers.getRegister((int)reg)); + *(++sp) = (pint_t)(svalue); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_bregx: + reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + svalue += static_cast(registers.getRegister((int)reg)); + *(++sp) = (pint_t)(svalue); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_fbreg: + _LIBUNWIND_ABORT("DW_OP_fbreg not implemented"); + break; + + case DW_OP_piece: + _LIBUNWIND_ABORT("DW_OP_piece not implemented"); + break; + + case DW_OP_deref_size: + // pop stack, dereference, push result + value = *sp--; + switch (addressSpace.get8(p++)) { + case 1: + value = addressSpace.get8(value); + break; + case 2: + value = addressSpace.get16(value); + break; + case 4: + value = addressSpace.get32(value); + break; + case 8: + value = (pint_t)addressSpace.get64(value); + break; + default: + _LIBUNWIND_ABORT("DW_OP_deref_size with bad size"); + } + *(++sp) = value; + if (log) + fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_xderef_size: + case DW_OP_nop: + case DW_OP_push_object_addres: + case DW_OP_call2: + case DW_OP_call4: + case DW_OP_call_ref: + default: _LIBUNWIND_ABORT("DWARF opcode not implemented"); - } - - } - if (log) - fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp); - return *sp; -} - - - -} // namespace libunwind - -#endif // __DWARF_INSTRUCTIONS_HPP__ + } + + } + if (log) + fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp); + return *sp; +} + + + +} // namespace libunwind + +#endif // __DWARF_INSTRUCTIONS_HPP__ diff --git a/contrib/libs/libunwind/src/DwarfParser.hpp b/contrib/libs/libunwind/src/DwarfParser.hpp index 8f764923e79..b5a53166fc3 100644 --- a/contrib/libs/libunwind/src/DwarfParser.hpp +++ b/contrib/libs/libunwind/src/DwarfParser.hpp @@ -1,94 +1,94 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Parses DWARF CFIs (FDEs and CIEs). -// -//===----------------------------------------------------------------------===// - -#ifndef __DWARF_PARSER_HPP__ -#define __DWARF_PARSER_HPP__ - -#include -#include -#include -#include - -#include "libunwind.h" -#include "dwarf2.h" +// +// +// Parses DWARF CFIs (FDEs and CIEs). +// +//===----------------------------------------------------------------------===// + +#ifndef __DWARF_PARSER_HPP__ +#define __DWARF_PARSER_HPP__ + +#include +#include +#include +#include + +#include "libunwind.h" +#include "dwarf2.h" #include "Registers.hpp" - + #include "config.h" - -namespace libunwind { -/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records. +namespace libunwind { + +/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records. /// See DWARF Spec for details: -/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html -/// -template -class CFI_Parser { -public: - typedef typename A::pint_t pint_t; - - /// Information encoded in a CIE (Common Information Entry) - struct CIE_Info { - pint_t cieStart; - pint_t cieLength; - pint_t cieInstructions; - uint8_t pointerEncoding; - uint8_t lsdaEncoding; - uint8_t personalityEncoding; - uint8_t personalityOffsetInCIE; - pint_t personality; - uint32_t codeAlignFactor; - int dataAlignFactor; - bool isSignalFrame; - bool fdesHaveAugmentationData; - uint8_t returnAddressRegister; +/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +/// +template +class CFI_Parser { +public: + typedef typename A::pint_t pint_t; + + /// Information encoded in a CIE (Common Information Entry) + struct CIE_Info { + pint_t cieStart; + pint_t cieLength; + pint_t cieInstructions; + uint8_t pointerEncoding; + uint8_t lsdaEncoding; + uint8_t personalityEncoding; + uint8_t personalityOffsetInCIE; + pint_t personality; + uint32_t codeAlignFactor; + int dataAlignFactor; + bool isSignalFrame; + bool fdesHaveAugmentationData; + uint8_t returnAddressRegister; #if defined(_LIBUNWIND_TARGET_AARCH64) bool addressesSignedWithBKey; #endif - }; - - /// Information about an FDE (Frame Description Entry) - struct FDE_Info { - pint_t fdeStart; - pint_t fdeLength; - pint_t fdeInstructions; - pint_t pcStart; - pint_t pcEnd; - pint_t lsda; - }; - - enum { + }; + + /// Information about an FDE (Frame Description Entry) + struct FDE_Info { + pint_t fdeStart; + pint_t fdeLength; + pint_t fdeInstructions; + pint_t pcStart; + pint_t pcEnd; + pint_t lsda; + }; + + enum { kMaxRegisterNumber = _LIBUNWIND_HIGHEST_DWARF_REGISTER - }; - enum RegisterSavedWhere { - kRegisterUnused, + }; + enum RegisterSavedWhere { + kRegisterUnused, kRegisterUndefined, - kRegisterInCFA, + kRegisterInCFA, kRegisterInCFADecrypt, // sparc64 specific - kRegisterOffsetFromCFA, - kRegisterInRegister, - kRegisterAtExpression, - kRegisterIsExpression - }; - struct RegisterLocation { - RegisterSavedWhere location; + kRegisterOffsetFromCFA, + kRegisterInRegister, + kRegisterAtExpression, + kRegisterIsExpression + }; + struct RegisterLocation { + RegisterSavedWhere location; bool initialStateSaved; - int64_t value; - }; - /// Information about a frame layout and registers saved determined + int64_t value; + }; + /// Information about a frame layout and registers saved determined /// by "running" the DWARF FDE "instructions" - struct PrologInfo { - uint32_t cfaRegister; - int32_t cfaRegisterOffset; // CFA = (cfaRegister)+cfaRegisterOffset - int64_t cfaExpression; // CFA = expression - uint32_t spExtraArgSize; + struct PrologInfo { + uint32_t cfaRegister; + int32_t cfaRegisterOffset; // CFA = (cfaRegister)+cfaRegisterOffset + int64_t cfaExpression; // CFA = expression + uint32_t spExtraArgSize; RegisterLocation savedRegisters[kMaxRegisterNumber + 1]; enum class InitializeTime { kLazy, kNormal }; @@ -124,15 +124,15 @@ public: savedRegisters[reg] = initialState.savedRegisters[reg]; // else the register still holds its initial state } - }; - - struct PrologInfoStackEntry { - PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i) - : next(n), info(i) {} - PrologInfoStackEntry *next; - PrologInfo info; - }; - + }; + + struct PrologInfoStackEntry { + PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i) + : next(n), info(i) {} + PrologInfoStackEntry *next; + PrologInfo info; + }; + struct RememberStack { PrologInfoStackEntry *entry; RememberStack() : entry(nullptr) {} @@ -151,42 +151,42 @@ public: } }; - static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, + static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, size_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo, - CIE_Info *cieInfo); - static const char *decodeFDE(A &addressSpace, pint_t fdeStart, + CIE_Info *cieInfo); + static const char *decodeFDE(A &addressSpace, pint_t fdeStart, FDE_Info *fdeInfo, CIE_Info *cieInfo, bool useCIEInfo = false); - static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo, - const CIE_Info &cieInfo, pint_t upToPC, + static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo, + const CIE_Info &cieInfo, pint_t upToPC, int arch, PrologInfo *results); - - static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo); -}; - + + static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo); +}; + /// Parse a FDE into a CIE_Info and an FDE_Info. If useCIEInfo is /// true, treat cieInfo as already-parsed CIE_Info (whose start offset /// must match the one specified by the FDE) rather than parsing the /// one indicated within the FDE. -template -const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, +template +const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, FDE_Info *fdeInfo, CIE_Info *cieInfo, bool useCIEInfo) { - pint_t p = fdeStart; - pint_t cfiLength = (pint_t)addressSpace.get32(p); - p += 4; - if (cfiLength == 0xffffffff) { - // 0xffffffff means length is really next 8 bytes - cfiLength = (pint_t)addressSpace.get64(p); - p += 8; - } - if (cfiLength == 0) + pint_t p = fdeStart; + pint_t cfiLength = (pint_t)addressSpace.get32(p); + p += 4; + if (cfiLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cfiLength = (pint_t)addressSpace.get64(p); + p += 8; + } + if (cfiLength == 0) return "FDE has zero length"; // zero terminator - uint32_t ciePointer = addressSpace.get32(p); + uint32_t ciePointer = addressSpace.get32(p); if (ciePointer == 0) return "FDE is really a CIE"; // this is a CIE not an FDE - pint_t nextCFI = p + cfiLength; - pint_t cieStart = p - ciePointer; + pint_t nextCFI = p + cfiLength; + pint_t cieStart = p - ciePointer; if (useCIEInfo) { if (cieInfo->cieStart != cieStart) return "CIE start does not match"; @@ -195,242 +195,242 @@ const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, if (err != NULL) return err; } - p += 4; + p += 4; // Parse pc begin and range. - pint_t pcStart = - addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); - pint_t pcRange = - addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F); + pint_t pcStart = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); + pint_t pcRange = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F); // Parse rest of info. - fdeInfo->lsda = 0; + fdeInfo->lsda = 0; // Check for augmentation length. - if (cieInfo->fdesHaveAugmentationData) { - pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); - pint_t endOfAug = p + augLen; - if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { + if (cieInfo->fdesHaveAugmentationData) { + pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); + pint_t endOfAug = p + augLen; + if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { // Peek at value (without indirection). Zero means no LSDA. - pint_t lsdaStart = p; - if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != - 0) { + pint_t lsdaStart = p; + if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != + 0) { // Reset pointer and re-parse LSDA address. - p = lsdaStart; - fdeInfo->lsda = - addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); - } - } - p = endOfAug; - } - fdeInfo->fdeStart = fdeStart; - fdeInfo->fdeLength = nextCFI - fdeStart; - fdeInfo->fdeInstructions = p; - fdeInfo->pcStart = pcStart; - fdeInfo->pcEnd = pcStart + pcRange; - return NULL; // success -} - -/// Scan an eh_frame section to find an FDE for a pc -template -bool CFI_Parser::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, + p = lsdaStart; + fdeInfo->lsda = + addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); + } + } + p = endOfAug; + } + fdeInfo->fdeStart = fdeStart; + fdeInfo->fdeLength = nextCFI - fdeStart; + fdeInfo->fdeInstructions = p; + fdeInfo->pcStart = pcStart; + fdeInfo->pcEnd = pcStart + pcRange; + return NULL; // success +} + +/// Scan an eh_frame section to find an FDE for a pc +template +bool CFI_Parser::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, size_t sectionLength, pint_t fdeHint, - FDE_Info *fdeInfo, CIE_Info *cieInfo) { - //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc); - pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart; + FDE_Info *fdeInfo, CIE_Info *cieInfo) { + //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc); + pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart; const pint_t ehSectionEnd = (sectionLength == SIZE_MAX) ? static_cast(-1) : (ehSectionStart + sectionLength); - while (p < ehSectionEnd) { - pint_t currentCFI = p; - //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p); - pint_t cfiLength = addressSpace.get32(p); - p += 4; - if (cfiLength == 0xffffffff) { - // 0xffffffff means length is really next 8 bytes - cfiLength = (pint_t)addressSpace.get64(p); - p += 8; - } - if (cfiLength == 0) + while (p < ehSectionEnd) { + pint_t currentCFI = p; + //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p); + pint_t cfiLength = addressSpace.get32(p); + p += 4; + if (cfiLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cfiLength = (pint_t)addressSpace.get64(p); + p += 8; + } + if (cfiLength == 0) return false; // zero terminator - uint32_t id = addressSpace.get32(p); - if (id == 0) { + uint32_t id = addressSpace.get32(p); + if (id == 0) { // Skip over CIEs. - p += cfiLength; - } else { + p += cfiLength; + } else { // Process FDE to see if it covers pc. - pint_t nextCFI = p + cfiLength; - uint32_t ciePointer = addressSpace.get32(p); - pint_t cieStart = p - ciePointer; + pint_t nextCFI = p + cfiLength; + uint32_t ciePointer = addressSpace.get32(p); + pint_t cieStart = p - ciePointer; // Validate pointer to CIE is within section. - if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) { - if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) { - p += 4; + if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) { + if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) { + p += 4; // Parse pc begin and range. - pint_t pcStart = - addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); - pint_t pcRange = addressSpace.getEncodedP( - p, nextCFI, cieInfo->pointerEncoding & 0x0F); + pint_t pcStart = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); + pint_t pcRange = addressSpace.getEncodedP( + p, nextCFI, cieInfo->pointerEncoding & 0x0F); // Test if pc is within the function this FDE covers. - if ((pcStart < pc) && (pc <= pcStart + pcRange)) { - // parse rest of info - fdeInfo->lsda = 0; - // check for augmentation length - if (cieInfo->fdesHaveAugmentationData) { - pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); - pint_t endOfAug = p + augLen; - if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { + if ((pcStart < pc) && (pc <= pcStart + pcRange)) { + // parse rest of info + fdeInfo->lsda = 0; + // check for augmentation length + if (cieInfo->fdesHaveAugmentationData) { + pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); + pint_t endOfAug = p + augLen; + if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { // Peek at value (without indirection). Zero means no LSDA. - pint_t lsdaStart = p; - if (addressSpace.getEncodedP( - p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) { + pint_t lsdaStart = p; + if (addressSpace.getEncodedP( + p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) { // Reset pointer and re-parse LSDA address. - p = lsdaStart; - fdeInfo->lsda = addressSpace - .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); - } - } - p = endOfAug; - } - fdeInfo->fdeStart = currentCFI; - fdeInfo->fdeLength = nextCFI - currentCFI; - fdeInfo->fdeInstructions = p; - fdeInfo->pcStart = pcStart; - fdeInfo->pcEnd = pcStart + pcRange; - return true; - } else { - // pc is not in begin/range, skip this FDE - } - } else { + p = lsdaStart; + fdeInfo->lsda = addressSpace + .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); + } + } + p = endOfAug; + } + fdeInfo->fdeStart = currentCFI; + fdeInfo->fdeLength = nextCFI - currentCFI; + fdeInfo->fdeInstructions = p; + fdeInfo->pcStart = pcStart; + fdeInfo->pcEnd = pcStart + pcRange; + return true; + } else { + // pc is not in begin/range, skip this FDE + } + } else { // Malformed CIE, now augmentation describing pc range encoding. - } - } else { - // malformed FDE. CIE is bad - } - p = nextCFI; - } - } - return false; -} - -/// Extract info from a CIE -template -const char *CFI_Parser::parseCIE(A &addressSpace, pint_t cie, - CIE_Info *cieInfo) { - cieInfo->pointerEncoding = 0; - cieInfo->lsdaEncoding = DW_EH_PE_omit; - cieInfo->personalityEncoding = 0; - cieInfo->personalityOffsetInCIE = 0; - cieInfo->personality = 0; - cieInfo->codeAlignFactor = 0; - cieInfo->dataAlignFactor = 0; - cieInfo->isSignalFrame = false; - cieInfo->fdesHaveAugmentationData = false; + } + } else { + // malformed FDE. CIE is bad + } + p = nextCFI; + } + } + return false; +} + +/// Extract info from a CIE +template +const char *CFI_Parser::parseCIE(A &addressSpace, pint_t cie, + CIE_Info *cieInfo) { + cieInfo->pointerEncoding = 0; + cieInfo->lsdaEncoding = DW_EH_PE_omit; + cieInfo->personalityEncoding = 0; + cieInfo->personalityOffsetInCIE = 0; + cieInfo->personality = 0; + cieInfo->codeAlignFactor = 0; + cieInfo->dataAlignFactor = 0; + cieInfo->isSignalFrame = false; + cieInfo->fdesHaveAugmentationData = false; #if defined(_LIBUNWIND_TARGET_AARCH64) cieInfo->addressesSignedWithBKey = false; #endif - cieInfo->cieStart = cie; - pint_t p = cie; - pint_t cieLength = (pint_t)addressSpace.get32(p); - p += 4; - pint_t cieContentEnd = p + cieLength; - if (cieLength == 0xffffffff) { - // 0xffffffff means length is really next 8 bytes - cieLength = (pint_t)addressSpace.get64(p); - p += 8; - cieContentEnd = p + cieLength; - } - if (cieLength == 0) - return NULL; - // CIE ID is always 0 - if (addressSpace.get32(p) != 0) - return "CIE ID is not zero"; - p += 4; - // Version is always 1 or 3 - uint8_t version = addressSpace.get8(p); - if ((version != 1) && (version != 3)) - return "CIE version is not 1 or 3"; - ++p; - // save start of augmentation string and find end - pint_t strStart = p; - while (addressSpace.get8(p) != 0) - ++p; - ++p; - // parse code aligment factor - cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd); - // parse data alignment factor - cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd); - // parse return address register + cieInfo->cieStart = cie; + pint_t p = cie; + pint_t cieLength = (pint_t)addressSpace.get32(p); + p += 4; + pint_t cieContentEnd = p + cieLength; + if (cieLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cieLength = (pint_t)addressSpace.get64(p); + p += 8; + cieContentEnd = p + cieLength; + } + if (cieLength == 0) + return NULL; + // CIE ID is always 0 + if (addressSpace.get32(p) != 0) + return "CIE ID is not zero"; + p += 4; + // Version is always 1 or 3 + uint8_t version = addressSpace.get8(p); + if ((version != 1) && (version != 3)) + return "CIE version is not 1 or 3"; + ++p; + // save start of augmentation string and find end + pint_t strStart = p; + while (addressSpace.get8(p) != 0) + ++p; + ++p; + // parse code aligment factor + cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd); + // parse data alignment factor + cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd); + // parse return address register uint64_t raReg = (version == 1) ? addressSpace.get8(p++) : addressSpace.getULEB128(p, cieContentEnd); - assert(raReg < 255 && "return address register too large"); - cieInfo->returnAddressRegister = (uint8_t)raReg; - // parse augmentation data based on augmentation string - const char *result = NULL; - if (addressSpace.get8(strStart) == 'z') { - // parse augmentation data length - addressSpace.getULEB128(p, cieContentEnd); - for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) { - switch (addressSpace.get8(s)) { - case 'z': - cieInfo->fdesHaveAugmentationData = true; - break; - case 'P': - cieInfo->personalityEncoding = addressSpace.get8(p); - ++p; - cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie); - cieInfo->personality = addressSpace - .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding); - break; - case 'L': - cieInfo->lsdaEncoding = addressSpace.get8(p); - ++p; - break; - case 'R': - cieInfo->pointerEncoding = addressSpace.get8(p); - ++p; - break; - case 'S': - cieInfo->isSignalFrame = true; - break; + assert(raReg < 255 && "return address register too large"); + cieInfo->returnAddressRegister = (uint8_t)raReg; + // parse augmentation data based on augmentation string + const char *result = NULL; + if (addressSpace.get8(strStart) == 'z') { + // parse augmentation data length + addressSpace.getULEB128(p, cieContentEnd); + for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) { + switch (addressSpace.get8(s)) { + case 'z': + cieInfo->fdesHaveAugmentationData = true; + break; + case 'P': + cieInfo->personalityEncoding = addressSpace.get8(p); + ++p; + cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie); + cieInfo->personality = addressSpace + .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding); + break; + case 'L': + cieInfo->lsdaEncoding = addressSpace.get8(p); + ++p; + break; + case 'R': + cieInfo->pointerEncoding = addressSpace.get8(p); + ++p; + break; + case 'S': + cieInfo->isSignalFrame = true; + break; #if defined(_LIBUNWIND_TARGET_AARCH64) case 'B': cieInfo->addressesSignedWithBKey = true; break; #endif - default: - // ignore unknown letters - break; - } - } - } - cieInfo->cieLength = cieContentEnd - cieInfo->cieStart; - cieInfo->cieInstructions = p; - return result; -} - - + default: + // ignore unknown letters + break; + } + } + } + cieInfo->cieLength = cieContentEnd - cieInfo->cieStart; + cieInfo->cieInstructions = p; + return result; +} + + /// "run" the DWARF instructions and create the abstact PrologInfo for an FDE -template -bool CFI_Parser::parseFDEInstructions(A &addressSpace, - const FDE_Info &fdeInfo, - const CIE_Info &cieInfo, pint_t upToPC, +template +bool CFI_Parser::parseFDEInstructions(A &addressSpace, + const FDE_Info &fdeInfo, + const CIE_Info &cieInfo, pint_t upToPC, int arch, PrologInfo *results) { // Alloca is used for the allocation of the rememberStack entries. It removes // the dependency on new/malloc but the below for loop can not be refactored // into functions. Entry could be saved during the processing of a CIE and // restored by an FDE. RememberStack rememberStack; - + struct ParseInfo { pint_t instructions; pint_t instructionsEnd; pint_t pcoffset; }; - + ParseInfo parseInfoArray[] = { {cieInfo.cieInstructions, cieInfo.cieStart + cieInfo.cieLength, (pint_t)(-1)}, {fdeInfo.fdeInstructions, fdeInfo.fdeStart + fdeInfo.fdeLength, upToPC - fdeInfo.pcStart}}; - + for (const auto &info : parseInfoArray) { pint_t p = info.instructions; pint_t instructionsEnd = info.instructionsEnd; @@ -588,7 +588,7 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa(reg=%" PRIu64 ", offset=%" PRIu64 ")\n", reg, offset); - break; + break; case DW_CFA_def_cfa_register: reg = addressSpace.getULEB128(p, instructionsEnd); if (reg > kMaxRegisterNumber) { @@ -598,7 +598,7 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, } results->cfaRegister = (uint32_t)reg; _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_register(%" PRIu64 ")\n", reg); - break; + break; case DW_CFA_def_cfa_offset: results->cfaRegisterOffset = (int32_t)addressSpace.getULEB128(p, instructionsEnd); @@ -631,7 +631,7 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, "expression=0x%" PRIx64 ", " "length=%" PRIu64 ")\n", reg, results->savedRegisters[reg].value, length); - break; + break; case DW_CFA_offset_extended_sf: reg = addressSpace.getULEB128(p, instructionsEnd); if (reg > kMaxRegisterNumber) { @@ -798,7 +798,7 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, (void)arch; #endif - default: + default: operand = opcode & 0x3F; switch (opcode & 0xC0) { case DW_CFA_offset: @@ -837,12 +837,12 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, _LIBUNWIND_TRACE_DWARF("unknown CFA opcode 0x%02X\n", opcode); return false; } - } - } - } - return true; -} - -} // namespace libunwind - -#endif // __DWARF_PARSER_HPP__ + } + } + } + return true; +} + +} // namespace libunwind + +#endif // __DWARF_PARSER_HPP__ diff --git a/contrib/libs/libunwind/src/EHHeaderParser.hpp b/contrib/libs/libunwind/src/EHHeaderParser.hpp index 188cb93269e..9a38070faba 100644 --- a/contrib/libs/libunwind/src/EHHeaderParser.hpp +++ b/contrib/libs/libunwind/src/EHHeaderParser.hpp @@ -1,169 +1,169 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Parses ELF .eh_frame_hdr sections. -// -//===----------------------------------------------------------------------===// - -#ifndef __EHHEADERPARSER_HPP__ -#define __EHHEADERPARSER_HPP__ - -#include "libunwind.h" - -#include "DwarfParser.hpp" - -namespace libunwind { - -/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section. -/// -/// See DWARF spec for details: -/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html -/// -template class EHHeaderParser { -public: - typedef typename A::pint_t pint_t; - - /// Information encoded in the EH frame header. - struct EHHeaderInfo { - pint_t eh_frame_ptr; - size_t fde_count; - pint_t table; - uint8_t table_enc; - }; - +// +// +// Parses ELF .eh_frame_hdr sections. +// +//===----------------------------------------------------------------------===// + +#ifndef __EHHEADERPARSER_HPP__ +#define __EHHEADERPARSER_HPP__ + +#include "libunwind.h" + +#include "DwarfParser.hpp" + +namespace libunwind { + +/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section. +/// +/// See DWARF spec for details: +/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +/// +template class EHHeaderParser { +public: + typedef typename A::pint_t pint_t; + + /// Information encoded in the EH frame header. + struct EHHeaderInfo { + pint_t eh_frame_ptr; + size_t fde_count; + pint_t table; + uint8_t table_enc; + }; + static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd, - EHHeaderInfo &ehHdrInfo); - static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, - uint32_t sectionLength, - typename CFI_Parser::FDE_Info *fdeInfo, - typename CFI_Parser::CIE_Info *cieInfo); - -private: - static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry, - pint_t ehHdrStart, pint_t ehHdrEnd, - uint8_t tableEnc, - typename CFI_Parser::FDE_Info *fdeInfo, - typename CFI_Parser::CIE_Info *cieInfo); - static size_t getTableEntrySize(uint8_t tableEnc); -}; - -template + EHHeaderInfo &ehHdrInfo); + static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, + uint32_t sectionLength, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo); + +private: + static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry, + pint_t ehHdrStart, pint_t ehHdrEnd, + uint8_t tableEnc, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo); + static size_t getTableEntrySize(uint8_t tableEnc); +}; + +template bool EHHeaderParser::decodeEHHdr(A &addressSpace, pint_t ehHdrStart, - pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) { - pint_t p = ehHdrStart; - uint8_t version = addressSpace.get8(p++); + pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) { + pint_t p = ehHdrStart; + uint8_t version = addressSpace.get8(p++); if (version != 1) { _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version"); return false; } - - uint8_t eh_frame_ptr_enc = addressSpace.get8(p++); - uint8_t fde_count_enc = addressSpace.get8(p++); - ehHdrInfo.table_enc = addressSpace.get8(p++); - - ehHdrInfo.eh_frame_ptr = - addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart); - ehHdrInfo.fde_count = + + uint8_t eh_frame_ptr_enc = addressSpace.get8(p++); + uint8_t fde_count_enc = addressSpace.get8(p++); + ehHdrInfo.table_enc = addressSpace.get8(p++); + + ehHdrInfo.eh_frame_ptr = + addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart); + ehHdrInfo.fde_count = fde_count_enc == DW_EH_PE_omit ? 0 : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart); - ehHdrInfo.table = p; + ehHdrInfo.table = p; return true; -} - -template -bool EHHeaderParser::decodeTableEntry( - A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd, - uint8_t tableEnc, typename CFI_Parser::FDE_Info *fdeInfo, - typename CFI_Parser::CIE_Info *cieInfo) { - // Have to decode the whole FDE for the PC range anyway, so just throw away - // the PC start. - addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); - pint_t fde = - addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); - const char *message = - CFI_Parser::decodeFDE(addressSpace, fde, fdeInfo, cieInfo); - if (message != NULL) { +} + +template +bool EHHeaderParser::decodeTableEntry( + A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd, + uint8_t tableEnc, typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo) { + // Have to decode the whole FDE for the PC range anyway, so just throw away + // the PC start. + addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); + pint_t fde = + addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); + const char *message = + CFI_Parser::decodeFDE(addressSpace, fde, fdeInfo, cieInfo); + if (message != NULL) { _LIBUNWIND_DEBUG_LOG("EHHeaderParser::decodeTableEntry: bad fde: %s", - message); - return false; - } - - return true; -} - -template -bool EHHeaderParser::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, - uint32_t sectionLength, - typename CFI_Parser::FDE_Info *fdeInfo, - typename CFI_Parser::CIE_Info *cieInfo) { - pint_t ehHdrEnd = ehHdrStart + sectionLength; - - EHHeaderParser::EHHeaderInfo hdrInfo; + message); + return false; + } + + return true; +} + +template +bool EHHeaderParser::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, + uint32_t sectionLength, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo) { + pint_t ehHdrEnd = ehHdrStart + sectionLength; + + EHHeaderParser::EHHeaderInfo hdrInfo; if (!EHHeaderParser::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd, hdrInfo)) return false; - + if (hdrInfo.fde_count == 0) return false; - size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc); - pint_t tableEntry; - - size_t low = 0; - for (size_t len = hdrInfo.fde_count; len > 1;) { - size_t mid = low + (len / 2); - tableEntry = hdrInfo.table + mid * tableEntrySize; - pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd, - hdrInfo.table_enc, ehHdrStart); - - if (start == pc) { - low = mid; - break; - } else if (start < pc) { - low = mid; - len -= (len / 2); - } else { - len /= 2; - } - } - - tableEntry = hdrInfo.table + low * tableEntrySize; - if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd, - hdrInfo.table_enc, fdeInfo, cieInfo)) { - if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd) - return true; - } - - return false; -} - -template -size_t EHHeaderParser::getTableEntrySize(uint8_t tableEnc) { - switch (tableEnc & 0x0f) { - case DW_EH_PE_sdata2: - case DW_EH_PE_udata2: - return 4; - case DW_EH_PE_sdata4: - case DW_EH_PE_udata4: - return 8; - case DW_EH_PE_sdata8: - case DW_EH_PE_udata8: - return 16; - case DW_EH_PE_sleb128: - case DW_EH_PE_uleb128: - _LIBUNWIND_ABORT("Can't binary search on variable length encoded data."); - case DW_EH_PE_omit: - return 0; - default: - _LIBUNWIND_ABORT("Unknown DWARF encoding for search table."); - } -} - -} - -#endif + size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc); + pint_t tableEntry; + + size_t low = 0; + for (size_t len = hdrInfo.fde_count; len > 1;) { + size_t mid = low + (len / 2); + tableEntry = hdrInfo.table + mid * tableEntrySize; + pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd, + hdrInfo.table_enc, ehHdrStart); + + if (start == pc) { + low = mid; + break; + } else if (start < pc) { + low = mid; + len -= (len / 2); + } else { + len /= 2; + } + } + + tableEntry = hdrInfo.table + low * tableEntrySize; + if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd, + hdrInfo.table_enc, fdeInfo, cieInfo)) { + if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd) + return true; + } + + return false; +} + +template +size_t EHHeaderParser::getTableEntrySize(uint8_t tableEnc) { + switch (tableEnc & 0x0f) { + case DW_EH_PE_sdata2: + case DW_EH_PE_udata2: + return 4; + case DW_EH_PE_sdata4: + case DW_EH_PE_udata4: + return 8; + case DW_EH_PE_sdata8: + case DW_EH_PE_udata8: + return 16; + case DW_EH_PE_sleb128: + case DW_EH_PE_uleb128: + _LIBUNWIND_ABORT("Can't binary search on variable length encoded data."); + case DW_EH_PE_omit: + return 0; + default: + _LIBUNWIND_ABORT("Unknown DWARF encoding for search table."); + } +} + +} + +#endif diff --git a/contrib/libs/libunwind/src/Registers.hpp b/contrib/libs/libunwind/src/Registers.hpp index 3d03b815cfe..cbc3876d672 100644 --- a/contrib/libs/libunwind/src/Registers.hpp +++ b/contrib/libs/libunwind/src/Registers.hpp @@ -1,29 +1,29 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Models register sets for supported processors. -// -//===----------------------------------------------------------------------===// - -#ifndef __REGISTERS_HPP__ -#define __REGISTERS_HPP__ - -#include -#include - +// +// +// Models register sets for supported processors. +// +//===----------------------------------------------------------------------===// + +#ifndef __REGISTERS_HPP__ +#define __REGISTERS_HPP__ + +#include +#include + #include "cet_unwind.h" #include "config.h" -#include "libunwind.h" - -namespace libunwind { - -// For emulating 128-bit registers -struct v128 { uint32_t vec[4]; }; - +#include "libunwind.h" + +namespace libunwind { + +// For emulating 128-bit registers +struct v128 { uint32_t vec[4]; }; + enum { REGISTERS_X86, REGISTERS_X86_64, @@ -40,1115 +40,1115 @@ enum { REGISTERS_RISCV, REGISTERS_VE, }; - + #if defined(_LIBUNWIND_TARGET_I386) class _LIBUNWIND_HIDDEN Registers_x86; extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *); -#if defined(_LIBUNWIND_USE_CET) -extern "C" void *__libunwind_cet_get_jump_target() { - return reinterpret_cast(&__libunwind_Registers_x86_jumpto); +#if defined(_LIBUNWIND_USE_CET) +extern "C" void *__libunwind_cet_get_jump_target() { + return reinterpret_cast(&__libunwind_Registers_x86_jumpto); +} +#endif + +/// Registers_x86 holds the register state of a thread in a 32-bit intel +/// process. +class _LIBUNWIND_HIDDEN Registers_x86 { +public: + Registers_x86(); + Registers_x86(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int) const { return false; } + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int) const { return false; } + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto() { __libunwind_Registers_x86_jumpto(this); } + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86; } + static int getArch() { return REGISTERS_X86; } + + uint32_t getSP() const { return _registers.__esp; } + void setSP(uint32_t value) { _registers.__esp = value; } + uint32_t getIP() const { return _registers.__eip; } + void setIP(uint32_t value) { _registers.__eip = value; } + uint32_t getEBP() const { return _registers.__ebp; } + void setEBP(uint32_t value) { _registers.__ebp = value; } + uint32_t getEBX() const { return _registers.__ebx; } + void setEBX(uint32_t value) { _registers.__ebx = value; } + uint32_t getECX() const { return _registers.__ecx; } + void setECX(uint32_t value) { _registers.__ecx = value; } + uint32_t getEDX() const { return _registers.__edx; } + void setEDX(uint32_t value) { _registers.__edx = value; } + uint32_t getESI() const { return _registers.__esi; } + void setESI(uint32_t value) { _registers.__esi = value; } + uint32_t getEDI() const { return _registers.__edi; } + void setEDI(uint32_t value) { _registers.__edi = value; } + +private: + struct GPRs { + unsigned int __eax; + unsigned int __ebx; + unsigned int __ecx; + unsigned int __edx; + unsigned int __edi; + unsigned int __esi; + unsigned int __ebp; + unsigned int __esp; + unsigned int __ss; + unsigned int __eflags; + unsigned int __eip; + unsigned int __cs; + unsigned int __ds; + unsigned int __es; + unsigned int __fs; + unsigned int __gs; + }; + + GPRs _registers; +}; + +inline Registers_x86::Registers_x86(const void *registers) { + static_assert((check_fit::does_fit), + "x86 registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); +} + +inline Registers_x86::Registers_x86() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_x86::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 7) + return false; + return true; +} + +inline uint32_t Registers_x86::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__eip; + case UNW_REG_SP: + return _registers.__esp; + case UNW_X86_EAX: + return _registers.__eax; + case UNW_X86_ECX: + return _registers.__ecx; + case UNW_X86_EDX: + return _registers.__edx; + case UNW_X86_EBX: + return _registers.__ebx; +#if !defined(__APPLE__) + case UNW_X86_ESP: +#else + case UNW_X86_EBP: +#endif + return _registers.__ebp; +#if !defined(__APPLE__) + case UNW_X86_EBP: +#else + case UNW_X86_ESP: +#endif + return _registers.__esp; + case UNW_X86_ESI: + return _registers.__esi; + case UNW_X86_EDI: + return _registers.__edi; + } + _LIBUNWIND_ABORT("unsupported x86 register"); +} + +inline void Registers_x86::setRegister(int regNum, uint32_t value) { + switch (regNum) { + case UNW_REG_IP: + _registers.__eip = value; + return; + case UNW_REG_SP: + _registers.__esp = value; + return; + case UNW_X86_EAX: + _registers.__eax = value; + return; + case UNW_X86_ECX: + _registers.__ecx = value; + return; + case UNW_X86_EDX: + _registers.__edx = value; + return; + case UNW_X86_EBX: + _registers.__ebx = value; + return; +#if !defined(__APPLE__) + case UNW_X86_ESP: +#else + case UNW_X86_EBP: +#endif + _registers.__ebp = value; + return; +#if !defined(__APPLE__) + case UNW_X86_EBP: +#else + case UNW_X86_ESP: +#endif + _registers.__esp = value; + return; + case UNW_X86_ESI: + _registers.__esi = value; + return; + case UNW_X86_EDI: + _registers.__edi = value; + return; + } + _LIBUNWIND_ABORT("unsupported x86 register"); +} + +inline const char *Registers_x86::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "ip"; + case UNW_REG_SP: + return "esp"; + case UNW_X86_EAX: + return "eax"; + case UNW_X86_ECX: + return "ecx"; + case UNW_X86_EDX: + return "edx"; + case UNW_X86_EBX: + return "ebx"; + case UNW_X86_EBP: + return "ebp"; + case UNW_X86_ESP: + return "esp"; + case UNW_X86_ESI: + return "esi"; + case UNW_X86_EDI: + return "edi"; + default: + return "unknown register"; + } +} + +inline double Registers_x86::getFloatRegister(int) const { + _LIBUNWIND_ABORT("no x86 float registers"); +} + +inline void Registers_x86::setFloatRegister(int, double) { + _LIBUNWIND_ABORT("no x86 float registers"); +} + +inline v128 Registers_x86::getVectorRegister(int) const { + _LIBUNWIND_ABORT("no x86 vector registers"); +} + +inline void Registers_x86::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("no x86 vector registers"); +} +#endif // _LIBUNWIND_TARGET_I386 + + +#if defined(_LIBUNWIND_TARGET_X86_64) +/// Registers_x86_64 holds the register state of a thread in a 64-bit intel +/// process. +class _LIBUNWIND_HIDDEN Registers_x86_64; +extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *); + +#if defined(_LIBUNWIND_USE_CET) +extern "C" void *__libunwind_cet_get_jump_target() { + return reinterpret_cast(&__libunwind_Registers_x86_64_jumpto); +} +#endif + +class _LIBUNWIND_HIDDEN Registers_x86_64 { +public: + Registers_x86_64(); + Registers_x86_64(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value); + bool validFloatRegister(int) const { return false; } + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto() { __libunwind_Registers_x86_64_jumpto(this); } + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64; } + static int getArch() { return REGISTERS_X86_64; } + + uint64_t getSP() const { return _registers.__rsp; } + void setSP(uint64_t value) { _registers.__rsp = value; } + uint64_t getIP() const { return _registers.__rip; } + void setIP(uint64_t value) { _registers.__rip = value; } + uint64_t getRBP() const { return _registers.__rbp; } + void setRBP(uint64_t value) { _registers.__rbp = value; } + uint64_t getRBX() const { return _registers.__rbx; } + void setRBX(uint64_t value) { _registers.__rbx = value; } + uint64_t getR12() const { return _registers.__r12; } + void setR12(uint64_t value) { _registers.__r12 = value; } + uint64_t getR13() const { return _registers.__r13; } + void setR13(uint64_t value) { _registers.__r13 = value; } + uint64_t getR14() const { return _registers.__r14; } + void setR14(uint64_t value) { _registers.__r14 = value; } + uint64_t getR15() const { return _registers.__r15; } + void setR15(uint64_t value) { _registers.__r15 = value; } + +private: + struct GPRs { + uint64_t __rax; + uint64_t __rbx; + uint64_t __rcx; + uint64_t __rdx; + uint64_t __rdi; + uint64_t __rsi; + uint64_t __rbp; + uint64_t __rsp; + uint64_t __r8; + uint64_t __r9; + uint64_t __r10; + uint64_t __r11; + uint64_t __r12; + uint64_t __r13; + uint64_t __r14; + uint64_t __r15; + uint64_t __rip; + uint64_t __rflags; + uint64_t __cs; + uint64_t __fs; + uint64_t __gs; +#if defined(_WIN64) + uint64_t __padding; // 16-byte align +#endif + }; + GPRs _registers; +#if defined(_WIN64) + v128 _xmm[16]; +#endif +}; + +inline Registers_x86_64::Registers_x86_64(const void *registers) { + static_assert((check_fit::does_fit), + "x86_64 registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); +} + +inline Registers_x86_64::Registers_x86_64() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_x86_64::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 16) + return false; + return true; +} + +inline uint64_t Registers_x86_64::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + case UNW_X86_64_RIP: + return _registers.__rip; + case UNW_REG_SP: + return _registers.__rsp; + case UNW_X86_64_RAX: + return _registers.__rax; + case UNW_X86_64_RDX: + return _registers.__rdx; + case UNW_X86_64_RCX: + return _registers.__rcx; + case UNW_X86_64_RBX: + return _registers.__rbx; + case UNW_X86_64_RSI: + return _registers.__rsi; + case UNW_X86_64_RDI: + return _registers.__rdi; + case UNW_X86_64_RBP: + return _registers.__rbp; + case UNW_X86_64_RSP: + return _registers.__rsp; + case UNW_X86_64_R8: + return _registers.__r8; + case UNW_X86_64_R9: + return _registers.__r9; + case UNW_X86_64_R10: + return _registers.__r10; + case UNW_X86_64_R11: + return _registers.__r11; + case UNW_X86_64_R12: + return _registers.__r12; + case UNW_X86_64_R13: + return _registers.__r13; + case UNW_X86_64_R14: + return _registers.__r14; + case UNW_X86_64_R15: + return _registers.__r15; + } + _LIBUNWIND_ABORT("unsupported x86_64 register"); +} + +inline void Registers_x86_64::setRegister(int regNum, uint64_t value) { + switch (regNum) { + case UNW_REG_IP: + case UNW_X86_64_RIP: + _registers.__rip = value; + return; + case UNW_REG_SP: + _registers.__rsp = value; + return; + case UNW_X86_64_RAX: + _registers.__rax = value; + return; + case UNW_X86_64_RDX: + _registers.__rdx = value; + return; + case UNW_X86_64_RCX: + _registers.__rcx = value; + return; + case UNW_X86_64_RBX: + _registers.__rbx = value; + return; + case UNW_X86_64_RSI: + _registers.__rsi = value; + return; + case UNW_X86_64_RDI: + _registers.__rdi = value; + return; + case UNW_X86_64_RBP: + _registers.__rbp = value; + return; + case UNW_X86_64_RSP: + _registers.__rsp = value; + return; + case UNW_X86_64_R8: + _registers.__r8 = value; + return; + case UNW_X86_64_R9: + _registers.__r9 = value; + return; + case UNW_X86_64_R10: + _registers.__r10 = value; + return; + case UNW_X86_64_R11: + _registers.__r11 = value; + return; + case UNW_X86_64_R12: + _registers.__r12 = value; + return; + case UNW_X86_64_R13: + _registers.__r13 = value; + return; + case UNW_X86_64_R14: + _registers.__r14 = value; + return; + case UNW_X86_64_R15: + _registers.__r15 = value; + return; + } + _LIBUNWIND_ABORT("unsupported x86_64 register"); +} + +inline const char *Registers_x86_64::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + case UNW_X86_64_RIP: + return "rip"; + case UNW_REG_SP: + return "rsp"; + case UNW_X86_64_RAX: + return "rax"; + case UNW_X86_64_RDX: + return "rdx"; + case UNW_X86_64_RCX: + return "rcx"; + case UNW_X86_64_RBX: + return "rbx"; + case UNW_X86_64_RSI: + return "rsi"; + case UNW_X86_64_RDI: + return "rdi"; + case UNW_X86_64_RBP: + return "rbp"; + case UNW_X86_64_RSP: + return "rsp"; + case UNW_X86_64_R8: + return "r8"; + case UNW_X86_64_R9: + return "r9"; + case UNW_X86_64_R10: + return "r10"; + case UNW_X86_64_R11: + return "r11"; + case UNW_X86_64_R12: + return "r12"; + case UNW_X86_64_R13: + return "r13"; + case UNW_X86_64_R14: + return "r14"; + case UNW_X86_64_R15: + return "r15"; + case UNW_X86_64_XMM0: + return "xmm0"; + case UNW_X86_64_XMM1: + return "xmm1"; + case UNW_X86_64_XMM2: + return "xmm2"; + case UNW_X86_64_XMM3: + return "xmm3"; + case UNW_X86_64_XMM4: + return "xmm4"; + case UNW_X86_64_XMM5: + return "xmm5"; + case UNW_X86_64_XMM6: + return "xmm6"; + case UNW_X86_64_XMM7: + return "xmm7"; + case UNW_X86_64_XMM8: + return "xmm8"; + case UNW_X86_64_XMM9: + return "xmm9"; + case UNW_X86_64_XMM10: + return "xmm10"; + case UNW_X86_64_XMM11: + return "xmm11"; + case UNW_X86_64_XMM12: + return "xmm12"; + case UNW_X86_64_XMM13: + return "xmm13"; + case UNW_X86_64_XMM14: + return "xmm14"; + case UNW_X86_64_XMM15: + return "xmm15"; + default: + return "unknown register"; + } +} + +inline double Registers_x86_64::getFloatRegister(int) const { + _LIBUNWIND_ABORT("no x86_64 float registers"); +} + +inline void Registers_x86_64::setFloatRegister(int, double) { + _LIBUNWIND_ABORT("no x86_64 float registers"); +} + +inline bool Registers_x86_64::validVectorRegister(int regNum) const { +#if defined(_WIN64) + if (regNum < UNW_X86_64_XMM0) + return false; + if (regNum > UNW_X86_64_XMM15) + return false; + return true; +#else + (void)regNum; // suppress unused parameter warning + return false; +#endif +} + +inline v128 Registers_x86_64::getVectorRegister(int regNum) const { +#if defined(_WIN64) + assert(validVectorRegister(regNum)); + return _xmm[regNum - UNW_X86_64_XMM0]; +#else + (void)regNum; // suppress unused parameter warning + _LIBUNWIND_ABORT("no x86_64 vector registers"); +#endif +} + +inline void Registers_x86_64::setVectorRegister(int regNum, v128 value) { +#if defined(_WIN64) + assert(validVectorRegister(regNum)); + _xmm[regNum - UNW_X86_64_XMM0] = value; +#else + (void)regNum; (void)value; // suppress unused parameter warnings + _LIBUNWIND_ABORT("no x86_64 vector registers"); +#endif +} +#endif // _LIBUNWIND_TARGET_X86_64 + + +#if defined(_LIBUNWIND_TARGET_PPC) +/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC +/// process. +class _LIBUNWIND_HIDDEN Registers_ppc { +public: + Registers_ppc(); + Registers_ppc(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC; } + static int getArch() { return REGISTERS_PPC; } + + uint64_t getSP() const { return _registers.__r1; } + void setSP(uint32_t value) { _registers.__r1 = value; } + uint64_t getIP() const { return _registers.__srr0; } + void setIP(uint32_t value) { _registers.__srr0 = value; } + +private: + struct ppc_thread_state_t { + unsigned int __srr0; /* Instruction address register (PC) */ + unsigned int __srr1; /* Machine state register (supervisor) */ + unsigned int __r0; + unsigned int __r1; + unsigned int __r2; + unsigned int __r3; + unsigned int __r4; + unsigned int __r5; + unsigned int __r6; + unsigned int __r7; + unsigned int __r8; + unsigned int __r9; + unsigned int __r10; + unsigned int __r11; + unsigned int __r12; + unsigned int __r13; + unsigned int __r14; + unsigned int __r15; + unsigned int __r16; + unsigned int __r17; + unsigned int __r18; + unsigned int __r19; + unsigned int __r20; + unsigned int __r21; + unsigned int __r22; + unsigned int __r23; + unsigned int __r24; + unsigned int __r25; + unsigned int __r26; + unsigned int __r27; + unsigned int __r28; + unsigned int __r29; + unsigned int __r30; + unsigned int __r31; + unsigned int __cr; /* Condition register */ + unsigned int __xer; /* User's integer exception register */ + unsigned int __lr; /* Link register */ + unsigned int __ctr; /* Count register */ + unsigned int __mq; /* MQ register (601 only) */ + unsigned int __vrsave; /* Vector Save Register */ + }; + + struct ppc_float_state_t { + double __fpregs[32]; + + unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ + unsigned int __fpscr; /* floating point status register */ + }; + + ppc_thread_state_t _registers; + ppc_float_state_t _floatRegisters; + v128 _vectorRegisters[32]; // offset 424 +}; + +inline Registers_ppc::Registers_ppc(const void *registers) { + static_assert((check_fit::does_fit), + "ppc registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); + static_assert(sizeof(ppc_thread_state_t) == 160, + "expected float register offset to be 160"); + memcpy(&_floatRegisters, + static_cast(registers) + sizeof(ppc_thread_state_t), + sizeof(_floatRegisters)); + static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424, + "expected vector register offset to be 424 bytes"); + memcpy(_vectorRegisters, + static_cast(registers) + sizeof(ppc_thread_state_t) + + sizeof(ppc_float_state_t), + sizeof(_vectorRegisters)); +} + +inline Registers_ppc::Registers_ppc() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_floatRegisters, 0, sizeof(_floatRegisters)); + memset(&_vectorRegisters, 0, sizeof(_vectorRegisters)); +} + +inline bool Registers_ppc::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum == UNW_PPC_VRSAVE) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_PPC_R31) + return true; + if (regNum == UNW_PPC_MQ) + return true; + if (regNum == UNW_PPC_LR) + return true; + if (regNum == UNW_PPC_CTR) + return true; + if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7)) + return true; + return false; +} + +inline uint32_t Registers_ppc::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__srr0; + case UNW_REG_SP: + return _registers.__r1; + case UNW_PPC_R0: + return _registers.__r0; + case UNW_PPC_R1: + return _registers.__r1; + case UNW_PPC_R2: + return _registers.__r2; + case UNW_PPC_R3: + return _registers.__r3; + case UNW_PPC_R4: + return _registers.__r4; + case UNW_PPC_R5: + return _registers.__r5; + case UNW_PPC_R6: + return _registers.__r6; + case UNW_PPC_R7: + return _registers.__r7; + case UNW_PPC_R8: + return _registers.__r8; + case UNW_PPC_R9: + return _registers.__r9; + case UNW_PPC_R10: + return _registers.__r10; + case UNW_PPC_R11: + return _registers.__r11; + case UNW_PPC_R12: + return _registers.__r12; + case UNW_PPC_R13: + return _registers.__r13; + case UNW_PPC_R14: + return _registers.__r14; + case UNW_PPC_R15: + return _registers.__r15; + case UNW_PPC_R16: + return _registers.__r16; + case UNW_PPC_R17: + return _registers.__r17; + case UNW_PPC_R18: + return _registers.__r18; + case UNW_PPC_R19: + return _registers.__r19; + case UNW_PPC_R20: + return _registers.__r20; + case UNW_PPC_R21: + return _registers.__r21; + case UNW_PPC_R22: + return _registers.__r22; + case UNW_PPC_R23: + return _registers.__r23; + case UNW_PPC_R24: + return _registers.__r24; + case UNW_PPC_R25: + return _registers.__r25; + case UNW_PPC_R26: + return _registers.__r26; + case UNW_PPC_R27: + return _registers.__r27; + case UNW_PPC_R28: + return _registers.__r28; + case UNW_PPC_R29: + return _registers.__r29; + case UNW_PPC_R30: + return _registers.__r30; + case UNW_PPC_R31: + return _registers.__r31; + case UNW_PPC_LR: + return _registers.__lr; + case UNW_PPC_CR0: + return (_registers.__cr & 0xF0000000); + case UNW_PPC_CR1: + return (_registers.__cr & 0x0F000000); + case UNW_PPC_CR2: + return (_registers.__cr & 0x00F00000); + case UNW_PPC_CR3: + return (_registers.__cr & 0x000F0000); + case UNW_PPC_CR4: + return (_registers.__cr & 0x0000F000); + case UNW_PPC_CR5: + return (_registers.__cr & 0x00000F00); + case UNW_PPC_CR6: + return (_registers.__cr & 0x000000F0); + case UNW_PPC_CR7: + return (_registers.__cr & 0x0000000F); + case UNW_PPC_VRSAVE: + return _registers.__vrsave; + } + _LIBUNWIND_ABORT("unsupported ppc register"); +} + +inline void Registers_ppc::setRegister(int regNum, uint32_t value) { + //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value); + switch (regNum) { + case UNW_REG_IP: + _registers.__srr0 = value; + return; + case UNW_REG_SP: + _registers.__r1 = value; + return; + case UNW_PPC_R0: + _registers.__r0 = value; + return; + case UNW_PPC_R1: + _registers.__r1 = value; + return; + case UNW_PPC_R2: + _registers.__r2 = value; + return; + case UNW_PPC_R3: + _registers.__r3 = value; + return; + case UNW_PPC_R4: + _registers.__r4 = value; + return; + case UNW_PPC_R5: + _registers.__r5 = value; + return; + case UNW_PPC_R6: + _registers.__r6 = value; + return; + case UNW_PPC_R7: + _registers.__r7 = value; + return; + case UNW_PPC_R8: + _registers.__r8 = value; + return; + case UNW_PPC_R9: + _registers.__r9 = value; + return; + case UNW_PPC_R10: + _registers.__r10 = value; + return; + case UNW_PPC_R11: + _registers.__r11 = value; + return; + case UNW_PPC_R12: + _registers.__r12 = value; + return; + case UNW_PPC_R13: + _registers.__r13 = value; + return; + case UNW_PPC_R14: + _registers.__r14 = value; + return; + case UNW_PPC_R15: + _registers.__r15 = value; + return; + case UNW_PPC_R16: + _registers.__r16 = value; + return; + case UNW_PPC_R17: + _registers.__r17 = value; + return; + case UNW_PPC_R18: + _registers.__r18 = value; + return; + case UNW_PPC_R19: + _registers.__r19 = value; + return; + case UNW_PPC_R20: + _registers.__r20 = value; + return; + case UNW_PPC_R21: + _registers.__r21 = value; + return; + case UNW_PPC_R22: + _registers.__r22 = value; + return; + case UNW_PPC_R23: + _registers.__r23 = value; + return; + case UNW_PPC_R24: + _registers.__r24 = value; + return; + case UNW_PPC_R25: + _registers.__r25 = value; + return; + case UNW_PPC_R26: + _registers.__r26 = value; + return; + case UNW_PPC_R27: + _registers.__r27 = value; + return; + case UNW_PPC_R28: + _registers.__r28 = value; + return; + case UNW_PPC_R29: + _registers.__r29 = value; + return; + case UNW_PPC_R30: + _registers.__r30 = value; + return; + case UNW_PPC_R31: + _registers.__r31 = value; + return; + case UNW_PPC_MQ: + _registers.__mq = value; + return; + case UNW_PPC_LR: + _registers.__lr = value; + return; + case UNW_PPC_CTR: + _registers.__ctr = value; + return; + case UNW_PPC_CR0: + _registers.__cr &= 0x0FFFFFFF; + _registers.__cr |= (value & 0xF0000000); + return; + case UNW_PPC_CR1: + _registers.__cr &= 0xF0FFFFFF; + _registers.__cr |= (value & 0x0F000000); + return; + case UNW_PPC_CR2: + _registers.__cr &= 0xFF0FFFFF; + _registers.__cr |= (value & 0x00F00000); + return; + case UNW_PPC_CR3: + _registers.__cr &= 0xFFF0FFFF; + _registers.__cr |= (value & 0x000F0000); + return; + case UNW_PPC_CR4: + _registers.__cr &= 0xFFFF0FFF; + _registers.__cr |= (value & 0x0000F000); + return; + case UNW_PPC_CR5: + _registers.__cr &= 0xFFFFF0FF; + _registers.__cr |= (value & 0x00000F00); + return; + case UNW_PPC_CR6: + _registers.__cr &= 0xFFFFFF0F; + _registers.__cr |= (value & 0x000000F0); + return; + case UNW_PPC_CR7: + _registers.__cr &= 0xFFFFFFF0; + _registers.__cr |= (value & 0x0000000F); + return; + case UNW_PPC_VRSAVE: + _registers.__vrsave = value; + return; + // not saved + return; + case UNW_PPC_XER: + _registers.__xer = value; + return; + case UNW_PPC_AP: + case UNW_PPC_VSCR: + case UNW_PPC_SPEFSCR: + // not saved + return; + } + _LIBUNWIND_ABORT("unsupported ppc register"); +} + +inline bool Registers_ppc::validFloatRegister(int regNum) const { + if (regNum < UNW_PPC_F0) + return false; + if (regNum > UNW_PPC_F31) + return false; + return true; } -#endif -/// Registers_x86 holds the register state of a thread in a 32-bit intel -/// process. -class _LIBUNWIND_HIDDEN Registers_x86 { -public: - Registers_x86(); - Registers_x86(const void *registers); - - bool validRegister(int num) const; - uint32_t getRegister(int num) const; - void setRegister(int num, uint32_t value); - bool validFloatRegister(int) const { return false; } - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); - bool validVectorRegister(int) const { return false; } - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); - static const char *getRegisterName(int num); - void jumpto() { __libunwind_Registers_x86_jumpto(this); } - static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86; } - static int getArch() { return REGISTERS_X86; } - - uint32_t getSP() const { return _registers.__esp; } - void setSP(uint32_t value) { _registers.__esp = value; } - uint32_t getIP() const { return _registers.__eip; } - void setIP(uint32_t value) { _registers.__eip = value; } - uint32_t getEBP() const { return _registers.__ebp; } - void setEBP(uint32_t value) { _registers.__ebp = value; } - uint32_t getEBX() const { return _registers.__ebx; } - void setEBX(uint32_t value) { _registers.__ebx = value; } - uint32_t getECX() const { return _registers.__ecx; } - void setECX(uint32_t value) { _registers.__ecx = value; } - uint32_t getEDX() const { return _registers.__edx; } - void setEDX(uint32_t value) { _registers.__edx = value; } - uint32_t getESI() const { return _registers.__esi; } - void setESI(uint32_t value) { _registers.__esi = value; } - uint32_t getEDI() const { return _registers.__edi; } - void setEDI(uint32_t value) { _registers.__edi = value; } - -private: - struct GPRs { - unsigned int __eax; - unsigned int __ebx; - unsigned int __ecx; - unsigned int __edx; - unsigned int __edi; - unsigned int __esi; - unsigned int __ebp; - unsigned int __esp; - unsigned int __ss; - unsigned int __eflags; - unsigned int __eip; - unsigned int __cs; - unsigned int __ds; - unsigned int __es; - unsigned int __fs; - unsigned int __gs; - }; - - GPRs _registers; -}; - -inline Registers_x86::Registers_x86(const void *registers) { - static_assert((check_fit::does_fit), - "x86 registers do not fit into unw_context_t"); - memcpy(&_registers, registers, sizeof(_registers)); -} - -inline Registers_x86::Registers_x86() { - memset(&_registers, 0, sizeof(_registers)); -} - -inline bool Registers_x86::validRegister(int regNum) const { - if (regNum == UNW_REG_IP) - return true; - if (regNum == UNW_REG_SP) - return true; - if (regNum < 0) - return false; - if (regNum > 7) - return false; - return true; -} - -inline uint32_t Registers_x86::getRegister(int regNum) const { - switch (regNum) { - case UNW_REG_IP: - return _registers.__eip; - case UNW_REG_SP: - return _registers.__esp; - case UNW_X86_EAX: - return _registers.__eax; - case UNW_X86_ECX: - return _registers.__ecx; - case UNW_X86_EDX: - return _registers.__edx; - case UNW_X86_EBX: - return _registers.__ebx; -#if !defined(__APPLE__) - case UNW_X86_ESP: -#else - case UNW_X86_EBP: -#endif - return _registers.__ebp; -#if !defined(__APPLE__) - case UNW_X86_EBP: -#else - case UNW_X86_ESP: -#endif - return _registers.__esp; - case UNW_X86_ESI: - return _registers.__esi; - case UNW_X86_EDI: - return _registers.__edi; - } - _LIBUNWIND_ABORT("unsupported x86 register"); -} - -inline void Registers_x86::setRegister(int regNum, uint32_t value) { - switch (regNum) { - case UNW_REG_IP: - _registers.__eip = value; - return; - case UNW_REG_SP: - _registers.__esp = value; - return; - case UNW_X86_EAX: - _registers.__eax = value; - return; - case UNW_X86_ECX: - _registers.__ecx = value; - return; - case UNW_X86_EDX: - _registers.__edx = value; - return; - case UNW_X86_EBX: - _registers.__ebx = value; - return; -#if !defined(__APPLE__) - case UNW_X86_ESP: -#else - case UNW_X86_EBP: -#endif - _registers.__ebp = value; - return; -#if !defined(__APPLE__) - case UNW_X86_EBP: -#else - case UNW_X86_ESP: -#endif - _registers.__esp = value; - return; - case UNW_X86_ESI: - _registers.__esi = value; - return; - case UNW_X86_EDI: - _registers.__edi = value; - return; - } - _LIBUNWIND_ABORT("unsupported x86 register"); -} - -inline const char *Registers_x86::getRegisterName(int regNum) { - switch (regNum) { - case UNW_REG_IP: - return "ip"; - case UNW_REG_SP: - return "esp"; - case UNW_X86_EAX: - return "eax"; - case UNW_X86_ECX: - return "ecx"; - case UNW_X86_EDX: - return "edx"; - case UNW_X86_EBX: - return "ebx"; - case UNW_X86_EBP: - return "ebp"; - case UNW_X86_ESP: - return "esp"; - case UNW_X86_ESI: - return "esi"; - case UNW_X86_EDI: - return "edi"; - default: - return "unknown register"; - } -} - -inline double Registers_x86::getFloatRegister(int) const { - _LIBUNWIND_ABORT("no x86 float registers"); -} - -inline void Registers_x86::setFloatRegister(int, double) { - _LIBUNWIND_ABORT("no x86 float registers"); -} - -inline v128 Registers_x86::getVectorRegister(int) const { - _LIBUNWIND_ABORT("no x86 vector registers"); -} - -inline void Registers_x86::setVectorRegister(int, v128) { - _LIBUNWIND_ABORT("no x86 vector registers"); -} -#endif // _LIBUNWIND_TARGET_I386 - - -#if defined(_LIBUNWIND_TARGET_X86_64) -/// Registers_x86_64 holds the register state of a thread in a 64-bit intel -/// process. -class _LIBUNWIND_HIDDEN Registers_x86_64; -extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *); +inline double Registers_ppc::getFloatRegister(int regNum) const { + assert(validFloatRegister(regNum)); + return _floatRegisters.__fpregs[regNum - UNW_PPC_F0]; +} -#if defined(_LIBUNWIND_USE_CET) -extern "C" void *__libunwind_cet_get_jump_target() { - return reinterpret_cast(&__libunwind_Registers_x86_64_jumpto); +inline void Registers_ppc::setFloatRegister(int regNum, double value) { + assert(validFloatRegister(regNum)); + _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value; } -#endif -class _LIBUNWIND_HIDDEN Registers_x86_64 { -public: - Registers_x86_64(); - Registers_x86_64(const void *registers); - - bool validRegister(int num) const; - uint64_t getRegister(int num) const; - void setRegister(int num, uint64_t value); - bool validFloatRegister(int) const { return false; } - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); - bool validVectorRegister(int) const; - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); - static const char *getRegisterName(int num); - void jumpto() { __libunwind_Registers_x86_64_jumpto(this); } - static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64; } - static int getArch() { return REGISTERS_X86_64; } - - uint64_t getSP() const { return _registers.__rsp; } - void setSP(uint64_t value) { _registers.__rsp = value; } - uint64_t getIP() const { return _registers.__rip; } - void setIP(uint64_t value) { _registers.__rip = value; } - uint64_t getRBP() const { return _registers.__rbp; } - void setRBP(uint64_t value) { _registers.__rbp = value; } - uint64_t getRBX() const { return _registers.__rbx; } - void setRBX(uint64_t value) { _registers.__rbx = value; } - uint64_t getR12() const { return _registers.__r12; } - void setR12(uint64_t value) { _registers.__r12 = value; } - uint64_t getR13() const { return _registers.__r13; } - void setR13(uint64_t value) { _registers.__r13 = value; } - uint64_t getR14() const { return _registers.__r14; } - void setR14(uint64_t value) { _registers.__r14 = value; } - uint64_t getR15() const { return _registers.__r15; } - void setR15(uint64_t value) { _registers.__r15 = value; } - -private: - struct GPRs { - uint64_t __rax; - uint64_t __rbx; - uint64_t __rcx; - uint64_t __rdx; - uint64_t __rdi; - uint64_t __rsi; - uint64_t __rbp; - uint64_t __rsp; - uint64_t __r8; - uint64_t __r9; - uint64_t __r10; - uint64_t __r11; - uint64_t __r12; - uint64_t __r13; - uint64_t __r14; - uint64_t __r15; - uint64_t __rip; - uint64_t __rflags; - uint64_t __cs; - uint64_t __fs; - uint64_t __gs; -#if defined(_WIN64) - uint64_t __padding; // 16-byte align -#endif - }; - GPRs _registers; -#if defined(_WIN64) - v128 _xmm[16]; -#endif -}; - -inline Registers_x86_64::Registers_x86_64(const void *registers) { - static_assert((check_fit::does_fit), - "x86_64 registers do not fit into unw_context_t"); - memcpy(&_registers, registers, sizeof(_registers)); -} - -inline Registers_x86_64::Registers_x86_64() { - memset(&_registers, 0, sizeof(_registers)); -} - -inline bool Registers_x86_64::validRegister(int regNum) const { - if (regNum == UNW_REG_IP) - return true; - if (regNum == UNW_REG_SP) - return true; - if (regNum < 0) - return false; - if (regNum > 16) - return false; - return true; -} - -inline uint64_t Registers_x86_64::getRegister(int regNum) const { - switch (regNum) { - case UNW_REG_IP: - case UNW_X86_64_RIP: - return _registers.__rip; - case UNW_REG_SP: - return _registers.__rsp; - case UNW_X86_64_RAX: - return _registers.__rax; - case UNW_X86_64_RDX: - return _registers.__rdx; - case UNW_X86_64_RCX: - return _registers.__rcx; - case UNW_X86_64_RBX: - return _registers.__rbx; - case UNW_X86_64_RSI: - return _registers.__rsi; - case UNW_X86_64_RDI: - return _registers.__rdi; - case UNW_X86_64_RBP: - return _registers.__rbp; - case UNW_X86_64_RSP: - return _registers.__rsp; - case UNW_X86_64_R8: - return _registers.__r8; - case UNW_X86_64_R9: - return _registers.__r9; - case UNW_X86_64_R10: - return _registers.__r10; - case UNW_X86_64_R11: - return _registers.__r11; - case UNW_X86_64_R12: - return _registers.__r12; - case UNW_X86_64_R13: - return _registers.__r13; - case UNW_X86_64_R14: - return _registers.__r14; - case UNW_X86_64_R15: - return _registers.__r15; - } - _LIBUNWIND_ABORT("unsupported x86_64 register"); -} - -inline void Registers_x86_64::setRegister(int regNum, uint64_t value) { - switch (regNum) { - case UNW_REG_IP: - case UNW_X86_64_RIP: - _registers.__rip = value; - return; - case UNW_REG_SP: - _registers.__rsp = value; - return; - case UNW_X86_64_RAX: - _registers.__rax = value; - return; - case UNW_X86_64_RDX: - _registers.__rdx = value; - return; - case UNW_X86_64_RCX: - _registers.__rcx = value; - return; - case UNW_X86_64_RBX: - _registers.__rbx = value; - return; - case UNW_X86_64_RSI: - _registers.__rsi = value; - return; - case UNW_X86_64_RDI: - _registers.__rdi = value; - return; - case UNW_X86_64_RBP: - _registers.__rbp = value; - return; - case UNW_X86_64_RSP: - _registers.__rsp = value; - return; - case UNW_X86_64_R8: - _registers.__r8 = value; - return; - case UNW_X86_64_R9: - _registers.__r9 = value; - return; - case UNW_X86_64_R10: - _registers.__r10 = value; - return; - case UNW_X86_64_R11: - _registers.__r11 = value; - return; - case UNW_X86_64_R12: - _registers.__r12 = value; - return; - case UNW_X86_64_R13: - _registers.__r13 = value; - return; - case UNW_X86_64_R14: - _registers.__r14 = value; - return; - case UNW_X86_64_R15: - _registers.__r15 = value; - return; - } - _LIBUNWIND_ABORT("unsupported x86_64 register"); -} - -inline const char *Registers_x86_64::getRegisterName(int regNum) { - switch (regNum) { - case UNW_REG_IP: - case UNW_X86_64_RIP: - return "rip"; - case UNW_REG_SP: - return "rsp"; - case UNW_X86_64_RAX: - return "rax"; - case UNW_X86_64_RDX: - return "rdx"; - case UNW_X86_64_RCX: - return "rcx"; - case UNW_X86_64_RBX: - return "rbx"; - case UNW_X86_64_RSI: - return "rsi"; - case UNW_X86_64_RDI: - return "rdi"; - case UNW_X86_64_RBP: - return "rbp"; - case UNW_X86_64_RSP: - return "rsp"; - case UNW_X86_64_R8: - return "r8"; - case UNW_X86_64_R9: - return "r9"; - case UNW_X86_64_R10: - return "r10"; - case UNW_X86_64_R11: - return "r11"; - case UNW_X86_64_R12: - return "r12"; - case UNW_X86_64_R13: - return "r13"; - case UNW_X86_64_R14: - return "r14"; - case UNW_X86_64_R15: - return "r15"; - case UNW_X86_64_XMM0: - return "xmm0"; - case UNW_X86_64_XMM1: - return "xmm1"; - case UNW_X86_64_XMM2: - return "xmm2"; - case UNW_X86_64_XMM3: - return "xmm3"; - case UNW_X86_64_XMM4: - return "xmm4"; - case UNW_X86_64_XMM5: - return "xmm5"; - case UNW_X86_64_XMM6: - return "xmm6"; - case UNW_X86_64_XMM7: - return "xmm7"; - case UNW_X86_64_XMM8: - return "xmm8"; - case UNW_X86_64_XMM9: - return "xmm9"; - case UNW_X86_64_XMM10: - return "xmm10"; - case UNW_X86_64_XMM11: - return "xmm11"; - case UNW_X86_64_XMM12: - return "xmm12"; - case UNW_X86_64_XMM13: - return "xmm13"; - case UNW_X86_64_XMM14: - return "xmm14"; - case UNW_X86_64_XMM15: - return "xmm15"; - default: - return "unknown register"; - } -} - -inline double Registers_x86_64::getFloatRegister(int) const { - _LIBUNWIND_ABORT("no x86_64 float registers"); -} - -inline void Registers_x86_64::setFloatRegister(int, double) { - _LIBUNWIND_ABORT("no x86_64 float registers"); -} - -inline bool Registers_x86_64::validVectorRegister(int regNum) const { -#if defined(_WIN64) - if (regNum < UNW_X86_64_XMM0) +inline bool Registers_ppc::validVectorRegister(int regNum) const { + if (regNum < UNW_PPC_V0) return false; - if (regNum > UNW_X86_64_XMM15) + if (regNum > UNW_PPC_V31) return false; return true; -#else - (void)regNum; // suppress unused parameter warning - return false; -#endif } -inline v128 Registers_x86_64::getVectorRegister(int regNum) const { -#if defined(_WIN64) +inline v128 Registers_ppc::getVectorRegister(int regNum) const { assert(validVectorRegister(regNum)); - return _xmm[regNum - UNW_X86_64_XMM0]; -#else - (void)regNum; // suppress unused parameter warning - _LIBUNWIND_ABORT("no x86_64 vector registers"); -#endif -} - -inline void Registers_x86_64::setVectorRegister(int regNum, v128 value) { -#if defined(_WIN64) + v128 result = _vectorRegisters[regNum - UNW_PPC_V0]; + return result; +} + +inline void Registers_ppc::setVectorRegister(int regNum, v128 value) { assert(validVectorRegister(regNum)); - _xmm[regNum - UNW_X86_64_XMM0] = value; -#else - (void)regNum; (void)value; // suppress unused parameter warnings - _LIBUNWIND_ABORT("no x86_64 vector registers"); -#endif -} -#endif // _LIBUNWIND_TARGET_X86_64 - - -#if defined(_LIBUNWIND_TARGET_PPC) -/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC -/// process. -class _LIBUNWIND_HIDDEN Registers_ppc { -public: - Registers_ppc(); - Registers_ppc(const void *registers); - - bool validRegister(int num) const; - uint32_t getRegister(int num) const; - void setRegister(int num, uint32_t value); - bool validFloatRegister(int num) const; - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); - bool validVectorRegister(int num) const; - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); - static const char *getRegisterName(int num); - void jumpto(); - static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC; } - static int getArch() { return REGISTERS_PPC; } - - uint64_t getSP() const { return _registers.__r1; } - void setSP(uint32_t value) { _registers.__r1 = value; } - uint64_t getIP() const { return _registers.__srr0; } - void setIP(uint32_t value) { _registers.__srr0 = value; } - -private: - struct ppc_thread_state_t { - unsigned int __srr0; /* Instruction address register (PC) */ - unsigned int __srr1; /* Machine state register (supervisor) */ - unsigned int __r0; - unsigned int __r1; - unsigned int __r2; - unsigned int __r3; - unsigned int __r4; - unsigned int __r5; - unsigned int __r6; - unsigned int __r7; - unsigned int __r8; - unsigned int __r9; - unsigned int __r10; - unsigned int __r11; - unsigned int __r12; - unsigned int __r13; - unsigned int __r14; - unsigned int __r15; - unsigned int __r16; - unsigned int __r17; - unsigned int __r18; - unsigned int __r19; - unsigned int __r20; - unsigned int __r21; - unsigned int __r22; - unsigned int __r23; - unsigned int __r24; - unsigned int __r25; - unsigned int __r26; - unsigned int __r27; - unsigned int __r28; - unsigned int __r29; - unsigned int __r30; - unsigned int __r31; - unsigned int __cr; /* Condition register */ - unsigned int __xer; /* User's integer exception register */ - unsigned int __lr; /* Link register */ - unsigned int __ctr; /* Count register */ - unsigned int __mq; /* MQ register (601 only) */ - unsigned int __vrsave; /* Vector Save Register */ - }; - - struct ppc_float_state_t { - double __fpregs[32]; - - unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ - unsigned int __fpscr; /* floating point status register */ - }; - - ppc_thread_state_t _registers; - ppc_float_state_t _floatRegisters; - v128 _vectorRegisters[32]; // offset 424 -}; - -inline Registers_ppc::Registers_ppc(const void *registers) { - static_assert((check_fit::does_fit), - "ppc registers do not fit into unw_context_t"); - memcpy(&_registers, static_cast(registers), - sizeof(_registers)); - static_assert(sizeof(ppc_thread_state_t) == 160, - "expected float register offset to be 160"); - memcpy(&_floatRegisters, - static_cast(registers) + sizeof(ppc_thread_state_t), - sizeof(_floatRegisters)); - static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424, - "expected vector register offset to be 424 bytes"); - memcpy(_vectorRegisters, - static_cast(registers) + sizeof(ppc_thread_state_t) + - sizeof(ppc_float_state_t), - sizeof(_vectorRegisters)); -} - -inline Registers_ppc::Registers_ppc() { - memset(&_registers, 0, sizeof(_registers)); - memset(&_floatRegisters, 0, sizeof(_floatRegisters)); - memset(&_vectorRegisters, 0, sizeof(_vectorRegisters)); -} - -inline bool Registers_ppc::validRegister(int regNum) const { - if (regNum == UNW_REG_IP) - return true; - if (regNum == UNW_REG_SP) - return true; - if (regNum == UNW_PPC_VRSAVE) - return true; - if (regNum < 0) - return false; - if (regNum <= UNW_PPC_R31) - return true; - if (regNum == UNW_PPC_MQ) - return true; - if (regNum == UNW_PPC_LR) - return true; - if (regNum == UNW_PPC_CTR) - return true; - if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7)) - return true; - return false; -} - -inline uint32_t Registers_ppc::getRegister(int regNum) const { - switch (regNum) { - case UNW_REG_IP: - return _registers.__srr0; - case UNW_REG_SP: - return _registers.__r1; - case UNW_PPC_R0: - return _registers.__r0; - case UNW_PPC_R1: - return _registers.__r1; - case UNW_PPC_R2: - return _registers.__r2; - case UNW_PPC_R3: - return _registers.__r3; - case UNW_PPC_R4: - return _registers.__r4; - case UNW_PPC_R5: - return _registers.__r5; - case UNW_PPC_R6: - return _registers.__r6; - case UNW_PPC_R7: - return _registers.__r7; - case UNW_PPC_R8: - return _registers.__r8; - case UNW_PPC_R9: - return _registers.__r9; - case UNW_PPC_R10: - return _registers.__r10; - case UNW_PPC_R11: - return _registers.__r11; - case UNW_PPC_R12: - return _registers.__r12; - case UNW_PPC_R13: - return _registers.__r13; - case UNW_PPC_R14: - return _registers.__r14; - case UNW_PPC_R15: - return _registers.__r15; - case UNW_PPC_R16: - return _registers.__r16; - case UNW_PPC_R17: - return _registers.__r17; - case UNW_PPC_R18: - return _registers.__r18; - case UNW_PPC_R19: - return _registers.__r19; - case UNW_PPC_R20: - return _registers.__r20; - case UNW_PPC_R21: - return _registers.__r21; - case UNW_PPC_R22: - return _registers.__r22; - case UNW_PPC_R23: - return _registers.__r23; - case UNW_PPC_R24: - return _registers.__r24; - case UNW_PPC_R25: - return _registers.__r25; - case UNW_PPC_R26: - return _registers.__r26; - case UNW_PPC_R27: - return _registers.__r27; - case UNW_PPC_R28: - return _registers.__r28; - case UNW_PPC_R29: - return _registers.__r29; - case UNW_PPC_R30: - return _registers.__r30; - case UNW_PPC_R31: - return _registers.__r31; - case UNW_PPC_LR: - return _registers.__lr; - case UNW_PPC_CR0: - return (_registers.__cr & 0xF0000000); - case UNW_PPC_CR1: - return (_registers.__cr & 0x0F000000); - case UNW_PPC_CR2: - return (_registers.__cr & 0x00F00000); - case UNW_PPC_CR3: - return (_registers.__cr & 0x000F0000); - case UNW_PPC_CR4: - return (_registers.__cr & 0x0000F000); - case UNW_PPC_CR5: - return (_registers.__cr & 0x00000F00); - case UNW_PPC_CR6: - return (_registers.__cr & 0x000000F0); - case UNW_PPC_CR7: - return (_registers.__cr & 0x0000000F); - case UNW_PPC_VRSAVE: - return _registers.__vrsave; - } - _LIBUNWIND_ABORT("unsupported ppc register"); -} - -inline void Registers_ppc::setRegister(int regNum, uint32_t value) { - //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value); - switch (regNum) { - case UNW_REG_IP: - _registers.__srr0 = value; - return; - case UNW_REG_SP: - _registers.__r1 = value; - return; - case UNW_PPC_R0: - _registers.__r0 = value; - return; - case UNW_PPC_R1: - _registers.__r1 = value; - return; - case UNW_PPC_R2: - _registers.__r2 = value; - return; - case UNW_PPC_R3: - _registers.__r3 = value; - return; - case UNW_PPC_R4: - _registers.__r4 = value; - return; - case UNW_PPC_R5: - _registers.__r5 = value; - return; - case UNW_PPC_R6: - _registers.__r6 = value; - return; - case UNW_PPC_R7: - _registers.__r7 = value; - return; - case UNW_PPC_R8: - _registers.__r8 = value; - return; - case UNW_PPC_R9: - _registers.__r9 = value; - return; - case UNW_PPC_R10: - _registers.__r10 = value; - return; - case UNW_PPC_R11: - _registers.__r11 = value; - return; - case UNW_PPC_R12: - _registers.__r12 = value; - return; - case UNW_PPC_R13: - _registers.__r13 = value; - return; - case UNW_PPC_R14: - _registers.__r14 = value; - return; - case UNW_PPC_R15: - _registers.__r15 = value; - return; - case UNW_PPC_R16: - _registers.__r16 = value; - return; - case UNW_PPC_R17: - _registers.__r17 = value; - return; - case UNW_PPC_R18: - _registers.__r18 = value; - return; - case UNW_PPC_R19: - _registers.__r19 = value; - return; - case UNW_PPC_R20: - _registers.__r20 = value; - return; - case UNW_PPC_R21: - _registers.__r21 = value; - return; - case UNW_PPC_R22: - _registers.__r22 = value; - return; - case UNW_PPC_R23: - _registers.__r23 = value; - return; - case UNW_PPC_R24: - _registers.__r24 = value; - return; - case UNW_PPC_R25: - _registers.__r25 = value; - return; - case UNW_PPC_R26: - _registers.__r26 = value; - return; - case UNW_PPC_R27: - _registers.__r27 = value; - return; - case UNW_PPC_R28: - _registers.__r28 = value; - return; - case UNW_PPC_R29: - _registers.__r29 = value; - return; - case UNW_PPC_R30: - _registers.__r30 = value; - return; - case UNW_PPC_R31: - _registers.__r31 = value; - return; - case UNW_PPC_MQ: - _registers.__mq = value; - return; - case UNW_PPC_LR: - _registers.__lr = value; - return; - case UNW_PPC_CTR: - _registers.__ctr = value; - return; - case UNW_PPC_CR0: - _registers.__cr &= 0x0FFFFFFF; - _registers.__cr |= (value & 0xF0000000); - return; - case UNW_PPC_CR1: - _registers.__cr &= 0xF0FFFFFF; - _registers.__cr |= (value & 0x0F000000); - return; - case UNW_PPC_CR2: - _registers.__cr &= 0xFF0FFFFF; - _registers.__cr |= (value & 0x00F00000); - return; - case UNW_PPC_CR3: - _registers.__cr &= 0xFFF0FFFF; - _registers.__cr |= (value & 0x000F0000); - return; - case UNW_PPC_CR4: - _registers.__cr &= 0xFFFF0FFF; - _registers.__cr |= (value & 0x0000F000); - return; - case UNW_PPC_CR5: - _registers.__cr &= 0xFFFFF0FF; - _registers.__cr |= (value & 0x00000F00); - return; - case UNW_PPC_CR6: - _registers.__cr &= 0xFFFFFF0F; - _registers.__cr |= (value & 0x000000F0); - return; - case UNW_PPC_CR7: - _registers.__cr &= 0xFFFFFFF0; - _registers.__cr |= (value & 0x0000000F); - return; - case UNW_PPC_VRSAVE: - _registers.__vrsave = value; - return; - // not saved - return; - case UNW_PPC_XER: - _registers.__xer = value; - return; - case UNW_PPC_AP: - case UNW_PPC_VSCR: - case UNW_PPC_SPEFSCR: - // not saved - return; - } - _LIBUNWIND_ABORT("unsupported ppc register"); -} - -inline bool Registers_ppc::validFloatRegister(int regNum) const { - if (regNum < UNW_PPC_F0) - return false; - if (regNum > UNW_PPC_F31) - return false; - return true; -} - -inline double Registers_ppc::getFloatRegister(int regNum) const { - assert(validFloatRegister(regNum)); - return _floatRegisters.__fpregs[regNum - UNW_PPC_F0]; -} - -inline void Registers_ppc::setFloatRegister(int regNum, double value) { - assert(validFloatRegister(regNum)); - _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value; -} - -inline bool Registers_ppc::validVectorRegister(int regNum) const { - if (regNum < UNW_PPC_V0) - return false; - if (regNum > UNW_PPC_V31) - return false; - return true; -} - -inline v128 Registers_ppc::getVectorRegister(int regNum) const { - assert(validVectorRegister(regNum)); - v128 result = _vectorRegisters[regNum - UNW_PPC_V0]; - return result; -} - -inline void Registers_ppc::setVectorRegister(int regNum, v128 value) { - assert(validVectorRegister(regNum)); - _vectorRegisters[regNum - UNW_PPC_V0] = value; -} - -inline const char *Registers_ppc::getRegisterName(int regNum) { - switch (regNum) { - case UNW_REG_IP: - return "ip"; - case UNW_REG_SP: - return "sp"; - case UNW_PPC_R0: - return "r0"; - case UNW_PPC_R1: - return "r1"; - case UNW_PPC_R2: - return "r2"; - case UNW_PPC_R3: - return "r3"; - case UNW_PPC_R4: - return "r4"; - case UNW_PPC_R5: - return "r5"; - case UNW_PPC_R6: - return "r6"; - case UNW_PPC_R7: - return "r7"; - case UNW_PPC_R8: - return "r8"; - case UNW_PPC_R9: - return "r9"; - case UNW_PPC_R10: - return "r10"; - case UNW_PPC_R11: - return "r11"; - case UNW_PPC_R12: - return "r12"; - case UNW_PPC_R13: - return "r13"; - case UNW_PPC_R14: - return "r14"; - case UNW_PPC_R15: - return "r15"; - case UNW_PPC_R16: - return "r16"; - case UNW_PPC_R17: - return "r17"; - case UNW_PPC_R18: - return "r18"; - case UNW_PPC_R19: - return "r19"; - case UNW_PPC_R20: - return "r20"; - case UNW_PPC_R21: - return "r21"; - case UNW_PPC_R22: - return "r22"; - case UNW_PPC_R23: - return "r23"; - case UNW_PPC_R24: - return "r24"; - case UNW_PPC_R25: - return "r25"; - case UNW_PPC_R26: - return "r26"; - case UNW_PPC_R27: - return "r27"; - case UNW_PPC_R28: - return "r28"; - case UNW_PPC_R29: - return "r29"; - case UNW_PPC_R30: - return "r30"; - case UNW_PPC_R31: - return "r31"; - case UNW_PPC_F0: - return "fp0"; - case UNW_PPC_F1: - return "fp1"; - case UNW_PPC_F2: - return "fp2"; - case UNW_PPC_F3: - return "fp3"; - case UNW_PPC_F4: - return "fp4"; - case UNW_PPC_F5: - return "fp5"; - case UNW_PPC_F6: - return "fp6"; - case UNW_PPC_F7: - return "fp7"; - case UNW_PPC_F8: - return "fp8"; - case UNW_PPC_F9: - return "fp9"; - case UNW_PPC_F10: - return "fp10"; - case UNW_PPC_F11: - return "fp11"; - case UNW_PPC_F12: - return "fp12"; - case UNW_PPC_F13: - return "fp13"; - case UNW_PPC_F14: - return "fp14"; - case UNW_PPC_F15: - return "fp15"; - case UNW_PPC_F16: - return "fp16"; - case UNW_PPC_F17: - return "fp17"; - case UNW_PPC_F18: - return "fp18"; - case UNW_PPC_F19: - return "fp19"; - case UNW_PPC_F20: - return "fp20"; - case UNW_PPC_F21: - return "fp21"; - case UNW_PPC_F22: - return "fp22"; - case UNW_PPC_F23: - return "fp23"; - case UNW_PPC_F24: - return "fp24"; - case UNW_PPC_F25: - return "fp25"; - case UNW_PPC_F26: - return "fp26"; - case UNW_PPC_F27: - return "fp27"; - case UNW_PPC_F28: - return "fp28"; - case UNW_PPC_F29: - return "fp29"; - case UNW_PPC_F30: - return "fp30"; - case UNW_PPC_F31: - return "fp31"; - case UNW_PPC_LR: - return "lr"; - default: - return "unknown register"; - } - -} + _vectorRegisters[regNum - UNW_PPC_V0] = value; +} + +inline const char *Registers_ppc::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "ip"; + case UNW_REG_SP: + return "sp"; + case UNW_PPC_R0: + return "r0"; + case UNW_PPC_R1: + return "r1"; + case UNW_PPC_R2: + return "r2"; + case UNW_PPC_R3: + return "r3"; + case UNW_PPC_R4: + return "r4"; + case UNW_PPC_R5: + return "r5"; + case UNW_PPC_R6: + return "r6"; + case UNW_PPC_R7: + return "r7"; + case UNW_PPC_R8: + return "r8"; + case UNW_PPC_R9: + return "r9"; + case UNW_PPC_R10: + return "r10"; + case UNW_PPC_R11: + return "r11"; + case UNW_PPC_R12: + return "r12"; + case UNW_PPC_R13: + return "r13"; + case UNW_PPC_R14: + return "r14"; + case UNW_PPC_R15: + return "r15"; + case UNW_PPC_R16: + return "r16"; + case UNW_PPC_R17: + return "r17"; + case UNW_PPC_R18: + return "r18"; + case UNW_PPC_R19: + return "r19"; + case UNW_PPC_R20: + return "r20"; + case UNW_PPC_R21: + return "r21"; + case UNW_PPC_R22: + return "r22"; + case UNW_PPC_R23: + return "r23"; + case UNW_PPC_R24: + return "r24"; + case UNW_PPC_R25: + return "r25"; + case UNW_PPC_R26: + return "r26"; + case UNW_PPC_R27: + return "r27"; + case UNW_PPC_R28: + return "r28"; + case UNW_PPC_R29: + return "r29"; + case UNW_PPC_R30: + return "r30"; + case UNW_PPC_R31: + return "r31"; + case UNW_PPC_F0: + return "fp0"; + case UNW_PPC_F1: + return "fp1"; + case UNW_PPC_F2: + return "fp2"; + case UNW_PPC_F3: + return "fp3"; + case UNW_PPC_F4: + return "fp4"; + case UNW_PPC_F5: + return "fp5"; + case UNW_PPC_F6: + return "fp6"; + case UNW_PPC_F7: + return "fp7"; + case UNW_PPC_F8: + return "fp8"; + case UNW_PPC_F9: + return "fp9"; + case UNW_PPC_F10: + return "fp10"; + case UNW_PPC_F11: + return "fp11"; + case UNW_PPC_F12: + return "fp12"; + case UNW_PPC_F13: + return "fp13"; + case UNW_PPC_F14: + return "fp14"; + case UNW_PPC_F15: + return "fp15"; + case UNW_PPC_F16: + return "fp16"; + case UNW_PPC_F17: + return "fp17"; + case UNW_PPC_F18: + return "fp18"; + case UNW_PPC_F19: + return "fp19"; + case UNW_PPC_F20: + return "fp20"; + case UNW_PPC_F21: + return "fp21"; + case UNW_PPC_F22: + return "fp22"; + case UNW_PPC_F23: + return "fp23"; + case UNW_PPC_F24: + return "fp24"; + case UNW_PPC_F25: + return "fp25"; + case UNW_PPC_F26: + return "fp26"; + case UNW_PPC_F27: + return "fp27"; + case UNW_PPC_F28: + return "fp28"; + case UNW_PPC_F29: + return "fp29"; + case UNW_PPC_F30: + return "fp30"; + case UNW_PPC_F31: + return "fp31"; + case UNW_PPC_LR: + return "lr"; + default: + return "unknown register"; + } + +} #endif // _LIBUNWIND_TARGET_PPC - + #if defined(_LIBUNWIND_TARGET_PPC64) /// Registers_ppc64 holds the register state of a thread in a 64-bit PowerPC /// process. @@ -1156,7 +1156,7 @@ class _LIBUNWIND_HIDDEN Registers_ppc64 { public: Registers_ppc64(); Registers_ppc64(const void *registers); - + bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value); @@ -1793,91 +1793,91 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) { #if defined(_LIBUNWIND_TARGET_AARCH64) -/// Registers_arm64 holds the register state of a thread in a 64-bit arm -/// process. +/// Registers_arm64 holds the register state of a thread in a 64-bit arm +/// process. class _LIBUNWIND_HIDDEN Registers_arm64; extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); -class _LIBUNWIND_HIDDEN Registers_arm64 { -public: - Registers_arm64(); - Registers_arm64(const void *registers); - - bool validRegister(int num) const; - uint64_t getRegister(int num) const; - void setRegister(int num, uint64_t value); - bool validFloatRegister(int num) const; - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); - bool validVectorRegister(int num) const; - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); +class _LIBUNWIND_HIDDEN Registers_arm64 { +public: + Registers_arm64(); + Registers_arm64(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); void jumpto() { __libunwind_Registers_arm64_jumpto(this); } static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } static int getArch() { return REGISTERS_ARM64; } - - uint64_t getSP() const { return _registers.__sp; } - void setSP(uint64_t value) { _registers.__sp = value; } - uint64_t getIP() const { return _registers.__pc; } - void setIP(uint64_t value) { _registers.__pc = value; } - uint64_t getFP() const { return _registers.__fp; } - void setFP(uint64_t value) { _registers.__fp = value; } - -private: - struct GPRs { - uint64_t __x[29]; // x0-x28 - uint64_t __fp; // Frame pointer x29 - uint64_t __lr; // Link register x30 - uint64_t __sp; // Stack pointer x31 - uint64_t __pc; // Program counter + + uint64_t getSP() const { return _registers.__sp; } + void setSP(uint64_t value) { _registers.__sp = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint64_t value) { _registers.__pc = value; } + uint64_t getFP() const { return _registers.__fp; } + void setFP(uint64_t value) { _registers.__fp = value; } + +private: + struct GPRs { + uint64_t __x[29]; // x0-x28 + uint64_t __fp; // Frame pointer x29 + uint64_t __lr; // Link register x30 + uint64_t __sp; // Stack pointer x31 + uint64_t __pc; // Program counter uint64_t __ra_sign_state; // RA sign state register - }; - - GPRs _registers; - double _vectorHalfRegisters[32]; - // Currently only the lower double in 128-bit vectore registers - // is perserved during unwinding. We could define new register - // numbers (> 96) which mean whole vector registers, then this - // struct would need to change to contain whole vector registers. -}; - -inline Registers_arm64::Registers_arm64(const void *registers) { + }; + + GPRs _registers; + double _vectorHalfRegisters[32]; + // Currently only the lower double in 128-bit vectore registers + // is perserved during unwinding. We could define new register + // numbers (> 96) which mean whole vector registers, then this + // struct would need to change to contain whole vector registers. +}; + +inline Registers_arm64::Registers_arm64(const void *registers) { static_assert((check_fit::does_fit), "arm64 registers do not fit into unw_context_t"); - memcpy(&_registers, registers, sizeof(_registers)); - static_assert(sizeof(GPRs) == 0x110, - "expected VFP registers to be at offset 272"); - memcpy(_vectorHalfRegisters, - static_cast(registers) + sizeof(GPRs), - sizeof(_vectorHalfRegisters)); -} - -inline Registers_arm64::Registers_arm64() { - memset(&_registers, 0, sizeof(_registers)); - memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters)); -} - -inline bool Registers_arm64::validRegister(int regNum) const { - if (regNum == UNW_REG_IP) - return true; - if (regNum == UNW_REG_SP) - return true; - if (regNum < 0) - return false; - if (regNum > 95) - return false; + memcpy(&_registers, registers, sizeof(_registers)); + static_assert(sizeof(GPRs) == 0x110, + "expected VFP registers to be at offset 272"); + memcpy(_vectorHalfRegisters, + static_cast(registers) + sizeof(GPRs), + sizeof(_vectorHalfRegisters)); +} + +inline Registers_arm64::Registers_arm64() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters)); +} + +inline bool Registers_arm64::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 95) + return false; if (regNum == UNW_AARCH64_RA_SIGN_STATE) return true; if ((regNum > 32) && (regNum < 64)) - return false; - return true; -} - -inline uint64_t Registers_arm64::getRegister(int regNum) const { + return false; + return true; +} + +inline uint64_t Registers_arm64::getRegister(int regNum) const { if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC) - return _registers.__pc; + return _registers.__pc; if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP) - return _registers.__sp; + return _registers.__sp; if (regNum == UNW_AARCH64_RA_SIGN_STATE) return _registers.__ra_sign_state; if (regNum == UNW_AARCH64_FP) @@ -1885,15 +1885,15 @@ inline uint64_t Registers_arm64::getRegister(int regNum) const { if (regNum == UNW_AARCH64_LR) return _registers.__lr; if ((regNum >= 0) && (regNum < 29)) - return _registers.__x[regNum]; - _LIBUNWIND_ABORT("unsupported arm64 register"); -} - -inline void Registers_arm64::setRegister(int regNum, uint64_t value) { + return _registers.__x[regNum]; + _LIBUNWIND_ABORT("unsupported arm64 register"); +} + +inline void Registers_arm64::setRegister(int regNum, uint64_t value) { if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC) - _registers.__pc = value; + _registers.__pc = value; else if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP) - _registers.__sp = value; + _registers.__sp = value; else if (regNum == UNW_AARCH64_RA_SIGN_STATE) _registers.__ra_sign_state = value; else if (regNum == UNW_AARCH64_FP) @@ -1901,246 +1901,246 @@ inline void Registers_arm64::setRegister(int regNum, uint64_t value) { else if (regNum == UNW_AARCH64_LR) _registers.__lr = value; else if ((regNum >= 0) && (regNum < 29)) - _registers.__x[regNum] = value; - else - _LIBUNWIND_ABORT("unsupported arm64 register"); -} - -inline const char *Registers_arm64::getRegisterName(int regNum) { - switch (regNum) { - case UNW_REG_IP: - return "pc"; - case UNW_REG_SP: - return "sp"; + _registers.__x[regNum] = value; + else + _LIBUNWIND_ABORT("unsupported arm64 register"); +} + +inline const char *Registers_arm64::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "pc"; + case UNW_REG_SP: + return "sp"; case UNW_AARCH64_X0: - return "x0"; + return "x0"; case UNW_AARCH64_X1: - return "x1"; + return "x1"; case UNW_AARCH64_X2: - return "x2"; + return "x2"; case UNW_AARCH64_X3: - return "x3"; + return "x3"; case UNW_AARCH64_X4: - return "x4"; + return "x4"; case UNW_AARCH64_X5: - return "x5"; + return "x5"; case UNW_AARCH64_X6: - return "x6"; + return "x6"; case UNW_AARCH64_X7: - return "x7"; + return "x7"; case UNW_AARCH64_X8: - return "x8"; + return "x8"; case UNW_AARCH64_X9: - return "x9"; + return "x9"; case UNW_AARCH64_X10: - return "x10"; + return "x10"; case UNW_AARCH64_X11: - return "x11"; + return "x11"; case UNW_AARCH64_X12: - return "x12"; + return "x12"; case UNW_AARCH64_X13: - return "x13"; + return "x13"; case UNW_AARCH64_X14: - return "x14"; + return "x14"; case UNW_AARCH64_X15: - return "x15"; + return "x15"; case UNW_AARCH64_X16: - return "x16"; + return "x16"; case UNW_AARCH64_X17: - return "x17"; + return "x17"; case UNW_AARCH64_X18: - return "x18"; + return "x18"; case UNW_AARCH64_X19: - return "x19"; + return "x19"; case UNW_AARCH64_X20: - return "x20"; + return "x20"; case UNW_AARCH64_X21: - return "x21"; + return "x21"; case UNW_AARCH64_X22: - return "x22"; + return "x22"; case UNW_AARCH64_X23: - return "x23"; + return "x23"; case UNW_AARCH64_X24: - return "x24"; + return "x24"; case UNW_AARCH64_X25: - return "x25"; + return "x25"; case UNW_AARCH64_X26: - return "x26"; + return "x26"; case UNW_AARCH64_X27: - return "x27"; + return "x27"; case UNW_AARCH64_X28: - return "x28"; + return "x28"; case UNW_AARCH64_FP: - return "fp"; + return "fp"; case UNW_AARCH64_LR: - return "lr"; + return "lr"; case UNW_AARCH64_SP: - return "sp"; + return "sp"; case UNW_AARCH64_PC: return "pc"; case UNW_AARCH64_V0: - return "d0"; + return "d0"; case UNW_AARCH64_V1: - return "d1"; + return "d1"; case UNW_AARCH64_V2: - return "d2"; + return "d2"; case UNW_AARCH64_V3: - return "d3"; + return "d3"; case UNW_AARCH64_V4: - return "d4"; + return "d4"; case UNW_AARCH64_V5: - return "d5"; + return "d5"; case UNW_AARCH64_V6: - return "d6"; + return "d6"; case UNW_AARCH64_V7: - return "d7"; + return "d7"; case UNW_AARCH64_V8: - return "d8"; + return "d8"; case UNW_AARCH64_V9: - return "d9"; + return "d9"; case UNW_AARCH64_V10: - return "d10"; + return "d10"; case UNW_AARCH64_V11: - return "d11"; + return "d11"; case UNW_AARCH64_V12: - return "d12"; + return "d12"; case UNW_AARCH64_V13: - return "d13"; + return "d13"; case UNW_AARCH64_V14: - return "d14"; + return "d14"; case UNW_AARCH64_V15: - return "d15"; + return "d15"; case UNW_AARCH64_V16: - return "d16"; + return "d16"; case UNW_AARCH64_V17: - return "d17"; + return "d17"; case UNW_AARCH64_V18: - return "d18"; + return "d18"; case UNW_AARCH64_V19: - return "d19"; + return "d19"; case UNW_AARCH64_V20: - return "d20"; + return "d20"; case UNW_AARCH64_V21: - return "d21"; + return "d21"; case UNW_AARCH64_V22: - return "d22"; + return "d22"; case UNW_AARCH64_V23: - return "d23"; + return "d23"; case UNW_AARCH64_V24: - return "d24"; + return "d24"; case UNW_AARCH64_V25: - return "d25"; + return "d25"; case UNW_AARCH64_V26: - return "d26"; + return "d26"; case UNW_AARCH64_V27: - return "d27"; + return "d27"; case UNW_AARCH64_V28: - return "d28"; + return "d28"; case UNW_AARCH64_V29: - return "d29"; + return "d29"; case UNW_AARCH64_V30: - return "d30"; + return "d30"; case UNW_AARCH64_V31: - return "d31"; - default: - return "unknown register"; - } -} - -inline bool Registers_arm64::validFloatRegister(int regNum) const { + return "d31"; + default: + return "unknown register"; + } +} + +inline bool Registers_arm64::validFloatRegister(int regNum) const { if (regNum < UNW_AARCH64_V0) - return false; + return false; if (regNum > UNW_AARCH64_V31) - return false; - return true; -} - -inline double Registers_arm64::getFloatRegister(int regNum) const { - assert(validFloatRegister(regNum)); + return false; + return true; +} + +inline double Registers_arm64::getFloatRegister(int regNum) const { + assert(validFloatRegister(regNum)); return _vectorHalfRegisters[regNum - UNW_AARCH64_V0]; -} - -inline void Registers_arm64::setFloatRegister(int regNum, double value) { - assert(validFloatRegister(regNum)); +} + +inline void Registers_arm64::setFloatRegister(int regNum, double value) { + assert(validFloatRegister(regNum)); _vectorHalfRegisters[regNum - UNW_AARCH64_V0] = value; -} - -inline bool Registers_arm64::validVectorRegister(int) const { - return false; -} - -inline v128 Registers_arm64::getVectorRegister(int) const { - _LIBUNWIND_ABORT("no arm64 vector register support yet"); -} - -inline void Registers_arm64::setVectorRegister(int, v128) { - _LIBUNWIND_ABORT("no arm64 vector register support yet"); -} +} + +inline bool Registers_arm64::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_arm64::getVectorRegister(int) const { + _LIBUNWIND_ABORT("no arm64 vector register support yet"); +} + +inline void Registers_arm64::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("no arm64 vector register support yet"); +} #endif // _LIBUNWIND_TARGET_AARCH64 - + #if defined(_LIBUNWIND_TARGET_ARM) -/// Registers_arm holds the register state of a thread in a 32-bit arm -/// process. -/// -/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit, -/// this uses more memory than required. -class _LIBUNWIND_HIDDEN Registers_arm { -public: - Registers_arm(); - Registers_arm(const void *registers); - - bool validRegister(int num) const; +/// Registers_arm holds the register state of a thread in a 32-bit arm +/// process. +/// +/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit, +/// this uses more memory than required. +class _LIBUNWIND_HIDDEN Registers_arm { +public: + Registers_arm(); + Registers_arm(const void *registers); + + bool validRegister(int num) const; uint32_t getRegister(int num) const; - void setRegister(int num, uint32_t value); - bool validFloatRegister(int num) const; - unw_fpreg_t getFloatRegister(int num); - void setFloatRegister(int num, unw_fpreg_t value); - bool validVectorRegister(int num) const; - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + unw_fpreg_t getFloatRegister(int num); + void setFloatRegister(int num, unw_fpreg_t value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); - void jumpto() { - restoreSavedFloatRegisters(); - restoreCoreAndJumpTo(); - } + void jumpto() { + restoreSavedFloatRegisters(); + restoreCoreAndJumpTo(); + } static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; } static int getArch() { return REGISTERS_ARM; } - - uint32_t getSP() const { return _registers.__sp; } - void setSP(uint32_t value) { _registers.__sp = value; } - uint32_t getIP() const { return _registers.__pc; } - void setIP(uint32_t value) { _registers.__pc = value; } - - void saveVFPAsX() { - assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15); - _use_X_for_vfp_save = true; - } - - void restoreSavedFloatRegisters() { - if (_saved_vfp_d0_d15) { - if (_use_X_for_vfp_save) - restoreVFPWithFLDMX(_vfp_d0_d15_pad); - else - restoreVFPWithFLDMD(_vfp_d0_d15_pad); - } - if (_saved_vfp_d16_d31) - restoreVFPv3(_vfp_d16_d31); + + uint32_t getSP() const { return _registers.__sp; } + void setSP(uint32_t value) { _registers.__sp = value; } + uint32_t getIP() const { return _registers.__pc; } + void setIP(uint32_t value) { _registers.__pc = value; } + + void saveVFPAsX() { + assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15); + _use_X_for_vfp_save = true; + } + + void restoreSavedFloatRegisters() { + if (_saved_vfp_d0_d15) { + if (_use_X_for_vfp_save) + restoreVFPWithFLDMX(_vfp_d0_d15_pad); + else + restoreVFPWithFLDMD(_vfp_d0_d15_pad); + } + if (_saved_vfp_d16_d31) + restoreVFPv3(_vfp_d16_d31); #if defined(__ARM_WMMX) - if (_saved_iwmmx) - restoreiWMMX(_iwmmx); - if (_saved_iwmmx_control) - restoreiWMMXControl(_iwmmx_control); + if (_saved_iwmmx) + restoreiWMMX(_iwmmx); + if (_saved_iwmmx_control) + restoreiWMMXControl(_iwmmx_control); #endif - } - -private: - struct GPRs { - uint32_t __r[13]; // r0-r12 - uint32_t __sp; // Stack pointer r13 - uint32_t __lr; // Link register r14 - uint32_t __pc; // Program counter r15 - }; - + } + +private: + struct GPRs { + uint32_t __r[13]; // r0-r12 + uint32_t __sp; // Stack pointer r13 + uint32_t __lr; // Link register r14 + uint32_t __pc; // Program counter r15 + }; + struct PseudoRegisters { uint32_t __pac; // Return Authentication Code (PAC) }; @@ -2153,94 +2153,94 @@ private: static void restoreVFPv3(void*); #if defined(__ARM_WMMX) static void saveiWMMX(void*); - static void saveiWMMXControl(uint32_t*); + static void saveiWMMXControl(uint32_t*); static void restoreiWMMX(void*); - static void restoreiWMMXControl(uint32_t*); + static void restoreiWMMXControl(uint32_t*); #endif - void restoreCoreAndJumpTo(); - - // ARM registers - GPRs _registers; + void restoreCoreAndJumpTo(); + + // ARM registers + GPRs _registers; PseudoRegisters _pseudo_registers; - - // We save floating point registers lazily because we can't know ahead of - // time which ones are used. See EHABI #4.7. - - // Whether D0-D15 are saved in the FTSMX instead of FSTMD format. - // - // See EHABI #7.5 that explains how matching instruction sequences for load - // and store need to be used to correctly restore the exact register bits. - bool _use_X_for_vfp_save; - // Whether VFP D0-D15 are saved. - bool _saved_vfp_d0_d15; - // Whether VFPv3 D16-D31 are saved. - bool _saved_vfp_d16_d31; - // VFP registers D0-D15, + padding if saved using FSTMX - unw_fpreg_t _vfp_d0_d15_pad[17]; - // VFPv3 registers D16-D31, always saved using FSTMD - unw_fpreg_t _vfp_d16_d31[16]; + + // We save floating point registers lazily because we can't know ahead of + // time which ones are used. See EHABI #4.7. + + // Whether D0-D15 are saved in the FTSMX instead of FSTMD format. + // + // See EHABI #7.5 that explains how matching instruction sequences for load + // and store need to be used to correctly restore the exact register bits. + bool _use_X_for_vfp_save; + // Whether VFP D0-D15 are saved. + bool _saved_vfp_d0_d15; + // Whether VFPv3 D16-D31 are saved. + bool _saved_vfp_d16_d31; + // VFP registers D0-D15, + padding if saved using FSTMX + unw_fpreg_t _vfp_d0_d15_pad[17]; + // VFPv3 registers D16-D31, always saved using FSTMD + unw_fpreg_t _vfp_d16_d31[16]; #if defined(__ARM_WMMX) // Whether iWMMX data registers are saved. bool _saved_iwmmx; // Whether iWMMX control registers are saved. mutable bool _saved_iwmmx_control; - // iWMMX registers - unw_fpreg_t _iwmmx[16]; - // iWMMX control registers + // iWMMX registers + unw_fpreg_t _iwmmx[16]; + // iWMMX control registers mutable uint32_t _iwmmx_control[4]; #endif -}; - -inline Registers_arm::Registers_arm(const void *registers) - : _use_X_for_vfp_save(false), - _saved_vfp_d0_d15(false), +}; + +inline Registers_arm::Registers_arm(const void *registers) + : _use_X_for_vfp_save(false), + _saved_vfp_d0_d15(false), _saved_vfp_d16_d31(false) { static_assert((check_fit::does_fit), "arm registers do not fit into unw_context_t"); // See __unw_getcontext() note about data. - memcpy(&_registers, registers, sizeof(_registers)); + memcpy(&_registers, registers, sizeof(_registers)); memset(&_pseudo_registers, 0, sizeof(_pseudo_registers)); - memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); - memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); + memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); + memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); #if defined(__ARM_WMMX) _saved_iwmmx = false; _saved_iwmmx_control = false; - memset(&_iwmmx, 0, sizeof(_iwmmx)); - memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); + memset(&_iwmmx, 0, sizeof(_iwmmx)); + memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); #endif -} - -inline Registers_arm::Registers_arm() - : _use_X_for_vfp_save(false), - _saved_vfp_d0_d15(false), +} + +inline Registers_arm::Registers_arm() + : _use_X_for_vfp_save(false), + _saved_vfp_d0_d15(false), _saved_vfp_d16_d31(false) { - memset(&_registers, 0, sizeof(_registers)); + memset(&_registers, 0, sizeof(_registers)); memset(&_pseudo_registers, 0, sizeof(_pseudo_registers)); - memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); - memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); + memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); + memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); #if defined(__ARM_WMMX) _saved_iwmmx = false; _saved_iwmmx_control = false; - memset(&_iwmmx, 0, sizeof(_iwmmx)); - memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); + memset(&_iwmmx, 0, sizeof(_iwmmx)); + memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); #endif -} - -inline bool Registers_arm::validRegister(int regNum) const { - // Returns true for all non-VFP registers supported by the EHABI - // virtual register set (VRS). - if (regNum == UNW_REG_IP) - return true; +} - if (regNum == UNW_REG_SP) - return true; +inline bool Registers_arm::validRegister(int regNum) const { + // Returns true for all non-VFP registers supported by the EHABI + // virtual register set (VRS). + if (regNum == UNW_REG_IP) + return true; + + if (regNum == UNW_REG_SP) + return true; - if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) - return true; + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) + return true; #if defined(__ARM_WMMX) - if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) - return true; + if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) + return true; #endif #ifdef __ARM_FEATURE_PAUTH @@ -2248,30 +2248,30 @@ inline bool Registers_arm::validRegister(int regNum) const { return true; #endif - return false; -} - + return false; +} + inline uint32_t Registers_arm::getRegister(int regNum) const { - if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) - return _registers.__sp; + if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) + return _registers.__sp; - if (regNum == UNW_ARM_LR) - return _registers.__lr; + if (regNum == UNW_ARM_LR) + return _registers.__lr; - if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) - return _registers.__pc; + if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) + return _registers.__pc; - if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) - return _registers.__r[regNum]; + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) + return _registers.__r[regNum]; #if defined(__ARM_WMMX) - if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { - if (!_saved_iwmmx_control) { - _saved_iwmmx_control = true; - saveiWMMXControl(_iwmmx_control); - } - return _iwmmx_control[regNum - UNW_ARM_WC0]; - } + if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { + if (!_saved_iwmmx_control) { + _saved_iwmmx_control = true; + saveiWMMXControl(_iwmmx_control); + } + return _iwmmx_control[regNum - UNW_ARM_WC0]; + } #endif #ifdef __ARM_FEATURE_PAUTH @@ -2279,37 +2279,37 @@ inline uint32_t Registers_arm::getRegister(int regNum) const { return _pseudo_registers.__pac; #endif - _LIBUNWIND_ABORT("unsupported arm register"); -} - -inline void Registers_arm::setRegister(int regNum, uint32_t value) { + _LIBUNWIND_ABORT("unsupported arm register"); +} + +inline void Registers_arm::setRegister(int regNum, uint32_t value) { if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) { - _registers.__sp = value; + _registers.__sp = value; return; } if (regNum == UNW_ARM_LR) { - _registers.__lr = value; + _registers.__lr = value; return; } if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) { - _registers.__pc = value; + _registers.__pc = value; return; } if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) { - _registers.__r[regNum] = value; + _registers.__r[regNum] = value; return; } #if defined(__ARM_WMMX) if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { - if (!_saved_iwmmx_control) { - _saved_iwmmx_control = true; - saveiWMMXControl(_iwmmx_control); - } - _iwmmx_control[regNum - UNW_ARM_WC0] = value; + if (!_saved_iwmmx_control) { + _saved_iwmmx_control = true; + saveiWMMXControl(_iwmmx_control); + } + _iwmmx_control[regNum - UNW_ARM_WC0] = value; return; } #endif @@ -2320,465 +2320,465 @@ inline void Registers_arm::setRegister(int regNum, uint32_t value) { } _LIBUNWIND_ABORT("unsupported arm register"); -} - -inline const char *Registers_arm::getRegisterName(int regNum) { - switch (regNum) { - case UNW_REG_IP: - case UNW_ARM_IP: // UNW_ARM_R15 is alias - return "pc"; - case UNW_ARM_LR: // UNW_ARM_R14 is alias - return "lr"; - case UNW_REG_SP: - case UNW_ARM_SP: // UNW_ARM_R13 is alias - return "sp"; - case UNW_ARM_R0: - return "r0"; - case UNW_ARM_R1: - return "r1"; - case UNW_ARM_R2: - return "r2"; - case UNW_ARM_R3: - return "r3"; - case UNW_ARM_R4: - return "r4"; - case UNW_ARM_R5: - return "r5"; - case UNW_ARM_R6: - return "r6"; - case UNW_ARM_R7: - return "r7"; - case UNW_ARM_R8: - return "r8"; - case UNW_ARM_R9: - return "r9"; - case UNW_ARM_R10: - return "r10"; - case UNW_ARM_R11: - return "r11"; - case UNW_ARM_R12: - return "r12"; - case UNW_ARM_S0: - return "s0"; - case UNW_ARM_S1: - return "s1"; - case UNW_ARM_S2: - return "s2"; - case UNW_ARM_S3: - return "s3"; - case UNW_ARM_S4: - return "s4"; - case UNW_ARM_S5: - return "s5"; - case UNW_ARM_S6: - return "s6"; - case UNW_ARM_S7: - return "s7"; - case UNW_ARM_S8: - return "s8"; - case UNW_ARM_S9: - return "s9"; - case UNW_ARM_S10: - return "s10"; - case UNW_ARM_S11: - return "s11"; - case UNW_ARM_S12: - return "s12"; - case UNW_ARM_S13: - return "s13"; - case UNW_ARM_S14: - return "s14"; - case UNW_ARM_S15: - return "s15"; - case UNW_ARM_S16: - return "s16"; - case UNW_ARM_S17: - return "s17"; - case UNW_ARM_S18: - return "s18"; - case UNW_ARM_S19: - return "s19"; - case UNW_ARM_S20: - return "s20"; - case UNW_ARM_S21: - return "s21"; - case UNW_ARM_S22: - return "s22"; - case UNW_ARM_S23: - return "s23"; - case UNW_ARM_S24: - return "s24"; - case UNW_ARM_S25: - return "s25"; - case UNW_ARM_S26: - return "s26"; - case UNW_ARM_S27: - return "s27"; - case UNW_ARM_S28: - return "s28"; - case UNW_ARM_S29: - return "s29"; - case UNW_ARM_S30: - return "s30"; - case UNW_ARM_S31: - return "s31"; - case UNW_ARM_D0: - return "d0"; - case UNW_ARM_D1: - return "d1"; - case UNW_ARM_D2: - return "d2"; - case UNW_ARM_D3: - return "d3"; - case UNW_ARM_D4: - return "d4"; - case UNW_ARM_D5: - return "d5"; - case UNW_ARM_D6: - return "d6"; - case UNW_ARM_D7: - return "d7"; - case UNW_ARM_D8: - return "d8"; - case UNW_ARM_D9: - return "d9"; - case UNW_ARM_D10: - return "d10"; - case UNW_ARM_D11: - return "d11"; - case UNW_ARM_D12: - return "d12"; - case UNW_ARM_D13: - return "d13"; - case UNW_ARM_D14: - return "d14"; - case UNW_ARM_D15: - return "d15"; - case UNW_ARM_D16: - return "d16"; - case UNW_ARM_D17: - return "d17"; - case UNW_ARM_D18: - return "d18"; - case UNW_ARM_D19: - return "d19"; - case UNW_ARM_D20: - return "d20"; - case UNW_ARM_D21: - return "d21"; - case UNW_ARM_D22: - return "d22"; - case UNW_ARM_D23: - return "d23"; - case UNW_ARM_D24: - return "d24"; - case UNW_ARM_D25: - return "d25"; - case UNW_ARM_D26: - return "d26"; - case UNW_ARM_D27: - return "d27"; - case UNW_ARM_D28: - return "d28"; - case UNW_ARM_D29: - return "d29"; - case UNW_ARM_D30: - return "d30"; - case UNW_ARM_D31: - return "d31"; - default: - return "unknown register"; - } -} - -inline bool Registers_arm::validFloatRegister(int regNum) const { - // NOTE: Consider the intel MMX registers floating points so the +} + +inline const char *Registers_arm::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + case UNW_ARM_IP: // UNW_ARM_R15 is alias + return "pc"; + case UNW_ARM_LR: // UNW_ARM_R14 is alias + return "lr"; + case UNW_REG_SP: + case UNW_ARM_SP: // UNW_ARM_R13 is alias + return "sp"; + case UNW_ARM_R0: + return "r0"; + case UNW_ARM_R1: + return "r1"; + case UNW_ARM_R2: + return "r2"; + case UNW_ARM_R3: + return "r3"; + case UNW_ARM_R4: + return "r4"; + case UNW_ARM_R5: + return "r5"; + case UNW_ARM_R6: + return "r6"; + case UNW_ARM_R7: + return "r7"; + case UNW_ARM_R8: + return "r8"; + case UNW_ARM_R9: + return "r9"; + case UNW_ARM_R10: + return "r10"; + case UNW_ARM_R11: + return "r11"; + case UNW_ARM_R12: + return "r12"; + case UNW_ARM_S0: + return "s0"; + case UNW_ARM_S1: + return "s1"; + case UNW_ARM_S2: + return "s2"; + case UNW_ARM_S3: + return "s3"; + case UNW_ARM_S4: + return "s4"; + case UNW_ARM_S5: + return "s5"; + case UNW_ARM_S6: + return "s6"; + case UNW_ARM_S7: + return "s7"; + case UNW_ARM_S8: + return "s8"; + case UNW_ARM_S9: + return "s9"; + case UNW_ARM_S10: + return "s10"; + case UNW_ARM_S11: + return "s11"; + case UNW_ARM_S12: + return "s12"; + case UNW_ARM_S13: + return "s13"; + case UNW_ARM_S14: + return "s14"; + case UNW_ARM_S15: + return "s15"; + case UNW_ARM_S16: + return "s16"; + case UNW_ARM_S17: + return "s17"; + case UNW_ARM_S18: + return "s18"; + case UNW_ARM_S19: + return "s19"; + case UNW_ARM_S20: + return "s20"; + case UNW_ARM_S21: + return "s21"; + case UNW_ARM_S22: + return "s22"; + case UNW_ARM_S23: + return "s23"; + case UNW_ARM_S24: + return "s24"; + case UNW_ARM_S25: + return "s25"; + case UNW_ARM_S26: + return "s26"; + case UNW_ARM_S27: + return "s27"; + case UNW_ARM_S28: + return "s28"; + case UNW_ARM_S29: + return "s29"; + case UNW_ARM_S30: + return "s30"; + case UNW_ARM_S31: + return "s31"; + case UNW_ARM_D0: + return "d0"; + case UNW_ARM_D1: + return "d1"; + case UNW_ARM_D2: + return "d2"; + case UNW_ARM_D3: + return "d3"; + case UNW_ARM_D4: + return "d4"; + case UNW_ARM_D5: + return "d5"; + case UNW_ARM_D6: + return "d6"; + case UNW_ARM_D7: + return "d7"; + case UNW_ARM_D8: + return "d8"; + case UNW_ARM_D9: + return "d9"; + case UNW_ARM_D10: + return "d10"; + case UNW_ARM_D11: + return "d11"; + case UNW_ARM_D12: + return "d12"; + case UNW_ARM_D13: + return "d13"; + case UNW_ARM_D14: + return "d14"; + case UNW_ARM_D15: + return "d15"; + case UNW_ARM_D16: + return "d16"; + case UNW_ARM_D17: + return "d17"; + case UNW_ARM_D18: + return "d18"; + case UNW_ARM_D19: + return "d19"; + case UNW_ARM_D20: + return "d20"; + case UNW_ARM_D21: + return "d21"; + case UNW_ARM_D22: + return "d22"; + case UNW_ARM_D23: + return "d23"; + case UNW_ARM_D24: + return "d24"; + case UNW_ARM_D25: + return "d25"; + case UNW_ARM_D26: + return "d26"; + case UNW_ARM_D27: + return "d27"; + case UNW_ARM_D28: + return "d28"; + case UNW_ARM_D29: + return "d29"; + case UNW_ARM_D30: + return "d30"; + case UNW_ARM_D31: + return "d31"; + default: + return "unknown register"; + } +} + +inline bool Registers_arm::validFloatRegister(int regNum) const { + // NOTE: Consider the intel MMX registers floating points so the // __unw_get_fpreg can be used to transmit the 64-bit data back. - return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31)) + return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31)) #if defined(__ARM_WMMX) || ((regNum >= UNW_ARM_WR0) && (regNum <= UNW_ARM_WR15)) #endif ; -} - -inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) { - if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { - if (!_saved_vfp_d0_d15) { - _saved_vfp_d0_d15 = true; - if (_use_X_for_vfp_save) - saveVFPWithFSTMX(_vfp_d0_d15_pad); - else - saveVFPWithFSTMD(_vfp_d0_d15_pad); - } - return _vfp_d0_d15_pad[regNum - UNW_ARM_D0]; +} + +inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) { + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { + if (!_saved_vfp_d0_d15) { + _saved_vfp_d0_d15 = true; + if (_use_X_for_vfp_save) + saveVFPWithFSTMX(_vfp_d0_d15_pad); + else + saveVFPWithFSTMD(_vfp_d0_d15_pad); + } + return _vfp_d0_d15_pad[regNum - UNW_ARM_D0]; } if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) { - if (!_saved_vfp_d16_d31) { - _saved_vfp_d16_d31 = true; - saveVFPv3(_vfp_d16_d31); - } - return _vfp_d16_d31[regNum - UNW_ARM_D16]; + if (!_saved_vfp_d16_d31) { + _saved_vfp_d16_d31 = true; + saveVFPv3(_vfp_d16_d31); + } + return _vfp_d16_d31[regNum - UNW_ARM_D16]; } #if defined(__ARM_WMMX) if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) { - if (!_saved_iwmmx) { - _saved_iwmmx = true; - saveiWMMX(_iwmmx); - } - return _iwmmx[regNum - UNW_ARM_WR0]; - } + if (!_saved_iwmmx) { + _saved_iwmmx = true; + saveiWMMX(_iwmmx); + } + return _iwmmx[regNum - UNW_ARM_WR0]; + } #endif _LIBUNWIND_ABORT("Unknown ARM float register"); -} - -inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) { - if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { - if (!_saved_vfp_d0_d15) { - _saved_vfp_d0_d15 = true; - if (_use_X_for_vfp_save) - saveVFPWithFSTMX(_vfp_d0_d15_pad); - else - saveVFPWithFSTMD(_vfp_d0_d15_pad); - } - _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value; +} + +inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) { + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { + if (!_saved_vfp_d0_d15) { + _saved_vfp_d0_d15 = true; + if (_use_X_for_vfp_save) + saveVFPWithFSTMX(_vfp_d0_d15_pad); + else + saveVFPWithFSTMD(_vfp_d0_d15_pad); + } + _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value; return; } if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) { - if (!_saved_vfp_d16_d31) { - _saved_vfp_d16_d31 = true; - saveVFPv3(_vfp_d16_d31); - } - _vfp_d16_d31[regNum - UNW_ARM_D16] = value; + if (!_saved_vfp_d16_d31) { + _saved_vfp_d16_d31 = true; + saveVFPv3(_vfp_d16_d31); + } + _vfp_d16_d31[regNum - UNW_ARM_D16] = value; return; } #if defined(__ARM_WMMX) if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) { - if (!_saved_iwmmx) { - _saved_iwmmx = true; - saveiWMMX(_iwmmx); - } - _iwmmx[regNum - UNW_ARM_WR0] = value; + if (!_saved_iwmmx) { + _saved_iwmmx = true; + saveiWMMX(_iwmmx); + } + _iwmmx[regNum - UNW_ARM_WR0] = value; return; - } + } #endif _LIBUNWIND_ABORT("Unknown ARM float register"); -} - -inline bool Registers_arm::validVectorRegister(int) const { - return false; -} - -inline v128 Registers_arm::getVectorRegister(int) const { - _LIBUNWIND_ABORT("ARM vector support not implemented"); -} - -inline void Registers_arm::setVectorRegister(int, v128) { - _LIBUNWIND_ABORT("ARM vector support not implemented"); -} +} + +inline bool Registers_arm::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_arm::getVectorRegister(int) const { + _LIBUNWIND_ABORT("ARM vector support not implemented"); +} + +inline void Registers_arm::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("ARM vector support not implemented"); +} #endif // _LIBUNWIND_TARGET_ARM #if defined(_LIBUNWIND_TARGET_OR1K) -/// Registers_or1k holds the register state of a thread in an OpenRISC1000 -/// process. -class _LIBUNWIND_HIDDEN Registers_or1k { -public: - Registers_or1k(); - Registers_or1k(const void *registers); - - bool validRegister(int num) const; - uint32_t getRegister(int num) const; - void setRegister(int num, uint32_t value); - bool validFloatRegister(int num) const; - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); - bool validVectorRegister(int num) const; - v128 getVectorRegister(int num) const; - void setVectorRegister(int num, v128 value); +/// Registers_or1k holds the register state of a thread in an OpenRISC1000 +/// process. +class _LIBUNWIND_HIDDEN Registers_or1k { +public: + Registers_or1k(); + Registers_or1k(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); - void jumpto(); + void jumpto(); static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K; } static int getArch() { return REGISTERS_OR1K; } - - uint64_t getSP() const { return _registers.__r[1]; } - void setSP(uint32_t value) { _registers.__r[1] = value; } + + uint64_t getSP() const { return _registers.__r[1]; } + void setSP(uint32_t value) { _registers.__r[1] = value; } uint64_t getIP() const { return _registers.__pc; } void setIP(uint32_t value) { _registers.__pc = value; } - -private: - struct or1k_thread_state_t { + +private: + struct or1k_thread_state_t { unsigned int __r[32]; // r0-r31 unsigned int __pc; // Program counter unsigned int __epcr; // Program counter at exception - }; - - or1k_thread_state_t _registers; -}; - -inline Registers_or1k::Registers_or1k(const void *registers) { + }; + + or1k_thread_state_t _registers; +}; + +inline Registers_or1k::Registers_or1k(const void *registers) { static_assert((check_fit::does_fit), "or1k registers do not fit into unw_context_t"); - memcpy(&_registers, static_cast(registers), - sizeof(_registers)); -} - -inline Registers_or1k::Registers_or1k() { - memset(&_registers, 0, sizeof(_registers)); -} - -inline bool Registers_or1k::validRegister(int regNum) const { - if (regNum == UNW_REG_IP) - return true; - if (regNum == UNW_REG_SP) - return true; - if (regNum < 0) - return false; - if (regNum <= UNW_OR1K_R31) - return true; + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); +} + +inline Registers_or1k::Registers_or1k() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_or1k::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_OR1K_R31) + return true; if (regNum == UNW_OR1K_EPCR) return true; - return false; -} - -inline uint32_t Registers_or1k::getRegister(int regNum) const { - if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) - return _registers.__r[regNum - UNW_OR1K_R0]; - - switch (regNum) { - case UNW_REG_IP: + return false; +} + +inline uint32_t Registers_or1k::getRegister(int regNum) const { + if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) + return _registers.__r[regNum - UNW_OR1K_R0]; + + switch (regNum) { + case UNW_REG_IP: return _registers.__pc; - case UNW_REG_SP: - return _registers.__r[1]; + case UNW_REG_SP: + return _registers.__r[1]; case UNW_OR1K_EPCR: return _registers.__epcr; - } - _LIBUNWIND_ABORT("unsupported or1k register"); -} - -inline void Registers_or1k::setRegister(int regNum, uint32_t value) { - if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) { - _registers.__r[regNum - UNW_OR1K_R0] = value; - return; - } - - switch (regNum) { - case UNW_REG_IP: + } + _LIBUNWIND_ABORT("unsupported or1k register"); +} + +inline void Registers_or1k::setRegister(int regNum, uint32_t value) { + if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) { + _registers.__r[regNum - UNW_OR1K_R0] = value; + return; + } + + switch (regNum) { + case UNW_REG_IP: _registers.__pc = value; - return; - case UNW_REG_SP: - _registers.__r[1] = value; - return; + return; + case UNW_REG_SP: + _registers.__r[1] = value; + return; case UNW_OR1K_EPCR: _registers.__epcr = value; return; - } - _LIBUNWIND_ABORT("unsupported or1k register"); -} - -inline bool Registers_or1k::validFloatRegister(int /* regNum */) const { - return false; -} - -inline double Registers_or1k::getFloatRegister(int /* regNum */) const { - _LIBUNWIND_ABORT("or1k float support not implemented"); -} - -inline void Registers_or1k::setFloatRegister(int /* regNum */, - double /* value */) { - _LIBUNWIND_ABORT("or1k float support not implemented"); -} - -inline bool Registers_or1k::validVectorRegister(int /* regNum */) const { - return false; -} - -inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const { - _LIBUNWIND_ABORT("or1k vector support not implemented"); -} - -inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) { - _LIBUNWIND_ABORT("or1k vector support not implemented"); -} - -inline const char *Registers_or1k::getRegisterName(int regNum) { - switch (regNum) { - case UNW_OR1K_R0: - return "r0"; - case UNW_OR1K_R1: - return "r1"; - case UNW_OR1K_R2: - return "r2"; - case UNW_OR1K_R3: - return "r3"; - case UNW_OR1K_R4: - return "r4"; - case UNW_OR1K_R5: - return "r5"; - case UNW_OR1K_R6: - return "r6"; - case UNW_OR1K_R7: - return "r7"; - case UNW_OR1K_R8: - return "r8"; - case UNW_OR1K_R9: - return "r9"; - case UNW_OR1K_R10: - return "r10"; - case UNW_OR1K_R11: - return "r11"; - case UNW_OR1K_R12: - return "r12"; - case UNW_OR1K_R13: - return "r13"; - case UNW_OR1K_R14: - return "r14"; - case UNW_OR1K_R15: - return "r15"; - case UNW_OR1K_R16: - return "r16"; - case UNW_OR1K_R17: - return "r17"; - case UNW_OR1K_R18: - return "r18"; - case UNW_OR1K_R19: - return "r19"; - case UNW_OR1K_R20: - return "r20"; - case UNW_OR1K_R21: - return "r21"; - case UNW_OR1K_R22: - return "r22"; - case UNW_OR1K_R23: - return "r23"; - case UNW_OR1K_R24: - return "r24"; - case UNW_OR1K_R25: - return "r25"; - case UNW_OR1K_R26: - return "r26"; - case UNW_OR1K_R27: - return "r27"; - case UNW_OR1K_R28: - return "r28"; - case UNW_OR1K_R29: - return "r29"; - case UNW_OR1K_R30: - return "r30"; - case UNW_OR1K_R31: - return "r31"; + } + _LIBUNWIND_ABORT("unsupported or1k register"); +} + +inline bool Registers_or1k::validFloatRegister(int /* regNum */) const { + return false; +} + +inline double Registers_or1k::getFloatRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("or1k float support not implemented"); +} + +inline void Registers_or1k::setFloatRegister(int /* regNum */, + double /* value */) { + _LIBUNWIND_ABORT("or1k float support not implemented"); +} + +inline bool Registers_or1k::validVectorRegister(int /* regNum */) const { + return false; +} + +inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("or1k vector support not implemented"); +} + +inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) { + _LIBUNWIND_ABORT("or1k vector support not implemented"); +} + +inline const char *Registers_or1k::getRegisterName(int regNum) { + switch (regNum) { + case UNW_OR1K_R0: + return "r0"; + case UNW_OR1K_R1: + return "r1"; + case UNW_OR1K_R2: + return "r2"; + case UNW_OR1K_R3: + return "r3"; + case UNW_OR1K_R4: + return "r4"; + case UNW_OR1K_R5: + return "r5"; + case UNW_OR1K_R6: + return "r6"; + case UNW_OR1K_R7: + return "r7"; + case UNW_OR1K_R8: + return "r8"; + case UNW_OR1K_R9: + return "r9"; + case UNW_OR1K_R10: + return "r10"; + case UNW_OR1K_R11: + return "r11"; + case UNW_OR1K_R12: + return "r12"; + case UNW_OR1K_R13: + return "r13"; + case UNW_OR1K_R14: + return "r14"; + case UNW_OR1K_R15: + return "r15"; + case UNW_OR1K_R16: + return "r16"; + case UNW_OR1K_R17: + return "r17"; + case UNW_OR1K_R18: + return "r18"; + case UNW_OR1K_R19: + return "r19"; + case UNW_OR1K_R20: + return "r20"; + case UNW_OR1K_R21: + return "r21"; + case UNW_OR1K_R22: + return "r22"; + case UNW_OR1K_R23: + return "r23"; + case UNW_OR1K_R24: + return "r24"; + case UNW_OR1K_R25: + return "r25"; + case UNW_OR1K_R26: + return "r26"; + case UNW_OR1K_R27: + return "r27"; + case UNW_OR1K_R28: + return "r28"; + case UNW_OR1K_R29: + return "r29"; + case UNW_OR1K_R30: + return "r30"; + case UNW_OR1K_R31: + return "r31"; case UNW_OR1K_EPCR: return "EPCR"; - default: - return "unknown register"; - } - -} + default: + return "unknown register"; + } + +} #endif // _LIBUNWIND_TARGET_OR1K #if defined(_LIBUNWIND_TARGET_MIPS_O32) @@ -4712,6 +4712,6 @@ inline const char *Registers_ve::getRegisterName(int regNum) { } #endif // _LIBUNWIND_TARGET_VE -} // namespace libunwind - -#endif // __REGISTERS_HPP__ +} // namespace libunwind + +#endif // __REGISTERS_HPP__ diff --git a/contrib/libs/libunwind/src/Unwind-EHABI.cpp b/contrib/libs/libunwind/src/Unwind-EHABI.cpp index 46b26f588f2..21c8b2777b8 100644 --- a/contrib/libs/libunwind/src/Unwind-EHABI.cpp +++ b/contrib/libs/libunwind/src/Unwind-EHABI.cpp @@ -1,432 +1,432 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Implements ARM zero-cost C++ exceptions -// -//===----------------------------------------------------------------------===// - -#include "Unwind-EHABI.h" - +// +// +// Implements ARM zero-cost C++ exceptions +// +//===----------------------------------------------------------------------===// + +#include "Unwind-EHABI.h" + #if defined(_LIBUNWIND_ARM_EHABI) - + #include -#include -#include -#include -#include -#include - -#include "config.h" -#include "libunwind.h" -#include "libunwind_ext.h" -#include "unwind.h" - -namespace { - -// Strange order: take words in order, but inside word, take from most to least -// signinficant byte. -uint8_t getByte(const uint32_t* data, size_t offset) { - const uint8_t* byteData = reinterpret_cast(data); +#include +#include +#include +#include +#include + +#include "config.h" +#include "libunwind.h" +#include "libunwind_ext.h" +#include "unwind.h" + +namespace { + +// Strange order: take words in order, but inside word, take from most to least +// signinficant byte. +uint8_t getByte(const uint32_t* data, size_t offset) { + const uint8_t* byteData = reinterpret_cast(data); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; + return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return byteData[offset]; #else #error "Unable to determine endianess" #endif -} - -const char* getNextWord(const char* data, uint32_t* out) { - *out = *reinterpret_cast(data); - return data + 4; -} - -const char* getNextNibble(const char* data, uint32_t* out) { - *out = *reinterpret_cast(data); - return data + 2; -} - -struct Descriptor { - // See # 9.2 - typedef enum { - SU16 = 0, // Short descriptor, 16-bit entries - LU16 = 1, // Long descriptor, 16-bit entries - LU32 = 3, // Long descriptor, 32-bit entries - RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7, - RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11, - RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15 - } Format; - - // See # 9.2 - typedef enum { - CLEANUP = 0x0, - FUNC = 0x1, - CATCH = 0x2, - INVALID = 0x4 - } Kind; -}; - -_Unwind_Reason_Code ProcessDescriptors( - _Unwind_State state, - _Unwind_Control_Block* ucbp, - struct _Unwind_Context* context, - Descriptor::Format format, - const char* descriptorStart, - uint32_t flags) { - - // EHT is inlined in the index using compact form. No descriptors. #5 - if (flags & 0x1) - return _URC_CONTINUE_UNWIND; - - // TODO: We should check the state here, and determine whether we need to - // perform phase1 or phase2 unwinding. - (void)state; - - const char* descriptor = descriptorStart; - uint32_t descriptorWord; - getNextWord(descriptor, &descriptorWord); - while (descriptorWord) { - // Read descriptor based on # 9.2. - uint32_t length; - uint32_t offset; - switch (format) { - case Descriptor::LU32: - descriptor = getNextWord(descriptor, &length); - descriptor = getNextWord(descriptor, &offset); +} + +const char* getNextWord(const char* data, uint32_t* out) { + *out = *reinterpret_cast(data); + return data + 4; +} + +const char* getNextNibble(const char* data, uint32_t* out) { + *out = *reinterpret_cast(data); + return data + 2; +} + +struct Descriptor { + // See # 9.2 + typedef enum { + SU16 = 0, // Short descriptor, 16-bit entries + LU16 = 1, // Long descriptor, 16-bit entries + LU32 = 3, // Long descriptor, 32-bit entries + RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7, + RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11, + RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15 + } Format; + + // See # 9.2 + typedef enum { + CLEANUP = 0x0, + FUNC = 0x1, + CATCH = 0x2, + INVALID = 0x4 + } Kind; +}; + +_Unwind_Reason_Code ProcessDescriptors( + _Unwind_State state, + _Unwind_Control_Block* ucbp, + struct _Unwind_Context* context, + Descriptor::Format format, + const char* descriptorStart, + uint32_t flags) { + + // EHT is inlined in the index using compact form. No descriptors. #5 + if (flags & 0x1) + return _URC_CONTINUE_UNWIND; + + // TODO: We should check the state here, and determine whether we need to + // perform phase1 or phase2 unwinding. + (void)state; + + const char* descriptor = descriptorStart; + uint32_t descriptorWord; + getNextWord(descriptor, &descriptorWord); + while (descriptorWord) { + // Read descriptor based on # 9.2. + uint32_t length; + uint32_t offset; + switch (format) { + case Descriptor::LU32: + descriptor = getNextWord(descriptor, &length); + descriptor = getNextWord(descriptor, &offset); break; - case Descriptor::LU16: - descriptor = getNextNibble(descriptor, &length); - descriptor = getNextNibble(descriptor, &offset); + case Descriptor::LU16: + descriptor = getNextNibble(descriptor, &length); + descriptor = getNextNibble(descriptor, &offset); + break; + default: + assert(false); + return _URC_FAILURE; + } + + // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value. + Descriptor::Kind kind = + static_cast((length & 0x1) | ((offset & 0x1) << 1)); + + // Clear off flag from last bit. + length &= ~1u; + offset &= ~1u; + uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset; + uintptr_t scopeEnd = scopeStart + length; + uintptr_t pc = _Unwind_GetIP(context); + bool isInScope = (scopeStart <= pc) && (pc < scopeEnd); + + switch (kind) { + case Descriptor::CLEANUP: { + // TODO(ajwong): Handle cleanup descriptors. + break; + } + case Descriptor::FUNC: { + // TODO(ajwong): Handle function descriptors. + break; + } + case Descriptor::CATCH: { + // Catch descriptors require gobbling one more word. + uint32_t landing_pad; + descriptor = getNextWord(descriptor, &landing_pad); + + if (isInScope) { + // TODO(ajwong): This is only phase1 compatible logic. Implement + // phase2. + landing_pad = signExtendPrel31(landing_pad & ~0x80000000); + if (landing_pad == 0xffffffff) { + return _URC_HANDLER_FOUND; + } else if (landing_pad == 0xfffffffe) { + return _URC_FAILURE; + } else { + /* + bool is_reference_type = landing_pad & 0x80000000; + void* matched_object; + if (__cxxabiv1::__cxa_type_match( + ucbp, reinterpret_cast(landing_pad), + is_reference_type, + &matched_object) != __cxxabiv1::ctm_failed) + return _URC_HANDLER_FOUND; + */ + _LIBUNWIND_ABORT("Type matching not implemented"); + } + } break; - default: - assert(false); - return _URC_FAILURE; - } - - // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value. - Descriptor::Kind kind = - static_cast((length & 0x1) | ((offset & 0x1) << 1)); - - // Clear off flag from last bit. - length &= ~1u; - offset &= ~1u; - uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset; - uintptr_t scopeEnd = scopeStart + length; - uintptr_t pc = _Unwind_GetIP(context); - bool isInScope = (scopeStart <= pc) && (pc < scopeEnd); - - switch (kind) { - case Descriptor::CLEANUP: { - // TODO(ajwong): Handle cleanup descriptors. - break; - } - case Descriptor::FUNC: { - // TODO(ajwong): Handle function descriptors. - break; - } - case Descriptor::CATCH: { - // Catch descriptors require gobbling one more word. - uint32_t landing_pad; - descriptor = getNextWord(descriptor, &landing_pad); - - if (isInScope) { - // TODO(ajwong): This is only phase1 compatible logic. Implement - // phase2. - landing_pad = signExtendPrel31(landing_pad & ~0x80000000); - if (landing_pad == 0xffffffff) { - return _URC_HANDLER_FOUND; - } else if (landing_pad == 0xfffffffe) { - return _URC_FAILURE; - } else { - /* - bool is_reference_type = landing_pad & 0x80000000; - void* matched_object; - if (__cxxabiv1::__cxa_type_match( - ucbp, reinterpret_cast(landing_pad), - is_reference_type, - &matched_object) != __cxxabiv1::ctm_failed) - return _URC_HANDLER_FOUND; - */ - _LIBUNWIND_ABORT("Type matching not implemented"); - } - } - break; - } - default: - _LIBUNWIND_ABORT("Invalid descriptor kind found."); - } - - getNextWord(descriptor, &descriptorWord); - } - - return _URC_CONTINUE_UNWIND; -} - -static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, - _Unwind_Control_Block* ucbp, - struct _Unwind_Context* context) { - // Read the compact model EHT entry's header # 6.3 - const uint32_t* unwindingData = ucbp->pr_cache.ehtp; - assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry"); - Descriptor::Format format = - static_cast((*unwindingData & 0x0f000000) >> 24); - - const char *lsda = - reinterpret_cast(_Unwind_GetLanguageSpecificData(context)); - - // Handle descriptors before unwinding so they are processed in the context - // of the correct stack frame. - _Unwind_Reason_Code result = - ProcessDescriptors(state, ucbp, context, format, lsda, - ucbp->pr_cache.additional); - - if (result != _URC_CONTINUE_UNWIND) - return result; - + } + default: + _LIBUNWIND_ABORT("Invalid descriptor kind found."); + } + + getNextWord(descriptor, &descriptorWord); + } + + return _URC_CONTINUE_UNWIND; +} + +static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, + _Unwind_Control_Block* ucbp, + struct _Unwind_Context* context) { + // Read the compact model EHT entry's header # 6.3 + const uint32_t* unwindingData = ucbp->pr_cache.ehtp; + assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry"); + Descriptor::Format format = + static_cast((*unwindingData & 0x0f000000) >> 24); + + const char *lsda = + reinterpret_cast(_Unwind_GetLanguageSpecificData(context)); + + // Handle descriptors before unwinding so they are processed in the context + // of the correct stack frame. + _Unwind_Reason_Code result = + ProcessDescriptors(state, ucbp, context, format, lsda, + ucbp->pr_cache.additional); + + if (result != _URC_CONTINUE_UNWIND) + return result; + switch (__unw_step(reinterpret_cast(context))) { case UNW_STEP_SUCCESS: return _URC_CONTINUE_UNWIND; case UNW_STEP_END: return _URC_END_OF_STACK; default: - return _URC_FAILURE; + return _URC_FAILURE; + } +} + +// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / +// _UVRSD_UINT32. +uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) { + return ((1U << (count_minus_one + 1)) - 1) << start; +} + +// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP / +// _UVRSD_DOUBLE. +uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) { + return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1); +} + +} // end anonymous namespace + +/** + * Decodes an EHT entry. + * + * @param data Pointer to EHT. + * @param[out] off Offset from return value (in bytes) to begin interpretation. + * @param[out] len Number of bytes in unwind code. + * @return Pointer to beginning of unwind code. + */ +extern "C" const uint32_t* +decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) { + if ((*data & 0x80000000) == 0) { + // 6.2: Generic Model + // + // EHT entry is a prel31 pointing to the PR, followed by data understood + // only by the personality routine. Fortunately, all existing assembler + // implementations, including GNU assembler, LLVM integrated assembler, + // and ARM assembler, assume that the unwind opcodes come after the + // personality rountine address. + *off = 1; // First byte is size data. + *len = (((data[1] >> 24) & 0xff) + 1) * 4; + data++; // Skip the first word, which is the prel31 offset. + } else { + // 6.3: ARM Compact Model + // + // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded + // by format: + Descriptor::Format format = + static_cast((*data & 0x0f000000) >> 24); + switch (format) { + case Descriptor::SU16: + *len = 4; + *off = 1; + break; + case Descriptor::LU16: + case Descriptor::LU32: + *len = 4 + 4 * ((*data & 0x00ff0000) >> 16); + *off = 2; + break; + default: + return nullptr; + } } -} - -// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / -// _UVRSD_UINT32. -uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) { - return ((1U << (count_minus_one + 1)) - 1) << start; -} - -// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP / -// _UVRSD_DOUBLE. -uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) { - return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1); -} - -} // end anonymous namespace - -/** - * Decodes an EHT entry. - * - * @param data Pointer to EHT. - * @param[out] off Offset from return value (in bytes) to begin interpretation. - * @param[out] len Number of bytes in unwind code. - * @return Pointer to beginning of unwind code. - */ -extern "C" const uint32_t* -decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) { - if ((*data & 0x80000000) == 0) { - // 6.2: Generic Model - // - // EHT entry is a prel31 pointing to the PR, followed by data understood - // only by the personality routine. Fortunately, all existing assembler - // implementations, including GNU assembler, LLVM integrated assembler, - // and ARM assembler, assume that the unwind opcodes come after the - // personality rountine address. - *off = 1; // First byte is size data. - *len = (((data[1] >> 24) & 0xff) + 1) * 4; - data++; // Skip the first word, which is the prel31 offset. - } else { - // 6.3: ARM Compact Model - // - // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded - // by format: - Descriptor::Format format = - static_cast((*data & 0x0f000000) >> 24); - switch (format) { - case Descriptor::SU16: - *len = 4; - *off = 1; - break; - case Descriptor::LU16: - case Descriptor::LU32: - *len = 4 + 4 * ((*data & 0x00ff0000) >> 16); - *off = 2; - break; - default: - return nullptr; - } - } - return data; -} - + return data; +} + _LIBUNWIND_EXPORT _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data, size_t offset, size_t len) { - bool wrotePC = false; - bool finish = false; + bool wrotePC = false; + bool finish = false; bool hasReturnAddrAuthCode = false; - while (offset < len && !finish) { - uint8_t byte = getByte(data, offset++); - if ((byte & 0x80) == 0) { - uint32_t sp; - _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); - if (byte & 0x40) - sp -= (((uint32_t)byte & 0x3f) << 2) + 4; - else - sp += ((uint32_t)byte << 2) + 4; - _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); - } else { - switch (byte & 0xf0) { - case 0x80: { - if (offset >= len) - return _URC_FAILURE; - uint32_t registers = - (((uint32_t)byte & 0x0f) << 12) | - (((uint32_t)getByte(data, offset++)) << 4); - if (!registers) - return _URC_FAILURE; - if (registers & (1 << 15)) - wrotePC = true; - _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); - break; - } - case 0x90: { - uint8_t reg = byte & 0x0f; - if (reg == 13 || reg == 15) - return _URC_FAILURE; - uint32_t sp; - _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg, - _UVRSD_UINT32, &sp); - _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, - &sp); - break; - } - case 0xa0: { - uint32_t registers = RegisterMask(4, byte & 0x07); - if (byte & 0x08) - registers |= 1 << 14; - _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); - break; - } - case 0xb0: { - switch (byte) { - case 0xb0: - finish = true; - break; - case 0xb1: { - if (offset >= len) - return _URC_FAILURE; - uint8_t registers = getByte(data, offset++); - if (registers & 0xf0 || !registers) - return _URC_FAILURE; - _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); - break; - } - case 0xb2: { - uint32_t addend = 0; - uint32_t shift = 0; - // This decodes a uleb128 value. - while (true) { - if (offset >= len) - return _URC_FAILURE; - uint32_t v = getByte(data, offset++); - addend |= (v & 0x7f) << shift; - if ((v & 0x80) == 0) - break; - shift += 7; - } - uint32_t sp; - _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, - &sp); - sp += 0x204 + (addend << 2); - _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, - &sp); - break; - } - case 0xb3: { - uint8_t v = getByte(data, offset++); - _Unwind_VRS_Pop(context, _UVRSC_VFP, - RegisterRange(static_cast(v >> 4), - v & 0x0f), _UVRSD_VFPX); - break; - } - case 0xb4: + while (offset < len && !finish) { + uint8_t byte = getByte(data, offset++); + if ((byte & 0x80) == 0) { + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); + if (byte & 0x40) + sp -= (((uint32_t)byte & 0x3f) << 2) + 4; + else + sp += ((uint32_t)byte << 2) + 4; + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); + } else { + switch (byte & 0xf0) { + case 0x80: { + if (offset >= len) + return _URC_FAILURE; + uint32_t registers = + (((uint32_t)byte & 0x0f) << 12) | + (((uint32_t)getByte(data, offset++)) << 4); + if (!registers) + return _URC_FAILURE; + if (registers & (1 << 15)) + wrotePC = true; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0x90: { + uint8_t reg = byte & 0x0f; + if (reg == 13 || reg == 15) + return _URC_FAILURE; + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg, + _UVRSD_UINT32, &sp); + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp); + break; + } + case 0xa0: { + uint32_t registers = RegisterMask(4, byte & 0x07); + if (byte & 0x08) + registers |= 1 << 14; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0xb0: { + switch (byte) { + case 0xb0: + finish = true; + break; + case 0xb1: { + if (offset >= len) + return _URC_FAILURE; + uint8_t registers = getByte(data, offset++); + if (registers & 0xf0 || !registers) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0xb2: { + uint32_t addend = 0; + uint32_t shift = 0; + // This decodes a uleb128 value. + while (true) { + if (offset >= len) + return _URC_FAILURE; + uint32_t v = getByte(data, offset++); + addend |= (v & 0x7f) << shift; + if ((v & 0x80) == 0) + break; + shift += 7; + } + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp); + sp += 0x204 + (addend << 2); + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp); + break; + } + case 0xb3: { + uint8_t v = getByte(data, offset++); + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(static_cast(v >> 4), + v & 0x0f), _UVRSD_VFPX); + break; + } + case 0xb4: hasReturnAddrAuthCode = true; _Unwind_VRS_Pop(context, _UVRSC_PSEUDO, 0 /* Return Address Auth Code */, _UVRSD_UINT32); break; - case 0xb5: - case 0xb6: - case 0xb7: - return _URC_FAILURE; - default: - _Unwind_VRS_Pop(context, _UVRSC_VFP, - RegisterRange(8, byte & 0x07), _UVRSD_VFPX); - break; - } - break; - } - case 0xc0: { - switch (byte) { + case 0xb5: + case 0xb6: + case 0xb7: + return _URC_FAILURE; + default: + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(8, byte & 0x07), _UVRSD_VFPX); + break; + } + break; + } + case 0xc0: { + switch (byte) { #if defined(__ARM_WMMX) - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc3: - case 0xc4: - case 0xc5: - _Unwind_VRS_Pop(context, _UVRSC_WMMXD, - RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE); - break; - case 0xc6: { - uint8_t v = getByte(data, offset++); - uint8_t start = static_cast(v >> 4); - uint8_t count_minus_one = v & 0xf; - if (start + count_minus_one >= 16) - return _URC_FAILURE; - _Unwind_VRS_Pop(context, _UVRSC_WMMXD, - RegisterRange(start, count_minus_one), - _UVRSD_DOUBLE); - break; - } - case 0xc7: { - uint8_t v = getByte(data, offset++); - if (!v || v & 0xf0) - return _URC_FAILURE; - _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE); - break; - } + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + _Unwind_VRS_Pop(context, _UVRSC_WMMXD, + RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE); + break; + case 0xc6: { + uint8_t v = getByte(data, offset++); + uint8_t start = static_cast(v >> 4); + uint8_t count_minus_one = v & 0xf; + if (start + count_minus_one >= 16) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_WMMXD, + RegisterRange(start, count_minus_one), + _UVRSD_DOUBLE); + break; + } + case 0xc7: { + uint8_t v = getByte(data, offset++); + if (!v || v & 0xf0) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE); + break; + } #endif - case 0xc8: - case 0xc9: { - uint8_t v = getByte(data, offset++); - uint8_t start = - static_cast(((byte == 0xc8) ? 16 : 0) + (v >> 4)); - uint8_t count_minus_one = v & 0xf; - if (start + count_minus_one >= 32) - return _URC_FAILURE; - _Unwind_VRS_Pop(context, _UVRSC_VFP, - RegisterRange(start, count_minus_one), - _UVRSD_DOUBLE); - break; - } - default: - return _URC_FAILURE; - } - break; - } - case 0xd0: { - if (byte & 0x08) - return _URC_FAILURE; - _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7), - _UVRSD_DOUBLE); - break; - } - default: - return _URC_FAILURE; - } - } - } - if (!wrotePC) { - uint32_t lr; - _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr); + case 0xc8: + case 0xc9: { + uint8_t v = getByte(data, offset++); + uint8_t start = + static_cast(((byte == 0xc8) ? 16 : 0) + (v >> 4)); + uint8_t count_minus_one = v & 0xf; + if (start + count_minus_one >= 32) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(start, count_minus_one), + _UVRSD_DOUBLE); + break; + } + default: + return _URC_FAILURE; + } + break; + } + case 0xd0: { + if (byte & 0x08) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7), + _UVRSD_DOUBLE); + break; + } + default: + return _URC_FAILURE; + } + } + } + if (!wrotePC) { + uint32_t lr; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr); #ifdef __ARM_FEATURE_PAUTH if (hasReturnAddrAuthCode) { uint32_t sp; @@ -437,263 +437,263 @@ _Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data, __asm__ __volatile__("autg %0, %1, %2" : : "r"(pac), "r"(lr), "r"(sp) :); } #endif - _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr); - } - return _URC_CONTINUE_UNWIND; -} - + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr); + } + return _URC_CONTINUE_UNWIND; +} + extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { - return unwindOneFrame(state, ucbp, context); -} - + return unwindOneFrame(state, ucbp, context); +} + extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { - return unwindOneFrame(state, ucbp, context); -} - + return unwindOneFrame(state, ucbp, context); +} + extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context) { - return unwindOneFrame(state, ucbp, context); -} - -static _Unwind_Reason_Code + return unwindOneFrame(state, ucbp, context); +} + +static _Unwind_Reason_Code unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { - // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during - // phase 1 and then restoring it to the "primary VRS" for phase 2. The - // effect is phase 2 doesn't see any of the VRS manipulations from phase 1. - // In this implementation, the phases don't share the VRS backing store. - // Instead, they are passed the original |uc| and they create a new VRS - // from scratch thus achieving the same effect. + // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during + // phase 1 and then restoring it to the "primary VRS" for phase 2. The + // effect is phase 2 doesn't see any of the VRS manipulations from phase 1. + // In this implementation, the phases don't share the VRS backing store. + // Instead, they are passed the original |uc| and they create a new VRS + // from scratch thus achieving the same effect. __unw_init_local(cursor, uc); - - // Walk each frame looking for a place to stop. - for (bool handlerNotFound = true; handlerNotFound;) { - - // See if frame has code to run (has personality routine). - unw_proc_info_t frameInfo; + + // Walk each frame looking for a place to stop. + for (bool handlerNotFound = true; handlerNotFound;) { + + // See if frame has code to run (has personality routine). + unw_proc_info_t frameInfo; if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " "failed => _URC_FATAL_PHASE1_ERROR", static_cast(exception_object)); - return _URC_FATAL_PHASE1_ERROR; - } - + return _URC_FATAL_PHASE1_ERROR; + } + #ifndef NDEBUG - // When tracing, print state information. - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionBuf[512]; - const char *functionName = functionBuf; - unw_word_t offset; + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || - (frameInfo.start_ip + offset > frameInfo.end_ip)) - functionName = ".anonymous."; - unw_word_t pc; + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + unw_word_t pc; __unw_get_reg(cursor, UNW_REG_IP, &pc); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, " "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, static_cast(exception_object), pc, frameInfo.start_ip, functionName, frameInfo.lsda, frameInfo.handler); - } + } #endif - - // If there is a personality routine, ask it if it will want to stop at - // this frame. - if (frameInfo.handler != 0) { + + // If there is a personality routine, ask it if it will want to stop at + // this frame. + if (frameInfo.handler != 0) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)(long)(frameInfo.handler); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): calling personality function %p", - static_cast(exception_object), - reinterpret_cast(reinterpret_cast(p))); + static_cast(exception_object), + reinterpret_cast(reinterpret_cast(p))); struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); - exception_object->pr_cache.fnstart = frameInfo.start_ip; - exception_object->pr_cache.ehtp = - (_Unwind_EHT_Header *)frameInfo.unwind_info; - exception_object->pr_cache.additional = frameInfo.flags; - _Unwind_Reason_Code personalityResult = - (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context); - _LIBUNWIND_TRACE_UNWINDING( - "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p " + exception_object->pr_cache.fnstart = frameInfo.start_ip; + exception_object->pr_cache.ehtp = + (_Unwind_EHT_Header *)frameInfo.unwind_info; + exception_object->pr_cache.additional = frameInfo.flags; + _Unwind_Reason_Code personalityResult = + (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p " "additional %x", - static_cast(exception_object), personalityResult, - exception_object->pr_cache.fnstart, - static_cast(exception_object->pr_cache.ehtp), - exception_object->pr_cache.additional); - switch (personalityResult) { - case _URC_HANDLER_FOUND: - // found a catch clause or locals that need destructing in this frame - // stop search and remember stack pointer at the frame - handlerNotFound = false; - // p should have initialized barrier_cache. EHABI #7.3.5 - _LIBUNWIND_TRACE_UNWINDING( + static_cast(exception_object), personalityResult, + exception_object->pr_cache.fnstart, + static_cast(exception_object->pr_cache.ehtp), + exception_object->pr_cache.additional); + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember stack pointer at the frame + handlerNotFound = false; + // p should have initialized barrier_cache. EHABI #7.3.5 + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND", - static_cast(exception_object)); - return _URC_NO_REASON; - - case _URC_CONTINUE_UNWIND: - _LIBUNWIND_TRACE_UNWINDING( + static_cast(exception_object)); + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND", - static_cast(exception_object)); - // continue unwinding - break; - - // EHABI #7.3.3 - case _URC_FAILURE: - return _URC_FAILURE; - - default: - // something went wrong - _LIBUNWIND_TRACE_UNWINDING( + static_cast(exception_object)); + // continue unwinding + break; + + // EHABI #7.3.3 + case _URC_FAILURE: + return _URC_FAILURE; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", - static_cast(exception_object)); - return _URC_FATAL_PHASE1_ERROR; - } - } - } - return _URC_NO_REASON; -} - + static_cast(exception_object)); + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, - _Unwind_Exception *exception_object, - bool resume) { - // See comment at the start of unwind_phase1 regarding VRS integrity. + _Unwind_Exception *exception_object, + bool resume) { + // See comment at the start of unwind_phase1 regarding VRS integrity. __unw_init_local(cursor, uc); - + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", - static_cast(exception_object)); - int frame_count = 0; - - // Walk each frame until we reach where search phase said to stop. - while (true) { + static_cast(exception_object)); + int frame_count = 0; + + // Walk each frame until we reach where search phase said to stop. + while (true) { // Ask libunwind to get next frame (skip over first which is - // _Unwind_RaiseException or _Unwind_Resume). - // - // Resume only ever makes sense for 1 frame. - _Unwind_State state = - resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING; - if (resume && frame_count == 1) { - // On a resume, first unwind the _Unwind_Resume() frame. The next frame - // is now the landing pad for the cleanup from a previous execution of - // phase2. To continue unwindingly correctly, replace VRS[15] with the - // IP of the frame that the previous run of phase2 installed the context - // for. After this, continue unwinding as if normal. - // - // See #7.4.6 for details. + // _Unwind_RaiseException or _Unwind_Resume). + // + // Resume only ever makes sense for 1 frame. + _Unwind_State state = + resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING; + if (resume && frame_count == 1) { + // On a resume, first unwind the _Unwind_Resume() frame. The next frame + // is now the landing pad for the cleanup from a previous execution of + // phase2. To continue unwindingly correctly, replace VRS[15] with the + // IP of the frame that the previous run of phase2 installed the context + // for. After this, continue unwinding as if normal. + // + // See #7.4.6 for details. __unw_set_reg(cursor, UNW_REG_IP, exception_object->unwinder_cache.reserved2); - resume = false; - } - - // Get info about this frame. - unw_word_t sp; - unw_proc_info_t frameInfo; + resume = false; + } + + // Get info about this frame. + unw_word_t sp; + unw_proc_info_t frameInfo; __unw_get_reg(cursor, UNW_REG_SP, &sp); if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " "failed => _URC_FATAL_PHASE2_ERROR", static_cast(exception_object)); - return _URC_FATAL_PHASE2_ERROR; - } - + return _URC_FATAL_PHASE2_ERROR; + } + #ifndef NDEBUG - // When tracing, print state information. - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionBuf[512]; - const char *functionName = functionBuf; - unw_word_t offset; + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || - (frameInfo.start_ip + offset > frameInfo.end_ip)) - functionName = ".anonymous."; - _LIBUNWIND_TRACE_UNWINDING( + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", " "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "", static_cast(exception_object), frameInfo.start_ip, functionName, sp, frameInfo.lsda, frameInfo.handler); - } + } #endif - - // If there is a personality routine, tell it we are unwinding. - if (frameInfo.handler != 0) { + + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler); struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); - // EHABI #7.2 - exception_object->pr_cache.fnstart = frameInfo.start_ip; - exception_object->pr_cache.ehtp = - (_Unwind_EHT_Header *)frameInfo.unwind_info; - exception_object->pr_cache.additional = frameInfo.flags; - _Unwind_Reason_Code personalityResult = - (*p)(state, exception_object, context); - switch (personalityResult) { - case _URC_CONTINUE_UNWIND: - // Continue unwinding - _LIBUNWIND_TRACE_UNWINDING( + // EHABI #7.2 + exception_object->pr_cache.fnstart = frameInfo.start_ip; + exception_object->pr_cache.ehtp = + (_Unwind_EHT_Header *)frameInfo.unwind_info; + exception_object->pr_cache.additional = frameInfo.flags; + _Unwind_Reason_Code personalityResult = + (*p)(state, exception_object, context); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // Continue unwinding + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", - static_cast(exception_object)); - // EHABI #7.2 - if (sp == exception_object->barrier_cache.sp) { - // Phase 1 said we would stop at this frame, but we did not... - _LIBUNWIND_ABORT("during phase1 personality function said it would " - "stop here, but now in phase2 it did not stop here"); - } - break; - case _URC_INSTALL_CONTEXT: - _LIBUNWIND_TRACE_UNWINDING( + static_cast(exception_object)); + // EHABI #7.2 + if (sp == exception_object->barrier_cache.sp) { + // Phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now in phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT", - static_cast(exception_object)); - // Personality routine says to transfer control to landing pad. - // We may get control back if landing pad calls _Unwind_Resume(). - if (_LIBUNWIND_TRACING_UNWINDING) { - unw_word_t pc; + static_cast(exception_object)); + // Personality routine says to transfer control to landing pad. + // We may get control back if landing pad calls _Unwind_Resume(). + if (_LIBUNWIND_TRACING_UNWINDING) { + unw_word_t pc; __unw_get_reg(cursor, UNW_REG_IP, &pc); __unw_get_reg(cursor, UNW_REG_SP, &sp); - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR, - static_cast(exception_object), + static_cast(exception_object), pc, sp); - } - - { - // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume - // is called back, to find this same frame. - unw_word_t pc; + } + + { + // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume + // is called back, to find this same frame. + unw_word_t pc; __unw_get_reg(cursor, UNW_REG_IP, &pc); - exception_object->unwinder_cache.reserved2 = (uint32_t)pc; - } + exception_object->unwinder_cache.reserved2 = (uint32_t)pc; + } __unw_resume(cursor); // __unw_resume() only returns if there was an error. - return _URC_FATAL_PHASE2_ERROR; - - // # EHABI #7.4.3 - case _URC_FAILURE: - abort(); - - default: - // Personality routine returned an unknown result code. - _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", - personalityResult); - return _URC_FATAL_PHASE2_ERROR; - } - } - frame_count++; - } - - // Clean up phase did not resume at the frame that the search phase - // said it would... - return _URC_FATAL_PHASE2_ERROR; -} - + return _URC_FATAL_PHASE2_ERROR; + + // # EHABI #7.4.3 + case _URC_FAILURE: + abort(); + + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + frame_count++; + } + + // Clean up phase did not resume at the frame that the search phase + // said it would... + return _URC_FATAL_PHASE2_ERROR; +} + static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop, @@ -811,53 +811,53 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, return _URC_FATAL_PHASE2_ERROR; } -/// Called by __cxa_throw. Only returns if there is a fatal error. -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_RaiseException(_Unwind_Exception *exception_object) { +/// Called by __cxa_throw. Only returns if there is a fatal error. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)", - static_cast(exception_object)); - unw_context_t uc; + static_cast(exception_object)); + unw_context_t uc; unw_cursor_t cursor; __unw_getcontext(&uc); - - // This field for is for compatibility with GCC to say this isn't a forced - // unwind. EHABI #7.2 - exception_object->unwinder_cache.reserved1 = 0; - - // phase 1: the search phase + + // This field for is for compatibility with GCC to say this isn't a forced + // unwind. EHABI #7.2 + exception_object->unwinder_cache.reserved1 = 0; + + // phase 1: the search phase _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object); - if (phase1 != _URC_NO_REASON) - return phase1; - - // phase 2: the clean up phase + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase return unwind_phase2(&uc, &cursor, exception_object, false); -} - -_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) { - // This is to be called when exception handling completes to give us a chance - // to perform any housekeeping. EHABI #7.2. But we have nothing to do here. - (void)exception_object; -} - -/// When _Unwind_RaiseException() is in phase2, it hands control -/// to the personality function at each frame. The personality -/// may force a jump to a landing pad in that function, the landing -/// pad code may then call _Unwind_Resume() to continue with the -/// unwinding. Note: the call to _Unwind_Resume() is from compiler -/// geneated user code. All other _Unwind_* routines are called -/// by the C++ runtime __cxa_* routines. -/// -/// Note: re-throwing an exception (as opposed to continuing the unwind) -/// is implemented by having the code call __cxa_rethrow() which -/// in turn calls _Unwind_Resume_or_Rethrow(). -_LIBUNWIND_EXPORT void -_Unwind_Resume(_Unwind_Exception *exception_object) { +} + +_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) { + // This is to be called when exception handling completes to give us a chance + // to perform any housekeeping. EHABI #7.2. But we have nothing to do here. + (void)exception_object; +} + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// geneated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Note: re-throwing an exception (as opposed to continuing the unwind) +/// is implemented by having the code call __cxa_rethrow() which +/// in turn calls _Unwind_Resume_or_Rethrow(). +_LIBUNWIND_EXPORT void +_Unwind_Resume(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", - static_cast(exception_object)); - unw_context_t uc; + static_cast(exception_object)); + unw_context_t uc; unw_cursor_t cursor; __unw_getcontext(&uc); - + if (exception_object->unwinder_cache.reserved1) unwind_phase2_forced( &uc, &cursor, exception_object, @@ -865,77 +865,77 @@ _Unwind_Resume(_Unwind_Exception *exception_object) { (void *)exception_object->unwinder_cache.reserved3); else unwind_phase2(&uc, &cursor, exception_object, true); - - // Clients assume _Unwind_Resume() does not return, so all we can do is abort. - _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); -} - -/// Called by personality handler during phase 2 to get LSDA for current frame. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_proc_info_t frameInfo; - uintptr_t result = 0; + + // Clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); +} + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) - result = (uintptr_t)frameInfo.lsda; - _LIBUNWIND_TRACE_API( + result = (uintptr_t)frameInfo.lsda; + _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx", - static_cast(context), (long long)result); - return result; -} - + static_cast(context), (long long)result); + return result; +} + [[maybe_unused]] static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation, - void* valuep) { - uint64_t value = 0; - switch (representation) { - case _UVRSD_UINT32: - case _UVRSD_FLOAT: - memcpy(&value, valuep, sizeof(uint32_t)); - break; - - case _UVRSD_VFPX: - case _UVRSD_UINT64: - case _UVRSD_DOUBLE: - memcpy(&value, valuep, sizeof(uint64_t)); - break; - } - return value; -} - + void* valuep) { + uint64_t value = 0; + switch (representation) { + case _UVRSD_UINT32: + case _UVRSD_FLOAT: + memcpy(&value, valuep, sizeof(uint32_t)); + break; + + case _UVRSD_VFPX: + case _UVRSD_UINT64: + case _UVRSD_DOUBLE: + memcpy(&value, valuep, sizeof(uint64_t)); + break; + } + return value; +} + _LIBUNWIND_EXPORT _Unwind_VRS_Result -_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, - uint32_t regno, _Unwind_VRS_DataRepresentation representation, - void *valuep) { - _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, " +_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep) { + _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, " "rep=%d, value=0x%llX)", - static_cast(context), regclass, regno, - representation, - ValueAsBitPattern(representation, valuep)); - unw_cursor_t *cursor = (unw_cursor_t *)context; - switch (regclass) { - case _UVRSC_CORE: - if (representation != _UVRSD_UINT32 || regno > 15) - return _UVRSR_FAILED; + static_cast(context), regclass, regno, + representation, + ValueAsBitPattern(representation, valuep)); + unw_cursor_t *cursor = (unw_cursor_t *)context; + switch (regclass) { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 || regno > 15) + return _UVRSR_FAILED; return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), *(unw_word_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; - case _UVRSC_VFP: - if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) - return _UVRSR_FAILED; - if (representation == _UVRSD_VFPX) { - // Can only touch d0-15 with FSTMFDX. - if (regno > 15) - return _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_VFP: + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + if (representation == _UVRSD_VFPX) { + // Can only touch d0-15 with FSTMFDX. + if (regno > 15) + return _UVRSR_FAILED; __unw_save_vfp_as_X(cursor); - } else { - if (regno > 31) - return _UVRSR_FAILED; - } + } else { + if (regno > 31) + return _UVRSR_FAILED; + } return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), *(unw_fpreg_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; #if defined(__ARM_WMMX) case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) @@ -944,13 +944,13 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, *(unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; - case _UVRSC_WMMXD: - if (representation != _UVRSD_DOUBLE || regno > 31) - return _UVRSR_FAILED; + case _UVRSC_WMMXD: + if (representation != _UVRSD_DOUBLE || regno > 31) + return _UVRSR_FAILED; return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), *(unw_fpreg_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; #else case _UVRSC_WMMXC: case _UVRSC_WMMXD: @@ -965,40 +965,40 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, ? _UVRSR_OK : _UVRSR_FAILED; break; - } - _LIBUNWIND_ABORT("unsupported register class"); -} - -static _Unwind_VRS_Result -_Unwind_VRS_Get_Internal(_Unwind_Context *context, - _Unwind_VRS_RegClass regclass, uint32_t regno, - _Unwind_VRS_DataRepresentation representation, - void *valuep) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - switch (regclass) { - case _UVRSC_CORE: - if (representation != _UVRSD_UINT32 || regno > 15) - return _UVRSR_FAILED; + } + _LIBUNWIND_ABORT("unsupported register class"); +} + +static _Unwind_VRS_Result +_Unwind_VRS_Get_Internal(_Unwind_Context *context, + _Unwind_VRS_RegClass regclass, uint32_t regno, + _Unwind_VRS_DataRepresentation representation, + void *valuep) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + switch (regclass) { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 || regno > 15) + return _UVRSR_FAILED; return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), (unw_word_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; - case _UVRSC_VFP: - if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) - return _UVRSR_FAILED; - if (representation == _UVRSD_VFPX) { - // Can only touch d0-15 with FSTMFDX. - if (regno > 15) - return _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_VFP: + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + if (representation == _UVRSD_VFPX) { + // Can only touch d0-15 with FSTMFDX. + if (regno > 15) + return _UVRSR_FAILED; __unw_save_vfp_as_X(cursor); - } else { - if (regno > 31) - return _UVRSR_FAILED; - } + } else { + if (regno > 31) + return _UVRSR_FAILED; + } return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), (unw_fpreg_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; #if defined(__ARM_WMMX) case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) @@ -1007,13 +1007,13 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context, (unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; - case _UVRSC_WMMXD: - if (representation != _UVRSD_DOUBLE || regno > 31) - return _UVRSR_FAILED; + case _UVRSC_WMMXD: + if (representation != _UVRSD_DOUBLE || regno > 31) + return _UVRSR_FAILED; return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), (unw_fpreg_t *)valuep) == UNW_ESUCCESS - ? _UVRSR_OK - : _UVRSR_FAILED; + ? _UVRSR_OK + : _UVRSR_FAILED; #else case _UVRSC_WMMXC: case _UVRSC_WMMXD: @@ -1028,85 +1028,85 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context, ? _UVRSR_OK : _UVRSR_FAILED; break; - } - _LIBUNWIND_ABORT("unsupported register class"); -} - + } + _LIBUNWIND_ABORT("unsupported register class"); +} + _LIBUNWIND_EXPORT _Unwind_VRS_Result _Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, uint32_t regno, _Unwind_VRS_DataRepresentation representation, void *valuep) { - _Unwind_VRS_Result result = - _Unwind_VRS_Get_Internal(context, regclass, regno, representation, - valuep); - _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, " + _Unwind_VRS_Result result = + _Unwind_VRS_Get_Internal(context, regclass, regno, representation, + valuep); + _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, " "rep=%d, value=0x%llX, result = %d)", - static_cast(context), regclass, regno, - representation, - ValueAsBitPattern(representation, valuep), result); - return result; -} - -_Unwind_VRS_Result -_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, - uint32_t discriminator, - _Unwind_VRS_DataRepresentation representation) { - _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, " + static_cast(context), regclass, regno, + representation, + ValueAsBitPattern(representation, valuep), result); + return result; +} + +_Unwind_VRS_Result +_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t discriminator, + _Unwind_VRS_DataRepresentation representation) { + _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, " "discriminator=%d, representation=%d)", - static_cast(context), regclass, discriminator, - representation); - switch (regclass) { + static_cast(context), regclass, discriminator, + representation); + switch (regclass) { case _UVRSC_WMMXC: #if !defined(__ARM_WMMX) break; #endif case _UVRSC_CORE: { - if (representation != _UVRSD_UINT32) - return _UVRSR_FAILED; - // When popping SP from the stack, we don't want to override it from the - // computed new stack location. See EHABI #7.5.4 table 3. - bool poppedSP = false; - uint32_t* sp; - if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, - _UVRSD_UINT32, &sp) != _UVRSR_OK) { - return _UVRSR_FAILED; - } - for (uint32_t i = 0; i < 16; ++i) { - if (!(discriminator & static_cast(1 << i))) - continue; - uint32_t value = *sp++; - if (regclass == _UVRSC_CORE && i == 13) - poppedSP = true; - if (_Unwind_VRS_Set(context, regclass, i, - _UVRSD_UINT32, &value) != _UVRSR_OK) { - return _UVRSR_FAILED; - } - } - if (!poppedSP) { - return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, - _UVRSD_UINT32, &sp); - } - return _UVRSR_OK; - } + if (representation != _UVRSD_UINT32) + return _UVRSR_FAILED; + // When popping SP from the stack, we don't want to override it from the + // computed new stack location. See EHABI #7.5.4 table 3. + bool poppedSP = false; + uint32_t* sp; + if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + for (uint32_t i = 0; i < 16; ++i) { + if (!(discriminator & static_cast(1 << i))) + continue; + uint32_t value = *sp++; + if (regclass == _UVRSC_CORE && i == 13) + poppedSP = true; + if (_Unwind_VRS_Set(context, regclass, i, + _UVRSD_UINT32, &value) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + } + if (!poppedSP) { + return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp); + } + return _UVRSR_OK; + } case _UVRSC_WMMXD: #if !defined(__ARM_WMMX) break; #endif case _UVRSC_VFP: { - if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) - return _UVRSR_FAILED; - uint32_t first = discriminator >> 16; - uint32_t count = discriminator & 0xffff; - uint32_t end = first+count; - uint32_t* sp; - if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, - _UVRSD_UINT32, &sp) != _UVRSR_OK) { - return _UVRSR_FAILED; - } - // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard - // format 1", which is equivalent to FSTMD + a padding word. - for (uint32_t i = first; i < end; ++i) { - // SP is only 32-bit aligned so don't copy 64-bit at a time. + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + uint32_t first = discriminator >> 16; + uint32_t count = discriminator & 0xffff; + uint32_t end = first+count; + uint32_t* sp; + if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard + // format 1", which is equivalent to FSTMD + a padding word. + for (uint32_t i = first; i < end; ++i) { + // SP is only 32-bit aligned so don't copy 64-bit at a time. uint64_t w0 = *sp++; uint64_t w1 = *sp++; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -1116,15 +1116,15 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, #else #error "Unable to determine endianess" #endif - if (_Unwind_VRS_Set(context, regclass, i, representation, &value) != - _UVRSR_OK) - return _UVRSR_FAILED; - } - if (representation == _UVRSD_VFPX) - ++sp; - return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, - &sp); - } + if (_Unwind_VRS_Set(context, regclass, i, representation, &value) != + _UVRSR_OK) + return _UVRSR_FAILED; + } + if (representation == _UVRSD_VFPX) + ++sp; + return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp); + } case _UVRSC_PSEUDO: { if (representation != _UVRSD_UINT32 || discriminator != 0) return _UVRSR_FAILED; @@ -1139,10 +1139,10 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_RA_AUTH_CODE, _UVRSD_UINT32, &pac); } - } - _LIBUNWIND_ABORT("unsupported register class"); -} - + } + _LIBUNWIND_ABORT("unsupported register class"); +} + /// Not used by C++. /// Unwinds stack, calling "stop" function at each frame. /// Could be used to implement longjmp(). @@ -1164,44 +1164,44 @@ _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, _Unwind_Stop_Fn stop, stop_parameter); } -/// Called by personality handler during phase 2 to find the start of the -/// function. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetRegionStart(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_proc_info_t frameInfo; - uintptr_t result = 0; +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) - result = (uintptr_t)frameInfo.start_ip; + result = (uintptr_t)frameInfo.start_ip; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX", - static_cast(context), (long long)result); - return result; -} - - -/// Called by personality handler during phase 2 if a foreign exception -// is caught. -_LIBUNWIND_EXPORT void -_Unwind_DeleteException(_Unwind_Exception *exception_object) { + static_cast(context), (long long)result); + return result; +} + + +/// Called by personality handler during phase 2 if a foreign exception +// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", - static_cast(exception_object)); - if (exception_object->exception_cleanup != NULL) - (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, - exception_object); -} - -extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code + static_cast(exception_object)); + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + +extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception * /* exception_object */, - struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; + struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; switch (__unw_step(cursor)) { case UNW_STEP_SUCCESS: return _URC_OK; case UNW_STEP_END: return _URC_END_OF_STACK; default: - return _URC_FAILURE; + return _URC_FAILURE; } -} - +} + #endif // defined(_LIBUNWIND_ARM_EHABI) diff --git a/contrib/libs/libunwind/src/Unwind-EHABI.h b/contrib/libs/libunwind/src/Unwind-EHABI.h index f24def91edd..ff3b5fc6fea 100644 --- a/contrib/libs/libunwind/src/Unwind-EHABI.h +++ b/contrib/libs/libunwind/src/Unwind-EHABI.h @@ -1,50 +1,50 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -//===----------------------------------------------------------------------===// - -#ifndef __UNWIND_EHABI_H__ -#define __UNWIND_EHABI_H__ - -#include <__libunwind_config.h> - +// +// +//===----------------------------------------------------------------------===// + +#ifndef __UNWIND_EHABI_H__ +#define __UNWIND_EHABI_H__ + +#include <__libunwind_config.h> + #if defined(_LIBUNWIND_ARM_EHABI) - -#include -#include - -// Unable to unwind in the ARM index table (section 5 EHABI). -#define UNW_EXIDX_CANTUNWIND 0x1 - -static inline uint32_t signExtendPrel31(uint32_t data) { - return data | ((data & 0x40000000u) << 1); -} - -static inline uint32_t readPrel31(const uint32_t *data) { - return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data)); -} - -#if defined(__cplusplus) -extern "C" { -#endif - -extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0( - _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); - -extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1( - _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); - -extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2( - _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); - -#if defined(__cplusplus) -} // extern "C" -#endif - + +#include +#include + +// Unable to unwind in the ARM index table (section 5 EHABI). +#define UNW_EXIDX_CANTUNWIND 0x1 + +static inline uint32_t signExtendPrel31(uint32_t data) { + return data | ((data & 0x40000000u) << 1); +} + +static inline uint32_t readPrel31(const uint32_t *data) { + return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data)); +} + +#if defined(__cplusplus) +extern "C" { +#endif + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +#if defined(__cplusplus) +} // extern "C" +#endif + #endif // defined(_LIBUNWIND_ARM_EHABI) - -#endif // __UNWIND_EHABI_H__ + +#endif // __UNWIND_EHABI_H__ diff --git a/contrib/libs/libunwind/src/Unwind-sjlj.c b/contrib/libs/libunwind/src/Unwind-sjlj.c index 18ece59862d..d487995bb78 100644 --- a/contrib/libs/libunwind/src/Unwind-sjlj.c +++ b/contrib/libs/libunwind/src/Unwind-sjlj.c @@ -1,23 +1,23 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Implements setjump-longjump based C++ exceptions -// -//===----------------------------------------------------------------------===// - -#include - +// +// +// Implements setjump-longjump based C++ exceptions +// +//===----------------------------------------------------------------------===// + +#include + #include -#include -#include -#include - -#include "config.h" - +#include +#include +#include + +#include "config.h" + /// With SJLJ based exceptions, any function that has a catch clause or needs to /// do any clean up when an exception propagates through it, needs to call /// \c _Unwind_SjLj_Register at the start of the function and @@ -25,19 +25,19 @@ /// the address of a block of memory in the function's stack frame. The runtime /// keeps a linked list (stack) of these blocks - one per thread. The calling /// function also sets the personality and lsda fields of the block. - + #if defined(_LIBUNWIND_BUILD_SJLJ_APIS) - -struct _Unwind_FunctionContext { - // next function in stack of handlers - struct _Unwind_FunctionContext *prev; - + +struct _Unwind_FunctionContext { + // next function in stack of handlers + struct _Unwind_FunctionContext *prev; + #if defined(__ve__) // VE requires to store 64 bit pointers in the buffer for SjLj execption. // We expand the size of values defined here. This size must be matched // to the size returned by TargetMachine::getSjLjDataSize(). - // set by calling function before registering to be the landing pad + // set by calling function before registering to be the landing pad uint64_t resumeLocation; // set by personality handler to be parameters passed to landing pad function @@ -45,20 +45,20 @@ struct _Unwind_FunctionContext { #else // set by calling function before registering to be the landing pad uint32_t resumeLocation; - - // set by personality handler to be parameters passed to landing pad function + + // set by personality handler to be parameters passed to landing pad function uint32_t resumeParameters[4]; #endif - - // set by calling function before registering + + // set by calling function before registering _Unwind_Personality_Fn personality; // arm offset=24 - uintptr_t lsda; // arm offset=28 - - // variable length array, contains registers to restore - // 0 = r7, 1 = pc, 2 = sp - void *jbuf[]; -}; - + uintptr_t lsda; // arm offset=28 + + // variable length array, contains registers to restore + // 0 = r7, 1 = pc, 2 = sp + void *jbuf[]; +}; + #if defined(_LIBUNWIND_HAS_NO_THREADS) # define _LIBUNWIND_THREAD_LOCAL #else @@ -72,7 +72,7 @@ struct _Unwind_FunctionContext { # error Unable to create thread local storage # endif #endif - + #if !defined(FOR_DYLD) @@ -102,427 +102,427 @@ __Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc) { #endif -/// Called at start of each function that catches exceptions -_LIBUNWIND_EXPORT void -_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) { - fc->prev = __Unwind_SjLj_GetTopOfFunctionStack(); - __Unwind_SjLj_SetTopOfFunctionStack(fc); -} - - -/// Called at end of each function that catches exceptions -_LIBUNWIND_EXPORT void -_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) { - __Unwind_SjLj_SetTopOfFunctionStack(fc->prev); -} - - -static _Unwind_Reason_Code -unwind_phase1(struct _Unwind_Exception *exception_object) { - _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); +/// Called at start of each function that catches exceptions +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) { + fc->prev = __Unwind_SjLj_GetTopOfFunctionStack(); + __Unwind_SjLj_SetTopOfFunctionStack(fc); +} + + +/// Called at end of each function that catches exceptions +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) { + __Unwind_SjLj_SetTopOfFunctionStack(fc->prev); +} + + +static _Unwind_Reason_Code +unwind_phase1(struct _Unwind_Exception *exception_object) { + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p", (void *)c); - - // walk each frame looking for a place to stop - for (bool handlerNotFound = true; handlerNotFound; c = c->prev) { - - // check for no more frames - if (c == NULL) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached " + + // walk each frame looking for a place to stop + for (bool handlerNotFound = true; handlerNotFound; c = c->prev) { + + // check for no more frames + if (c == NULL) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached " "bottom => _URC_END_OF_STACK", (void *)exception_object); - return _URC_END_OF_STACK; - } - + return _URC_END_OF_STACK; + } + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", (void *)c); - // if there is a personality routine, ask it if it will want to stop at this - // frame - if (c->personality != NULL) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling " + // if there is a personality routine, ask it if it will want to stop at this + // frame + if (c->personality != NULL) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling " "personality function %p", (void *)exception_object, (void *)c->personality); - _Unwind_Reason_Code personalityResult = (*c->personality)( - 1, _UA_SEARCH_PHASE, exception_object->exception_class, - exception_object, (struct _Unwind_Context *)c); - switch (personalityResult) { - case _URC_HANDLER_FOUND: - // found a catch clause or locals that need destructing in this frame - // stop search and remember function context - handlerNotFound = false; - exception_object->private_2 = (uintptr_t) c; - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " + _Unwind_Reason_Code personalityResult = (*c->personality)( + 1, _UA_SEARCH_PHASE, exception_object->exception_class, + exception_object, (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember function context + handlerNotFound = false; + exception_object->private_2 = (uintptr_t) c; + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " "_URC_HANDLER_FOUND", (void *)exception_object); - return _URC_NO_REASON; - - case _URC_CONTINUE_UNWIND: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " "_URC_CONTINUE_UNWIND", (void *)exception_object); - // continue unwinding - break; - - default: - // something went wrong - _LIBUNWIND_TRACE_UNWINDING( + // continue unwinding + break; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", (void *)exception_object); - return _URC_FATAL_PHASE1_ERROR; - } - } - } - return _URC_NO_REASON; -} - - -static _Unwind_Reason_Code -unwind_phase2(struct _Unwind_Exception *exception_object) { + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + + +static _Unwind_Reason_Code +unwind_phase2(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", (void *)exception_object); - - // walk each frame until we reach where search phase said to stop - _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); - while (true) { + + // walk each frame until we reach where search phase said to stop + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); + while (true) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2s(ex_ojb=%p): context=%p", (void *)exception_object, (void *)c); - - // check for no more frames - if (c == NULL) { + + // check for no more frames + if (c == NULL) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_step() reached " "bottom => _URC_END_OF_STACK", (void *)exception_object); - return _URC_END_OF_STACK; - } - - // if there is a personality routine, tell it we are unwinding - if (c->personality != NULL) { - _Unwind_Action action = _UA_CLEANUP_PHASE; - if ((uintptr_t) c == exception_object->private_2) - action = (_Unwind_Action)( - _UA_CLEANUP_PHASE | - _UA_HANDLER_FRAME); // tell personality this was the frame it marked - // in phase 1 - _Unwind_Reason_Code personalityResult = - (*c->personality)(1, action, exception_object->exception_class, - exception_object, (struct _Unwind_Context *)c); - switch (personalityResult) { - case _URC_CONTINUE_UNWIND: - // continue unwinding - _LIBUNWIND_TRACE_UNWINDING( + return _URC_END_OF_STACK; + } + + // if there is a personality routine, tell it we are unwinding + if (c->personality != NULL) { + _Unwind_Action action = _UA_CLEANUP_PHASE; + if ((uintptr_t) c == exception_object->private_2) + action = (_Unwind_Action)( + _UA_CLEANUP_PHASE | + _UA_HANDLER_FRAME); // tell personality this was the frame it marked + // in phase 1 + _Unwind_Reason_Code personalityResult = + (*c->personality)(1, action, exception_object->exception_class, + exception_object, (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // continue unwinding + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", (void *)exception_object); - if ((uintptr_t) c == exception_object->private_2) { - // phase 1 said we would stop at this frame, but we did not... - _LIBUNWIND_ABORT("during phase1 personality function said it would " - "stop here, but now if phase2 it did not stop here"); - } - break; - case _URC_INSTALL_CONTEXT: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): " + if ((uintptr_t) c == exception_object->private_2) { + // phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now if phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): " "_URC_INSTALL_CONTEXT, will resume at " "landing pad %p", (void *)exception_object, c->jbuf[1]); - // personality routine says to transfer control to landing pad - // we may get control back if landing pad calls _Unwind_Resume() - __Unwind_SjLj_SetTopOfFunctionStack(c); - __builtin_longjmp(c->jbuf, 1); + // personality routine says to transfer control to landing pad + // we may get control back if landing pad calls _Unwind_Resume() + __Unwind_SjLj_SetTopOfFunctionStack(c); + __builtin_longjmp(c->jbuf, 1); // __unw_resume() only returns if there was an error - return _URC_FATAL_PHASE2_ERROR; - default: - // something went wrong - _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", - personalityResult); - return _URC_FATAL_PHASE2_ERROR; - } - } - c = c->prev; - } - - // clean up phase did not resume at the frame that the search phase said it - // would - return _URC_FATAL_PHASE2_ERROR; -} - - -static _Unwind_Reason_Code -unwind_phase2_forced(struct _Unwind_Exception *exception_object, - _Unwind_Stop_Fn stop, void *stop_parameter) { - // walk each frame until we reach where search phase said to stop - _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); - while (true) { - - // get next frame (skip over first which is _Unwind_RaiseException) - if (c == NULL) { + return _URC_FATAL_PHASE2_ERROR; + default: + // something went wrong + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + c = c->prev; + } + + // clean up phase did not resume at the frame that the search phase said it + // would + return _URC_FATAL_PHASE2_ERROR; +} + + +static _Unwind_Reason_Code +unwind_phase2_forced(struct _Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + // walk each frame until we reach where search phase said to stop + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); + while (true) { + + // get next frame (skip over first which is _Unwind_RaiseException) + if (c == NULL) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_step() reached " "bottom => _URC_END_OF_STACK", (void *)exception_object); - return _URC_END_OF_STACK; - } - - // call stop function at each frame - _Unwind_Action action = - (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); - _Unwind_Reason_Code stopResult = - (*stop)(1, action, exception_object->exception_class, exception_object, - (struct _Unwind_Context *)c, stop_parameter); - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + return _URC_END_OF_STACK; + } + + // call stop function at each frame + _Unwind_Action action = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); + _Unwind_Reason_Code stopResult = + (*stop)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c, stop_parameter); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "stop function returned %d", (void *)exception_object, stopResult); - if (stopResult != _URC_NO_REASON) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + if (stopResult != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "stopped by stop function", (void *)exception_object); - return _URC_FATAL_PHASE2_ERROR; - } - - // if there is a personality routine, tell it we are unwinding - if (c->personality != NULL) { + return _URC_FATAL_PHASE2_ERROR; + } + + // if there is a personality routine, tell it we are unwinding + if (c->personality != NULL) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)c->personality; - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "calling personality function %p", (void *)exception_object, (void *)p); - _Unwind_Reason_Code personalityResult = - (*p)(1, action, exception_object->exception_class, exception_object, - (struct _Unwind_Context *)c); - switch (personalityResult) { - case _URC_CONTINUE_UNWIND: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "personality returned _URC_CONTINUE_UNWIND", (void *)exception_object); - // destructors called, continue unwinding - break; - case _URC_INSTALL_CONTEXT: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + // destructors called, continue unwinding + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "personality returned _URC_INSTALL_CONTEXT", (void *)exception_object); - // we may get control back if landing pad calls _Unwind_Resume() - __Unwind_SjLj_SetTopOfFunctionStack(c); - __builtin_longjmp(c->jbuf, 1); - break; - default: - // something went wrong - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " - "personality returned %d, " + // we may get control back if landing pad calls _Unwind_Resume() + __Unwind_SjLj_SetTopOfFunctionStack(c); + __builtin_longjmp(c->jbuf, 1); + break; + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned %d, " "_URC_FATAL_PHASE2_ERROR", (void *)exception_object, personalityResult); - return _URC_FATAL_PHASE2_ERROR; - } - } - c = c->prev; - } - - // call stop function one last time and tell it we've reached the end of the - // stack - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " + return _URC_FATAL_PHASE2_ERROR; + } + } + c = c->prev; + } + + // call stop function one last time and tell it we've reached the end of the + // stack + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " "function with _UA_END_OF_STACK", (void *)exception_object); - _Unwind_Action lastAction = - (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); - (*stop)(1, lastAction, exception_object->exception_class, exception_object, - (struct _Unwind_Context *)c, stop_parameter); - - // clean up phase did not resume at the frame that the search phase said it - // would - return _URC_FATAL_PHASE2_ERROR; -} - - -/// Called by __cxa_throw. Only returns if there is a fatal error -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { + _Unwind_Action lastAction = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); + (*stop)(1, lastAction, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c, stop_parameter); + + // clean up phase did not resume at the frame that the search phase said it + // would + return _URC_FATAL_PHASE2_ERROR; +} + + +/// Called by __cxa_throw. Only returns if there is a fatal error +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)", (void *)exception_object); - - // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right - // thing - exception_object->private_1 = 0; - exception_object->private_2 = 0; - - // phase 1: the search phase - _Unwind_Reason_Code phase1 = unwind_phase1(exception_object); - if (phase1 != _URC_NO_REASON) - return phase1; - - // phase 2: the clean up phase - return unwind_phase2(exception_object); -} - - - -/// When _Unwind_RaiseException() is in phase2, it hands control -/// to the personality function at each frame. The personality -/// may force a jump to a landing pad in that function, the landing -/// pad code may then call _Unwind_Resume() to continue with the -/// unwinding. Note: the call to _Unwind_Resume() is from compiler -/// geneated user code. All other _Unwind_* routines are called -/// by the C++ runtime __cxa_* routines. -/// -/// Re-throwing an exception is implemented by having the code call -/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow() -_LIBUNWIND_EXPORT void -_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { + + // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right + // thing + exception_object->private_1 = 0; + exception_object->private_2 = 0; + + // phase 1: the search phase + _Unwind_Reason_Code phase1 = unwind_phase1(exception_object); + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase + return unwind_phase2(exception_object); +} + + + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// geneated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Re-throwing an exception is implemented by having the code call +/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow() +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)", (void *)exception_object); - - if (exception_object->private_1 != 0) - unwind_phase2_forced(exception_object, - (_Unwind_Stop_Fn) exception_object->private_1, - (void *)exception_object->private_2); - else - unwind_phase2(exception_object); - - // clients assume _Unwind_Resume() does not return, so all we can do is abort. - _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return"); -} - - -/// Called by __cxa_rethrow(). -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) { - _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), " + + if (exception_object->private_1 != 0) + unwind_phase2_forced(exception_object, + (_Unwind_Stop_Fn) exception_object->private_1, + (void *)exception_object->private_2); + else + unwind_phase2(exception_object); + + // clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return"); +} + + +/// Called by __cxa_rethrow(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), " "private_1=%" PRIuPTR, (void *)exception_object, exception_object->private_1); - // If this is non-forced and a stopping place was found, then this is a - // re-throw. - // Call _Unwind_RaiseException() as if this was a new exception. - if (exception_object->private_1 == 0) { - return _Unwind_SjLj_RaiseException(exception_object); - // should return if there is no catch clause, so that __cxa_rethrow can call - // std::terminate() - } - - // Call through to _Unwind_Resume() which distiguishes between forced and - // regular exceptions. - _Unwind_SjLj_Resume(exception_object); - _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called " - "_Unwind_SjLj_Resume() which unexpectedly returned"); -} - - -/// Called by personality handler during phase 2 to get LSDA for current frame. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) " + // If this is non-forced and a stopping place was found, then this is a + // re-throw. + // Call _Unwind_RaiseException() as if this was a new exception. + if (exception_object->private_1 == 0) { + return _Unwind_SjLj_RaiseException(exception_object); + // should return if there is no catch clause, so that __cxa_rethrow can call + // std::terminate() + } + + // Call through to _Unwind_Resume() which distiguishes between forced and + // regular exceptions. + _Unwind_SjLj_Resume(exception_object); + _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called " + "_Unwind_SjLj_Resume() which unexpectedly returned"); +} + + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) " "=> 0x%" PRIuPTR, (void *)context, ufc->lsda); - return ufc->lsda; -} - - -/// Called by personality handler during phase 2 to get register values. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, - int index) { + return ufc->lsda; +} + + +/// Called by personality handler during phase 2 to get register values. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, + int index) { _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", (void *)context, index); - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - return ufc->resumeParameters[index]; -} - - -/// Called by personality handler during phase 2 to alter register values. -_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, - uintptr_t new_value) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + return ufc->resumeParameters[index]; +} + + +/// Called by personality handler during phase 2 to alter register values. +_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t new_value) { _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%" PRIuPTR ")", (void *)context, index, new_value); - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - ufc->resumeParameters[index] = new_value; -} - - -/// Called by personality handler during phase 2 to get instruction pointer. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + ufc->resumeParameters[index] = new_value; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIu32, (void *)context, ufc->resumeLocation + 1); - return ufc->resumeLocation + 1; -} - - -/// Called by personality handler during phase 2 to get instruction pointer. -/// ipBefore is a boolean that says if IP is already adjusted to be the call -/// site address. Normally IP is the return address. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, - int *ipBefore) { - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - *ipBefore = 0; + return ufc->resumeLocation + 1; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +/// ipBefore is a boolean that says if IP is already adjusted to be the call +/// site address. Normally IP is the return address. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + *ipBefore = 0; _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%" PRIu32, (void *)context, (void *)ipBefore, ufc->resumeLocation + 1); - return ufc->resumeLocation + 1; -} - - -/// Called by personality handler during phase 2 to alter instruction pointer. -_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, - uintptr_t new_value) { + return ufc->resumeLocation + 1; +} + + +/// Called by personality handler during phase 2 to alter instruction pointer. +_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, + uintptr_t new_value) { _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%" PRIuPTR ")", (void *)context, new_value); - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - ufc->resumeLocation = new_value - 1; -} - - -/// Called by personality handler during phase 2 to find the start of the -/// function. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetRegionStart(struct _Unwind_Context *context) { - // Not supported or needed for sjlj based unwinding - (void)context; + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + ufc->resumeLocation = new_value - 1; +} + + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", (void *)context); - return 0; -} - - -/// Called by personality handler during phase 2 if a foreign exception -/// is caught. -_LIBUNWIND_EXPORT void -_Unwind_DeleteException(struct _Unwind_Exception *exception_object) { + return 0; +} + + +/// Called by personality handler during phase 2 if a foreign exception +/// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", (void *)exception_object); - if (exception_object->exception_cleanup != NULL) - (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, - exception_object); -} - - - -/// Called by personality handler during phase 2 to get base address for data -/// relative encodings. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetDataRelBase(struct _Unwind_Context *context) { - // Not supported or needed for sjlj based unwinding - (void)context; + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + + + +/// Called by personality handler during phase 2 to get base address for data +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetDataRelBase(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context); - _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); -} - - -/// Called by personality handler during phase 2 to get base address for text -/// relative encodings. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetTextRelBase(struct _Unwind_Context *context) { - // Not supported or needed for sjlj based unwinding - (void)context; + _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); +} + + +/// Called by personality handler during phase 2 to get base address for text +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetTextRelBase(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context); - _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); -} - - -/// Called by personality handler to get "Call Frame Area" for current frame. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { + _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); +} + + +/// Called by personality handler to get "Call Frame Area" for current frame. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", (void *)context); - if (context != NULL) { - _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - // Setjmp/longjmp based exceptions don't have a true CFA. - // Instead, the SP in the jmpbuf is the closest approximation. - return (uintptr_t) ufc->jbuf[2]; - } - return 0; -} - + if (context != NULL) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + // Setjmp/longjmp based exceptions don't have a true CFA. + // Instead, the SP in the jmpbuf is the closest approximation. + return (uintptr_t) ufc->jbuf[2]; + } + return 0; +} + #endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS) diff --git a/contrib/libs/libunwind/src/UnwindCursor.hpp b/contrib/libs/libunwind/src/UnwindCursor.hpp index d751111dd2d..1ca842f33aa 100644 --- a/contrib/libs/libunwind/src/UnwindCursor.hpp +++ b/contrib/libs/libunwind/src/UnwindCursor.hpp @@ -1,30 +1,30 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// +// +// // C++ interface to lower levels of libunwind -//===----------------------------------------------------------------------===// - -#ifndef __UNWINDCURSOR_HPP__ -#define __UNWINDCURSOR_HPP__ - +//===----------------------------------------------------------------------===// + +#ifndef __UNWINDCURSOR_HPP__ +#define __UNWINDCURSOR_HPP__ + #include "cet_unwind.h" -#include -#include -#include -#include - +#include +#include +#include +#include + #ifdef _WIN32 #include #include #endif -#ifdef __APPLE__ - #include -#endif - +#ifdef __APPLE__ + #include +#endif + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) // Provide a definition for the DISPATCHER_CONTEXT struct for old (Win7 and // earlier) SDKs. @@ -62,402 +62,402 @@ extern "C" _Unwind_Reason_Code __libunwind_seh_personality( #endif -#include "config.h" - -#include "AddressSpace.hpp" -#include "CompactUnwinder.hpp" -#include "config.h" -#include "DwarfInstructions.hpp" -#include "EHHeaderParser.hpp" -#include "libunwind.h" -#include "Registers.hpp" +#include "config.h" + +#include "AddressSpace.hpp" +#include "CompactUnwinder.hpp" +#include "config.h" +#include "DwarfInstructions.hpp" +#include "EHHeaderParser.hpp" +#include "libunwind.h" +#include "Registers.hpp" #include "RWMutex.hpp" -#include "Unwind-EHABI.h" - -namespace libunwind { - +#include "Unwind-EHABI.h" + +namespace libunwind { + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) -/// Cache of recently found FDEs. -template -class _LIBUNWIND_HIDDEN DwarfFDECache { - typedef typename A::pint_t pint_t; -public: +/// Cache of recently found FDEs. +template +class _LIBUNWIND_HIDDEN DwarfFDECache { + typedef typename A::pint_t pint_t; +public: static constexpr pint_t kSearchAll = static_cast(-1); - static pint_t findFDE(pint_t mh, pint_t pc); - static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde); - static void removeAllIn(pint_t mh); - static void iterateCacheEntries(void (*func)(unw_word_t ip_start, - unw_word_t ip_end, - unw_word_t fde, unw_word_t mh)); - -private: - - struct entry { - pint_t mh; - pint_t ip_start; - pint_t ip_end; - pint_t fde; - }; - - // These fields are all static to avoid needing an initializer. - // There is only one instance of this class per process. + static pint_t findFDE(pint_t mh, pint_t pc); + static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde); + static void removeAllIn(pint_t mh); + static void iterateCacheEntries(void (*func)(unw_word_t ip_start, + unw_word_t ip_end, + unw_word_t fde, unw_word_t mh)); + +private: + + struct entry { + pint_t mh; + pint_t ip_start; + pint_t ip_end; + pint_t fde; + }; + + // These fields are all static to avoid needing an initializer. + // There is only one instance of this class per process. static RWMutex _lock; -#ifdef __APPLE__ - static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide); - static bool _registeredForDyldUnloads; -#endif - static entry *_buffer; - static entry *_bufferUsed; - static entry *_bufferEnd; - static entry _initialBuffer[64]; -}; - -template -typename DwarfFDECache::entry * -DwarfFDECache::_buffer = _initialBuffer; - -template -typename DwarfFDECache::entry * -DwarfFDECache::_bufferUsed = _initialBuffer; - -template -typename DwarfFDECache::entry * -DwarfFDECache::_bufferEnd = &_initialBuffer[64]; - -template -typename DwarfFDECache::entry DwarfFDECache::_initialBuffer[64]; - -template +#ifdef __APPLE__ + static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide); + static bool _registeredForDyldUnloads; +#endif + static entry *_buffer; + static entry *_bufferUsed; + static entry *_bufferEnd; + static entry _initialBuffer[64]; +}; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_buffer = _initialBuffer; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_bufferUsed = _initialBuffer; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_bufferEnd = &_initialBuffer[64]; + +template +typename DwarfFDECache::entry DwarfFDECache::_initialBuffer[64]; + +template RWMutex DwarfFDECache::_lock; - -#ifdef __APPLE__ -template -bool DwarfFDECache::_registeredForDyldUnloads = false; -#endif - -template -typename A::pint_t DwarfFDECache::findFDE(pint_t mh, pint_t pc) { - pint_t result = 0; + +#ifdef __APPLE__ +template +bool DwarfFDECache::_registeredForDyldUnloads = false; +#endif + +template +typename A::pint_t DwarfFDECache::findFDE(pint_t mh, pint_t pc) { + pint_t result = 0; _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared()); - for (entry *p = _buffer; p < _bufferUsed; ++p) { + for (entry *p = _buffer; p < _bufferUsed; ++p) { if ((mh == p->mh) || (mh == kSearchAll)) { - if ((p->ip_start <= pc) && (pc < p->ip_end)) { - result = p->fde; - break; - } - } - } + if ((p->ip_start <= pc) && (pc < p->ip_end)) { + result = p->fde; + break; + } + } + } _LIBUNWIND_LOG_IF_FALSE(_lock.unlock_shared()); - return result; -} - -template -void DwarfFDECache::add(pint_t mh, pint_t ip_start, pint_t ip_end, - pint_t fde) { -#if !defined(_LIBUNWIND_NO_HEAP) + return result; +} + +template +void DwarfFDECache::add(pint_t mh, pint_t ip_start, pint_t ip_end, + pint_t fde) { +#if !defined(_LIBUNWIND_NO_HEAP) _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); - if (_bufferUsed >= _bufferEnd) { - size_t oldSize = (size_t)(_bufferEnd - _buffer); - size_t newSize = oldSize * 4; - // Can't use operator new (we are below it). - entry *newBuffer = (entry *)malloc(newSize * sizeof(entry)); - memcpy(newBuffer, _buffer, oldSize * sizeof(entry)); - if (_buffer != _initialBuffer) - free(_buffer); - _buffer = newBuffer; - _bufferUsed = &newBuffer[oldSize]; - _bufferEnd = &newBuffer[newSize]; - } - _bufferUsed->mh = mh; - _bufferUsed->ip_start = ip_start; - _bufferUsed->ip_end = ip_end; - _bufferUsed->fde = fde; - ++_bufferUsed; -#ifdef __APPLE__ - if (!_registeredForDyldUnloads) { - _dyld_register_func_for_remove_image(&dyldUnloadHook); - _registeredForDyldUnloads = true; - } -#endif + if (_bufferUsed >= _bufferEnd) { + size_t oldSize = (size_t)(_bufferEnd - _buffer); + size_t newSize = oldSize * 4; + // Can't use operator new (we are below it). + entry *newBuffer = (entry *)malloc(newSize * sizeof(entry)); + memcpy(newBuffer, _buffer, oldSize * sizeof(entry)); + if (_buffer != _initialBuffer) + free(_buffer); + _buffer = newBuffer; + _bufferUsed = &newBuffer[oldSize]; + _bufferEnd = &newBuffer[newSize]; + } + _bufferUsed->mh = mh; + _bufferUsed->ip_start = ip_start; + _bufferUsed->ip_end = ip_end; + _bufferUsed->fde = fde; + ++_bufferUsed; +#ifdef __APPLE__ + if (!_registeredForDyldUnloads) { + _dyld_register_func_for_remove_image(&dyldUnloadHook); + _registeredForDyldUnloads = true; + } +#endif _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); -#endif -} - -template -void DwarfFDECache::removeAllIn(pint_t mh) { +#endif +} + +template +void DwarfFDECache::removeAllIn(pint_t mh) { _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); - entry *d = _buffer; - for (const entry *s = _buffer; s < _bufferUsed; ++s) { - if (s->mh != mh) { - if (d != s) - *d = *s; - ++d; - } - } - _bufferUsed = d; + entry *d = _buffer; + for (const entry *s = _buffer; s < _bufferUsed; ++s) { + if (s->mh != mh) { + if (d != s) + *d = *s; + ++d; + } + } + _bufferUsed = d; _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); -} - -#ifdef __APPLE__ -template -void DwarfFDECache::dyldUnloadHook(const struct mach_header *mh, intptr_t ) { - removeAllIn((pint_t) mh); -} -#endif - -template -void DwarfFDECache::iterateCacheEntries(void (*func)( - unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { +} + +#ifdef __APPLE__ +template +void DwarfFDECache::dyldUnloadHook(const struct mach_header *mh, intptr_t ) { + removeAllIn((pint_t) mh); +} +#endif + +template +void DwarfFDECache::iterateCacheEntries(void (*func)( + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); - for (entry *p = _buffer; p < _bufferUsed; ++p) { - (*func)(p->ip_start, p->ip_end, p->fde, p->mh); - } + for (entry *p = _buffer; p < _bufferUsed; ++p) { + (*func)(p->ip_start, p->ip_end, p->fde, p->mh); + } _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); -} +} #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - - -#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field)) - + + +#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field)) + #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) -template class UnwindSectionHeader { -public: - UnwindSectionHeader(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t version() const { - return _addressSpace.get32(_addr + - offsetof(unwind_info_section_header, version)); - } - uint32_t commonEncodingsArraySectionOffset() const { - return _addressSpace.get32(_addr + - offsetof(unwind_info_section_header, - commonEncodingsArraySectionOffset)); - } - uint32_t commonEncodingsArrayCount() const { - return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, - commonEncodingsArrayCount)); - } - uint32_t personalityArraySectionOffset() const { - return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, - personalityArraySectionOffset)); - } - uint32_t personalityArrayCount() const { - return _addressSpace.get32( - _addr + offsetof(unwind_info_section_header, personalityArrayCount)); - } - uint32_t indexSectionOffset() const { - return _addressSpace.get32( - _addr + offsetof(unwind_info_section_header, indexSectionOffset)); - } - uint32_t indexCount() const { - return _addressSpace.get32( - _addr + offsetof(unwind_info_section_header, indexCount)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionIndexArray { -public: - UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t functionOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, - functionOffset)); - } - uint32_t secondLevelPagesSectionOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, - secondLevelPagesSectionOffset)); - } - uint32_t lsdaIndexArraySectionOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, - lsdaIndexArraySectionOffset)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionRegularPageHeader { -public: - UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t kind() const { - return _addressSpace.get32( - _addr + offsetof(unwind_info_regular_second_level_page_header, kind)); - } - uint16_t entryPageOffset() const { - return _addressSpace.get16( - _addr + offsetof(unwind_info_regular_second_level_page_header, - entryPageOffset)); - } - uint16_t entryCount() const { - return _addressSpace.get16( - _addr + - offsetof(unwind_info_regular_second_level_page_header, entryCount)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionRegularArray { -public: - UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t functionOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index, - functionOffset)); - } - uint32_t encoding(uint32_t index) const { - return _addressSpace.get32( - _addr + - arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionCompressedPageHeader { -public: - UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t kind() const { - return _addressSpace.get32( - _addr + - offsetof(unwind_info_compressed_second_level_page_header, kind)); - } - uint16_t entryPageOffset() const { - return _addressSpace.get16( - _addr + offsetof(unwind_info_compressed_second_level_page_header, - entryPageOffset)); - } - uint16_t entryCount() const { - return _addressSpace.get16( - _addr + - offsetof(unwind_info_compressed_second_level_page_header, entryCount)); - } - uint16_t encodingsPageOffset() const { - return _addressSpace.get16( - _addr + offsetof(unwind_info_compressed_second_level_page_header, - encodingsPageOffset)); - } - uint16_t encodingsCount() const { - return _addressSpace.get16( - _addr + offsetof(unwind_info_compressed_second_level_page_header, - encodingsCount)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionCompressedArray { -public: - UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t functionOffset(uint32_t index) const { - return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET( - _addressSpace.get32(_addr + index * sizeof(uint32_t))); - } - uint16_t encodingIndex(uint32_t index) const { - return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX( - _addressSpace.get32(_addr + index * sizeof(uint32_t))); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; - -template class UnwindSectionLsdaArray { -public: - UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr) - : _addressSpace(addressSpace), _addr(addr) {} - - uint32_t functionOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, - index, functionOffset)); - } - uint32_t lsdaOffset(uint32_t index) const { - return _addressSpace.get32( - _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, - index, lsdaOffset)); - } - -private: - A &_addressSpace; - typename A::pint_t _addr; -}; +template class UnwindSectionHeader { +public: + UnwindSectionHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t version() const { + return _addressSpace.get32(_addr + + offsetof(unwind_info_section_header, version)); + } + uint32_t commonEncodingsArraySectionOffset() const { + return _addressSpace.get32(_addr + + offsetof(unwind_info_section_header, + commonEncodingsArraySectionOffset)); + } + uint32_t commonEncodingsArrayCount() const { + return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, + commonEncodingsArrayCount)); + } + uint32_t personalityArraySectionOffset() const { + return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, + personalityArraySectionOffset)); + } + uint32_t personalityArrayCount() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, personalityArrayCount)); + } + uint32_t indexSectionOffset() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, indexSectionOffset)); + } + uint32_t indexCount() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, indexCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionIndexArray { +public: + UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + functionOffset)); + } + uint32_t secondLevelPagesSectionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + secondLevelPagesSectionOffset)); + } + uint32_t lsdaIndexArraySectionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + lsdaIndexArraySectionOffset)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionRegularPageHeader { +public: + UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t kind() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_regular_second_level_page_header, kind)); + } + uint16_t entryPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_regular_second_level_page_header, + entryPageOffset)); + } + uint16_t entryCount() const { + return _addressSpace.get16( + _addr + + offsetof(unwind_info_regular_second_level_page_header, entryCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionRegularArray { +public: + UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index, + functionOffset)); + } + uint32_t encoding(uint32_t index) const { + return _addressSpace.get32( + _addr + + arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionCompressedPageHeader { +public: + UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t kind() const { + return _addressSpace.get32( + _addr + + offsetof(unwind_info_compressed_second_level_page_header, kind)); + } + uint16_t entryPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + entryPageOffset)); + } + uint16_t entryCount() const { + return _addressSpace.get16( + _addr + + offsetof(unwind_info_compressed_second_level_page_header, entryCount)); + } + uint16_t encodingsPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + encodingsPageOffset)); + } + uint16_t encodingsCount() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + encodingsCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionCompressedArray { +public: + UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET( + _addressSpace.get32(_addr + index * sizeof(uint32_t))); + } + uint16_t encodingIndex(uint32_t index) const { + return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX( + _addressSpace.get32(_addr + index * sizeof(uint32_t))); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionLsdaArray { +public: + UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, + index, functionOffset)); + } + uint32_t lsdaOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, + index, lsdaOffset)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - -class _LIBUNWIND_HIDDEN AbstractUnwindCursor { -public: - // NOTE: provide a class specific placement deallocation function (S5.3.4 p20) - // This avoids an unnecessary dependency to libc++abi. - void operator delete(void *, size_t) {} - - virtual ~AbstractUnwindCursor() {} - virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); } - virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); } - virtual void setReg(int, unw_word_t) { - _LIBUNWIND_ABORT("setReg not implemented"); - } - virtual bool validFloatReg(int) { - _LIBUNWIND_ABORT("validFloatReg not implemented"); - } - virtual unw_fpreg_t getFloatReg(int) { - _LIBUNWIND_ABORT("getFloatReg not implemented"); - } - virtual void setFloatReg(int, unw_fpreg_t) { - _LIBUNWIND_ABORT("setFloatReg not implemented"); - } - virtual int step() { _LIBUNWIND_ABORT("step not implemented"); } - virtual void getInfo(unw_proc_info_t *) { - _LIBUNWIND_ABORT("getInfo not implemented"); - } - virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } - virtual bool isSignalFrame() { - _LIBUNWIND_ABORT("isSignalFrame not implemented"); - } - virtual bool getFunctionName(char *, size_t, unw_word_t *) { - _LIBUNWIND_ABORT("getFunctionName not implemented"); - } - virtual void setInfoBasedOnIPRegister(bool = false) { - _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented"); - } - virtual const char *getRegisterName(int) { - _LIBUNWIND_ABORT("getRegisterName not implemented"); - } -#ifdef __arm__ - virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } -#endif + +class _LIBUNWIND_HIDDEN AbstractUnwindCursor { +public: + // NOTE: provide a class specific placement deallocation function (S5.3.4 p20) + // This avoids an unnecessary dependency to libc++abi. + void operator delete(void *, size_t) {} + + virtual ~AbstractUnwindCursor() {} + virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); } + virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); } + virtual void setReg(int, unw_word_t) { + _LIBUNWIND_ABORT("setReg not implemented"); + } + virtual bool validFloatReg(int) { + _LIBUNWIND_ABORT("validFloatReg not implemented"); + } + virtual unw_fpreg_t getFloatReg(int) { + _LIBUNWIND_ABORT("getFloatReg not implemented"); + } + virtual void setFloatReg(int, unw_fpreg_t) { + _LIBUNWIND_ABORT("setFloatReg not implemented"); + } + virtual int step() { _LIBUNWIND_ABORT("step not implemented"); } + virtual void getInfo(unw_proc_info_t *) { + _LIBUNWIND_ABORT("getInfo not implemented"); + } + virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } + virtual bool isSignalFrame() { + _LIBUNWIND_ABORT("isSignalFrame not implemented"); + } + virtual bool getFunctionName(char *, size_t, unw_word_t *) { + _LIBUNWIND_ABORT("getFunctionName not implemented"); + } + virtual void setInfoBasedOnIPRegister(bool = false) { + _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented"); + } + virtual const char *getRegisterName(int) { + _LIBUNWIND_ABORT("getRegisterName not implemented"); + } +#ifdef __arm__ + virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } +#endif #if defined(_LIBUNWIND_USE_CET) virtual void *get_registers() { _LIBUNWIND_ABORT("get_registers not implemented"); } #endif -}; - +}; + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) /// \c UnwindCursor contains all state (including all register values) during @@ -884,32 +884,32 @@ template bool UnwindCursor::isSignalFrame() { #else // !defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) || !defined(_WIN32) -/// UnwindCursor contains all state (including all register values) during -/// an unwind. This is normally stack allocated inside a unw_cursor_t. -template -class UnwindCursor : public AbstractUnwindCursor{ - typedef typename A::pint_t pint_t; -public: - UnwindCursor(unw_context_t *context, A &as); - UnwindCursor(A &as, void *threadArg); - virtual ~UnwindCursor() {} - virtual bool validReg(int); - virtual unw_word_t getReg(int); - virtual void setReg(int, unw_word_t); - virtual bool validFloatReg(int); - virtual unw_fpreg_t getFloatReg(int); - virtual void setFloatReg(int, unw_fpreg_t); - virtual int step(); - virtual void getInfo(unw_proc_info_t *); - virtual void jumpto(); - virtual bool isSignalFrame(); - virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); - virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); - virtual const char *getRegisterName(int num); -#ifdef __arm__ - virtual void saveVFPAsX(); -#endif - +/// UnwindCursor contains all state (including all register values) during +/// an unwind. This is normally stack allocated inside a unw_cursor_t. +template +class UnwindCursor : public AbstractUnwindCursor{ + typedef typename A::pint_t pint_t; +public: + UnwindCursor(unw_context_t *context, A &as); + UnwindCursor(A &as, void *threadArg); + virtual ~UnwindCursor() {} + virtual bool validReg(int); + virtual unw_word_t getReg(int); + virtual void setReg(int, unw_word_t); + virtual bool validFloatReg(int); + virtual unw_fpreg_t getFloatReg(int); + virtual void setFloatReg(int, unw_fpreg_t); + virtual int step(); + virtual void getInfo(unw_proc_info_t *); + virtual void jumpto(); + virtual bool isSignalFrame(); + virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); + virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); + virtual const char *getRegisterName(int num); +#ifdef __arm__ + virtual void saveVFPAsX(); +#endif + #if defined(_LIBUNWIND_USE_CET) virtual void *get_registers() { return &_registers; } #endif @@ -917,26 +917,26 @@ public: // need our own defition of inline placement new. static void *operator new(size_t, UnwindCursor *p) { return p; } -private: - +private: + #if defined(_LIBUNWIND_ARM_EHABI) - bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections §s); - - int stepWithEHABI() { - size_t len = 0; - size_t off = 0; - // FIXME: Calling decode_eht_entry() here is violating the libunwind - // abstraction layer. - const uint32_t *ehtp = - decode_eht_entry(reinterpret_cast(_info.unwind_info), - &off, &len); - if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) != - _URC_CONTINUE_UNWIND) - return UNW_STEP_END; - return UNW_STEP_SUCCESS; - } -#endif - + bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections §s); + + int stepWithEHABI() { + size_t len = 0; + size_t off = 0; + // FIXME: Calling decode_eht_entry() here is violating the libunwind + // abstraction layer. + const uint32_t *ehtp = + decode_eht_entry(reinterpret_cast(_info.unwind_info), + &off, &len); + if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) != + _URC_CONTINUE_UNWIND) + return UNW_STEP_END; + return UNW_STEP_SUCCESS; + } +#endif + #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) bool setInfoForSigReturn() { R dummy; @@ -960,48 +960,48 @@ private: bool getInfoFromFdeCie(const typename CFI_Parser::FDE_Info &fdeInfo, const typename CFI_Parser::CIE_Info &cieInfo, pint_t pc, uintptr_t dso_base); - bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections §s, - uint32_t fdeSectionOffsetHint=0); - int stepWithDwarfFDE() { - return DwarfInstructions::stepWithDwarf(_addressSpace, - (pint_t)this->getReg(UNW_REG_IP), - (pint_t)_info.unwind_info, + bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint=0); + int stepWithDwarfFDE() { + return DwarfInstructions::stepWithDwarf(_addressSpace, + (pint_t)this->getReg(UNW_REG_IP), + (pint_t)_info.unwind_info, _registers, _isSignalFrame); - } -#endif - + } +#endif + #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - bool getInfoFromCompactEncodingSection(pint_t pc, - const UnwindInfoSections §s); - int stepWithCompactEncoding() { + bool getInfoFromCompactEncodingSection(pint_t pc, + const UnwindInfoSections §s); + int stepWithCompactEncoding() { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - if ( compactSaysUseDwarf() ) - return stepWithDwarfFDE(); - #endif - R dummy; - return stepWithCompactEncoding(dummy); - } - + if ( compactSaysUseDwarf() ) + return stepWithDwarfFDE(); + #endif + R dummy; + return stepWithCompactEncoding(dummy); + } + #if defined(_LIBUNWIND_TARGET_X86_64) - int stepWithCompactEncoding(Registers_x86_64 &) { - return CompactUnwinder_x86_64::stepWithCompactEncoding( - _info.format, _info.start_ip, _addressSpace, _registers); - } + int stepWithCompactEncoding(Registers_x86_64 &) { + return CompactUnwinder_x86_64::stepWithCompactEncoding( + _info.format, _info.start_ip, _addressSpace, _registers); + } #endif - + #if defined(_LIBUNWIND_TARGET_I386) - int stepWithCompactEncoding(Registers_x86 &) { - return CompactUnwinder_x86::stepWithCompactEncoding( - _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers); - } + int stepWithCompactEncoding(Registers_x86 &) { + return CompactUnwinder_x86::stepWithCompactEncoding( + _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers); + } #endif - + #if defined(_LIBUNWIND_TARGET_PPC) - int stepWithCompactEncoding(Registers_ppc &) { - return UNW_EINVAL; - } + int stepWithCompactEncoding(Registers_ppc &) { + return UNW_EINVAL; + } #endif - + #if defined(_LIBUNWIND_TARGET_PPC64) int stepWithCompactEncoding(Registers_ppc64 &) { return UNW_EINVAL; @@ -1010,12 +1010,12 @@ private: #if defined(_LIBUNWIND_TARGET_AARCH64) - int stepWithCompactEncoding(Registers_arm64 &) { - return CompactUnwinder_arm64::stepWithCompactEncoding( - _info.format, _info.start_ip, _addressSpace, _registers); - } + int stepWithCompactEncoding(Registers_arm64 &) { + return CompactUnwinder_arm64::stepWithCompactEncoding( + _info.format, _info.start_ip, _addressSpace, _registers); + } #endif - + #if defined(_LIBUNWIND_TARGET_MIPS_O32) int stepWithCompactEncoding(Registers_mips_o32 &) { return UNW_EINVAL; @@ -1042,39 +1042,39 @@ private: } #endif - bool compactSaysUseDwarf(uint32_t *offset=NULL) const { - R dummy; - return compactSaysUseDwarf(dummy, offset); - } - + bool compactSaysUseDwarf(uint32_t *offset=NULL) const { + R dummy; + return compactSaysUseDwarf(dummy, offset); + } + #if defined(_LIBUNWIND_TARGET_X86_64) - bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const { - if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) { - if (offset) - *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET); - return true; - } - return false; - } -#endif - + bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const { + if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + #if defined(_LIBUNWIND_TARGET_I386) - bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const { - if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) { - if (offset) - *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET); - return true; - } - return false; - } -#endif - + bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const { + if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + #if defined(_LIBUNWIND_TARGET_PPC) - bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const { - return true; - } + bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const { + return true; + } #endif - + #if defined(_LIBUNWIND_TARGET_PPC64) bool compactSaysUseDwarf(Registers_ppc64 &, uint32_t *) const { return true; @@ -1082,16 +1082,16 @@ private: #endif #if defined(_LIBUNWIND_TARGET_AARCH64) - bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const { - if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) { - if (offset) - *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET); - return true; - } - return false; - } -#endif - + bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const { + if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + #if defined(_LIBUNWIND_TARGET_MIPS_O32) bool compactSaysUseDwarf(Registers_mips_o32 &, uint32_t *) const { return true; @@ -1123,29 +1123,29 @@ private: #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - compact_unwind_encoding_t dwarfEncoding() const { - R dummy; - return dwarfEncoding(dummy); - } - + compact_unwind_encoding_t dwarfEncoding() const { + R dummy; + return dwarfEncoding(dummy); + } + #if defined(_LIBUNWIND_TARGET_X86_64) - compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const { - return UNWIND_X86_64_MODE_DWARF; - } + compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const { + return UNWIND_X86_64_MODE_DWARF; + } #endif - + #if defined(_LIBUNWIND_TARGET_I386) - compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const { - return UNWIND_X86_MODE_DWARF; - } + compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const { + return UNWIND_X86_MODE_DWARF; + } #endif - + #if defined(_LIBUNWIND_TARGET_PPC) - compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const { - return 0; - } + compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const { + return 0; + } #endif - + #if defined(_LIBUNWIND_TARGET_PPC64) compact_unwind_encoding_t dwarfEncoding(Registers_ppc64 &) const { return 0; @@ -1153,11 +1153,11 @@ private: #endif #if defined(_LIBUNWIND_TARGET_AARCH64) - compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const { - return UNWIND_ARM64_MODE_DWARF; - } + compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const { + return UNWIND_ARM64_MODE_DWARF; + } #endif - + #if defined(_LIBUNWIND_TARGET_ARM) compact_unwind_encoding_t dwarfEncoding(Registers_arm &) const { return 0; @@ -1165,11 +1165,11 @@ private: #endif #if defined (_LIBUNWIND_TARGET_OR1K) - compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const { - return 0; - } + compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const { + return 0; + } #endif - + #if defined (_LIBUNWIND_TARGET_HEXAGON) compact_unwind_encoding_t dwarfEncoding(Registers_hexagon &) const { return 0; @@ -1181,7 +1181,7 @@ private: return 0; } #endif - + #if defined (_LIBUNWIND_TARGET_MIPS_NEWABI) compact_unwind_encoding_t dwarfEncoding(Registers_mips_newabi &) const { return 0; @@ -1221,152 +1221,152 @@ private: #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) - A &_addressSpace; - R _registers; - unw_proc_info_t _info; - bool _unwindInfoMissing; - bool _isSignalFrame; + A &_addressSpace; + R _registers; + unw_proc_info_t _info; + bool _unwindInfoMissing; + bool _isSignalFrame; #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) bool _isSigReturn = false; #endif -}; - - -template -UnwindCursor::UnwindCursor(unw_context_t *context, A &as) - : _addressSpace(as), _registers(context), _unwindInfoMissing(false), - _isSignalFrame(false) { +}; + + +template +UnwindCursor::UnwindCursor(unw_context_t *context, A &as) + : _addressSpace(as), _registers(context), _unwindInfoMissing(false), + _isSignalFrame(false) { static_assert((check_fit, unw_cursor_t>::does_fit), - "UnwindCursor<> does not fit in unw_cursor_t"); + "UnwindCursor<> does not fit in unw_cursor_t"); static_assert((alignof(UnwindCursor) <= alignof(unw_cursor_t)), "UnwindCursor<> requires more alignment than unw_cursor_t"); - memset(&_info, 0, sizeof(_info)); -} - -template -UnwindCursor::UnwindCursor(A &as, void *) - : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) { - memset(&_info, 0, sizeof(_info)); - // FIXME - // fill in _registers from thread arg -} - - -template -bool UnwindCursor::validReg(int regNum) { - return _registers.validRegister(regNum); -} - -template -unw_word_t UnwindCursor::getReg(int regNum) { - return _registers.getRegister(regNum); -} - -template -void UnwindCursor::setReg(int regNum, unw_word_t value) { - _registers.setRegister(regNum, (typename A::pint_t)value); -} - -template -bool UnwindCursor::validFloatReg(int regNum) { - return _registers.validFloatRegister(regNum); -} - -template -unw_fpreg_t UnwindCursor::getFloatReg(int regNum) { - return _registers.getFloatRegister(regNum); -} - -template -void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { - _registers.setFloatRegister(regNum, value); -} - -template void UnwindCursor::jumpto() { - _registers.jumpto(); -} - -#ifdef __arm__ -template void UnwindCursor::saveVFPAsX() { - _registers.saveVFPAsX(); -} -#endif - -template -const char *UnwindCursor::getRegisterName(int regNum) { - return _registers.getRegisterName(regNum); -} - -template bool UnwindCursor::isSignalFrame() { - return _isSignalFrame; -} - + memset(&_info, 0, sizeof(_info)); +} + +template +UnwindCursor::UnwindCursor(A &as, void *) + : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) { + memset(&_info, 0, sizeof(_info)); + // FIXME + // fill in _registers from thread arg +} + + +template +bool UnwindCursor::validReg(int regNum) { + return _registers.validRegister(regNum); +} + +template +unw_word_t UnwindCursor::getReg(int regNum) { + return _registers.getRegister(regNum); +} + +template +void UnwindCursor::setReg(int regNum, unw_word_t value) { + _registers.setRegister(regNum, (typename A::pint_t)value); +} + +template +bool UnwindCursor::validFloatReg(int regNum) { + return _registers.validFloatRegister(regNum); +} + +template +unw_fpreg_t UnwindCursor::getFloatReg(int regNum) { + return _registers.getFloatRegister(regNum); +} + +template +void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { + _registers.setFloatRegister(regNum, value); +} + +template void UnwindCursor::jumpto() { + _registers.jumpto(); +} + +#ifdef __arm__ +template void UnwindCursor::saveVFPAsX() { + _registers.saveVFPAsX(); +} +#endif + +template +const char *UnwindCursor::getRegisterName(int regNum) { + return _registers.getRegisterName(regNum); +} + +template bool UnwindCursor::isSignalFrame() { + return _isSignalFrame; +} + #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) #if defined(_LIBUNWIND_ARM_EHABI) -template -struct EHABISectionIterator { - typedef EHABISectionIterator _Self; - - typedef typename A::pint_t value_type; - typedef typename A::pint_t* pointer; - typedef typename A::pint_t& reference; - typedef size_t size_type; - typedef size_t difference_type; - - static _Self begin(A& addressSpace, const UnwindInfoSections& sects) { - return _Self(addressSpace, sects, 0); - } - static _Self end(A& addressSpace, const UnwindInfoSections& sects) { +template +struct EHABISectionIterator { + typedef EHABISectionIterator _Self; + + typedef typename A::pint_t value_type; + typedef typename A::pint_t* pointer; + typedef typename A::pint_t& reference; + typedef size_t size_type; + typedef size_t difference_type; + + static _Self begin(A& addressSpace, const UnwindInfoSections& sects) { + return _Self(addressSpace, sects, 0); + } + static _Self end(A& addressSpace, const UnwindInfoSections& sects) { return _Self(addressSpace, sects, sects.arm_section_length / sizeof(EHABIIndexEntry)); - } - - EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i) - : _i(i), _addressSpace(&addressSpace), _sects(§s) {} - - _Self& operator++() { ++_i; return *this; } - _Self& operator+=(size_t a) { _i += a; return *this; } - _Self& operator--() { assert(_i > 0); --_i; return *this; } - _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; } - - _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; } - _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; } - + } + + EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i) + : _i(i), _addressSpace(&addressSpace), _sects(§s) {} + + _Self& operator++() { ++_i; return *this; } + _Self& operator+=(size_t a) { _i += a; return *this; } + _Self& operator--() { assert(_i > 0); --_i; return *this; } + _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; } + + _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; } + _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; } + size_t operator-(const _Self& other) const { return _i - other._i; } - - bool operator==(const _Self& other) const { - assert(_addressSpace == other._addressSpace); - assert(_sects == other._sects); - return _i == other._i; - } - + + bool operator==(const _Self& other) const { + assert(_addressSpace == other._addressSpace); + assert(_sects == other._sects); + return _i == other._i; + } + bool operator!=(const _Self& other) const { assert(_addressSpace == other._addressSpace); assert(_sects == other._sects); return _i != other._i; } - typename A::pint_t operator*() const { return functionAddress(); } - - typename A::pint_t functionAddress() const { - typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( - EHABIIndexEntry, _i, functionOffset); - return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr)); - } - - typename A::pint_t dataAddress() { - typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( - EHABIIndexEntry, _i, data); - return indexAddr; - } - - private: - size_t _i; - A* _addressSpace; - const UnwindInfoSections* _sects; -}; - + typename A::pint_t operator*() const { return functionAddress(); } + + typename A::pint_t functionAddress() const { + typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( + EHABIIndexEntry, _i, functionOffset); + return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr)); + } + + typename A::pint_t dataAddress() { + typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( + EHABIIndexEntry, _i, data); + return indexAddr; + } + + private: + size_t _i; + A* _addressSpace; + const UnwindInfoSections* _sects; +}; + namespace { template @@ -1390,147 +1390,147 @@ EHABISectionIterator EHABISectionUpperBound( } -template -bool UnwindCursor::getInfoFromEHABISection( - pint_t pc, - const UnwindInfoSections §s) { - EHABISectionIterator begin = - EHABISectionIterator::begin(_addressSpace, sects); - EHABISectionIterator end = - EHABISectionIterator::end(_addressSpace, sects); +template +bool UnwindCursor::getInfoFromEHABISection( + pint_t pc, + const UnwindInfoSections §s) { + EHABISectionIterator begin = + EHABISectionIterator::begin(_addressSpace, sects); + EHABISectionIterator end = + EHABISectionIterator::end(_addressSpace, sects); if (begin == end) return false; - + EHABISectionIterator itNextPC = EHABISectionUpperBound(begin, end, pc); if (itNextPC == begin) - return false; - EHABISectionIterator itThisPC = itNextPC - 1; - - pint_t thisPC = itThisPC.functionAddress(); + return false; + EHABISectionIterator itThisPC = itNextPC - 1; + + pint_t thisPC = itThisPC.functionAddress(); // If an exception is thrown from a function, corresponding to the last entry // in the table, we don't really know the function extent and have to choose a // value for nextPC. Choosing max() will allow the range check during trace to // succeed. pint_t nextPC = (itNextPC == end) ? UINTPTR_MAX : itNextPC.functionAddress(); - pint_t indexDataAddr = itThisPC.dataAddress(); - - if (indexDataAddr == 0) - return false; - - uint32_t indexData = _addressSpace.get32(indexDataAddr); - if (indexData == UNW_EXIDX_CANTUNWIND) - return false; - - // If the high bit is set, the exception handling table entry is inline inside - // the index table entry on the second word (aka |indexDataAddr|). Otherwise, + pint_t indexDataAddr = itThisPC.dataAddress(); + + if (indexDataAddr == 0) + return false; + + uint32_t indexData = _addressSpace.get32(indexDataAddr); + if (indexData == UNW_EXIDX_CANTUNWIND) + return false; + + // If the high bit is set, the exception handling table entry is inline inside + // the index table entry on the second word (aka |indexDataAddr|). Otherwise, // the table points at an offset in the exception handling table (section 5 // EHABI). - pint_t exceptionTableAddr; - uint32_t exceptionTableData; - bool isSingleWordEHT; - if (indexData & 0x80000000) { - exceptionTableAddr = indexDataAddr; - // TODO(ajwong): Should this data be 0? - exceptionTableData = indexData; - isSingleWordEHT = true; - } else { - exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData); - exceptionTableData = _addressSpace.get32(exceptionTableAddr); - isSingleWordEHT = false; - } - - // Now we know the 3 things: - // exceptionTableAddr -- exception handler table entry. - // exceptionTableData -- the data inside the first word of the eht entry. - // isSingleWordEHT -- whether the entry is in the index. - unw_word_t personalityRoutine = 0xbadf00d; - bool scope32 = false; - uintptr_t lsda; - - // If the high bit in the exception handling table entry is set, the entry is - // in compact form (section 6.3 EHABI). - if (exceptionTableData & 0x80000000) { - // Grab the index of the personality routine from the compact form. - uint32_t choice = (exceptionTableData & 0x0f000000) >> 24; - uint32_t extraWords = 0; - switch (choice) { - case 0: - personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0; - extraWords = 0; - scope32 = false; - lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4); - break; - case 1: - personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1; - extraWords = (exceptionTableData & 0x00ff0000) >> 16; - scope32 = false; - lsda = exceptionTableAddr + (extraWords + 1) * 4; - break; - case 2: - personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2; - extraWords = (exceptionTableData & 0x00ff0000) >> 16; - scope32 = true; - lsda = exceptionTableAddr + (extraWords + 1) * 4; - break; - default: - _LIBUNWIND_ABORT("unknown personality routine"); - return false; - } - - if (isSingleWordEHT) { - if (extraWords != 0) { - _LIBUNWIND_ABORT("index inlined table detected but pr function " - "requires extra words"); - return false; - } - } - } else { - pint_t personalityAddr = - exceptionTableAddr + signExtendPrel31(exceptionTableData); - personalityRoutine = personalityAddr; - - // ARM EHABI # 6.2, # 9.2 - // - // +---- ehtp - // v - // +--------------------------------------+ - // | +--------+--------+--------+-------+ | - // | |0| prel31 to personalityRoutine | | - // | +--------+--------+--------+-------+ | - // | | N | unwind opcodes | | <-- UnwindData - // | +--------+--------+--------+-------+ | - // | | Word 2 unwind opcodes | | - // | +--------+--------+--------+-------+ | - // | ... | - // | +--------+--------+--------+-------+ | - // | | Word N unwind opcodes | | - // | +--------+--------+--------+-------+ | - // | | LSDA | | <-- lsda - // | | ... | | - // | +--------+--------+--------+-------+ | - // +--------------------------------------+ - - uint32_t *UnwindData = reinterpret_cast(exceptionTableAddr) + 1; - uint32_t FirstDataWord = *UnwindData; - size_t N = ((FirstDataWord >> 24) & 0xff); - size_t NDataWords = N + 1; - lsda = reinterpret_cast(UnwindData + NDataWords); - } - - _info.start_ip = thisPC; - _info.end_ip = nextPC; - _info.handler = personalityRoutine; - _info.unwind_info = exceptionTableAddr; - _info.lsda = lsda; - // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0. + pint_t exceptionTableAddr; + uint32_t exceptionTableData; + bool isSingleWordEHT; + if (indexData & 0x80000000) { + exceptionTableAddr = indexDataAddr; + // TODO(ajwong): Should this data be 0? + exceptionTableData = indexData; + isSingleWordEHT = true; + } else { + exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData); + exceptionTableData = _addressSpace.get32(exceptionTableAddr); + isSingleWordEHT = false; + } + + // Now we know the 3 things: + // exceptionTableAddr -- exception handler table entry. + // exceptionTableData -- the data inside the first word of the eht entry. + // isSingleWordEHT -- whether the entry is in the index. + unw_word_t personalityRoutine = 0xbadf00d; + bool scope32 = false; + uintptr_t lsda; + + // If the high bit in the exception handling table entry is set, the entry is + // in compact form (section 6.3 EHABI). + if (exceptionTableData & 0x80000000) { + // Grab the index of the personality routine from the compact form. + uint32_t choice = (exceptionTableData & 0x0f000000) >> 24; + uint32_t extraWords = 0; + switch (choice) { + case 0: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0; + extraWords = 0; + scope32 = false; + lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4); + break; + case 1: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1; + extraWords = (exceptionTableData & 0x00ff0000) >> 16; + scope32 = false; + lsda = exceptionTableAddr + (extraWords + 1) * 4; + break; + case 2: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2; + extraWords = (exceptionTableData & 0x00ff0000) >> 16; + scope32 = true; + lsda = exceptionTableAddr + (extraWords + 1) * 4; + break; + default: + _LIBUNWIND_ABORT("unknown personality routine"); + return false; + } + + if (isSingleWordEHT) { + if (extraWords != 0) { + _LIBUNWIND_ABORT("index inlined table detected but pr function " + "requires extra words"); + return false; + } + } + } else { + pint_t personalityAddr = + exceptionTableAddr + signExtendPrel31(exceptionTableData); + personalityRoutine = personalityAddr; + + // ARM EHABI # 6.2, # 9.2 + // + // +---- ehtp + // v + // +--------------------------------------+ + // | +--------+--------+--------+-------+ | + // | |0| prel31 to personalityRoutine | | + // | +--------+--------+--------+-------+ | + // | | N | unwind opcodes | | <-- UnwindData + // | +--------+--------+--------+-------+ | + // | | Word 2 unwind opcodes | | + // | +--------+--------+--------+-------+ | + // | ... | + // | +--------+--------+--------+-------+ | + // | | Word N unwind opcodes | | + // | +--------+--------+--------+-------+ | + // | | LSDA | | <-- lsda + // | | ... | | + // | +--------+--------+--------+-------+ | + // +--------------------------------------+ + + uint32_t *UnwindData = reinterpret_cast(exceptionTableAddr) + 1; + uint32_t FirstDataWord = *UnwindData; + size_t N = ((FirstDataWord >> 24) & 0xff); + size_t NDataWords = N + 1; + lsda = reinterpret_cast(UnwindData + NDataWords); + } + + _info.start_ip = thisPC; + _info.end_ip = nextPC; + _info.handler = personalityRoutine; + _info.unwind_info = exceptionTableAddr; + _info.lsda = lsda; + // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0. _info.flags = (isSingleWordEHT ? 1 : 0) | (scope32 ? 0x2 : 0); // Use enum? - - return true; -} -#endif - + + return true; +} +#endif + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) -template +template bool UnwindCursor::getInfoFromFdeCie( const typename CFI_Parser::FDE_Info &fdeInfo, const typename CFI_Parser::CIE_Info &cieInfo, pint_t pc, @@ -1557,325 +1557,325 @@ bool UnwindCursor::getInfoFromFdeCie( } template -bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, - const UnwindInfoSections §s, - uint32_t fdeSectionOffsetHint) { - typename CFI_Parser::FDE_Info fdeInfo; - typename CFI_Parser::CIE_Info cieInfo; - bool foundFDE = false; - bool foundInCache = false; - // If compact encoding table gave offset into dwarf section, go directly there - if (fdeSectionOffsetHint != 0) { - foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, +bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, + const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint) { + typename CFI_Parser::FDE_Info fdeInfo; + typename CFI_Parser::CIE_Info cieInfo; + bool foundFDE = false; + bool foundInCache = false; + // If compact encoding table gave offset into dwarf section, go directly there + if (fdeSectionOffsetHint != 0) { + foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length, - sects.dwarf_section + fdeSectionOffsetHint, - &fdeInfo, &cieInfo); - } + sects.dwarf_section + fdeSectionOffsetHint, + &fdeInfo, &cieInfo); + } #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - if (!foundFDE && (sects.dwarf_index_section != 0)) { - foundFDE = EHHeaderParser::findFDE( - _addressSpace, pc, sects.dwarf_index_section, - (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); - } -#endif - if (!foundFDE) { - // otherwise, search cache of previously found FDEs. - pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); - if (cachedFDE != 0) { - foundFDE = - CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, + if (!foundFDE && (sects.dwarf_index_section != 0)) { + foundFDE = EHHeaderParser::findFDE( + _addressSpace, pc, sects.dwarf_index_section, + (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); + } +#endif + if (!foundFDE) { + // otherwise, search cache of previously found FDEs. + pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); + if (cachedFDE != 0) { + foundFDE = + CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length, - cachedFDE, &fdeInfo, &cieInfo); - foundInCache = foundFDE; - } - } - if (!foundFDE) { - // Still not found, do full scan of __eh_frame section. - foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, + cachedFDE, &fdeInfo, &cieInfo); + foundInCache = foundFDE; + } + } + if (!foundFDE) { + // Still not found, do full scan of __eh_frame section. + foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length, 0, - &fdeInfo, &cieInfo); - } - if (foundFDE) { + &fdeInfo, &cieInfo); + } + if (foundFDE) { if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, sects.dso_base)) { - // Add to cache (to make next lookup faster) if we had no hint - // and there was no index. - if (!foundInCache && (fdeSectionOffsetHint == 0)) { + // Add to cache (to make next lookup faster) if we had no hint + // and there was no index. + if (!foundInCache && (fdeSectionOffsetHint == 0)) { #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - if (sects.dwarf_index_section == 0) - #endif - DwarfFDECache::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd, - fdeInfo.fdeStart); - } - return true; - } - } + if (sects.dwarf_index_section == 0) + #endif + DwarfFDECache::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd, + fdeInfo.fdeStart); + } + return true; + } + } //_LIBUNWIND_DEBUG_LOG("can't find/use FDE for pc=0x%llX", (uint64_t)pc); - return false; -} + return false; +} #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - - + + #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) -template -bool UnwindCursor::getInfoFromCompactEncodingSection(pint_t pc, - const UnwindInfoSections §s) { - const bool log = false; - if (log) - fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n", - (uint64_t)pc, (uint64_t)sects.dso_base); - - const UnwindSectionHeader sectionHeader(_addressSpace, - sects.compact_unwind_section); - if (sectionHeader.version() != UNWIND_SECTION_VERSION) - return false; - - // do a binary search of top level index to find page with unwind info - pint_t targetFunctionOffset = pc - sects.dso_base; - const UnwindSectionIndexArray topIndex(_addressSpace, - sects.compact_unwind_section - + sectionHeader.indexSectionOffset()); - uint32_t low = 0; - uint32_t high = sectionHeader.indexCount(); - uint32_t last = high - 1; - while (low < high) { - uint32_t mid = (low + high) / 2; - //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n", - //mid, low, high, topIndex.functionOffset(mid)); - if (topIndex.functionOffset(mid) <= targetFunctionOffset) { - if ((mid == last) || - (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) { - low = mid; - break; - } else { - low = mid + 1; - } - } else { - high = mid; - } - } - const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low); - const uint32_t firstLevelNextPageFunctionOffset = - topIndex.functionOffset(low + 1); - const pint_t secondLevelAddr = - sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low); - const pint_t lsdaArrayStartAddr = - sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low); - const pint_t lsdaArrayEndAddr = - sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1); - if (log) - fprintf(stderr, "\tfirst level search for result index=%d " - "to secondLevelAddr=0x%llX\n", - low, (uint64_t) secondLevelAddr); - // do a binary search of second level page index - uint32_t encoding = 0; - pint_t funcStart = 0; - pint_t funcEnd = 0; - pint_t lsda = 0; - pint_t personality = 0; - uint32_t pageKind = _addressSpace.get32(secondLevelAddr); - if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) { - // regular page - UnwindSectionRegularPageHeader pageHeader(_addressSpace, - secondLevelAddr); - UnwindSectionRegularArray pageIndex( - _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); - // binary search looks for entry with e where index[e].offset <= pc < - // index[e+1].offset - if (log) - fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in " - "regular page starting at secondLevelAddr=0x%llX\n", - (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr); - low = 0; - high = pageHeader.entryCount(); - while (low < high) { - uint32_t mid = (low + high) / 2; - if (pageIndex.functionOffset(mid) <= targetFunctionOffset) { - if (mid == (uint32_t)(pageHeader.entryCount() - 1)) { - // at end of table - low = mid; - funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; - break; - } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) { - // next is too big, so we found it - low = mid; - funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base; - break; - } else { - low = mid + 1; - } - } else { - high = mid; - } - } - encoding = pageIndex.encoding(low); - funcStart = pageIndex.functionOffset(low) + sects.dso_base; - if (pc < funcStart) { - if (log) - fprintf( - stderr, - "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", - (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); - return false; - } - if (pc > funcEnd) { - if (log) - fprintf( - stderr, - "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", - (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); - return false; - } - } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) { - // compressed page - UnwindSectionCompressedPageHeader pageHeader(_addressSpace, - secondLevelAddr); - UnwindSectionCompressedArray pageIndex( - _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); - const uint32_t targetFunctionPageOffset = - (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset); - // binary search looks for entry with e where index[e].offset <= pc < - // index[e+1].offset - if (log) - fprintf(stderr, "\tbinary search of compressed page starting at " - "secondLevelAddr=0x%llX\n", - (uint64_t) secondLevelAddr); - low = 0; - last = pageHeader.entryCount() - 1; - high = pageHeader.entryCount(); - while (low < high) { - uint32_t mid = (low + high) / 2; - if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) { - if ((mid == last) || - (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) { - low = mid; - break; - } else { - low = mid + 1; - } - } else { - high = mid; - } - } - funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset - + sects.dso_base; - if (low < last) - funcEnd = - pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset - + sects.dso_base; - else - funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; - if (pc < funcStart) { +template +bool UnwindCursor::getInfoFromCompactEncodingSection(pint_t pc, + const UnwindInfoSections §s) { + const bool log = false; + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n", + (uint64_t)pc, (uint64_t)sects.dso_base); + + const UnwindSectionHeader sectionHeader(_addressSpace, + sects.compact_unwind_section); + if (sectionHeader.version() != UNWIND_SECTION_VERSION) + return false; + + // do a binary search of top level index to find page with unwind info + pint_t targetFunctionOffset = pc - sects.dso_base; + const UnwindSectionIndexArray topIndex(_addressSpace, + sects.compact_unwind_section + + sectionHeader.indexSectionOffset()); + uint32_t low = 0; + uint32_t high = sectionHeader.indexCount(); + uint32_t last = high - 1; + while (low < high) { + uint32_t mid = (low + high) / 2; + //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n", + //mid, low, high, topIndex.functionOffset(mid)); + if (topIndex.functionOffset(mid) <= targetFunctionOffset) { + if ((mid == last) || + (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) { + low = mid; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low); + const uint32_t firstLevelNextPageFunctionOffset = + topIndex.functionOffset(low + 1); + const pint_t secondLevelAddr = + sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low); + const pint_t lsdaArrayStartAddr = + sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low); + const pint_t lsdaArrayEndAddr = + sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1); + if (log) + fprintf(stderr, "\tfirst level search for result index=%d " + "to secondLevelAddr=0x%llX\n", + low, (uint64_t) secondLevelAddr); + // do a binary search of second level page index + uint32_t encoding = 0; + pint_t funcStart = 0; + pint_t funcEnd = 0; + pint_t lsda = 0; + pint_t personality = 0; + uint32_t pageKind = _addressSpace.get32(secondLevelAddr); + if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) { + // regular page + UnwindSectionRegularPageHeader pageHeader(_addressSpace, + secondLevelAddr); + UnwindSectionRegularArray pageIndex( + _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); + // binary search looks for entry with e where index[e].offset <= pc < + // index[e+1].offset + if (log) + fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in " + "regular page starting at secondLevelAddr=0x%llX\n", + (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr); + low = 0; + high = pageHeader.entryCount(); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (pageIndex.functionOffset(mid) <= targetFunctionOffset) { + if (mid == (uint32_t)(pageHeader.entryCount() - 1)) { + // at end of table + low = mid; + funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; + break; + } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) { + // next is too big, so we found it + low = mid; + funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + encoding = pageIndex.encoding(low); + funcStart = pageIndex.functionOffset(low) + sects.dso_base; + if (pc < funcStart) { + if (log) + fprintf( + stderr, + "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", + (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); + return false; + } + if (pc > funcEnd) { + if (log) + fprintf( + stderr, + "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", + (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); + return false; + } + } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) { + // compressed page + UnwindSectionCompressedPageHeader pageHeader(_addressSpace, + secondLevelAddr); + UnwindSectionCompressedArray pageIndex( + _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); + const uint32_t targetFunctionPageOffset = + (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset); + // binary search looks for entry with e where index[e].offset <= pc < + // index[e+1].offset + if (log) + fprintf(stderr, "\tbinary search of compressed page starting at " + "secondLevelAddr=0x%llX\n", + (uint64_t) secondLevelAddr); + low = 0; + last = pageHeader.entryCount() - 1; + high = pageHeader.entryCount(); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) { + if ((mid == last) || + (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) { + low = mid; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset + + sects.dso_base; + if (low < last) + funcEnd = + pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset + + sects.dso_base; + else + funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; + if (pc < funcStart) { _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX " "not in second level compressed unwind table. " "funcStart=0x%llX", - (uint64_t) pc, (uint64_t) funcStart); - return false; - } - if (pc > funcEnd) { + (uint64_t) pc, (uint64_t) funcStart); + return false; + } + if (pc > funcEnd) { _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX " "not in second level compressed unwind table. " "funcEnd=0x%llX", - (uint64_t) pc, (uint64_t) funcEnd); - return false; - } - uint16_t encodingIndex = pageIndex.encodingIndex(low); - if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) { - // encoding is in common table in section header - encoding = _addressSpace.get32( - sects.compact_unwind_section + - sectionHeader.commonEncodingsArraySectionOffset() + - encodingIndex * sizeof(uint32_t)); - } else { - // encoding is in page specific table - uint16_t pageEncodingIndex = - encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount(); - encoding = _addressSpace.get32(secondLevelAddr + - pageHeader.encodingsPageOffset() + - pageEncodingIndex * sizeof(uint32_t)); - } - } else { + (uint64_t) pc, (uint64_t) funcEnd); + return false; + } + uint16_t encodingIndex = pageIndex.encodingIndex(low); + if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) { + // encoding is in common table in section header + encoding = _addressSpace.get32( + sects.compact_unwind_section + + sectionHeader.commonEncodingsArraySectionOffset() + + encodingIndex * sizeof(uint32_t)); + } else { + // encoding is in page specific table + uint16_t pageEncodingIndex = + encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount(); + encoding = _addressSpace.get32(secondLevelAddr + + pageHeader.encodingsPageOffset() + + pageEncodingIndex * sizeof(uint32_t)); + } + } else { _LIBUNWIND_DEBUG_LOG( "malformed __unwind_info at 0x%0llX bad second level page", (uint64_t)sects.compact_unwind_section); - return false; - } - - // look up LSDA, if encoding says function has one - if (encoding & UNWIND_HAS_LSDA) { - UnwindSectionLsdaArray lsdaIndex(_addressSpace, lsdaArrayStartAddr); - uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base); - low = 0; - high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) / - sizeof(unwind_info_section_header_lsda_index_entry); - // binary search looks for entry with exact match for functionOffset - if (log) - fprintf(stderr, - "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n", - funcStartOffset); - while (low < high) { - uint32_t mid = (low + high) / 2; - if (lsdaIndex.functionOffset(mid) == funcStartOffset) { - lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base; - break; - } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) { - low = mid + 1; - } else { - high = mid; - } - } - if (lsda == 0) { - _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for " + return false; + } + + // look up LSDA, if encoding says function has one + if (encoding & UNWIND_HAS_LSDA) { + UnwindSectionLsdaArray lsdaIndex(_addressSpace, lsdaArrayStartAddr); + uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base); + low = 0; + high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) / + sizeof(unwind_info_section_header_lsda_index_entry); + // binary search looks for entry with exact match for functionOffset + if (log) + fprintf(stderr, + "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n", + funcStartOffset); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (lsdaIndex.functionOffset(mid) == funcStartOffset) { + lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base; + break; + } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) { + low = mid + 1; + } else { + high = mid; + } + } + if (lsda == 0) { + _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for " "pc=0x%0llX, but lsda table has no entry", - encoding, (uint64_t) pc); - return false; - } - } - + encoding, (uint64_t) pc); + return false; + } + } + // extract personality routine, if encoding says function has one - uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >> - (__builtin_ctz(UNWIND_PERSONALITY_MASK)); - if (personalityIndex != 0) { - --personalityIndex; // change 1-based to zero-based index + uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >> + (__builtin_ctz(UNWIND_PERSONALITY_MASK)); + if (personalityIndex != 0) { + --personalityIndex; // change 1-based to zero-based index if (personalityIndex >= sectionHeader.personalityArrayCount()) { - _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d, " + _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d, " "but personality table has only %d entries", - encoding, personalityIndex, - sectionHeader.personalityArrayCount()); - return false; - } - int32_t personalityDelta = (int32_t)_addressSpace.get32( - sects.compact_unwind_section + - sectionHeader.personalityArraySectionOffset() + - personalityIndex * sizeof(uint32_t)); - pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta; - personality = _addressSpace.getP(personalityPointer); - if (log) - fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " - "personalityDelta=0x%08X, personality=0x%08llX\n", - (uint64_t) pc, personalityDelta, (uint64_t) personality); - } - - if (log) - fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " - "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n", - (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart); - _info.start_ip = funcStart; - _info.end_ip = funcEnd; - _info.lsda = lsda; - _info.handler = personality; - _info.gp = 0; - _info.flags = 0; - _info.format = encoding; - _info.unwind_info = 0; - _info.unwind_info_size = 0; - _info.extra = sects.dso_base; - return true; -} + encoding, personalityIndex, + sectionHeader.personalityArrayCount()); + return false; + } + int32_t personalityDelta = (int32_t)_addressSpace.get32( + sects.compact_unwind_section + + sectionHeader.personalityArraySectionOffset() + + personalityIndex * sizeof(uint32_t)); + pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta; + personality = _addressSpace.getP(personalityPointer); + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " + "personalityDelta=0x%08X, personality=0x%08llX\n", + (uint64_t) pc, personalityDelta, (uint64_t) personality); + } + + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " + "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n", + (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart); + _info.start_ip = funcStart; + _info.end_ip = funcEnd; + _info.lsda = lsda; + _info.handler = personality; + _info.gp = 0; + _info.flags = 0; + _info.format = encoding; + _info.unwind_info = 0; + _info.unwind_info_size = 0; + _info.extra = sects.dso_base; + return true; +} #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - - + + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) -template +template bool UnwindCursor::getInfoFromSEH(pint_t pc) { pint_t base; RUNTIME_FUNCTION *unwindEntry = lookUpSEHUnwindInfo(pc, &base); @@ -1924,58 +1924,58 @@ bool UnwindCursor::getInfoFromSEH(pint_t pc) { template -void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { +void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) _isSigReturn = false; #endif pint_t pc = static_cast(this->getReg(UNW_REG_IP)); #if defined(_LIBUNWIND_ARM_EHABI) - // Remove the thumb bit so the IP represents the actual instruction address. - // This matches the behaviour of _Unwind_GetIP on arm. - pc &= (pint_t)~0x1; -#endif - + // Remove the thumb bit so the IP represents the actual instruction address. + // This matches the behaviour of _Unwind_GetIP on arm. + pc &= (pint_t)~0x1; +#endif + // Exit early if at the top of the stack. if (pc == 0) { _unwindInfoMissing = true; return; } - // If the last line of a function is a "throw" the compiler sometimes - // emits no instructions after the call to __cxa_throw. This means - // the return address is actually the start of the next function. - // To disambiguate this, back up the pc when we know it is a return - // address. - if (isReturnAddress) - --pc; - - // Ask address space object to find unwind sections for this pc. - UnwindInfoSections sects; - if (_addressSpace.findUnwindSections(pc, sects)) { + // If the last line of a function is a "throw" the compiler sometimes + // emits no instructions after the call to __cxa_throw. This means + // the return address is actually the start of the next function. + // To disambiguate this, back up the pc when we know it is a return + // address. + if (isReturnAddress) + --pc; + + // Ask address space object to find unwind sections for this pc. + UnwindInfoSections sects; + if (_addressSpace.findUnwindSections(pc, sects)) { #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - // If there is a compact unwind encoding table, look there first. - if (sects.compact_unwind_section != 0) { - if (this->getInfoFromCompactEncodingSection(pc, sects)) { + // If there is a compact unwind encoding table, look there first. + if (sects.compact_unwind_section != 0) { + if (this->getInfoFromCompactEncodingSection(pc, sects)) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - // Found info in table, done unless encoding says to use dwarf. - uint32_t dwarfOffset; - if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) { - if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) { - // found info in dwarf, done - return; - } - } - #endif - // If unwind table has entry, but entry says there is no unwind info, - // record that we have no unwind info. - if (_info.format == 0) - _unwindInfoMissing = true; - return; - } - } + // Found info in table, done unless encoding says to use dwarf. + uint32_t dwarfOffset; + if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) { + if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) { + // found info in dwarf, done + return; + } + } + #endif + // If unwind table has entry, but entry says there is no unwind info, + // record that we have no unwind info. + if (_info.format == 0) + _unwindInfoMissing = true; + return; + } + } #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) - + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) // If there is SEH unwind info, look there next. if (this->getInfoFromSEH(pc)) @@ -1983,61 +1983,61 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - // If there is dwarf unwind info, look there next. - if (sects.dwarf_section != 0) { - if (this->getInfoFromDwarfSection(pc, sects)) { - // found info in dwarf, done - return; - } - } -#endif - + // If there is dwarf unwind info, look there next. + if (sects.dwarf_section != 0) { + if (this->getInfoFromDwarfSection(pc, sects)) { + // found info in dwarf, done + return; + } + } +#endif + #if defined(_LIBUNWIND_ARM_EHABI) - // If there is ARM EHABI unwind info, look there next. - if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects)) - return; -#endif - } - + // If there is ARM EHABI unwind info, look there next. + if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects)) + return; +#endif + } + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - // There is no static unwind info for this pc. Look to see if an FDE was - // dynamically registered for it. + // There is no static unwind info for this pc. Look to see if an FDE was + // dynamically registered for it. pint_t cachedFDE = DwarfFDECache::findFDE(DwarfFDECache::kSearchAll, pc); - if (cachedFDE != 0) { + if (cachedFDE != 0) { typename CFI_Parser::FDE_Info fdeInfo; typename CFI_Parser::CIE_Info cieInfo; if (!CFI_Parser::decodeFDE(_addressSpace, cachedFDE, &fdeInfo, &cieInfo)) if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, 0)) - return; - } - - // Lastly, ask AddressSpace object about platform specific ways to locate - // other FDEs. - pint_t fde; - if (_addressSpace.findOtherFDE(pc, fde)) { + return; + } + + // Lastly, ask AddressSpace object about platform specific ways to locate + // other FDEs. + pint_t fde; + if (_addressSpace.findOtherFDE(pc, fde)) { typename CFI_Parser::FDE_Info fdeInfo; typename CFI_Parser::CIE_Info cieInfo; - if (!CFI_Parser::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) { - // Double check this FDE is for a function that includes the pc. + if (!CFI_Parser::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) { + // Double check this FDE is for a function that includes the pc. if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd)) if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, 0)) - return; - } - } + return; + } + } #endif // #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - + #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) if (setInfoForSigReturn()) return; #endif - // no unwind info, flag that we can't reliably unwind - _unwindInfoMissing = true; -} - + // no unwind info, flag that we can't reliably unwind + _unwindInfoMissing = true; +} + #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) -template +template bool UnwindCursor::setInfoForSigReturn(Registers_arm64 &) { // Look for the sigreturn trampoline. The trampoline's body is two // specific instructions (see below). Typically the trampoline comes from the @@ -2099,13 +2099,13 @@ int UnwindCursor::stepThroughSigReturn(Registers_arm64 &) { #endif // defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) template -int UnwindCursor::step() { - // Bottom of stack is defined is when unwind info cannot be found. - if (_unwindInfoMissing) - return UNW_STEP_END; - - // Use unwinding info to modify register set as if function returned. - int result; +int UnwindCursor::step() { + // Bottom of stack is defined is when unwind info cannot be found. + if (_unwindInfoMissing) + return UNW_STEP_END; + + // Use unwinding info to modify register set as if function returned. + int result; #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) if (_isSigReturn) { result = this->stepThroughSigReturn(); @@ -2120,45 +2120,45 @@ int UnwindCursor::step() { result = this->stepWithDwarfFDE(); #elif defined(_LIBUNWIND_ARM_EHABI) result = this->stepWithEHABI(); -#else - #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \ +#else + #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \ _LIBUNWIND_SUPPORT_SEH_UNWIND or \ - _LIBUNWIND_SUPPORT_DWARF_UNWIND or \ - _LIBUNWIND_ARM_EHABI -#endif - } - - // update info based on new PC - if (result == UNW_STEP_SUCCESS) { - this->setInfoBasedOnIPRegister(true); - if (_unwindInfoMissing) - return UNW_STEP_END; - } - - return result; -} - -template -void UnwindCursor::getInfo(unw_proc_info_t *info) { + _LIBUNWIND_SUPPORT_DWARF_UNWIND or \ + _LIBUNWIND_ARM_EHABI +#endif + } + + // update info based on new PC + if (result == UNW_STEP_SUCCESS) { + this->setInfoBasedOnIPRegister(true); + if (_unwindInfoMissing) + return UNW_STEP_END; + } + + return result; +} + +template +void UnwindCursor::getInfo(unw_proc_info_t *info) { if (_unwindInfoMissing) memset(info, 0, sizeof(*info)); else *info = _info; -} - -template -bool UnwindCursor::getFunctionName(char *buf, size_t bufLen, - unw_word_t *offset) { - return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP), - buf, bufLen, offset); -} - +} + +template +bool UnwindCursor::getFunctionName(char *buf, size_t bufLen, + unw_word_t *offset) { + return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP), + buf, bufLen, offset); +} + #if defined(_LIBUNWIND_USE_CET) extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) { AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->get_registers(); } #endif -} // namespace libunwind - -#endif // __UNWINDCURSOR_HPP__ +} // namespace libunwind + +#endif // __UNWINDCURSOR_HPP__ diff --git a/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c b/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c index 1032fbf688c..951d5d219a3 100644 --- a/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c +++ b/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c @@ -1,29 +1,29 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Implements gcc extensions to the C++ ABI Exception Handling Level 1. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include - -#include "config.h" -#include "libunwind_ext.h" -#include "libunwind.h" -#include "Unwind-EHABI.h" -#include "unwind.h" - +// +// +// Implements gcc extensions to the C++ ABI Exception Handling Level 1. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "libunwind_ext.h" +#include "libunwind.h" +#include "Unwind-EHABI.h" +#include "unwind.h" + #if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) - + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) #define PRIVATE_1 private_[0] #elif defined(_LIBUNWIND_ARM_EHABI) @@ -32,185 +32,185 @@ #define PRIVATE_1 private_1 #endif -/// Called by __cxa_rethrow(). -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) { +/// Called by __cxa_rethrow(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API( "_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR, (void *)exception_object, (intptr_t)exception_object->PRIVATE_1); - - // If this is non-forced and a stopping place was found, then this is a - // re-throw. - // Call _Unwind_RaiseException() as if this was a new exception + + // If this is non-forced and a stopping place was found, then this is a + // re-throw. + // Call _Unwind_RaiseException() as if this was a new exception if (exception_object->PRIVATE_1 == 0) { - return _Unwind_RaiseException(exception_object); - // Will return if there is no catch clause, so that __cxa_rethrow can call - // std::terminate(). - } - - // Call through to _Unwind_Resume() which distiguishes between forced and - // regular exceptions. - _Unwind_Resume(exception_object); - _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()" - " which unexpectedly returned"); -} - -/// Called by personality handler during phase 2 to get base address for data -/// relative encodings. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetDataRelBase(struct _Unwind_Context *context) { - (void)context; + return _Unwind_RaiseException(exception_object); + // Will return if there is no catch clause, so that __cxa_rethrow can call + // std::terminate(). + } + + // Call through to _Unwind_Resume() which distiguishes between forced and + // regular exceptions. + _Unwind_Resume(exception_object); + _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()" + " which unexpectedly returned"); +} + +/// Called by personality handler during phase 2 to get base address for data +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetDataRelBase(struct _Unwind_Context *context) { + (void)context; _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context); - _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); -} - - -/// Called by personality handler during phase 2 to get base address for text -/// relative encodings. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetTextRelBase(struct _Unwind_Context *context) { - (void)context; + _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); +} + + +/// Called by personality handler during phase 2 to get base address for text +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetTextRelBase(struct _Unwind_Context *context) { + (void)context; _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context); - _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); -} - - -/// Scans unwind information to find the function that contains the -/// specified code address "pc". -_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) { + _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); +} + + +/// Scans unwind information to find the function that contains the +/// specified code address "pc". +_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) { _LIBUNWIND_TRACE_API("_Unwind_FindEnclosingFunction(pc=%p)", pc); - // This is slow, but works. - // We create an unwind cursor then alter the IP to be pc - unw_cursor_t cursor; - unw_context_t uc; - unw_proc_info_t info; + // This is slow, but works. + // We create an unwind cursor then alter the IP to be pc + unw_cursor_t cursor; + unw_context_t uc; + unw_proc_info_t info; __unw_getcontext(&uc); __unw_init_local(&cursor, &uc); __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); if (__unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS) return (void *)(intptr_t) info.start_ip; - else - return NULL; -} - -/// Walk every frame and call trace function at each one. If trace function -/// returns anything other than _URC_NO_REASON, then walk is terminated. -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { - unw_cursor_t cursor; - unw_context_t uc; + else + return NULL; +} + +/// Walk every frame and call trace function at each one. If trace function +/// returns anything other than _URC_NO_REASON, then walk is terminated. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { + unw_cursor_t cursor; + unw_context_t uc; __unw_getcontext(&uc); __unw_init_local(&cursor, &uc); - + _LIBUNWIND_TRACE_API("_Unwind_Backtrace(callback=%p)", - (void *)(uintptr_t)callback); - + (void *)(uintptr_t)callback); + #if defined(_LIBUNWIND_ARM_EHABI) - // Create a mock exception object for force unwinding. - _Unwind_Exception ex; - memset(&ex, '\0', sizeof(ex)); + // Create a mock exception object for force unwinding. + _Unwind_Exception ex; + memset(&ex, '\0', sizeof(ex)); strcpy((char *)&ex.exception_class, "CLNGUNW"); -#endif - - // walk each frame - while (true) { - _Unwind_Reason_Code result; - +#endif + + // walk each frame + while (true) { + _Unwind_Reason_Code result; + #if !defined(_LIBUNWIND_ARM_EHABI) // ask libunwind to get next frame (skip over first frame which is - // _Unwind_Backtrace()) + // _Unwind_Backtrace()) if (__unw_step(&cursor) <= 0) { - _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached " + _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached " "bottom of stack, returning %d", - _URC_END_OF_STACK); - return _URC_END_OF_STACK; - } -#else - // Get the information for this frame. - unw_proc_info_t frameInfo; + _URC_END_OF_STACK); + return _URC_END_OF_STACK; + } +#else + // Get the information for this frame. + unw_proc_info_t frameInfo; if (__unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) { - return _URC_END_OF_STACK; - } - - // Update the pr_cache in the mock exception object. - const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info; - ex.pr_cache.fnstart = frameInfo.start_ip; - ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo; - ex.pr_cache.additional= frameInfo.flags; - - struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor; - // Get and call the personality function to unwind the frame. + return _URC_END_OF_STACK; + } + + // Update the pr_cache in the mock exception object. + const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info; + ex.pr_cache.fnstart = frameInfo.start_ip; + ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo; + ex.pr_cache.additional= frameInfo.flags; + + struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor; + // Get and call the personality function to unwind the frame. _Unwind_Personality_Fn handler = (_Unwind_Personality_Fn)frameInfo.handler; - if (handler == NULL) { - return _URC_END_OF_STACK; - } - if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) != - _URC_CONTINUE_UNWIND) { - return _URC_END_OF_STACK; - } + if (handler == NULL) { + return _URC_END_OF_STACK; + } + if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) != + _URC_CONTINUE_UNWIND) { + return _URC_END_OF_STACK; + } #endif // defined(_LIBUNWIND_ARM_EHABI) - - // debugging - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionName[512]; - unw_proc_info_t frame; - unw_word_t offset; + + // debugging + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionName[512]; + unw_proc_info_t frame; + unw_word_t offset; __unw_get_proc_name(&cursor, functionName, 512, &offset); __unw_get_proc_info(&cursor, &frame); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( " _backtrace: start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", context=%p", frame.start_ip, functionName, frame.lsda, - (void *)&cursor); - } - - // call trace function with this frame - result = (*callback)((struct _Unwind_Context *)(&cursor), ref); - if (result != _URC_NO_REASON) { - _LIBUNWIND_TRACE_UNWINDING( + (void *)&cursor); + } + + // call trace function with this frame + result = (*callback)((struct _Unwind_Context *)(&cursor), ref); + if (result != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING( " _backtrace: ended because callback returned %d", result); - return result; - } - } -} - - + return result; + } + } +} + + /// Find DWARF unwind info for an address 'pc' in some function. -_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, - struct dwarf_eh_bases *bases) { - // This is slow, but works. - // We create an unwind cursor then alter the IP to be pc - unw_cursor_t cursor; - unw_context_t uc; - unw_proc_info_t info; +_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, + struct dwarf_eh_bases *bases) { + // This is slow, but works. + // We create an unwind cursor then alter the IP to be pc + unw_cursor_t cursor; + unw_context_t uc; + unw_proc_info_t info; __unw_getcontext(&uc); __unw_init_local(&cursor, &uc); __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); __unw_get_proc_info(&cursor, &info); - bases->tbase = (uintptr_t)info.extra; - bases->dbase = 0; // dbase not used on Mac OS X - bases->func = (uintptr_t)info.start_ip; + bases->tbase = (uintptr_t)info.extra; + bases->dbase = 0; // dbase not used on Mac OS X + bases->func = (uintptr_t)info.start_ip; _LIBUNWIND_TRACE_API("_Unwind_Find_FDE(pc=%p) => %p", pc, (void *)(intptr_t) info.unwind_info); return (void *)(intptr_t) info.unwind_info; -} - -/// Returns the CFA (call frame area, or stack pointer at start of function) -/// for the current context. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_word_t result; +} + +/// Returns the CFA (call frame area, or stack pointer at start of function) +/// for the current context. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; __unw_get_reg(cursor, UNW_REG_SP, &result); _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p) => 0x%" PRIxPTR, (void *)context, result); - return (uintptr_t)result; -} - - -/// Called by personality handler during phase 2 to get instruction pointer. -/// ipBefore is a boolean that says if IP is already adjusted to be the call -/// site address. Normally IP is the return address. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, - int *ipBefore) { + return (uintptr_t)result; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +/// ipBefore is a boolean that says if IP is already adjusted to be the call +/// site address. Normally IP is the return address. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore) { _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p)", (void *)context); int isSignalFrame = __unw_is_signal_frame((unw_cursor_t *)context); // Negative means some kind of error (probably UNW_ENOINFO), but we have no @@ -220,98 +220,98 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, *ipBefore = 0; else *ipBefore = 1; - return _Unwind_GetIP(context); -} - + return _Unwind_GetIP(context); +} + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - -/// Called by programs with dynamic code generators that want -/// to register a dynamically generated FDE. -/// This function has existed on Mac OS X since 10.4, but -/// was broken until 10.6. -_LIBUNWIND_EXPORT void __register_frame(const void *fde) { + +/// Called by programs with dynamic code generators that want +/// to register a dynamically generated FDE. +/// This function has existed on Mac OS X since 10.4, but +/// was broken until 10.6. +_LIBUNWIND_EXPORT void __register_frame(const void *fde) { _LIBUNWIND_TRACE_API("__register_frame(%p)", fde); __unw_add_dynamic_fde((unw_word_t)(uintptr_t)fde); -} - - -/// Called by programs with dynamic code generators that want -/// to unregister a dynamically generated FDE. -/// This function has existed on Mac OS X since 10.4, but -/// was broken until 10.6. -_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) { +} + + +/// Called by programs with dynamic code generators that want +/// to unregister a dynamically generated FDE. +/// This function has existed on Mac OS X since 10.4, but +/// was broken until 10.6. +_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) { _LIBUNWIND_TRACE_API("__deregister_frame(%p)", fde); __unw_remove_dynamic_fde((unw_word_t)(uintptr_t)fde); -} - - -// The following register/deregister functions are gcc extensions. -// They have existed on Mac OS X, but have never worked because Mac OS X -// before 10.6 used keymgr to track known FDEs, but these functions -// never got updated to use keymgr. -// For now, we implement these as do-nothing functions to keep any existing -// applications working. We also add the not in 10.6 symbol so that nwe -// application won't be able to use them. - +} + + +// The following register/deregister functions are gcc extensions. +// They have existed on Mac OS X, but have never worked because Mac OS X +// before 10.6 used keymgr to track known FDEs, but these functions +// never got updated to use keymgr. +// For now, we implement these as do-nothing functions to keep any existing +// applications working. We also add the not in 10.6 symbol so that nwe +// application won't be able to use them. + #if defined(_LIBUNWIND_SUPPORT_FRAME_APIS) -_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob, - void *tb, void *db) { - (void)fde; - (void)ob; - (void)tb; - (void)db; +_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob, + void *tb, void *db) { + (void)fde; + (void)ob; + (void)tb; + (void)db; _LIBUNWIND_TRACE_API("__register_frame_info_bases(%p,%p, %p, %p)", - fde, ob, tb, db); - // do nothing, this function never worked in Mac OS X -} - -_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) { - (void)fde; - (void)ob; + fde, ob, tb, db); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) { + (void)fde; + (void)ob; _LIBUNWIND_TRACE_API("__register_frame_info(%p, %p)", fde, ob); - // do nothing, this function never worked in Mac OS X -} - -_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde, - void *ob, void *tb, - void *db) { - (void)fde; - (void)ob; - (void)tb; - (void)db; - _LIBUNWIND_TRACE_API("__register_frame_info_table_bases" + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde, + void *ob, void *tb, + void *db) { + (void)fde; + (void)ob; + (void)tb; + (void)db; + _LIBUNWIND_TRACE_API("__register_frame_info_table_bases" "(%p,%p, %p, %p)", fde, ob, tb, db); - // do nothing, this function never worked in Mac OS X -} - -_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) { - (void)fde; - (void)ob; + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) { + (void)fde; + (void)ob; _LIBUNWIND_TRACE_API("__register_frame_info_table(%p, %p)", fde, ob); - // do nothing, this function never worked in Mac OS X -} - -_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) { - (void)fde; + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) { + (void)fde; _LIBUNWIND_TRACE_API("__register_frame_table(%p)", fde); - // do nothing, this function never worked in Mac OS X -} - -_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) { - (void)fde; + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) { + (void)fde; _LIBUNWIND_TRACE_API("__deregister_frame_info(%p)", fde); - // do nothing, this function never worked in Mac OS X - return NULL; -} - -_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) { - (void)fde; + // do nothing, this function never worked in Mac OS X + return NULL; +} + +_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) { + (void)fde; _LIBUNWIND_TRACE_API("__deregister_frame_info_bases(%p)", fde); - // do nothing, this function never worked in Mac OS X - return NULL; -} + // do nothing, this function never worked in Mac OS X + return NULL; +} #endif // defined(_LIBUNWIND_SUPPORT_FRAME_APIS) - + #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - + #endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) diff --git a/contrib/libs/libunwind/src/UnwindLevel1.c b/contrib/libs/libunwind/src/UnwindLevel1.c index 82338e7d365..13ca17cc6a1 100644 --- a/contrib/libs/libunwind/src/UnwindLevel1.c +++ b/contrib/libs/libunwind/src/UnwindLevel1.c @@ -1,38 +1,38 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Implements C++ ABI Exception Handling Level 1 as documented at: +// +// +// Implements C++ ABI Exception Handling Level 1 as documented at: // https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html -// using libunwind -// -//===----------------------------------------------------------------------===// - -// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}(). Thus, we are -// defining inline functions to delegate the function calls to -// _Unwind_VRS_{Get,Set}(). However, some applications might declare the -// function protetype directly (instead of including ), thus we need -// to export these functions from libunwind.so as well. -#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1 - -#include -#include -#include -#include -#include -#include - +// using libunwind +// +//===----------------------------------------------------------------------===// + +// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}(). Thus, we are +// defining inline functions to delegate the function calls to +// _Unwind_VRS_{Get,Set}(). However, some applications might declare the +// function protetype directly (instead of including ), thus we need +// to export these functions from libunwind.so as well. +#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1 + +#include +#include +#include +#include +#include +#include + #include "cet_unwind.h" #include "config.h" -#include "libunwind.h" +#include "libunwind.h" #include "libunwind_ext.h" -#include "unwind.h" - +#include "unwind.h" + #if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) - + #ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND // When CET is enabled, each "call" instruction will push return address to @@ -67,10 +67,10 @@ } while (0) #endif -static _Unwind_Reason_Code +static _Unwind_Reason_Code unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { __unw_init_local(cursor, uc); - + #ifdef _YNDX_LIBUNWIND_ENABLE_EXCEPTION_BACKTRACE _Unwind_Backtrace_Buffer* backtrace_buffer = exception_object->exception_class == _YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_PRIMARY_CLASS || @@ -80,25 +80,25 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except : NULL; #endif - // Walk each frame looking for a place to stop. + // Walk each frame looking for a place to stop. while (true) { // Ask libunwind to get next frame (skip over first which is - // _Unwind_RaiseException). + // _Unwind_RaiseException). int stepResult = __unw_step(cursor); - if (stepResult == 0) { + if (stepResult == 0) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): __unw_step() reached " "bottom => _URC_END_OF_STACK", (void *)exception_object); - return _URC_END_OF_STACK; - } else if (stepResult < 0) { + return _URC_END_OF_STACK; + } else if (stepResult < 0) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): __unw_step failed => " "_URC_FATAL_PHASE1_ERROR", (void *)exception_object); - return _URC_FATAL_PHASE1_ERROR; - } - + return _URC_FATAL_PHASE1_ERROR; + } + #ifdef _YNDX_LIBUNWIND_ENABLE_EXCEPTION_BACKTRACE if (backtrace_buffer && backtrace_buffer->size < _YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_SIZE) { unw_word_t pc; @@ -107,472 +107,472 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except } #endif - // See if frame has code to run (has personality routine). - unw_proc_info_t frameInfo; - unw_word_t sp; + // See if frame has code to run (has personality routine). + unw_proc_info_t frameInfo; + unw_word_t sp; if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " "failed => _URC_FATAL_PHASE1_ERROR", (void *)exception_object); - return _URC_FATAL_PHASE1_ERROR; - } - + return _URC_FATAL_PHASE1_ERROR; + } + #ifndef NDEBUG - // When tracing, print state information. - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionBuf[512]; - const char *functionName = functionBuf; - unw_word_t offset; + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || - (frameInfo.start_ip + offset > frameInfo.end_ip)) - functionName = ".anonymous."; - unw_word_t pc; + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + unw_word_t pc; __unw_get_reg(cursor, UNW_REG_IP, &pc); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "", - (void *)exception_object, pc, frameInfo.start_ip, functionName, - frameInfo.lsda, frameInfo.handler); - } + (void *)exception_object, pc, frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } #endif - - // If there is a personality routine, ask it if it will want to stop at - // this frame. - if (frameInfo.handler != 0) { + + // If there is a personality routine, ask it if it will want to stop at + // this frame. + if (frameInfo.handler != 0) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): calling personality function %p", - (void *)exception_object, (void *)(uintptr_t)p); - _Unwind_Reason_Code personalityResult = - (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class, + (void *)exception_object, (void *)(uintptr_t)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class, exception_object, (struct _Unwind_Context *)(cursor)); - switch (personalityResult) { - case _URC_HANDLER_FOUND: - // found a catch clause or locals that need destructing in this frame - // stop search and remember stack pointer at the frame + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember stack pointer at the frame __unw_get_reg(cursor, UNW_REG_SP, &sp); - exception_object->private_2 = (uintptr_t)sp; - _LIBUNWIND_TRACE_UNWINDING( + exception_object->private_2 = (uintptr_t)sp; + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND", - (void *)exception_object); - return _URC_NO_REASON; - - case _URC_CONTINUE_UNWIND: - _LIBUNWIND_TRACE_UNWINDING( + (void *)exception_object); + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND", - (void *)exception_object); - // continue unwinding - break; - - default: - // something went wrong - _LIBUNWIND_TRACE_UNWINDING( + (void *)exception_object); + // continue unwinding + break; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", - (void *)exception_object); - return _URC_FATAL_PHASE1_ERROR; - } - } - } - return _URC_NO_REASON; -} - - -static _Unwind_Reason_Code + (void *)exception_object); + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + + +static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { __unw_init_local(cursor, uc); - + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", - (void *)exception_object); - + (void *)exception_object); + // uc is initialized by __unw_getcontext in the parent frame. The first stack // frame walked is unwind_phase2. unsigned framesWalked = 1; - // Walk each frame until we reach where search phase said to stop. - while (true) { - + // Walk each frame until we reach where search phase said to stop. + while (true) { + // Ask libunwind to get next frame (skip over first which is - // _Unwind_RaiseException). + // _Unwind_RaiseException). int stepResult = __unw_step(cursor); - if (stepResult == 0) { + if (stepResult == 0) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_step() reached " "bottom => _URC_END_OF_STACK", (void *)exception_object); - return _URC_END_OF_STACK; - } else if (stepResult < 0) { + return _URC_END_OF_STACK; + } else if (stepResult < 0) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_step failed => " "_URC_FATAL_PHASE1_ERROR", (void *)exception_object); - return _URC_FATAL_PHASE2_ERROR; - } - - // Get info about this frame. - unw_word_t sp; - unw_proc_info_t frameInfo; + return _URC_FATAL_PHASE2_ERROR; + } + + // Get info about this frame. + unw_word_t sp; + unw_proc_info_t frameInfo; __unw_get_reg(cursor, UNW_REG_SP, &sp); if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " "failed => _URC_FATAL_PHASE1_ERROR", (void *)exception_object); - return _URC_FATAL_PHASE2_ERROR; - } - + return _URC_FATAL_PHASE2_ERROR; + } + #ifndef NDEBUG - // When tracing, print state information. - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionBuf[512]; - const char *functionName = functionBuf; - unw_word_t offset; + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || - (frameInfo.start_ip + offset > frameInfo.end_ip)) - functionName = ".anonymous."; + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, - (void *)exception_object, frameInfo.start_ip, - functionName, sp, frameInfo.lsda, - frameInfo.handler); - } + (void *)exception_object, frameInfo.start_ip, + functionName, sp, frameInfo.lsda, + frameInfo.handler); + } #endif - + ++framesWalked; - // If there is a personality routine, tell it we are unwinding. - if (frameInfo.handler != 0) { + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler); - _Unwind_Action action = _UA_CLEANUP_PHASE; - if (sp == exception_object->private_2) { - // Tell personality this was the frame it marked in phase 1. - action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME); - } - _Unwind_Reason_Code personalityResult = - (*p)(1, action, exception_object->exception_class, exception_object, + _Unwind_Action action = _UA_CLEANUP_PHASE; + if (sp == exception_object->private_2) { + // Tell personality this was the frame it marked in phase 1. + action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME); + } + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, (struct _Unwind_Context *)(cursor)); - switch (personalityResult) { - case _URC_CONTINUE_UNWIND: - // Continue unwinding - _LIBUNWIND_TRACE_UNWINDING( + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // Continue unwinding + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", - (void *)exception_object); - if (sp == exception_object->private_2) { - // Phase 1 said we would stop at this frame, but we did not... - _LIBUNWIND_ABORT("during phase1 personality function said it would " - "stop here, but now in phase2 it did not stop here"); - } - break; - case _URC_INSTALL_CONTEXT: - _LIBUNWIND_TRACE_UNWINDING( + (void *)exception_object); + if (sp == exception_object->private_2) { + // Phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now in phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT", - (void *)exception_object); - // Personality routine says to transfer control to landing pad. - // We may get control back if landing pad calls _Unwind_Resume(). - if (_LIBUNWIND_TRACING_UNWINDING) { - unw_word_t pc; + (void *)exception_object); + // Personality routine says to transfer control to landing pad. + // We may get control back if landing pad calls _Unwind_Resume(). + if (_LIBUNWIND_TRACING_UNWINDING) { + unw_word_t pc; __unw_get_reg(cursor, UNW_REG_IP, &pc); __unw_get_reg(cursor, UNW_REG_SP, &sp); - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR, - (void *)exception_object, pc, sp); - } + (void *)exception_object, pc, sp); + } __unw_phase2_resume(cursor, framesWalked); // __unw_phase2_resume() only returns if there was an error. - return _URC_FATAL_PHASE2_ERROR; - default: - // Personality routine returned an unknown result code. - _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", - personalityResult); - return _URC_FATAL_PHASE2_ERROR; - } - } - } - - // Clean up phase did not resume at the frame that the search phase - // said it would... - return _URC_FATAL_PHASE2_ERROR; -} - -static _Unwind_Reason_Code + return _URC_FATAL_PHASE2_ERROR; + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + } + + // Clean up phase did not resume at the frame that the search phase + // said it would... + return _URC_FATAL_PHASE2_ERROR; +} + +static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, - _Unwind_Exception *exception_object, - _Unwind_Stop_Fn stop, void *stop_parameter) { + _Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { __unw_init_local(cursor, uc); - + // uc is initialized by __unw_getcontext in the parent frame. The first stack // frame walked is unwind_phase2_forced. unsigned framesWalked = 1; - // Walk each frame until we reach where search phase said to stop + // Walk each frame until we reach where search phase said to stop while (__unw_step(cursor) > 0) { - - // Update info about this frame. - unw_proc_info_t frameInfo; + + // Update info about this frame. + unw_proc_info_t frameInfo; if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step " "failed => _URC_END_OF_STACK", - (void *)exception_object); - return _URC_FATAL_PHASE2_ERROR; - } - + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + #ifndef NDEBUG - // When tracing, print state information. - if (_LIBUNWIND_TRACING_UNWINDING) { - char functionBuf[512]; - const char *functionName = functionBuf; - unw_word_t offset; + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), &offset) != UNW_ESUCCESS) || - (frameInfo.start_ip + offset > frameInfo.end_ip)) - functionName = ".anonymous."; - _LIBUNWIND_TRACE_UNWINDING( + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, - (void *)exception_object, frameInfo.start_ip, functionName, - frameInfo.lsda, frameInfo.handler); - } + (void *)exception_object, frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } #endif - - // Call stop function at each frame. - _Unwind_Action action = - (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); - _Unwind_Reason_Code stopResult = - (*stop)(1, action, exception_object->exception_class, exception_object, + + // Call stop function at each frame. + _Unwind_Action action = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); + _Unwind_Reason_Code stopResult = + (*stop)(1, action, exception_object->exception_class, exception_object, (struct _Unwind_Context *)(cursor), stop_parameter); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2_forced(ex_ojb=%p): stop function returned %d", - (void *)exception_object, stopResult); - if (stopResult != _URC_NO_REASON) { - _LIBUNWIND_TRACE_UNWINDING( + (void *)exception_object, stopResult); + if (stopResult != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2_forced(ex_ojb=%p): stopped by stop function", - (void *)exception_object); - return _URC_FATAL_PHASE2_ERROR; - } - + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + ++framesWalked; - // If there is a personality routine, tell it we are unwinding. - if (frameInfo.handler != 0) { + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler); - _LIBUNWIND_TRACE_UNWINDING( + _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2_forced(ex_ojb=%p): calling personality function %p", - (void *)exception_object, (void *)(uintptr_t)p); - _Unwind_Reason_Code personalityResult = - (*p)(1, action, exception_object->exception_class, exception_object, + (void *)exception_object, (void *)(uintptr_t)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, (struct _Unwind_Context *)(cursor)); - switch (personalityResult) { - case _URC_CONTINUE_UNWIND: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " - "personality returned " + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " "_URC_CONTINUE_UNWIND", - (void *)exception_object); - // Destructors called, continue unwinding - break; - case _URC_INSTALL_CONTEXT: - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " - "personality returned " + (void *)exception_object); + // Destructors called, continue unwinding + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " "_URC_INSTALL_CONTEXT", - (void *)exception_object); - // We may get control back if landing pad calls _Unwind_Resume(). + (void *)exception_object); + // We may get control back if landing pad calls _Unwind_Resume(). __unw_phase2_resume(cursor, framesWalked); - break; - default: - // Personality routine returned an unknown result code. - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " - "personality returned %d, " + break; + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned %d, " "_URC_FATAL_PHASE2_ERROR", - (void *)exception_object, personalityResult); - return _URC_FATAL_PHASE2_ERROR; - } - } - } - - // Call stop function one last time and tell it we've reached the end - // of the stack. - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " + (void *)exception_object, personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + } + + // Call stop function one last time and tell it we've reached the end + // of the stack. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " "function with _UA_END_OF_STACK", - (void *)exception_object); - _Unwind_Action lastAction = - (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); - (*stop)(1, lastAction, exception_object->exception_class, exception_object, + (void *)exception_object); + _Unwind_Action lastAction = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); + (*stop)(1, lastAction, exception_object->exception_class, exception_object, (struct _Unwind_Context *)(cursor), stop_parameter); - - // Clean up phase did not resume at the frame that the search phase said it - // would. - return _URC_FATAL_PHASE2_ERROR; -} - - -/// Called by __cxa_throw. Only returns if there is a fatal error. -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_RaiseException(_Unwind_Exception *exception_object) { + + // Clean up phase did not resume at the frame that the search phase said it + // would. + return _URC_FATAL_PHASE2_ERROR; +} + + +/// Called by __cxa_throw. Only returns if there is a fatal error. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)", - (void *)exception_object); - unw_context_t uc; + (void *)exception_object); + unw_context_t uc; unw_cursor_t cursor; __unw_getcontext(&uc); - - // Mark that this is a non-forced unwind, so _Unwind_Resume() - // can do the right thing. - exception_object->private_1 = 0; - exception_object->private_2 = 0; - - // phase 1: the search phase + + // Mark that this is a non-forced unwind, so _Unwind_Resume() + // can do the right thing. + exception_object->private_1 = 0; + exception_object->private_2 = 0; + + // phase 1: the search phase _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object); - if (phase1 != _URC_NO_REASON) - return phase1; - - // phase 2: the clean up phase + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase return unwind_phase2(&uc, &cursor, exception_object); -} - - - -/// When _Unwind_RaiseException() is in phase2, it hands control -/// to the personality function at each frame. The personality -/// may force a jump to a landing pad in that function, the landing -/// pad code may then call _Unwind_Resume() to continue with the -/// unwinding. Note: the call to _Unwind_Resume() is from compiler -/// geneated user code. All other _Unwind_* routines are called -/// by the C++ runtime __cxa_* routines. -/// -/// Note: re-throwing an exception (as opposed to continuing the unwind) -/// is implemented by having the code call __cxa_rethrow() which -/// in turn calls _Unwind_Resume_or_Rethrow(). -_LIBUNWIND_EXPORT void -_Unwind_Resume(_Unwind_Exception *exception_object) { +} + + + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// geneated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Note: re-throwing an exception (as opposed to continuing the unwind) +/// is implemented by having the code call __cxa_rethrow() which +/// in turn calls _Unwind_Resume_or_Rethrow(). +_LIBUNWIND_EXPORT void +_Unwind_Resume(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object); - unw_context_t uc; + unw_context_t uc; unw_cursor_t cursor; __unw_getcontext(&uc); - - if (exception_object->private_1 != 0) + + if (exception_object->private_1 != 0) unwind_phase2_forced(&uc, &cursor, exception_object, - (_Unwind_Stop_Fn) exception_object->private_1, - (void *)exception_object->private_2); - else + (_Unwind_Stop_Fn) exception_object->private_1, + (void *)exception_object->private_2); + else unwind_phase2(&uc, &cursor, exception_object); - - // Clients assume _Unwind_Resume() does not return, so all we can do is abort. - _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); -} - - - -/// Not used by C++. -/// Unwinds stack, calling "stop" function at each frame. -/// Could be used to implement longjmp(). -_LIBUNWIND_EXPORT _Unwind_Reason_Code -_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, - _Unwind_Stop_Fn stop, void *stop_parameter) { + + // Clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); +} + + + +/// Not used by C++. +/// Unwinds stack, calling "stop" function at each frame. +/// Could be used to implement longjmp(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)", - (void *)exception_object, (void *)(uintptr_t)stop); - unw_context_t uc; + (void *)exception_object, (void *)(uintptr_t)stop); + unw_context_t uc; unw_cursor_t cursor; __unw_getcontext(&uc); - - // Mark that this is a forced unwind, so _Unwind_Resume() can do - // the right thing. - exception_object->private_1 = (uintptr_t) stop; - exception_object->private_2 = (uintptr_t) stop_parameter; - - // do it + + // Mark that this is a forced unwind, so _Unwind_Resume() can do + // the right thing. + exception_object->private_1 = (uintptr_t) stop; + exception_object->private_2 = (uintptr_t) stop_parameter; + + // do it return unwind_phase2_forced(&uc, &cursor, exception_object, stop, stop_parameter); -} - - -/// Called by personality handler during phase 2 to get LSDA for current frame. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_proc_info_t frameInfo; - uintptr_t result = 0; +} + + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) - result = (uintptr_t)frameInfo.lsda; - _LIBUNWIND_TRACE_API( + result = (uintptr_t)frameInfo.lsda; + _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR, - (void *)context, result); - if (result != 0) { - if (*((uint8_t *)result) != 0xFF) + (void *)context, result); + if (result != 0) { + if (*((uint8_t *)result) != 0xFF) _LIBUNWIND_DEBUG_LOG("lsda at 0x%" PRIxPTR " does not start with 0xFF", - result); - } - return result; -} - - -/// Called by personality handler during phase 2 to find the start of the -/// function. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetRegionStart(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_proc_info_t frameInfo; - uintptr_t result = 0; + result); + } + return result; +} + + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) - result = (uintptr_t)frameInfo.start_ip; + result = (uintptr_t)frameInfo.start_ip; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR, - (void *)context, result); - return result; -} - + (void *)context, result); + return result; +} + #endif // !_LIBUNWIND_SUPPORT_SEH_UNWIND - -/// Called by personality handler during phase 2 if a foreign exception -// is caught. -_LIBUNWIND_EXPORT void -_Unwind_DeleteException(_Unwind_Exception *exception_object) { + +/// Called by personality handler during phase 2 if a foreign exception +// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", - (void *)exception_object); - if (exception_object->exception_cleanup != NULL) - (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, - exception_object); -} - -/// Called by personality handler during phase 2 to get register values. -_LIBUNWIND_EXPORT uintptr_t -_Unwind_GetGR(struct _Unwind_Context *context, int index) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_word_t result; + (void *)exception_object); + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + +/// Called by personality handler during phase 2 to get register values. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetGR(struct _Unwind_Context *context, int index) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; __unw_get_reg(cursor, index, &result); _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d) => 0x%" PRIxPTR, (void *)context, index, result); - return (uintptr_t)result; -} - -/// Called by personality handler during phase 2 to alter register values. -_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, - uintptr_t value) { + return (uintptr_t)result; +} + +/// Called by personality handler during phase 2 to alter register values. +_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t value) { _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%0" PRIxPTR ")", (void *)context, index, value); - unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_cursor_t *cursor = (unw_cursor_t *)context; __unw_set_reg(cursor, index, value); -} - -/// Called by personality handler during phase 2 to get instruction pointer. -_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { - unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_word_t result; +} + +/// Called by personality handler during phase 2 to get instruction pointer. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; __unw_get_reg(cursor, UNW_REG_IP, &result); _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR, (void *)context, result); - return (uintptr_t)result; -} - -/// Called by personality handler during phase 2 to alter instruction pointer, -/// such as setting where the landing pad is, so _Unwind_Resume() will -/// start executing in the landing pad. -_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, - uintptr_t value) { + return (uintptr_t)result; +} + +/// Called by personality handler during phase 2 to alter instruction pointer, +/// such as setting where the landing pad is, so _Unwind_Resume() will +/// start executing in the landing pad. +_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, + uintptr_t value) { _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0" PRIxPTR ")", (void *)context, value); - unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_cursor_t *cursor = (unw_cursor_t *)context; __unw_set_reg(cursor, UNW_REG_IP, value); -} - +} + #endif // !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) diff --git a/contrib/libs/libunwind/src/UnwindRegistersRestore.S b/contrib/libs/libunwind/src/UnwindRegistersRestore.S index 0c10e2b0e48..1df97f5fc41 100644 --- a/contrib/libs/libunwind/src/UnwindRegistersRestore.S +++ b/contrib/libs/libunwind/src/UnwindRegistersRestore.S @@ -1,67 +1,67 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "assembly.h" - - .text - +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .text + #if !defined(__USING_SJLJ_EXCEPTIONS__) -#if defined(__i386__) +#if defined(__i386__) DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto) -# +# # extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *); -# -# On entry: -# + + -# +-----------------------+ -# + thread_state pointer + -# +-----------------------+ -# + return address + -# +-----------------------+ <-- SP -# + + +# +# On entry: +# + + +# +-----------------------+ +# + thread_state pointer + +# +-----------------------+ +# + return address + +# +-----------------------+ <-- SP +# + + _LIBUNWIND_CET_ENDBR - movl 4(%esp), %eax - # set up eax and ret on new stack location - movl 28(%eax), %edx # edx holds new stack pointer - subl $8,%edx - movl %edx, 28(%eax) - movl 0(%eax), %ebx - movl %ebx, 0(%edx) - movl 40(%eax), %ebx - movl %ebx, 4(%edx) - # we now have ret and eax pushed onto where new stack will be - # restore all registers - movl 4(%eax), %ebx - movl 8(%eax), %ecx - movl 12(%eax), %edx - movl 16(%eax), %edi - movl 20(%eax), %esi - movl 24(%eax), %ebp - movl 28(%eax), %esp - # skip ss - # skip eflags - pop %eax # eax was already pushed on new stack + movl 4(%esp), %eax + # set up eax and ret on new stack location + movl 28(%eax), %edx # edx holds new stack pointer + subl $8,%edx + movl %edx, 28(%eax) + movl 0(%eax), %ebx + movl %ebx, 0(%edx) + movl 40(%eax), %ebx + movl %ebx, 4(%edx) + # we now have ret and eax pushed onto where new stack will be + # restore all registers + movl 4(%eax), %ebx + movl 8(%eax), %ecx + movl 12(%eax), %edx + movl 16(%eax), %edi + movl 20(%eax), %esi + movl 24(%eax), %ebp + movl 28(%eax), %esp + # skip ss + # skip eflags + pop %eax # eax was already pushed on new stack pop %ecx jmp *%ecx - # skip cs - # skip ds - # skip es - # skip fs - # skip gs - -#elif defined(__x86_64__) - + # skip cs + # skip ds + # skip es + # skip fs + # skip gs + +#elif defined(__x86_64__) + DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto) -# +# # extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *); -# +# #if defined(_WIN64) # On entry, thread_state pointer is in rcx; move it into rdi # to share restore code below. Since this routine restores and @@ -70,38 +70,38 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto) # mustn't clobber some of them. movq %rcx, %rdi #else -# On entry, thread_state pointer is in rdi +# On entry, thread_state pointer is in rdi #endif - + _LIBUNWIND_CET_ENDBR - movq 56(%rdi), %rax # rax holds new stack pointer - subq $16, %rax - movq %rax, 56(%rdi) - movq 32(%rdi), %rbx # store new rdi on new stack - movq %rbx, 0(%rax) - movq 128(%rdi), %rbx # store new rip on new stack - movq %rbx, 8(%rax) - # restore all registers - movq 0(%rdi), %rax - movq 8(%rdi), %rbx - movq 16(%rdi), %rcx - movq 24(%rdi), %rdx - # restore rdi later - movq 40(%rdi), %rsi - movq 48(%rdi), %rbp - # restore rsp later - movq 64(%rdi), %r8 - movq 72(%rdi), %r9 - movq 80(%rdi), %r10 - movq 88(%rdi), %r11 - movq 96(%rdi), %r12 - movq 104(%rdi), %r13 - movq 112(%rdi), %r14 - movq 120(%rdi), %r15 - # skip rflags - # skip cs - # skip fs - # skip gs + movq 56(%rdi), %rax # rax holds new stack pointer + subq $16, %rax + movq %rax, 56(%rdi) + movq 32(%rdi), %rbx # store new rdi on new stack + movq %rbx, 0(%rax) + movq 128(%rdi), %rbx # store new rip on new stack + movq %rbx, 8(%rax) + # restore all registers + movq 0(%rdi), %rax + movq 8(%rdi), %rbx + movq 16(%rdi), %rcx + movq 24(%rdi), %rdx + # restore rdi later + movq 40(%rdi), %rsi + movq 48(%rdi), %rbp + # restore rsp later + movq 64(%rdi), %r8 + movq 72(%rdi), %r9 + movq 80(%rdi), %r10 + movq 88(%rdi), %r11 + movq 96(%rdi), %r12 + movq 104(%rdi), %r13 + movq 112(%rdi), %r14 + movq 120(%rdi), %r15 + # skip rflags + # skip cs + # skip fs + # skip gs #if defined(_WIN64) movdqu 176(%rdi),%xmm0 @@ -121,12 +121,12 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto) movdqu 400(%rdi),%xmm14 movdqu 416(%rdi),%xmm15 #endif - movq 56(%rdi), %rsp # cut back rsp to new location - pop %rdi # rdi was saved here earlier + movq 56(%rdi), %rsp # cut back rsp to new location + pop %rdi # rdi was saved here earlier pop %rcx jmpq *%rcx - - + + #elif defined(__powerpc64__) DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv) @@ -395,7 +395,7 @@ Lnovec: bctr #elif defined(__powerpc__) - + DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) // // void libunwind::Registers_ppc::jumpto() @@ -403,7 +403,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) // On entry: // thread_state pointer is in r3 // - + // restore integral registerrs // skip r0 for now // skip r1 for now @@ -437,7 +437,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) lwz 29,124(3) lwz 30,128(3) lwz 31,132(3) - + #ifndef __NO_FPRS__ // restore float registers lfd 0, 160(3) @@ -473,18 +473,18 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) lfd 30,400(3) lfd 31,408(3) #endif - + #if defined(__ALTIVEC__) // restore vector registers if any are in use lwz 5, 156(3) // test VRsave cmpwi 5, 0 beq Lnovec - + subi 4, 1, 16 rlwinm 4, 4, 0, 0, 27 // mask low 4-bits // r4 is now a 16-byte aligned pointer into the red zone // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer - + #define LOAD_VECTOR_UNALIGNEDl(_index) \ andis. 0, 5, (1 PPC_LEFT_SHIFT(15-_index)) SEPARATOR \ @@ -499,7 +499,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) stw 0, 12(%r4) SEPARATOR \ lvx _index, 0, 4 SEPARATOR \ Ldone ## _index: - + #define LOAD_VECTOR_UNALIGNEDh(_index) \ andi. 0, 5, (1 PPC_LEFT_SHIFT(31-_index)) SEPARATOR \ beq Ldone ## _index SEPARATOR \ @@ -513,43 +513,43 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) stw 0, 12(4) SEPARATOR \ lvx _index, 0, 4 SEPARATOR \ Ldone ## _index: - - - LOAD_VECTOR_UNALIGNEDl(0) - LOAD_VECTOR_UNALIGNEDl(1) - LOAD_VECTOR_UNALIGNEDl(2) - LOAD_VECTOR_UNALIGNEDl(3) - LOAD_VECTOR_UNALIGNEDl(4) - LOAD_VECTOR_UNALIGNEDl(5) - LOAD_VECTOR_UNALIGNEDl(6) - LOAD_VECTOR_UNALIGNEDl(7) - LOAD_VECTOR_UNALIGNEDl(8) - LOAD_VECTOR_UNALIGNEDl(9) - LOAD_VECTOR_UNALIGNEDl(10) - LOAD_VECTOR_UNALIGNEDl(11) - LOAD_VECTOR_UNALIGNEDl(12) - LOAD_VECTOR_UNALIGNEDl(13) - LOAD_VECTOR_UNALIGNEDl(14) - LOAD_VECTOR_UNALIGNEDl(15) - LOAD_VECTOR_UNALIGNEDh(16) - LOAD_VECTOR_UNALIGNEDh(17) - LOAD_VECTOR_UNALIGNEDh(18) - LOAD_VECTOR_UNALIGNEDh(19) - LOAD_VECTOR_UNALIGNEDh(20) - LOAD_VECTOR_UNALIGNEDh(21) - LOAD_VECTOR_UNALIGNEDh(22) - LOAD_VECTOR_UNALIGNEDh(23) - LOAD_VECTOR_UNALIGNEDh(24) - LOAD_VECTOR_UNALIGNEDh(25) - LOAD_VECTOR_UNALIGNEDh(26) - LOAD_VECTOR_UNALIGNEDh(27) - LOAD_VECTOR_UNALIGNEDh(28) - LOAD_VECTOR_UNALIGNEDh(29) - LOAD_VECTOR_UNALIGNEDh(30) - LOAD_VECTOR_UNALIGNEDh(31) + + + LOAD_VECTOR_UNALIGNEDl(0) + LOAD_VECTOR_UNALIGNEDl(1) + LOAD_VECTOR_UNALIGNEDl(2) + LOAD_VECTOR_UNALIGNEDl(3) + LOAD_VECTOR_UNALIGNEDl(4) + LOAD_VECTOR_UNALIGNEDl(5) + LOAD_VECTOR_UNALIGNEDl(6) + LOAD_VECTOR_UNALIGNEDl(7) + LOAD_VECTOR_UNALIGNEDl(8) + LOAD_VECTOR_UNALIGNEDl(9) + LOAD_VECTOR_UNALIGNEDl(10) + LOAD_VECTOR_UNALIGNEDl(11) + LOAD_VECTOR_UNALIGNEDl(12) + LOAD_VECTOR_UNALIGNEDl(13) + LOAD_VECTOR_UNALIGNEDl(14) + LOAD_VECTOR_UNALIGNEDl(15) + LOAD_VECTOR_UNALIGNEDh(16) + LOAD_VECTOR_UNALIGNEDh(17) + LOAD_VECTOR_UNALIGNEDh(18) + LOAD_VECTOR_UNALIGNEDh(19) + LOAD_VECTOR_UNALIGNEDh(20) + LOAD_VECTOR_UNALIGNEDh(21) + LOAD_VECTOR_UNALIGNEDh(22) + LOAD_VECTOR_UNALIGNEDh(23) + LOAD_VECTOR_UNALIGNEDh(24) + LOAD_VECTOR_UNALIGNEDh(25) + LOAD_VECTOR_UNALIGNEDh(26) + LOAD_VECTOR_UNALIGNEDh(27) + LOAD_VECTOR_UNALIGNEDh(28) + LOAD_VECTOR_UNALIGNEDh(29) + LOAD_VECTOR_UNALIGNEDh(30) + LOAD_VECTOR_UNALIGNEDh(31) #endif - -Lnovec: + +Lnovec: lwz 0, 136(3) // __cr mtcr 0 lwz 0, 148(3) // __ctr @@ -561,79 +561,79 @@ Lnovec: lwz 4, 24(3) // do r4 now lwz 1, 12(3) // do sp now lwz 3, 20(3) // do r3 last - bctr - + bctr + #elif defined(__aarch64__) - -// + +// // extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); -// -// On entry: -// thread_state pointer is in x0 -// - .p2align 2 +// +// On entry: +// thread_state pointer is in x0 +// + .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) - // skip restore of x0,x1 for now - ldp x2, x3, [x0, #0x010] - ldp x4, x5, [x0, #0x020] - ldp x6, x7, [x0, #0x030] - ldp x8, x9, [x0, #0x040] - ldp x10,x11, [x0, #0x050] - ldp x12,x13, [x0, #0x060] - ldp x14,x15, [x0, #0x070] + // skip restore of x0,x1 for now + ldp x2, x3, [x0, #0x010] + ldp x4, x5, [x0, #0x020] + ldp x6, x7, [x0, #0x030] + ldp x8, x9, [x0, #0x040] + ldp x10,x11, [x0, #0x050] + ldp x12,x13, [x0, #0x060] + ldp x14,x15, [x0, #0x070] // x16 and x17 were clobbered by the call into the unwinder, so no point in // restoring them. - ldp x18,x19, [x0, #0x090] - ldp x20,x21, [x0, #0x0A0] - ldp x22,x23, [x0, #0x0B0] - ldp x24,x25, [x0, #0x0C0] - ldp x26,x27, [x0, #0x0D0] + ldp x18,x19, [x0, #0x090] + ldp x20,x21, [x0, #0x0A0] + ldp x22,x23, [x0, #0x0B0] + ldp x24,x25, [x0, #0x0C0] + ldp x26,x27, [x0, #0x0D0] ldp x28,x29, [x0, #0x0E0] ldr x30, [x0, #0x100] // restore pc into lr - - ldp d0, d1, [x0, #0x110] - ldp d2, d3, [x0, #0x120] - ldp d4, d5, [x0, #0x130] - ldp d6, d7, [x0, #0x140] - ldp d8, d9, [x0, #0x150] - ldp d10,d11, [x0, #0x160] - ldp d12,d13, [x0, #0x170] - ldp d14,d15, [x0, #0x180] - ldp d16,d17, [x0, #0x190] - ldp d18,d19, [x0, #0x1A0] - ldp d20,d21, [x0, #0x1B0] - ldp d22,d23, [x0, #0x1C0] - ldp d24,d25, [x0, #0x1D0] - ldp d26,d27, [x0, #0x1E0] - ldp d28,d29, [x0, #0x1F0] - ldr d30, [x0, #0x200] - ldr d31, [x0, #0x208] - + + ldp d0, d1, [x0, #0x110] + ldp d2, d3, [x0, #0x120] + ldp d4, d5, [x0, #0x130] + ldp d6, d7, [x0, #0x140] + ldp d8, d9, [x0, #0x150] + ldp d10,d11, [x0, #0x160] + ldp d12,d13, [x0, #0x170] + ldp d14,d15, [x0, #0x180] + ldp d16,d17, [x0, #0x190] + ldp d18,d19, [x0, #0x1A0] + ldp d20,d21, [x0, #0x1B0] + ldp d22,d23, [x0, #0x1C0] + ldp d24,d25, [x0, #0x1D0] + ldp d26,d27, [x0, #0x1E0] + ldp d28,d29, [x0, #0x1F0] + ldr d30, [x0, #0x200] + ldr d31, [x0, #0x208] + // Finally, restore sp. This must be done after the the last read from the // context struct, because it is allocated on the stack, and an exception // could clobber the de-allocated portion of the stack after sp has been // restored. ldr x16, [x0, #0x0F8] - ldp x0, x1, [x0, #0x000] // restore x0,x1 + ldp x0, x1, [x0, #0x000] // restore x0,x1 mov sp,x16 // restore sp ret x30 // jump to pc - -#elif defined(__arm__) && !defined(__APPLE__) - -#if !defined(__ARM_ARCH_ISA_ARM) + +#elif defined(__arm__) && !defined(__APPLE__) + +#if !defined(__ARM_ARCH_ISA_ARM) #if (__ARM_ARCH_ISA_THUMB == 2) .syntax unified #endif - .thumb -#endif - -@ -@ void libunwind::Registers_arm::restoreCoreAndJumpTo() -@ -@ On entry: -@ thread_state pointer is in r0 -@ - .p2align 2 + .thumb +#endif + +@ +@ void libunwind::Registers_arm::restoreCoreAndJumpTo() +@ +@ On entry: +@ thread_state pointer is in r0 +@ + .p2align 2 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv) #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 @ r8-r11: ldm into r1-r4, then mov to r8-r11 @@ -647,180 +647,180 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv) @ r12 does not need loading, it it the intra-procedure-call scratch register ldr r2, [r0, #0x34] ldr r3, [r0, #0x3c] - mov sp, r2 - mov lr, r3 @ restore pc into lr - ldm r0, {r0-r7} -#else - @ Use lr as base so that r0 can be restored. - mov lr, r0 - @ 32bit thumb-2 restrictions for ldm: - @ . the sp (r13) cannot be in the list - @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction - ldm lr, {r0-r12} - ldr sp, [lr, #52] - ldr lr, [lr, #60] @ restore pc into lr -#endif + mov sp, r2 + mov lr, r3 @ restore pc into lr + ldm r0, {r0-r7} +#else + @ Use lr as base so that r0 can be restored. + mov lr, r0 + @ 32bit thumb-2 restrictions for ldm: + @ . the sp (r13) cannot be in the list + @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction + ldm lr, {r0-r12} + ldr sp, [lr, #52] + ldr lr, [lr, #60] @ restore pc into lr +#endif #if defined(__ARM_FEATURE_BTI_DEFAULT) && !defined(__ARM_ARCH_ISA_ARM) // 'bx' is not BTI setting when used with lr, therefore r12 is used instead mov r12, lr JMP(r12) #else - JMP(lr) + JMP(lr) #endif - -@ -@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + +@ +@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3-d16 + .fpu vfpv3-d16 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPv) - @ VFP and iwMMX instructions are only available when compiling with the flags - @ that enable them. We do not want to do that in the library (because we do not - @ want the compiler to generate instructions that access those) but this is - @ only accessed if the personality routine needs these registers. Use of - @ these registers implies they are, actually, available on the target, so - @ it's ok to execute. - @ So, generate the instruction using the corresponding coprocessor mnemonic. - vldmia r0, {d0-d15} - JMP(lr) - -@ -@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + @ VFP and iwMMX instructions are only available when compiling with the flags + @ that enable them. We do not want to do that in the library (because we do not + @ want the compiler to generate instructions that access those) but this is + @ only accessed if the personality routine needs these registers. Use of + @ these registers implies they are, actually, available on the target, so + @ it's ok to execute. + @ So, generate the instruction using the corresponding coprocessor mnemonic. + vldmia r0, {d0-d15} + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3-d16 + .fpu vfpv3-d16 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPv) - vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia - JMP(lr) - -@ -@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3 + .fpu vfpv3 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPv) - vldmia r0, {d16-d31} - JMP(lr) - + vldmia r0, {d16-d31} + JMP(lr) + #if defined(__ARM_WMMX) -@ -@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 +@ +@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) .arch armv5te #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPv) - ldcl p1, cr0, [r0], #8 @ wldrd wR0, [r0], #8 - ldcl p1, cr1, [r0], #8 @ wldrd wR1, [r0], #8 - ldcl p1, cr2, [r0], #8 @ wldrd wR2, [r0], #8 - ldcl p1, cr3, [r0], #8 @ wldrd wR3, [r0], #8 - ldcl p1, cr4, [r0], #8 @ wldrd wR4, [r0], #8 - ldcl p1, cr5, [r0], #8 @ wldrd wR5, [r0], #8 - ldcl p1, cr6, [r0], #8 @ wldrd wR6, [r0], #8 - ldcl p1, cr7, [r0], #8 @ wldrd wR7, [r0], #8 - ldcl p1, cr8, [r0], #8 @ wldrd wR8, [r0], #8 - ldcl p1, cr9, [r0], #8 @ wldrd wR9, [r0], #8 - ldcl p1, cr10, [r0], #8 @ wldrd wR10, [r0], #8 - ldcl p1, cr11, [r0], #8 @ wldrd wR11, [r0], #8 - ldcl p1, cr12, [r0], #8 @ wldrd wR12, [r0], #8 - ldcl p1, cr13, [r0], #8 @ wldrd wR13, [r0], #8 - ldcl p1, cr14, [r0], #8 @ wldrd wR14, [r0], #8 - ldcl p1, cr15, [r0], #8 @ wldrd wR15, [r0], #8 - JMP(lr) - -@ -@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + ldcl p1, cr0, [r0], #8 @ wldrd wR0, [r0], #8 + ldcl p1, cr1, [r0], #8 @ wldrd wR1, [r0], #8 + ldcl p1, cr2, [r0], #8 @ wldrd wR2, [r0], #8 + ldcl p1, cr3, [r0], #8 @ wldrd wR3, [r0], #8 + ldcl p1, cr4, [r0], #8 @ wldrd wR4, [r0], #8 + ldcl p1, cr5, [r0], #8 @ wldrd wR5, [r0], #8 + ldcl p1, cr6, [r0], #8 @ wldrd wR6, [r0], #8 + ldcl p1, cr7, [r0], #8 @ wldrd wR7, [r0], #8 + ldcl p1, cr8, [r0], #8 @ wldrd wR8, [r0], #8 + ldcl p1, cr9, [r0], #8 @ wldrd wR9, [r0], #8 + ldcl p1, cr10, [r0], #8 @ wldrd wR10, [r0], #8 + ldcl p1, cr11, [r0], #8 @ wldrd wR11, [r0], #8 + ldcl p1, cr12, [r0], #8 @ wldrd wR12, [r0], #8 + ldcl p1, cr13, [r0], #8 @ wldrd wR13, [r0], #8 + ldcl p1, cr14, [r0], #8 @ wldrd wR14, [r0], #8 + ldcl p1, cr15, [r0], #8 @ wldrd wR15, [r0], #8 + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) .arch armv5te #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj) - ldc2 p1, cr8, [r0], #4 @ wldrw wCGR0, [r0], #4 - ldc2 p1, cr9, [r0], #4 @ wldrw wCGR1, [r0], #4 - ldc2 p1, cr10, [r0], #4 @ wldrw wCGR2, [r0], #4 - ldc2 p1, cr11, [r0], #4 @ wldrw wCGR3, [r0], #4 - JMP(lr) - + ldc2 p1, cr8, [r0], #4 @ wldrw wCGR0, [r0], #4 + ldc2 p1, cr9, [r0], #4 @ wldrw wCGR1, [r0], #4 + ldc2 p1, cr10, [r0], #4 @ wldrw wCGR2, [r0], #4 + ldc2 p1, cr11, [r0], #4 @ wldrw wCGR3, [r0], #4 + JMP(lr) + #endif -#elif defined(__or1k__) - +#elif defined(__or1k__) + DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv) -# -# void libunwind::Registers_or1k::jumpto() -# -# On entry: -# thread_state pointer is in r3 -# - +# +# void libunwind::Registers_or1k::jumpto() +# +# On entry: +# thread_state pointer is in r3 +# + # restore integral registers - l.lwz r0, 0(r3) - l.lwz r1, 4(r3) - l.lwz r2, 8(r3) - # skip r3 for now - l.lwz r4, 16(r3) - l.lwz r5, 20(r3) - l.lwz r6, 24(r3) - l.lwz r7, 28(r3) - l.lwz r8, 32(r3) + l.lwz r0, 0(r3) + l.lwz r1, 4(r3) + l.lwz r2, 8(r3) + # skip r3 for now + l.lwz r4, 16(r3) + l.lwz r5, 20(r3) + l.lwz r6, 24(r3) + l.lwz r7, 28(r3) + l.lwz r8, 32(r3) # skip r9 - l.lwz r10, 40(r3) - l.lwz r11, 44(r3) - l.lwz r12, 48(r3) - l.lwz r13, 52(r3) - l.lwz r14, 56(r3) - l.lwz r15, 60(r3) - l.lwz r16, 64(r3) - l.lwz r17, 68(r3) - l.lwz r18, 72(r3) - l.lwz r19, 76(r3) - l.lwz r20, 80(r3) - l.lwz r21, 84(r3) - l.lwz r22, 88(r3) - l.lwz r23, 92(r3) - l.lwz r24, 96(r3) - l.lwz r25,100(r3) - l.lwz r26,104(r3) - l.lwz r27,108(r3) - l.lwz r28,112(r3) - l.lwz r29,116(r3) - l.lwz r30,120(r3) - l.lwz r31,124(r3) - + l.lwz r10, 40(r3) + l.lwz r11, 44(r3) + l.lwz r12, 48(r3) + l.lwz r13, 52(r3) + l.lwz r14, 56(r3) + l.lwz r15, 60(r3) + l.lwz r16, 64(r3) + l.lwz r17, 68(r3) + l.lwz r18, 72(r3) + l.lwz r19, 76(r3) + l.lwz r20, 80(r3) + l.lwz r21, 84(r3) + l.lwz r22, 88(r3) + l.lwz r23, 92(r3) + l.lwz r24, 96(r3) + l.lwz r25,100(r3) + l.lwz r26,104(r3) + l.lwz r27,108(r3) + l.lwz r28,112(r3) + l.lwz r29,116(r3) + l.lwz r30,120(r3) + l.lwz r31,124(r3) + # load new pc into ra l.lwz r9, 128(r3) - # at last, restore r3 - l.lwz r3, 12(r3) - - # jump to pc - l.jr r9 - l.nop - + # at last, restore r3 + l.lwz r3, 12(r3) + + # jump to pc + l.jr r9 + l.nop + #elif defined(__hexagon__) # On entry: # thread_state pointer is in r2 @@ -927,7 +927,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv) ldc1 $f29, (4 * 36 + 8 * 29)($4) ldc1 $f30, (4 * 36 + 8 * 30)($4) ldc1 $f31, (4 * 36 + 8 * 31)($4) -#endif +#endif #endif // restore hi and lo lw $8, (4 * 33)($4) diff --git a/contrib/libs/libunwind/src/UnwindRegistersSave.S b/contrib/libs/libunwind/src/UnwindRegistersSave.S index b6170bcdc68..9566bb0335f 100644 --- a/contrib/libs/libunwind/src/UnwindRegistersSave.S +++ b/contrib/libs/libunwind/src/UnwindRegistersSave.S @@ -1,68 +1,68 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "assembly.h" - - .text - +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .text + #if !defined(__USING_SJLJ_EXCEPTIONS__) -#if defined(__i386__) - -# +#if defined(__i386__) + +# # extern int __unw_getcontext(unw_context_t* thread_state) -# -# On entry: -# + + -# +-----------------------+ -# + thread_state pointer + -# +-----------------------+ -# + return address + -# +-----------------------+ <-- SP -# + + -# +# +# On entry: +# + + +# +-----------------------+ +# + thread_state pointer + +# +-----------------------+ +# + return address + +# +-----------------------+ <-- SP +# + + +# DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) _LIBUNWIND_CET_ENDBR - push %eax - movl 8(%esp), %eax - movl %ebx, 4(%eax) - movl %ecx, 8(%eax) - movl %edx, 12(%eax) - movl %edi, 16(%eax) - movl %esi, 20(%eax) - movl %ebp, 24(%eax) - movl %esp, %edx - addl $8, %edx - movl %edx, 28(%eax) # store what sp was at call site as esp - # skip ss - # skip eflags - movl 4(%esp), %edx - movl %edx, 40(%eax) # store return address as eip - # skip cs - # skip ds - # skip es - # skip fs - # skip gs - movl (%esp), %edx - movl %edx, (%eax) # store original eax - popl %eax - xorl %eax, %eax # return UNW_ESUCCESS - ret - -#elif defined(__x86_64__) - -# + push %eax + movl 8(%esp), %eax + movl %ebx, 4(%eax) + movl %ecx, 8(%eax) + movl %edx, 12(%eax) + movl %edi, 16(%eax) + movl %esi, 20(%eax) + movl %ebp, 24(%eax) + movl %esp, %edx + addl $8, %edx + movl %edx, 28(%eax) # store what sp was at call site as esp + # skip ss + # skip eflags + movl 4(%esp), %edx + movl %edx, 40(%eax) # store return address as eip + # skip cs + # skip ds + # skip es + # skip fs + # skip gs + movl (%esp), %edx + movl %edx, (%eax) # store original eax + popl %eax + xorl %eax, %eax # return UNW_ESUCCESS + ret + +#elif defined(__x86_64__) + +# # extern int __unw_getcontext(unw_context_t* thread_state) -# -# On entry: -# thread_state pointer is in rdi -# +# +# On entry: +# thread_state pointer is in rdi +# DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) #if defined(_WIN64) #define PTR %rcx @@ -92,10 +92,10 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) movq %r15,120(PTR) movq (%rsp),TMP movq TMP,128(PTR) # store return address as rip - # skip rflags - # skip cs - # skip fs - # skip gs + # skip rflags + # skip cs + # skip fs + # skip gs #if defined(_WIN64) movdqu %xmm0,176(PTR) @@ -115,9 +115,9 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) movdqu %xmm14,400(PTR) movdqu %xmm15,416(PTR) #endif - xorl %eax, %eax # return UNW_ESUCCESS - ret - + xorl %eax, %eax # return UNW_ESUCCESS + ret + #elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32 # @@ -317,15 +317,15 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) or $2, $0, $0 .set pop -# elif defined(__mips__) - -# +# elif defined(__mips__) + +# # extern int __unw_getcontext(unw_context_t* thread_state) -# -# Just trap for the time being. +# +# Just trap for the time being. DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) - teq $0, $0 - + teq $0, $0 + #elif defined(__powerpc64__) // @@ -560,7 +560,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) #elif defined(__powerpc__) - + // // extern int unw_getcontext(unw_context_t* thread_state) // @@ -602,7 +602,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) stw 29,124(3) stw 30,128(3) stw 31,132(3) - + // save VRSave register mfspr 0, 256 stw 0, 156(3) @@ -612,7 +612,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) // save CTR register mfctr 0 stw 0, 148(3) - + #if !defined(__NO_FPRS__) // save float registers stfd 0, 160(3) @@ -648,15 +648,15 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) stfd 30,400(3) stfd 31,408(3) #endif - + #if defined(__ALTIVEC__) // save vector registers - + subi 4, 1, 16 rlwinm 4, 4, 0, 0, 27 // mask low 4-bits // r4 is now a 16-byte aligned pointer into the red zone - -#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ + +#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ stvx _vec, 0, 4 SEPARATOR \ lwz 5, 0(4) SEPARATOR \ stw 5, _offset(3) SEPARATOR \ @@ -666,7 +666,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) stw 5, _offset+8(3) SEPARATOR \ lwz 5, 12(4) SEPARATOR \ stw 5, _offset+12(3) - + SAVE_VECTOR_UNALIGNED( 0, 424+0x000) SAVE_VECTOR_UNALIGNED( 1, 424+0x010) SAVE_VECTOR_UNALIGNED( 2, 424+0x020) @@ -700,83 +700,83 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) SAVE_VECTOR_UNALIGNED(30, 424+0x1E0) SAVE_VECTOR_UNALIGNED(31, 424+0x1F0) #endif - + li 3, 0 // return UNW_ESUCCESS - blr - - + blr + + #elif defined(__aarch64__) - -// + +// // extern int __unw_getcontext(unw_context_t* thread_state) -// -// On entry: -// thread_state pointer is in x0 -// - .p2align 2 +// +// On entry: +// thread_state pointer is in x0 +// + .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) - stp x0, x1, [x0, #0x000] - stp x2, x3, [x0, #0x010] - stp x4, x5, [x0, #0x020] - stp x6, x7, [x0, #0x030] - stp x8, x9, [x0, #0x040] - stp x10,x11, [x0, #0x050] - stp x12,x13, [x0, #0x060] - stp x14,x15, [x0, #0x070] - stp x16,x17, [x0, #0x080] - stp x18,x19, [x0, #0x090] - stp x20,x21, [x0, #0x0A0] - stp x22,x23, [x0, #0x0B0] - stp x24,x25, [x0, #0x0C0] - stp x26,x27, [x0, #0x0D0] + stp x0, x1, [x0, #0x000] + stp x2, x3, [x0, #0x010] + stp x4, x5, [x0, #0x020] + stp x6, x7, [x0, #0x030] + stp x8, x9, [x0, #0x040] + stp x10,x11, [x0, #0x050] + stp x12,x13, [x0, #0x060] + stp x14,x15, [x0, #0x070] + stp x16,x17, [x0, #0x080] + stp x18,x19, [x0, #0x090] + stp x20,x21, [x0, #0x0A0] + stp x22,x23, [x0, #0x0B0] + stp x24,x25, [x0, #0x0C0] + stp x26,x27, [x0, #0x0D0] stp x28,x29, [x0, #0x0E0] str x30, [x0, #0x0F0] - mov x1,sp - str x1, [x0, #0x0F8] + mov x1,sp + str x1, [x0, #0x0F8] str x30, [x0, #0x100] // store return address as pc - // skip cpsr - stp d0, d1, [x0, #0x110] - stp d2, d3, [x0, #0x120] - stp d4, d5, [x0, #0x130] - stp d6, d7, [x0, #0x140] - stp d8, d9, [x0, #0x150] - stp d10,d11, [x0, #0x160] - stp d12,d13, [x0, #0x170] - stp d14,d15, [x0, #0x180] - stp d16,d17, [x0, #0x190] - stp d18,d19, [x0, #0x1A0] - stp d20,d21, [x0, #0x1B0] - stp d22,d23, [x0, #0x1C0] - stp d24,d25, [x0, #0x1D0] - stp d26,d27, [x0, #0x1E0] - stp d28,d29, [x0, #0x1F0] - str d30, [x0, #0x200] - str d31, [x0, #0x208] - mov x0, #0 // return UNW_ESUCCESS - ret - -#elif defined(__arm__) && !defined(__APPLE__) - -#if !defined(__ARM_ARCH_ISA_ARM) + // skip cpsr + stp d0, d1, [x0, #0x110] + stp d2, d3, [x0, #0x120] + stp d4, d5, [x0, #0x130] + stp d6, d7, [x0, #0x140] + stp d8, d9, [x0, #0x150] + stp d10,d11, [x0, #0x160] + stp d12,d13, [x0, #0x170] + stp d14,d15, [x0, #0x180] + stp d16,d17, [x0, #0x190] + stp d18,d19, [x0, #0x1A0] + stp d20,d21, [x0, #0x1B0] + stp d22,d23, [x0, #0x1C0] + stp d24,d25, [x0, #0x1D0] + stp d26,d27, [x0, #0x1E0] + stp d28,d29, [x0, #0x1F0] + str d30, [x0, #0x200] + str d31, [x0, #0x208] + mov x0, #0 // return UNW_ESUCCESS + ret + +#elif defined(__arm__) && !defined(__APPLE__) + +#if !defined(__ARM_ARCH_ISA_ARM) #if (__ARM_ARCH_ISA_THUMB == 2) .syntax unified #endif - .thumb -#endif - -@ + .thumb +#endif + +@ @ extern int __unw_getcontext(unw_context_t* thread_state) +@ +@ On entry: +@ thread_state pointer is in r0 @ -@ On entry: -@ thread_state pointer is in r0 -@ -@ Per EHABI #4.7 this only saves the core integer registers. -@ EHABI #7.4.5 notes that in general all VRS registers should be restored -@ however this is very hard to do for VFP registers because it is unknown -@ to the library how many registers are implemented by the architecture. +@ Per EHABI #4.7 this only saves the core integer registers. +@ EHABI #7.4.5 notes that in general all VRS registers should be restored +@ however this is very hard to do for VFP registers because it is unknown +@ to the library how many registers are implemented by the architecture. @ Instead, VFP registers are demand saved by logic external to __unw_getcontext. -@ - .p2align 2 +@ + .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 stm r0!, {r0-r7} @@ -785,8 +785,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) mov r3, r10 stm r0!, {r1-r3} mov r1, r11 - mov r2, sp - mov r3, lr + mov r2, sp + mov r3, lr str r1, [r0, #0] @ r11 @ r12 does not need storing, it it the intra-procedure-call scratch register str r2, [r0, #8] @ sp @@ -796,158 +796,158 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) @ It is safe to use here though because we are about to return, and cpsr is @ not expected to be preserved. movs r0, #0 @ return UNW_ESUCCESS -#else - @ 32bit thumb-2 restrictions for stm: - @ . the sp (r13) cannot be in the list - @ . the pc (r15) cannot be in the list in an STM instruction - stm r0, {r0-r12} - str sp, [r0, #52] - str lr, [r0, #56] - str lr, [r0, #60] @ store return address as pc - mov r0, #0 @ return UNW_ESUCCESS -#endif - JMP(lr) - -@ -@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 +#else + @ 32bit thumb-2 restrictions for stm: + @ . the sp (r13) cannot be in the list + @ . the pc (r15) cannot be in the list in an STM instruction + stm r0, {r0-r12} + str sp, [r0, #52] + str lr, [r0, #56] + str lr, [r0, #60] @ store return address as pc + mov r0, #0 @ return UNW_ESUCCESS +#endif + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3-d16 + .fpu vfpv3-d16 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv) - vstmia r0, {d0-d15} - JMP(lr) - -@ -@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + vstmia r0, {d0-d15} + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3-d16 + .fpu vfpv3-d16 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv) - vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia - JMP(lr) - -@ -@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) - .fpu vfpv3 + .fpu vfpv3 #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv) - @ VFP and iwMMX instructions are only available when compiling with the flags - @ that enable them. We do not want to do that in the library (because we do not - @ want the compiler to generate instructions that access those) but this is - @ only accessed if the personality routine needs these registers. Use of - @ these registers implies they are, actually, available on the target, so - @ it's ok to execute. - @ So, generate the instructions using the corresponding coprocessor mnemonic. - vstmia r0, {d16-d31} - JMP(lr) - + @ VFP and iwMMX instructions are only available when compiling with the flags + @ that enable them. We do not want to do that in the library (because we do not + @ want the compiler to generate instructions that access those) but this is + @ only accessed if the personality routine needs these registers. Use of + @ these registers implies they are, actually, available on the target, so + @ it's ok to execute. + @ So, generate the instructions using the corresponding coprocessor mnemonic. + vstmia r0, {d16-d31} + JMP(lr) + #if defined(_LIBUNWIND_ARM_WMMX) -@ -@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 +@ +@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) .arch armv5te #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv) - stcl p1, cr0, [r0], #8 @ wstrd wR0, [r0], #8 - stcl p1, cr1, [r0], #8 @ wstrd wR1, [r0], #8 - stcl p1, cr2, [r0], #8 @ wstrd wR2, [r0], #8 - stcl p1, cr3, [r0], #8 @ wstrd wR3, [r0], #8 - stcl p1, cr4, [r0], #8 @ wstrd wR4, [r0], #8 - stcl p1, cr5, [r0], #8 @ wstrd wR5, [r0], #8 - stcl p1, cr6, [r0], #8 @ wstrd wR6, [r0], #8 - stcl p1, cr7, [r0], #8 @ wstrd wR7, [r0], #8 - stcl p1, cr8, [r0], #8 @ wstrd wR8, [r0], #8 - stcl p1, cr9, [r0], #8 @ wstrd wR9, [r0], #8 - stcl p1, cr10, [r0], #8 @ wstrd wR10, [r0], #8 - stcl p1, cr11, [r0], #8 @ wstrd wR11, [r0], #8 - stcl p1, cr12, [r0], #8 @ wstrd wR12, [r0], #8 - stcl p1, cr13, [r0], #8 @ wstrd wR13, [r0], #8 - stcl p1, cr14, [r0], #8 @ wstrd wR14, [r0], #8 - stcl p1, cr15, [r0], #8 @ wstrd wR15, [r0], #8 - JMP(lr) - -@ -@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values) -@ -@ On entry: -@ values pointer is in r0 -@ - .p2align 2 + stcl p1, cr0, [r0], #8 @ wstrd wR0, [r0], #8 + stcl p1, cr1, [r0], #8 @ wstrd wR1, [r0], #8 + stcl p1, cr2, [r0], #8 @ wstrd wR2, [r0], #8 + stcl p1, cr3, [r0], #8 @ wstrd wR3, [r0], #8 + stcl p1, cr4, [r0], #8 @ wstrd wR4, [r0], #8 + stcl p1, cr5, [r0], #8 @ wstrd wR5, [r0], #8 + stcl p1, cr6, [r0], #8 @ wstrd wR6, [r0], #8 + stcl p1, cr7, [r0], #8 @ wstrd wR7, [r0], #8 + stcl p1, cr8, [r0], #8 @ wstrd wR8, [r0], #8 + stcl p1, cr9, [r0], #8 @ wstrd wR9, [r0], #8 + stcl p1, cr10, [r0], #8 @ wstrd wR10, [r0], #8 + stcl p1, cr11, [r0], #8 @ wstrd wR11, [r0], #8 + stcl p1, cr12, [r0], #8 @ wstrd wR12, [r0], #8 + stcl p1, cr13, [r0], #8 @ wstrd wR13, [r0], #8 + stcl p1, cr14, [r0], #8 @ wstrd wR14, [r0], #8 + stcl p1, cr15, [r0], #8 @ wstrd wR15, [r0], #8 + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 #if defined(__ELF__) .arch armv5te #endif DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj) - stc2 p1, cr8, [r0], #4 @ wstrw wCGR0, [r0], #4 - stc2 p1, cr9, [r0], #4 @ wstrw wCGR1, [r0], #4 - stc2 p1, cr10, [r0], #4 @ wstrw wCGR2, [r0], #4 - stc2 p1, cr11, [r0], #4 @ wstrw wCGR3, [r0], #4 - JMP(lr) - + stc2 p1, cr8, [r0], #4 @ wstrw wCGR0, [r0], #4 + stc2 p1, cr9, [r0], #4 @ wstrw wCGR1, [r0], #4 + stc2 p1, cr10, [r0], #4 @ wstrw wCGR2, [r0], #4 + stc2 p1, cr11, [r0], #4 @ wstrw wCGR3, [r0], #4 + JMP(lr) + #endif -#elif defined(__or1k__) - -# +#elif defined(__or1k__) + +# # extern int __unw_getcontext(unw_context_t* thread_state) -# -# On entry: -# thread_state pointer is in r3 -# +# +# On entry: +# thread_state pointer is in r3 +# DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) - l.sw 0(r3), r0 - l.sw 4(r3), r1 - l.sw 8(r3), r2 - l.sw 12(r3), r3 - l.sw 16(r3), r4 - l.sw 20(r3), r5 - l.sw 24(r3), r6 - l.sw 28(r3), r7 - l.sw 32(r3), r8 - l.sw 36(r3), r9 - l.sw 40(r3), r10 - l.sw 44(r3), r11 - l.sw 48(r3), r12 - l.sw 52(r3), r13 - l.sw 56(r3), r14 - l.sw 60(r3), r15 - l.sw 64(r3), r16 - l.sw 68(r3), r17 - l.sw 72(r3), r18 - l.sw 76(r3), r19 - l.sw 80(r3), r20 - l.sw 84(r3), r21 - l.sw 88(r3), r22 - l.sw 92(r3), r23 - l.sw 96(r3), r24 - l.sw 100(r3), r25 - l.sw 104(r3), r26 - l.sw 108(r3), r27 - l.sw 112(r3), r28 - l.sw 116(r3), r29 - l.sw 120(r3), r30 - l.sw 124(r3), r31 + l.sw 0(r3), r0 + l.sw 4(r3), r1 + l.sw 8(r3), r2 + l.sw 12(r3), r3 + l.sw 16(r3), r4 + l.sw 20(r3), r5 + l.sw 24(r3), r6 + l.sw 28(r3), r7 + l.sw 32(r3), r8 + l.sw 36(r3), r9 + l.sw 40(r3), r10 + l.sw 44(r3), r11 + l.sw 48(r3), r12 + l.sw 52(r3), r13 + l.sw 56(r3), r14 + l.sw 60(r3), r15 + l.sw 64(r3), r16 + l.sw 68(r3), r17 + l.sw 72(r3), r18 + l.sw 76(r3), r19 + l.sw 80(r3), r20 + l.sw 84(r3), r21 + l.sw 88(r3), r22 + l.sw 92(r3), r23 + l.sw 96(r3), r24 + l.sw 100(r3), r25 + l.sw 104(r3), r26 + l.sw 108(r3), r27 + l.sw 112(r3), r28 + l.sw 116(r3), r29 + l.sw 120(r3), r30 + l.sw 124(r3), r31 # store ra to pc l.sw 128(r3), r9 # zero epcr diff --git a/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp b/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp index 1a0b61f6cbc..ffb49a89e54 100644 --- a/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp +++ b/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp @@ -1,113 +1,113 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -//===----------------------------------------------------------------------===// - -#include "config.h" - - -// static linker symbols to prevent wrong two level namespace for _Unwind symbols -#if defined(__arm__) - #define NOT_HERE_BEFORE_5_0(sym) \ - extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \ - extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \ - extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\ - __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \ - extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \ - extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \ - extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \ - extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp43 = 0; +// +// +//===----------------------------------------------------------------------===// + +#include "config.h" + + +// static linker symbols to prevent wrong two level namespace for _Unwind symbols +#if defined(__arm__) + #define NOT_HERE_BEFORE_5_0(sym) \ + extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \ + extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \ + extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\ + __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \ + extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \ + extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \ + extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \ + extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp43 = 0; #elif defined(__aarch64__) - #define NOT_HERE_BEFORE_10_6(sym) - #define NEVER_HERE(sym) -#else - #define NOT_HERE_BEFORE_10_6(sym) \ - extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; - #define NEVER_HERE(sym) \ - extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ - extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp6 = 0; -#endif - - + #define NOT_HERE_BEFORE_10_6(sym) + #define NEVER_HERE(sym) +#else + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #define NEVER_HERE(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; +#endif + + #if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) - -// -// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in -// earlier versions -// -NOT_HERE_BEFORE_10_6(_Unwind_DeleteException) -NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE) -NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind) -NOT_HERE_BEFORE_10_6(_Unwind_GetGR) -NOT_HERE_BEFORE_10_6(_Unwind_GetIP) -NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData) -NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart) -NOT_HERE_BEFORE_10_6(_Unwind_RaiseException) -NOT_HERE_BEFORE_10_6(_Unwind_Resume) -NOT_HERE_BEFORE_10_6(_Unwind_SetGR) -NOT_HERE_BEFORE_10_6(_Unwind_SetIP) -NOT_HERE_BEFORE_10_6(_Unwind_Backtrace) -NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction) -NOT_HERE_BEFORE_10_6(_Unwind_GetCFA) -NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase) -NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase) -NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow) -NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo) -NOT_HERE_BEFORE_10_6(__register_frame) -NOT_HERE_BEFORE_10_6(__deregister_frame) - -// -// symbols in libSystem.dylib for compatibility, but we don't want any new code -// using them -// -NEVER_HERE(__register_frame_info_bases) -NEVER_HERE(__register_frame_info) -NEVER_HERE(__register_frame_info_table_bases) -NEVER_HERE(__register_frame_info_table) -NEVER_HERE(__register_frame_table) -NEVER_HERE(__deregister_frame_info) -NEVER_HERE(__deregister_frame_info_bases) - + +// +// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in +// earlier versions +// +NOT_HERE_BEFORE_10_6(_Unwind_DeleteException) +NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE) +NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind) +NOT_HERE_BEFORE_10_6(_Unwind_GetGR) +NOT_HERE_BEFORE_10_6(_Unwind_GetIP) +NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData) +NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart) +NOT_HERE_BEFORE_10_6(_Unwind_RaiseException) +NOT_HERE_BEFORE_10_6(_Unwind_Resume) +NOT_HERE_BEFORE_10_6(_Unwind_SetGR) +NOT_HERE_BEFORE_10_6(_Unwind_SetIP) +NOT_HERE_BEFORE_10_6(_Unwind_Backtrace) +NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction) +NOT_HERE_BEFORE_10_6(_Unwind_GetCFA) +NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase) +NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase) +NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow) +NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo) +NOT_HERE_BEFORE_10_6(__register_frame) +NOT_HERE_BEFORE_10_6(__deregister_frame) + +// +// symbols in libSystem.dylib for compatibility, but we don't want any new code +// using them +// +NEVER_HERE(__register_frame_info_bases) +NEVER_HERE(__register_frame_info) +NEVER_HERE(__register_frame_info_table_bases) +NEVER_HERE(__register_frame_info_table) +NEVER_HERE(__register_frame_table) +NEVER_HERE(__deregister_frame_info) +NEVER_HERE(__deregister_frame_info_bases) + #endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) - - - - + + + + #if defined(_LIBUNWIND_BUILD_SJLJ_APIS) -// -// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in -// earlier versions -// -NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData) -NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart) -NOT_HERE_BEFORE_5_0(_Unwind_GetIP) -NOT_HERE_BEFORE_5_0(_Unwind_SetGR) -NOT_HERE_BEFORE_5_0(_Unwind_SetIP) -NOT_HERE_BEFORE_5_0(_Unwind_DeleteException) -NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register) -NOT_HERE_BEFORE_5_0(_Unwind_GetGR) -NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo) -NOT_HERE_BEFORE_5_0(_Unwind_GetCFA) -NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume) -NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException) -NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow) -NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister) - +// +// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in +// earlier versions +// +NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData) +NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart) +NOT_HERE_BEFORE_5_0(_Unwind_GetIP) +NOT_HERE_BEFORE_5_0(_Unwind_SetGR) +NOT_HERE_BEFORE_5_0(_Unwind_SetIP) +NOT_HERE_BEFORE_5_0(_Unwind_DeleteException) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register) +NOT_HERE_BEFORE_5_0(_Unwind_GetGR) +NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo) +NOT_HERE_BEFORE_5_0(_Unwind_GetCFA) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister) + #endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS) diff --git a/contrib/libs/libunwind/src/assembly.h b/contrib/libs/libunwind/src/assembly.h index ab0b191b9d6..978f6bd619b 100644 --- a/contrib/libs/libunwind/src/assembly.h +++ b/contrib/libs/libunwind/src/assembly.h @@ -1,20 +1,20 @@ -/* ===-- assembly.h - libUnwind assembler support macros -------------------=== - * +/* ===-- assembly.h - libUnwind assembler support macros -------------------=== + * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===----------------------------------------------------------------------=== - * - * This file defines macros for use in libUnwind assembler source. - * This file is not part of the interface of this library. - * - * ===----------------------------------------------------------------------=== - */ - -#ifndef UNWIND_ASSEMBLY_H -#define UNWIND_ASSEMBLY_H - + * + * ===----------------------------------------------------------------------=== + * + * This file defines macros for use in libUnwind assembler source. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef UNWIND_ASSEMBLY_H +#define UNWIND_ASSEMBLY_H + #if (defined(__i386__) || defined(__x86_64__)) && defined(__linux__) #include #define _LIBUNWIND_CET_ENDBR _CET_ENDBR @@ -33,7 +33,7 @@ #define PPC64_OFFS_FP 312 #define PPC64_OFFS_V 824 #elif defined(__APPLE__) && defined(__aarch64__) -#define SEPARATOR %% +#define SEPARATOR %% #elif defined(__riscv) # define RISCV_ISIZE (__riscv_xlen / 8) # define RISCV_FOFFSET (RISCV_ISIZE * 32) @@ -63,10 +63,10 @@ # endif # endif # define SEPARATOR ; -#else -#define SEPARATOR ; -#endif - +#else +#define SEPARATOR ; +#endif + #if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) #define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR #define PPC64_OPD2 SEPARATOR \ @@ -76,11 +76,11 @@ .quad 0 SEPARATOR \ .text SEPARATOR \ .Lfunc_begin0: -#else +#else #define PPC64_OPD1 #define PPC64_OPD2 -#endif - +#endif + #if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT) .pushsection ".note.gnu.property", "a" SEPARATOR \ .balign 8 SEPARATOR \ @@ -110,13 +110,13 @@ #endif #endif -#define GLUE2(a, b) a ## b -#define GLUE(a, b) GLUE2(a, b) -#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) - -#if defined(__APPLE__) +#define GLUE2(a, b) a ## b +#define GLUE(a, b) GLUE2(a, b) +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#if defined(__APPLE__) -#define SYMBOL_IS_FUNC(name) +#define SYMBOL_IS_FUNC(name) #define HIDDEN_SYMBOL(name) .private_extern name #if defined(_LIBUNWIND_HIDE_SYMBOLS) #define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name) @@ -130,13 +130,13 @@ #define NO_EXEC_STACK_DIRECTIVE -#elif defined(__ELF__) +#elif defined(__ELF__) -#if defined(__arm__) -#define SYMBOL_IS_FUNC(name) .type name,%function -#else -#define SYMBOL_IS_FUNC(name) .type name,@function -#endif +#if defined(__arm__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif #define HIDDEN_SYMBOL(name) .hidden name #if defined(_LIBUNWIND_HIDE_SYMBOLS) #define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name) @@ -160,17 +160,17 @@ #if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ defined(__linux__) #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits -#else +#else #define NO_EXEC_STACK_DIRECTIVE #endif #elif defined(_WIN32) -#define SYMBOL_IS_FUNC(name) \ - .def name SEPARATOR \ - .scl 2 SEPARATOR \ - .type 32 SEPARATOR \ - .endef +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef #define EXPORT_SYMBOL2(name) \ .section .drectve,"yn" SEPARATOR \ .ascii "-export:", #name, "\0" SEPARATOR \ @@ -179,9 +179,9 @@ #define EXPORT_SYMBOL(name) #else #define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name) -#endif +#endif #define HIDDEN_SYMBOL(name) - + #if defined(__MINGW32__) #define WEAK_ALIAS(name, aliasname) \ .globl SYMBOL_NAME(aliasname) SEPARATOR \ @@ -198,9 +198,9 @@ EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \ WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname)) #endif - + #define NO_EXEC_STACK_DIRECTIVE - + #elif defined(__sparc__) #else @@ -218,24 +218,24 @@ PPC64_OPD2 \ AARCH64_BTI -#if defined(__arm__) -#if !defined(__ARM_ARCH) -#define __ARM_ARCH 4 -#endif - -#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 -#define ARM_HAS_BX -#endif - -#ifdef ARM_HAS_BX -#define JMP(r) bx r -#else -#define JMP(r) mov pc, r -#endif -#endif /* __arm__ */ - +#if defined(__arm__) +#if !defined(__ARM_ARCH) +#define __ARM_ARCH 4 +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#else +#define JMP(r) mov pc, r +#endif +#endif /* __arm__ */ + #if defined(__powerpc__) #define PPC_LEFT_SHIFT(index) << (index) #endif -#endif /* UNWIND_ASSEMBLY_H */ +#endif /* UNWIND_ASSEMBLY_H */ diff --git a/contrib/libs/libunwind/src/config.h b/contrib/libs/libunwind/src/config.h index 850a1606572..560edda04ea 100644 --- a/contrib/libs/libunwind/src/config.h +++ b/contrib/libs/libunwind/src/config.h @@ -1,39 +1,39 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// +// +// // Defines macros used within libunwind project. -// -//===----------------------------------------------------------------------===// - - -#ifndef LIBUNWIND_CONFIG_H -#define LIBUNWIND_CONFIG_H - -#include -#include +// +//===----------------------------------------------------------------------===// + + +#ifndef LIBUNWIND_CONFIG_H +#define LIBUNWIND_CONFIG_H + +#include +#include #include #include - + #include <__libunwind_config.h> - -// Platform specific configuration defines. -#ifdef __APPLE__ + +// Platform specific configuration defines. +#ifdef __APPLE__ #if defined(FOR_DYLD) #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1 #else #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 - #endif + #endif #elif defined(_WIN32) #ifdef __SEH__ #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1 #else #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 - #endif + #endif #elif defined(_LIBUNWIND_IS_BAREMETAL) #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 @@ -51,7 +51,7 @@ #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 #endif #endif - + #if defined(_LIBUNWIND_HIDE_SYMBOLS) // The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility. #define _LIBUNWIND_EXPORT @@ -60,12 +60,12 @@ #if !defined(__ELF__) && !defined(__MACH__) #define _LIBUNWIND_EXPORT __declspec(dllexport) #define _LIBUNWIND_HIDDEN - #else + #else #define _LIBUNWIND_EXPORT __attribute__((visibility("default"))) #define _LIBUNWIND_HIDDEN __attribute__((visibility("hidden"))) - #endif + #endif #endif - + #define STR(a) #a #define XSTR(a) STR(a) #define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name @@ -89,7 +89,7 @@ #define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ __attribute__((alias(#name))); -#else +#else #define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ __pragma(comment(linker, "/alternatename:" SYMBOL_NAME(aliasname) "=" \ SYMBOL_NAME(name))) \ @@ -98,17 +98,17 @@ #else #error Unsupported target #endif - + // Apple/armv7k defaults to DWARF/Compact unwinding, but its libunwind also // needs to include the SJLJ APIs. #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__) #define _LIBUNWIND_BUILD_SJLJ_APIS #endif - + #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) #define _LIBUNWIND_SUPPORT_FRAME_APIS #endif - + #if defined(__i386__) || defined(__x86_64__) || \ defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) || \ (!defined(__APPLE__) && defined(__arm__)) || \ @@ -118,9 +118,9 @@ defined(__hexagon__) #if !defined(_LIBUNWIND_BUILD_SJLJ_APIS) #define _LIBUNWIND_BUILD_ZERO_COST_APIS -#endif #endif - +#endif + #ifndef _LIBUNWIND_REMEMBER_HEAP_ALLOC #if defined(_LIBUNWIND_REMEMBER_STACK_ALLOC) || defined(__APPLE__) || \ defined(__linux__) || defined(__ANDROID__) || defined(__MINGW32__) || \ @@ -143,7 +143,7 @@ #define _LIBUNWIND_REMEMBER_FREE(_ptr) free(_ptr) #define _LIBUNWIND_REMEMBER_CLEANUP_NEEDED #endif - + #if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL) #define _LIBUNWIND_ABORT(msg) \ do { \ @@ -179,31 +179,31 @@ } while (0) #endif -// Macros that define away in non-Debug builds -#ifdef NDEBUG - #define _LIBUNWIND_DEBUG_LOG(msg, ...) - #define _LIBUNWIND_TRACE_API(msg, ...) +// Macros that define away in non-Debug builds +#ifdef NDEBUG + #define _LIBUNWIND_DEBUG_LOG(msg, ...) + #define _LIBUNWIND_TRACE_API(msg, ...) #define _LIBUNWIND_TRACING_UNWINDING (0) #define _LIBUNWIND_TRACING_DWARF (0) - #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) + #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) #define _LIBUNWIND_TRACE_DWARF(...) -#else - #ifdef __cplusplus - extern "C" { - #endif - extern bool logAPIs(); - extern bool logUnwinding(); +#else + #ifdef __cplusplus + extern "C" { + #endif + extern bool logAPIs(); + extern bool logUnwinding(); extern bool logDWARF(); - #ifdef __cplusplus - } - #endif - #define _LIBUNWIND_DEBUG_LOG(msg, ...) _LIBUNWIND_LOG(msg, __VA_ARGS__) + #ifdef __cplusplus + } + #endif + #define _LIBUNWIND_DEBUG_LOG(msg, ...) _LIBUNWIND_LOG(msg, __VA_ARGS__) #define _LIBUNWIND_TRACE_API(msg, ...) \ do { \ if (logAPIs()) \ _LIBUNWIND_LOG(msg, __VA_ARGS__); \ } while (0) - #define _LIBUNWIND_TRACING_UNWINDING logUnwinding() + #define _LIBUNWIND_TRACING_UNWINDING logUnwinding() #define _LIBUNWIND_TRACING_DWARF logDWARF() #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) \ do { \ @@ -215,8 +215,8 @@ if (logDWARF()) \ fprintf(stderr, __VA_ARGS__); \ } while (0) -#endif - +#endif + #ifdef __cplusplus // Used to fit UnwindCursor and Registers_xxx types against unw_context_t / // unw_cursor_t sized memory blocks. @@ -237,5 +237,5 @@ struct check_fit { }; #undef COMP_OP #endif // __cplusplus - -#endif // LIBUNWIND_CONFIG_H + +#endif // LIBUNWIND_CONFIG_H diff --git a/contrib/libs/libunwind/src/dwarf2.h b/contrib/libs/libunwind/src/dwarf2.h index ec099388b26..174277d5a79 100644 --- a/contrib/libs/libunwind/src/dwarf2.h +++ b/contrib/libs/libunwind/src/dwarf2.h @@ -1,239 +1,239 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - - -/* - These constants were taken from version 3 of the DWARF standard, - which is Copyright (c) 2005 Free Standards Group, and - Copyright (c) 1992, 1993 UNIX International, Inc. -*/ - -#ifndef __DWARF2__ -#define __DWARF2__ - -// DWARF unwind instructions -enum { - DW_CFA_nop = 0x0, - DW_CFA_set_loc = 0x1, - DW_CFA_advance_loc1 = 0x2, - DW_CFA_advance_loc2 = 0x3, - DW_CFA_advance_loc4 = 0x4, - DW_CFA_offset_extended = 0x5, - DW_CFA_restore_extended = 0x6, - DW_CFA_undefined = 0x7, - DW_CFA_same_value = 0x8, - DW_CFA_register = 0x9, - DW_CFA_remember_state = 0xA, - DW_CFA_restore_state = 0xB, - DW_CFA_def_cfa = 0xC, - DW_CFA_def_cfa_register = 0xD, - DW_CFA_def_cfa_offset = 0xE, - DW_CFA_def_cfa_expression = 0xF, - DW_CFA_expression = 0x10, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_def_cfa_sf = 0x12, - DW_CFA_def_cfa_offset_sf = 0x13, - DW_CFA_val_offset = 0x14, - DW_CFA_val_offset_sf = 0x15, - DW_CFA_val_expression = 0x16, - DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta - DW_CFA_offset = 0x80, // high 2 bits are 0x2, lower 6 bits are register - DW_CFA_restore = 0xC0, // high 2 bits are 0x3, lower 6 bits are register - - // GNU extensions - DW_CFA_GNU_window_save = 0x2D, - DW_CFA_GNU_args_size = 0x2E, +// +//===----------------------------------------------------------------------===// + + +/* + These constants were taken from version 3 of the DWARF standard, + which is Copyright (c) 2005 Free Standards Group, and + Copyright (c) 1992, 1993 UNIX International, Inc. +*/ + +#ifndef __DWARF2__ +#define __DWARF2__ + +// DWARF unwind instructions +enum { + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xA, + DW_CFA_restore_state = 0xB, + DW_CFA_def_cfa = 0xC, + DW_CFA_def_cfa_register = 0xD, + DW_CFA_def_cfa_offset = 0xE, + DW_CFA_def_cfa_expression = 0xF, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta + DW_CFA_offset = 0x80, // high 2 bits are 0x2, lower 6 bits are register + DW_CFA_restore = 0xC0, // high 2 bits are 0x3, lower 6 bits are register + + // GNU extensions + DW_CFA_GNU_window_save = 0x2D, + DW_CFA_GNU_args_size = 0x2E, DW_CFA_GNU_negative_offset_extended = 0x2F, // AARCH64 extensions DW_CFA_AARCH64_negate_ra_state = 0x2D -}; - - -// FSF exception handling Pointer-Encoding constants -// Used in CFI augmentation by GCC -enum { - DW_EH_PE_ptr = 0x00, - DW_EH_PE_uleb128 = 0x01, - DW_EH_PE_udata2 = 0x02, - DW_EH_PE_udata4 = 0x03, - DW_EH_PE_udata8 = 0x04, - DW_EH_PE_signed = 0x08, - DW_EH_PE_sleb128 = 0x09, - DW_EH_PE_sdata2 = 0x0A, - DW_EH_PE_sdata4 = 0x0B, - DW_EH_PE_sdata8 = 0x0C, - DW_EH_PE_absptr = 0x00, - DW_EH_PE_pcrel = 0x10, - DW_EH_PE_textrel = 0x20, - DW_EH_PE_datarel = 0x30, - DW_EH_PE_funcrel = 0x40, - DW_EH_PE_aligned = 0x50, - DW_EH_PE_indirect = 0x80, - DW_EH_PE_omit = 0xFF -}; - - -// DWARF expressions -enum { - DW_OP_addr = 0x03, // constant address (size target specific) - DW_OP_deref = 0x06, - DW_OP_const1u = 0x08, // 1-byte constant - DW_OP_const1s = 0x09, // 1-byte constant - DW_OP_const2u = 0x0A, // 2-byte constant - DW_OP_const2s = 0x0B, // 2-byte constant - DW_OP_const4u = 0x0C, // 4-byte constant - DW_OP_const4s = 0x0D, // 4-byte constant - DW_OP_const8u = 0x0E, // 8-byte constant - DW_OP_const8s = 0x0F, // 8-byte constant - DW_OP_constu = 0x10, // ULEB128 constant - DW_OP_consts = 0x11, // SLEB128 constant - DW_OP_dup = 0x12, - DW_OP_drop = 0x13, - DW_OP_over = 0x14, - DW_OP_pick = 0x15, // 1-byte stack index - DW_OP_swap = 0x16, - DW_OP_rot = 0x17, - DW_OP_xderef = 0x18, - DW_OP_abs = 0x19, - DW_OP_and = 0x1A, - DW_OP_div = 0x1B, - DW_OP_minus = 0x1C, - DW_OP_mod = 0x1D, - DW_OP_mul = 0x1E, - DW_OP_neg = 0x1F, - DW_OP_not = 0x20, - DW_OP_or = 0x21, - DW_OP_plus = 0x22, - DW_OP_plus_uconst = 0x23, // ULEB128 addend - DW_OP_shl = 0x24, - DW_OP_shr = 0x25, - DW_OP_shra = 0x26, - DW_OP_xor = 0x27, - DW_OP_skip = 0x2F, // signed 2-byte constant - DW_OP_bra = 0x28, // signed 2-byte constant - DW_OP_eq = 0x29, - DW_OP_ge = 0x2A, - DW_OP_gt = 0x2B, - DW_OP_le = 0x2C, - DW_OP_lt = 0x2D, - DW_OP_ne = 0x2E, - DW_OP_lit0 = 0x30, // Literal 0 - DW_OP_lit1 = 0x31, // Literal 1 - DW_OP_lit2 = 0x32, // Literal 2 - DW_OP_lit3 = 0x33, // Literal 3 - DW_OP_lit4 = 0x34, // Literal 4 - DW_OP_lit5 = 0x35, // Literal 5 - DW_OP_lit6 = 0x36, // Literal 6 - DW_OP_lit7 = 0x37, // Literal 7 - DW_OP_lit8 = 0x38, // Literal 8 - DW_OP_lit9 = 0x39, // Literal 9 - DW_OP_lit10 = 0x3A, // Literal 10 - DW_OP_lit11 = 0x3B, // Literal 11 - DW_OP_lit12 = 0x3C, // Literal 12 - DW_OP_lit13 = 0x3D, // Literal 13 - DW_OP_lit14 = 0x3E, // Literal 14 - DW_OP_lit15 = 0x3F, // Literal 15 - DW_OP_lit16 = 0x40, // Literal 16 - DW_OP_lit17 = 0x41, // Literal 17 - DW_OP_lit18 = 0x42, // Literal 18 - DW_OP_lit19 = 0x43, // Literal 19 - DW_OP_lit20 = 0x44, // Literal 20 - DW_OP_lit21 = 0x45, // Literal 21 - DW_OP_lit22 = 0x46, // Literal 22 - DW_OP_lit23 = 0x47, // Literal 23 - DW_OP_lit24 = 0x48, // Literal 24 - DW_OP_lit25 = 0x49, // Literal 25 - DW_OP_lit26 = 0x4A, // Literal 26 - DW_OP_lit27 = 0x4B, // Literal 27 - DW_OP_lit28 = 0x4C, // Literal 28 - DW_OP_lit29 = 0x4D, // Literal 29 - DW_OP_lit30 = 0x4E, // Literal 30 - DW_OP_lit31 = 0x4F, // Literal 31 - DW_OP_reg0 = 0x50, // Contents of reg0 - DW_OP_reg1 = 0x51, // Contents of reg1 - DW_OP_reg2 = 0x52, // Contents of reg2 - DW_OP_reg3 = 0x53, // Contents of reg3 - DW_OP_reg4 = 0x54, // Contents of reg4 - DW_OP_reg5 = 0x55, // Contents of reg5 - DW_OP_reg6 = 0x56, // Contents of reg6 - DW_OP_reg7 = 0x57, // Contents of reg7 - DW_OP_reg8 = 0x58, // Contents of reg8 - DW_OP_reg9 = 0x59, // Contents of reg9 - DW_OP_reg10 = 0x5A, // Contents of reg10 - DW_OP_reg11 = 0x5B, // Contents of reg11 - DW_OP_reg12 = 0x5C, // Contents of reg12 - DW_OP_reg13 = 0x5D, // Contents of reg13 - DW_OP_reg14 = 0x5E, // Contents of reg14 - DW_OP_reg15 = 0x5F, // Contents of reg15 - DW_OP_reg16 = 0x60, // Contents of reg16 - DW_OP_reg17 = 0x61, // Contents of reg17 - DW_OP_reg18 = 0x62, // Contents of reg18 - DW_OP_reg19 = 0x63, // Contents of reg19 - DW_OP_reg20 = 0x64, // Contents of reg20 - DW_OP_reg21 = 0x65, // Contents of reg21 - DW_OP_reg22 = 0x66, // Contents of reg22 - DW_OP_reg23 = 0x67, // Contents of reg23 - DW_OP_reg24 = 0x68, // Contents of reg24 - DW_OP_reg25 = 0x69, // Contents of reg25 - DW_OP_reg26 = 0x6A, // Contents of reg26 - DW_OP_reg27 = 0x6B, // Contents of reg27 - DW_OP_reg28 = 0x6C, // Contents of reg28 - DW_OP_reg29 = 0x6D, // Contents of reg29 - DW_OP_reg30 = 0x6E, // Contents of reg30 - DW_OP_reg31 = 0x6F, // Contents of reg31 - DW_OP_breg0 = 0x70, // base register 0 + SLEB128 offset - DW_OP_breg1 = 0x71, // base register 1 + SLEB128 offset - DW_OP_breg2 = 0x72, // base register 2 + SLEB128 offset - DW_OP_breg3 = 0x73, // base register 3 + SLEB128 offset - DW_OP_breg4 = 0x74, // base register 4 + SLEB128 offset - DW_OP_breg5 = 0x75, // base register 5 + SLEB128 offset - DW_OP_breg6 = 0x76, // base register 6 + SLEB128 offset - DW_OP_breg7 = 0x77, // base register 7 + SLEB128 offset - DW_OP_breg8 = 0x78, // base register 8 + SLEB128 offset - DW_OP_breg9 = 0x79, // base register 9 + SLEB128 offset - DW_OP_breg10 = 0x7A, // base register 10 + SLEB128 offset - DW_OP_breg11 = 0x7B, // base register 11 + SLEB128 offset - DW_OP_breg12 = 0x7C, // base register 12 + SLEB128 offset - DW_OP_breg13 = 0x7D, // base register 13 + SLEB128 offset - DW_OP_breg14 = 0x7E, // base register 14 + SLEB128 offset - DW_OP_breg15 = 0x7F, // base register 15 + SLEB128 offset - DW_OP_breg16 = 0x80, // base register 16 + SLEB128 offset - DW_OP_breg17 = 0x81, // base register 17 + SLEB128 offset - DW_OP_breg18 = 0x82, // base register 18 + SLEB128 offset - DW_OP_breg19 = 0x83, // base register 19 + SLEB128 offset - DW_OP_breg20 = 0x84, // base register 20 + SLEB128 offset - DW_OP_breg21 = 0x85, // base register 21 + SLEB128 offset - DW_OP_breg22 = 0x86, // base register 22 + SLEB128 offset - DW_OP_breg23 = 0x87, // base register 23 + SLEB128 offset - DW_OP_breg24 = 0x88, // base register 24 + SLEB128 offset - DW_OP_breg25 = 0x89, // base register 25 + SLEB128 offset - DW_OP_breg26 = 0x8A, // base register 26 + SLEB128 offset - DW_OP_breg27 = 0x8B, // base register 27 + SLEB128 offset - DW_OP_breg28 = 0x8C, // base register 28 + SLEB128 offset - DW_OP_breg29 = 0x8D, // base register 29 + SLEB128 offset - DW_OP_breg30 = 0x8E, // base register 30 + SLEB128 offset - DW_OP_breg31 = 0x8F, // base register 31 + SLEB128 offset - DW_OP_regx = 0x90, // ULEB128 register - DW_OP_fbreg = 0x91, // SLEB128 offset - DW_OP_bregx = 0x92, // ULEB128 register followed by SLEB128 offset - DW_OP_piece = 0x93, // ULEB128 size of piece addressed - DW_OP_deref_size = 0x94, // 1-byte size of data retrieved - DW_OP_xderef_size = 0x95, // 1-byte size of data retrieved - DW_OP_nop = 0x96, - DW_OP_push_object_addres = 0x97, - DW_OP_call2 = 0x98, // 2-byte offset of DIE - DW_OP_call4 = 0x99, // 4-byte offset of DIE - DW_OP_call_ref = 0x9A, // 4- or 8-byte offset of DIE - DW_OP_lo_user = 0xE0, - DW_OP_APPLE_uninit = 0xF0, - DW_OP_hi_user = 0xFF -}; - - -#endif +}; + + +// FSF exception handling Pointer-Encoding constants +// Used in CFI augmentation by GCC +enum { + DW_EH_PE_ptr = 0x00, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_signed = 0x08, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_absptr = 0x00, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + DW_EH_PE_indirect = 0x80, + DW_EH_PE_omit = 0xFF +}; + + +// DWARF expressions +enum { + DW_OP_addr = 0x03, // constant address (size target specific) + DW_OP_deref = 0x06, + DW_OP_const1u = 0x08, // 1-byte constant + DW_OP_const1s = 0x09, // 1-byte constant + DW_OP_const2u = 0x0A, // 2-byte constant + DW_OP_const2s = 0x0B, // 2-byte constant + DW_OP_const4u = 0x0C, // 4-byte constant + DW_OP_const4s = 0x0D, // 4-byte constant + DW_OP_const8u = 0x0E, // 8-byte constant + DW_OP_const8s = 0x0F, // 8-byte constant + DW_OP_constu = 0x10, // ULEB128 constant + DW_OP_consts = 0x11, // SLEB128 constant + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, // 1-byte stack index + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1A, + DW_OP_div = 0x1B, + DW_OP_minus = 0x1C, + DW_OP_mod = 0x1D, + DW_OP_mul = 0x1E, + DW_OP_neg = 0x1F, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, // ULEB128 addend + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2F, // signed 2-byte constant + DW_OP_bra = 0x28, // signed 2-byte constant + DW_OP_eq = 0x29, + DW_OP_ge = 0x2A, + DW_OP_gt = 0x2B, + DW_OP_le = 0x2C, + DW_OP_lt = 0x2D, + DW_OP_ne = 0x2E, + DW_OP_lit0 = 0x30, // Literal 0 + DW_OP_lit1 = 0x31, // Literal 1 + DW_OP_lit2 = 0x32, // Literal 2 + DW_OP_lit3 = 0x33, // Literal 3 + DW_OP_lit4 = 0x34, // Literal 4 + DW_OP_lit5 = 0x35, // Literal 5 + DW_OP_lit6 = 0x36, // Literal 6 + DW_OP_lit7 = 0x37, // Literal 7 + DW_OP_lit8 = 0x38, // Literal 8 + DW_OP_lit9 = 0x39, // Literal 9 + DW_OP_lit10 = 0x3A, // Literal 10 + DW_OP_lit11 = 0x3B, // Literal 11 + DW_OP_lit12 = 0x3C, // Literal 12 + DW_OP_lit13 = 0x3D, // Literal 13 + DW_OP_lit14 = 0x3E, // Literal 14 + DW_OP_lit15 = 0x3F, // Literal 15 + DW_OP_lit16 = 0x40, // Literal 16 + DW_OP_lit17 = 0x41, // Literal 17 + DW_OP_lit18 = 0x42, // Literal 18 + DW_OP_lit19 = 0x43, // Literal 19 + DW_OP_lit20 = 0x44, // Literal 20 + DW_OP_lit21 = 0x45, // Literal 21 + DW_OP_lit22 = 0x46, // Literal 22 + DW_OP_lit23 = 0x47, // Literal 23 + DW_OP_lit24 = 0x48, // Literal 24 + DW_OP_lit25 = 0x49, // Literal 25 + DW_OP_lit26 = 0x4A, // Literal 26 + DW_OP_lit27 = 0x4B, // Literal 27 + DW_OP_lit28 = 0x4C, // Literal 28 + DW_OP_lit29 = 0x4D, // Literal 29 + DW_OP_lit30 = 0x4E, // Literal 30 + DW_OP_lit31 = 0x4F, // Literal 31 + DW_OP_reg0 = 0x50, // Contents of reg0 + DW_OP_reg1 = 0x51, // Contents of reg1 + DW_OP_reg2 = 0x52, // Contents of reg2 + DW_OP_reg3 = 0x53, // Contents of reg3 + DW_OP_reg4 = 0x54, // Contents of reg4 + DW_OP_reg5 = 0x55, // Contents of reg5 + DW_OP_reg6 = 0x56, // Contents of reg6 + DW_OP_reg7 = 0x57, // Contents of reg7 + DW_OP_reg8 = 0x58, // Contents of reg8 + DW_OP_reg9 = 0x59, // Contents of reg9 + DW_OP_reg10 = 0x5A, // Contents of reg10 + DW_OP_reg11 = 0x5B, // Contents of reg11 + DW_OP_reg12 = 0x5C, // Contents of reg12 + DW_OP_reg13 = 0x5D, // Contents of reg13 + DW_OP_reg14 = 0x5E, // Contents of reg14 + DW_OP_reg15 = 0x5F, // Contents of reg15 + DW_OP_reg16 = 0x60, // Contents of reg16 + DW_OP_reg17 = 0x61, // Contents of reg17 + DW_OP_reg18 = 0x62, // Contents of reg18 + DW_OP_reg19 = 0x63, // Contents of reg19 + DW_OP_reg20 = 0x64, // Contents of reg20 + DW_OP_reg21 = 0x65, // Contents of reg21 + DW_OP_reg22 = 0x66, // Contents of reg22 + DW_OP_reg23 = 0x67, // Contents of reg23 + DW_OP_reg24 = 0x68, // Contents of reg24 + DW_OP_reg25 = 0x69, // Contents of reg25 + DW_OP_reg26 = 0x6A, // Contents of reg26 + DW_OP_reg27 = 0x6B, // Contents of reg27 + DW_OP_reg28 = 0x6C, // Contents of reg28 + DW_OP_reg29 = 0x6D, // Contents of reg29 + DW_OP_reg30 = 0x6E, // Contents of reg30 + DW_OP_reg31 = 0x6F, // Contents of reg31 + DW_OP_breg0 = 0x70, // base register 0 + SLEB128 offset + DW_OP_breg1 = 0x71, // base register 1 + SLEB128 offset + DW_OP_breg2 = 0x72, // base register 2 + SLEB128 offset + DW_OP_breg3 = 0x73, // base register 3 + SLEB128 offset + DW_OP_breg4 = 0x74, // base register 4 + SLEB128 offset + DW_OP_breg5 = 0x75, // base register 5 + SLEB128 offset + DW_OP_breg6 = 0x76, // base register 6 + SLEB128 offset + DW_OP_breg7 = 0x77, // base register 7 + SLEB128 offset + DW_OP_breg8 = 0x78, // base register 8 + SLEB128 offset + DW_OP_breg9 = 0x79, // base register 9 + SLEB128 offset + DW_OP_breg10 = 0x7A, // base register 10 + SLEB128 offset + DW_OP_breg11 = 0x7B, // base register 11 + SLEB128 offset + DW_OP_breg12 = 0x7C, // base register 12 + SLEB128 offset + DW_OP_breg13 = 0x7D, // base register 13 + SLEB128 offset + DW_OP_breg14 = 0x7E, // base register 14 + SLEB128 offset + DW_OP_breg15 = 0x7F, // base register 15 + SLEB128 offset + DW_OP_breg16 = 0x80, // base register 16 + SLEB128 offset + DW_OP_breg17 = 0x81, // base register 17 + SLEB128 offset + DW_OP_breg18 = 0x82, // base register 18 + SLEB128 offset + DW_OP_breg19 = 0x83, // base register 19 + SLEB128 offset + DW_OP_breg20 = 0x84, // base register 20 + SLEB128 offset + DW_OP_breg21 = 0x85, // base register 21 + SLEB128 offset + DW_OP_breg22 = 0x86, // base register 22 + SLEB128 offset + DW_OP_breg23 = 0x87, // base register 23 + SLEB128 offset + DW_OP_breg24 = 0x88, // base register 24 + SLEB128 offset + DW_OP_breg25 = 0x89, // base register 25 + SLEB128 offset + DW_OP_breg26 = 0x8A, // base register 26 + SLEB128 offset + DW_OP_breg27 = 0x8B, // base register 27 + SLEB128 offset + DW_OP_breg28 = 0x8C, // base register 28 + SLEB128 offset + DW_OP_breg29 = 0x8D, // base register 29 + SLEB128 offset + DW_OP_breg30 = 0x8E, // base register 30 + SLEB128 offset + DW_OP_breg31 = 0x8F, // base register 31 + SLEB128 offset + DW_OP_regx = 0x90, // ULEB128 register + DW_OP_fbreg = 0x91, // SLEB128 offset + DW_OP_bregx = 0x92, // ULEB128 register followed by SLEB128 offset + DW_OP_piece = 0x93, // ULEB128 size of piece addressed + DW_OP_deref_size = 0x94, // 1-byte size of data retrieved + DW_OP_xderef_size = 0x95, // 1-byte size of data retrieved + DW_OP_nop = 0x96, + DW_OP_push_object_addres = 0x97, + DW_OP_call2 = 0x98, // 2-byte offset of DIE + DW_OP_call4 = 0x99, // 4-byte offset of DIE + DW_OP_call_ref = 0x9A, // 4- or 8-byte offset of DIE + DW_OP_lo_user = 0xE0, + DW_OP_APPLE_uninit = 0xF0, + DW_OP_hi_user = 0xFF +}; + + +#endif diff --git a/contrib/libs/libunwind/src/libunwind.cpp b/contrib/libs/libunwind/src/libunwind.cpp index 7c47a767997..03f8b75b5bb 100644 --- a/contrib/libs/libunwind/src/libunwind.cpp +++ b/contrib/libs/libunwind/src/libunwind.cpp @@ -1,21 +1,21 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Implements unw_* functions from -// -//===----------------------------------------------------------------------===// - -#include - +// +// +// Implements unw_* functions from +// +//===----------------------------------------------------------------------===// + +#include + #include "config.h" -#include "libunwind_ext.h" - -#include - +#include "libunwind_ext.h" + +#include + // Define the __has_feature extension for compilers that do not support it so // that we can later check for the presence of ASan in a compiler-neutral way. #if !defined(__has_feature) @@ -28,26 +28,26 @@ #if !defined(__USING_SJLJ_EXCEPTIONS__) #include "AddressSpace.hpp" -#include "UnwindCursor.hpp" - -using namespace libunwind; - -/// internal object to represent this processes address space -LocalAddressSpace LocalAddressSpace::sThisAddressSpace; - -_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space = - (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace; - -/// Create a cursor of a thread in this process given 'context' recorded by +#include "UnwindCursor.hpp" + +using namespace libunwind; + +/// internal object to represent this processes address space +LocalAddressSpace LocalAddressSpace::sThisAddressSpace; + +_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space = + (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace; + +/// Create a cursor of a thread in this process given 'context' recorded by /// __unw_getcontext(). _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, unw_context_t *context) { _LIBUNWIND_TRACE_API("__unw_init_local(cursor=%p, context=%p)", - static_cast(cursor), - static_cast(context)); -#if defined(__i386__) + static_cast(cursor), + static_cast(context)); +#if defined(__i386__) # define REGISTER_KIND Registers_x86 -#elif defined(__x86_64__) +#elif defined(__x86_64__) # define REGISTER_KIND Registers_x86_64 #elif defined(__powerpc64__) # define REGISTER_KIND Registers_ppc64 @@ -57,7 +57,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_arm64 #elif defined(__arm__) # define REGISTER_KIND Registers_arm -#elif defined(__or1k__) +#elif defined(__or1k__) # define REGISTER_KIND Registers_or1k #elif defined(__hexagon__) # define REGISTER_KIND Registers_hexagon @@ -65,7 +65,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_mips_o32 #elif defined(__mips64) # define REGISTER_KIND Registers_mips_newabi -#elif defined(__mips__) +#elif defined(__mips__) # warning The MIPS architecture is not supported with this ABI and environment! #elif defined(__sparc__) && defined(__arch64__) #define REGISTER_KIND Registers_sparc64 @@ -75,53 +75,53 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_riscv #elif defined(__ve__) # define REGISTER_KIND Registers_ve -#else +#else # error Architecture not supported -#endif +#endif // Use "placement new" to allocate UnwindCursor in the cursor buffer. new (reinterpret_cast *>(cursor)) UnwindCursor( context, LocalAddressSpace::sThisAddressSpace); #undef REGISTER_KIND - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - co->setInfoBasedOnIPRegister(); - - return UNW_ESUCCESS; -} + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->setInfoBasedOnIPRegister(); + + return UNW_ESUCCESS; +} _LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local) - -/// Get value of specified register at cursor position in stack frame. + +/// Get value of specified register at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum, unw_word_t *value) { _LIBUNWIND_TRACE_API("__unw_get_reg(cursor=%p, regNum=%d, &value=%p)", - static_cast(cursor), regNum, - static_cast(value)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - if (co->validReg(regNum)) { - *value = co->getReg(regNum); - return UNW_ESUCCESS; - } - return UNW_EBADREG; -} + static_cast(cursor), regNum, + static_cast(value)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validReg(regNum)) { + *value = co->getReg(regNum); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} _LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg) - -/// Set value of specified register at cursor position in stack frame. + +/// Set value of specified register at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, unw_word_t value) { _LIBUNWIND_TRACE_API("__unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR ")", static_cast(cursor), regNum, value); - typedef LocalAddressSpace::pint_t pint_t; - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - if (co->validReg(regNum)) { - co->setReg(regNum, (pint_t)value); - // specical case altering IP to re-find info (being called by personality - // function) + typedef LocalAddressSpace::pint_t pint_t; + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validReg(regNum)) { + co->setReg(regNum, (pint_t)value); + // specical case altering IP to re-find info (being called by personality + // function) if (regNum == UNW_REG_IP) { unw_proc_info_t info; // First, get the FDE for the old location and then update it. co->getInfo(&info); - co->setInfoBasedOnIPRegister(false); + co->setInfoBasedOnIPRegister(false); // If the original call expects stack adjustment, perform this now. // Normal frame unwinding would have included the offset already in the // CFA computation. @@ -131,169 +131,169 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, if (info.gp) co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp); } - return UNW_ESUCCESS; - } - return UNW_EBADREG; -} + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} _LIBUNWIND_WEAK_ALIAS(__unw_set_reg, unw_set_reg) - -/// Get value of specified float register at cursor position in stack frame. + +/// Get value of specified float register at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, unw_fpreg_t *value) { _LIBUNWIND_TRACE_API("__unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)", - static_cast(cursor), regNum, - static_cast(value)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - if (co->validFloatReg(regNum)) { - *value = co->getFloatReg(regNum); - return UNW_ESUCCESS; - } - return UNW_EBADREG; -} + static_cast(cursor), regNum, + static_cast(value)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validFloatReg(regNum)) { + *value = co->getFloatReg(regNum); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} _LIBUNWIND_WEAK_ALIAS(__unw_get_fpreg, unw_get_fpreg) - -/// Set value of specified float register at cursor position in stack frame. + +/// Set value of specified float register at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, unw_fpreg_t value) { #if defined(_LIBUNWIND_ARM_EHABI) _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)", - static_cast(cursor), regNum, value); -#else + static_cast(cursor), regNum, value); +#else _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%g)", - static_cast(cursor), regNum, value); -#endif - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - if (co->validFloatReg(regNum)) { - co->setFloatReg(regNum, value); - return UNW_ESUCCESS; - } - return UNW_EBADREG; -} + static_cast(cursor), regNum, value); +#endif + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validFloatReg(regNum)) { + co->setFloatReg(regNum, value); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} _LIBUNWIND_WEAK_ALIAS(__unw_set_fpreg, unw_set_fpreg) - -/// Move cursor to next frame. + +/// Move cursor to next frame. _LIBUNWIND_HIDDEN int __unw_step(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_step(cursor=%p)", static_cast(cursor)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - return co->step(); -} + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->step(); +} _LIBUNWIND_WEAK_ALIAS(__unw_step, unw_step) - -/// Get unwind info at cursor position in stack frame. + +/// Get unwind info at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor, unw_proc_info_t *info) { _LIBUNWIND_TRACE_API("__unw_get_proc_info(cursor=%p, &info=%p)", - static_cast(cursor), static_cast(info)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - co->getInfo(info); - if (info->end_ip == 0) - return UNW_ENOINFO; + static_cast(cursor), static_cast(info)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->getInfo(info); + if (info->end_ip == 0) + return UNW_ENOINFO; return UNW_ESUCCESS; -} +} _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info) - -/// Resume execution at cursor position (aka longjump). + +/// Resume execution at cursor position (aka longjump). _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast(cursor)); #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) // Inform the ASan runtime that now might be a good time to clean stuff up. __asan_handle_no_return(); #endif - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - co->jumpto(); - return UNW_EUNSPEC; -} + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->jumpto(); + return UNW_EUNSPEC; +} _LIBUNWIND_WEAK_ALIAS(__unw_resume, unw_resume) - -/// Get name of function at cursor position in stack frame. + +/// Get name of function at cursor position in stack frame. _LIBUNWIND_HIDDEN int __unw_get_proc_name(unw_cursor_t *cursor, char *buf, size_t bufLen, unw_word_t *offset) { _LIBUNWIND_TRACE_API("__unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)", - static_cast(cursor), static_cast(buf), - static_cast(bufLen)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - if (co->getFunctionName(buf, bufLen, offset)) - return UNW_ESUCCESS; + static_cast(cursor), static_cast(buf), + static_cast(bufLen)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->getFunctionName(buf, bufLen, offset)) + return UNW_ESUCCESS; return UNW_EUNSPEC; -} +} _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_name, unw_get_proc_name) - -/// Checks if a register is a floating-point register. + +/// Checks if a register is a floating-point register. _LIBUNWIND_HIDDEN int __unw_is_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum) { _LIBUNWIND_TRACE_API("__unw_is_fpreg(cursor=%p, regNum=%d)", - static_cast(cursor), regNum); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - return co->validFloatReg(regNum); -} + static_cast(cursor), regNum); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->validFloatReg(regNum); +} _LIBUNWIND_WEAK_ALIAS(__unw_is_fpreg, unw_is_fpreg) - -/// Checks if a register is a floating-point register. + +/// Checks if a register is a floating-point register. _LIBUNWIND_HIDDEN const char *__unw_regname(unw_cursor_t *cursor, unw_regnum_t regNum) { _LIBUNWIND_TRACE_API("__unw_regname(cursor=%p, regNum=%d)", - static_cast(cursor), regNum); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - return co->getRegisterName(regNum); -} + static_cast(cursor), regNum); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->getRegisterName(regNum); +} _LIBUNWIND_WEAK_ALIAS(__unw_regname, unw_regname) - -/// Checks if current frame is signal trampoline. + +/// Checks if current frame is signal trampoline. _LIBUNWIND_HIDDEN int __unw_is_signal_frame(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_is_signal_frame(cursor=%p)", - static_cast(cursor)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - return co->isSignalFrame(); -} + static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->isSignalFrame(); +} _LIBUNWIND_WEAK_ALIAS(__unw_is_signal_frame, unw_is_signal_frame) - -#ifdef __arm__ -// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD + +#ifdef __arm__ +// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD _LIBUNWIND_HIDDEN void __unw_save_vfp_as_X(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_get_fpreg_save_vfp_as_X(cursor=%p)", - static_cast(cursor)); - AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; - return co->saveVFPAsX(); -} + static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->saveVFPAsX(); +} _LIBUNWIND_WEAK_ALIAS(__unw_save_vfp_as_X, unw_save_vfp_as_X) -#endif - - +#endif + + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) /// SPI: walks cached DWARF entries _LIBUNWIND_HIDDEN void __unw_iterate_dwarf_unwind_cache(void (*func)( - unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { _LIBUNWIND_TRACE_API("__unw_iterate_dwarf_unwind_cache(func=%p)", - reinterpret_cast(func)); - DwarfFDECache::iterateCacheEntries(func); -} + reinterpret_cast(func)); + DwarfFDECache::iterateCacheEntries(func); +} _LIBUNWIND_WEAK_ALIAS(__unw_iterate_dwarf_unwind_cache, unw_iterate_dwarf_unwind_cache) - -/// IPI: for __register_frame() + +/// IPI: for __register_frame() void __unw_add_dynamic_fde(unw_word_t fde) { - CFI_Parser::FDE_Info fdeInfo; - CFI_Parser::CIE_Info cieInfo; - const char *message = CFI_Parser::decodeFDE( - LocalAddressSpace::sThisAddressSpace, - (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo); - if (message == NULL) { - // dynamically registered FDEs don't have a mach_header group they are in. - // Use fde as mh_group - unw_word_t mh_group = fdeInfo.fdeStart; - DwarfFDECache::add((LocalAddressSpace::pint_t)mh_group, - fdeInfo.pcStart, fdeInfo.pcEnd, - fdeInfo.fdeStart); - } else { + CFI_Parser::FDE_Info fdeInfo; + CFI_Parser::CIE_Info cieInfo; + const char *message = CFI_Parser::decodeFDE( + LocalAddressSpace::sThisAddressSpace, + (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo); + if (message == NULL) { + // dynamically registered FDEs don't have a mach_header group they are in. + // Use fde as mh_group + unw_word_t mh_group = fdeInfo.fdeStart; + DwarfFDECache::add((LocalAddressSpace::pint_t)mh_group, + fdeInfo.pcStart, fdeInfo.pcEnd, + fdeInfo.fdeStart); + } else { _LIBUNWIND_DEBUG_LOG("__unw_add_dynamic_fde: bad fde: %s", message); - } -} - -/// IPI: for __deregister_frame() + } +} + +/// IPI: for __deregister_frame() void __unw_remove_dynamic_fde(unw_word_t fde) { - // fde is own mh_group + // fde is own mh_group DwarfFDECache::removeAllIn((LocalAddressSpace::pint_t)fde); -} +} void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start) { // The eh_frame section start serves as the mh_group @@ -325,37 +325,37 @@ void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) { #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) #endif // !defined(__USING_SJLJ_EXCEPTIONS__) - - - -// Add logging hooks in Debug builds only -#ifndef NDEBUG -#include - -_LIBUNWIND_HIDDEN -bool logAPIs() { - // do manual lock to avoid use of _cxa_guard_acquire or initializers - static bool checked = false; - static bool log = false; - if (!checked) { - log = (getenv("LIBUNWIND_PRINT_APIS") != NULL); - checked = true; - } - return log; -} - -_LIBUNWIND_HIDDEN -bool logUnwinding() { - // do manual lock to avoid use of _cxa_guard_acquire or initializers - static bool checked = false; - static bool log = false; - if (!checked) { - log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL); - checked = true; - } - return log; -} - + + + +// Add logging hooks in Debug builds only +#ifndef NDEBUG +#include + +_LIBUNWIND_HIDDEN +bool logAPIs() { + // do manual lock to avoid use of _cxa_guard_acquire or initializers + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBUNWIND_PRINT_APIS") != NULL); + checked = true; + } + return log; +} + +_LIBUNWIND_HIDDEN +bool logUnwinding() { + // do manual lock to avoid use of _cxa_guard_acquire or initializers + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL); + checked = true; + } + return log; +} + _LIBUNWIND_HIDDEN bool logDWARF() { // do manual lock to avoid use of _cxa_guard_acquire or initializers @@ -368,5 +368,5 @@ bool logDWARF() { return log; } -#endif // NDEBUG +#endif // NDEBUG diff --git a/contrib/libs/libunwind/src/libunwind_ext.h b/contrib/libs/libunwind/src/libunwind_ext.h index d543ce129e7..7065ffcdaef 100644 --- a/contrib/libs/libunwind/src/libunwind_ext.h +++ b/contrib/libs/libunwind/src/libunwind_ext.h @@ -1,27 +1,27 @@ //===----------------------------------------------------------------------===// -// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// -// Extensions to libunwind API. -// -//===----------------------------------------------------------------------===// - -#ifndef __LIBUNWIND_EXT__ -#define __LIBUNWIND_EXT__ - -#include "config.h" -#include -#include - -#define UNW_STEP_SUCCESS 1 -#define UNW_STEP_END 0 - -#ifdef __cplusplus -extern "C" { -#endif +// +// +// Extensions to libunwind API. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBUNWIND_EXT__ +#define __LIBUNWIND_EXT__ + +#include "config.h" +#include +#include + +#define UNW_STEP_SUCCESS 1 +#define UNW_STEP_END 0 + +#ifdef __cplusplus +extern "C" { +#endif extern int __unw_getcontext(unw_context_t *); extern int __unw_init_local(unw_cursor_t *, unw_context_t *); @@ -43,26 +43,26 @@ extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t); extern int __unw_is_signal_frame(unw_cursor_t *); extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); -// SPI +// SPI extern void __unw_iterate_dwarf_unwind_cache(void (*func)( unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)); - -// IPI + +// IPI extern void __unw_add_dynamic_fde(unw_word_t fde); extern void __unw_remove_dynamic_fde(unw_word_t fde); - + extern void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start); extern void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start); #if defined(_LIBUNWIND_ARM_EHABI) -extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*); -extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context, - const uint32_t *data, - size_t offset, size_t len); -#endif - -#ifdef __cplusplus -} -#endif - -#endif // __LIBUNWIND_EXT__ +extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*); +extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context, + const uint32_t *data, + size_t offset, size_t len); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __LIBUNWIND_EXT__ diff --git a/contrib/libs/libunwind/ya.make b/contrib/libs/libunwind/ya.make index 39141abcdb3..2d1aa635d47 100644 --- a/contrib/libs/libunwind/ya.make +++ b/contrib/libs/libunwind/ya.make @@ -1,15 +1,15 @@ # Generated by devtools/yamaker from nixpkgs 21.11. -LIBRARY() - +LIBRARY() + OWNER( pg somov g:cpp-contrib ) - + VERSION(2022-02-05) - + ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/archive/2b9554b8850192bdd86c02eb671de1d866df8d87.tar.gz) LICENSE( @@ -22,11 +22,11 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) DISABLE(USE_LTO) - + ADDINCL( contrib/libs/libunwind/include ) - + NO_RUNTIME() NO_UTIL() @@ -39,17 +39,17 @@ CFLAGS( GLOBAL -D_libunwind_ -D_LIBUNWIND_IS_NATIVE_ONLY -fno-exceptions - -fno-rtti - -funwind-tables -) - + -fno-rtti + -funwind-tables +) + IF (SANITIZER_TYPE == memory) CFLAGS( -fPIC ) ENDIF() - -SRCS( + +SRCS( src/Unwind-EHABI.cpp src/Unwind-seh.cpp src/Unwind-sjlj.c @@ -58,16 +58,16 @@ SRCS( src/UnwindRegistersRestore.S src/UnwindRegistersSave.S src/libunwind.cpp -) - +) + IF (OS_DARWIN OR OS_IOS) - SRCS( + SRCS( src/Unwind_AppleExtras.cpp - ) + ) ENDIF() - + END() - + RECURSE_FOR_TESTS( ut ) diff --git a/contrib/libs/linuxvdso/fake.cpp b/contrib/libs/linuxvdso/fake.cpp index 395c4c9cb65..f75627feaae 100644 --- a/contrib/libs/linuxvdso/fake.cpp +++ b/contrib/libs/linuxvdso/fake.cpp @@ -1,8 +1,8 @@ -#include "interface.h" - -size_t NVdso::Enumerate(TSymbol*, size_t) { - return 0; -} +#include "interface.h" + +size_t NVdso::Enumerate(TSymbol*, size_t) { + return 0; +} void* NVdso::Function(const char*, const char*) { return nullptr; diff --git a/contrib/libs/linuxvdso/interface.cpp b/contrib/libs/linuxvdso/interface.cpp index 49bf3b6707f..1c8b92ad25f 100644 --- a/contrib/libs/linuxvdso/interface.cpp +++ b/contrib/libs/linuxvdso/interface.cpp @@ -1,32 +1,32 @@ -#include "interface.h" -#include "original/vdso_support.h" - +#include "interface.h" +#include "original/vdso_support.h" + #ifdef HAVE_VDSO_SUPPORT - -size_t NVdso::Enumerate(TSymbol* s, size_t len) { - if (!len) { - return 0; - } - - base::VDSOSupport vdso; - - if (!vdso.IsPresent()) { - return 0; - } - - size_t n = 0; - - for (base::VDSOSupport::SymbolIterator it = vdso.begin(); it != vdso.end(); ++it) { - *s++ = TSymbol(it->name, (void*)it->address); - ++n; - - if (!--len) { - break; - } - } - - return n; -} + +size_t NVdso::Enumerate(TSymbol* s, size_t len) { + if (!len) { + return 0; + } + + base::VDSOSupport vdso; + + if (!vdso.IsPresent()) { + return 0; + } + + size_t n = 0; + + for (base::VDSOSupport::SymbolIterator it = vdso.begin(); it != vdso.end(); ++it) { + *s++ = TSymbol(it->name, (void*)it->address); + ++n; + + if (!--len) { + break; + } + } + + return n; +} void* NVdso::Function(const char* name, const char* version) { base::VDSOSupport::SymbolInfo info; diff --git a/contrib/libs/linuxvdso/interface.h b/contrib/libs/linuxvdso/interface.h index 2435ae1e7dd..97d7f113c11 100644 --- a/contrib/libs/linuxvdso/interface.h +++ b/contrib/libs/linuxvdso/interface.h @@ -1,26 +1,26 @@ -#pragma once - -#include - -namespace NVdso { - struct TSymbol { - inline TSymbol() - : Name(0) - , Address(0) - { - } - - inline TSymbol(const char* name, void* addr) - : Name(name) - , Address(addr) - { - } - - const char* Name; - void* Address; - }; - - size_t Enumerate(TSymbol* s, size_t len); +#pragma once + +#include + +namespace NVdso { + struct TSymbol { + inline TSymbol() + : Name(0) + , Address(0) + { + } + + inline TSymbol(const char* name, void* addr) + : Name(name) + , Address(addr) + { + } + + const char* Name; + void* Address; + }; + + size_t Enumerate(TSymbol* s, size_t len); void* Function(const char* name, const char* version); -} +} diff --git a/contrib/libs/linuxvdso/original/config.h b/contrib/libs/linuxvdso/original/config.h index 9ac2e91d8b7..06598e84c00 100644 --- a/contrib/libs/linuxvdso/original/config.h +++ b/contrib/libs/linuxvdso/original/config.h @@ -1,11 +1,11 @@ -#pragma once - -#include - -#if !defined(__WORDSIZE) - #define __WORDSIZE (sizeof(unsigned long) * 8) -#endif - -#define DISALLOW_COPY_AND_ASSIGN(x) -#define RunningOnValgrind() false -#define COMPILE_ASSERT(x, y) +#pragma once + +#include + +#if !defined(__WORDSIZE) + #define __WORDSIZE (sizeof(unsigned long) * 8) +#endif + +#define DISALLOW_COPY_AND_ASSIGN(x) +#define RunningOnValgrind() false +#define COMPILE_ASSERT(x, y) diff --git a/contrib/libs/linuxvdso/original/elf_mem_image.cc b/contrib/libs/linuxvdso/original/elf_mem_image.cc index d18ef43f108..066fcba4e3f 100644 --- a/contrib/libs/linuxvdso/original/elf_mem_image.cc +++ b/contrib/libs/linuxvdso/original/elf_mem_image.cc @@ -1,436 +1,436 @@ -// Copyright (c) 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Paul Pluzhnikov -// -// Allow dynamic symbol lookup in an in-memory Elf image. -// - -#include "elf_mem_image.h" -#include "logging.h" - -#ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h - -#if defined(_musl_) - #include -#endif - -#include // for size_t, ptrdiff_t - -// From binutils/include/elf/common.h (this doesn't appear to be documented -// anywhere else). -// -// /* This flag appears in a Versym structure. It means that the symbol -// is hidden, and is only visible with an explicit version number. -// This is a GNU extension. */ -// #define VERSYM_HIDDEN 0x8000 -// -// /* This is the mask for the rest of the Versym information. */ -// #define VERSYM_VERSION 0x7fff - -#define VERSYM_VERSION 0x7fff - -namespace base { - -namespace { -template class ElfClass { - public: - static const int kElfClass = -1; - static int ElfBind(const ElfW(Sym) *) { - CHECK(false); // << "Unexpected word size"; - return 0; - } - static int ElfType(const ElfW(Sym) *) { - CHECK(false); // << "Unexpected word size"; - return 0; - } -}; - -template <> class ElfClass<32> { - public: - static const int kElfClass = ELFCLASS32; - static int ElfBind(const ElfW(Sym) *symbol) { - return ELF32_ST_BIND(symbol->st_info); - } - static int ElfType(const ElfW(Sym) *symbol) { - return ELF32_ST_TYPE(symbol->st_info); - } -}; - -template <> class ElfClass<64> { - public: - static const int kElfClass = ELFCLASS64; - static int ElfBind(const ElfW(Sym) *symbol) { - return ELF64_ST_BIND(symbol->st_info); - } - static int ElfType(const ElfW(Sym) *symbol) { - return ELF64_ST_TYPE(symbol->st_info); - } -}; - -typedef ElfClass<__WORDSIZE> CurrentElfClass; - -// Extract an element from one of the ELF tables, cast it to desired type. -// This is just a simple arithmetic and a glorified cast. -// Callers are responsible for bounds checking. -template -const T* GetTableElement(const ElfW(Ehdr) *ehdr, - ElfW(Off) table_offset, - ElfW(Word) element_size, - size_t index) { - return reinterpret_cast(reinterpret_cast(ehdr) - + table_offset - + index * element_size); -} -} // namespace - -const void *const ElfMemImage::kInvalidBase = - reinterpret_cast(~0L); - -ElfMemImage::ElfMemImage(const void *base) { - Init(base); -} - -int ElfMemImage::GetNumSymbols() const { - if (!hash_) { - return 0; - } - // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash - return hash_[1]; -} - -const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { - CHECK_LT(index, GetNumSymbols()); - return dynsym_ + index; -} - -const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { - CHECK_LT(index, GetNumSymbols()); - return versym_ + index; -} - -const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { - CHECK_LT(index, ehdr_->e_phnum); - return GetTableElement(ehdr_, - ehdr_->e_phoff, - ehdr_->e_phentsize, - index); -} - -const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { - CHECK_LT(offset, strsize_); - return dynstr_ + offset; -} - -const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { - if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { - // Symbol corresponds to "special" (e.g. SHN_ABS) section. - return reinterpret_cast(sym->st_value); - } - CHECK_LT(link_base_, sym->st_value); - return GetTableElement(ehdr_, 0, 1, sym->st_value) - link_base_; -} - -const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { - CHECK_LE(index, verdefnum_); - const ElfW(Verdef) *version_definition = verdef_; - while (version_definition->vd_ndx < index && version_definition->vd_next) { - const char *const version_definition_as_char = - reinterpret_cast(version_definition); - version_definition = - reinterpret_cast(version_definition_as_char + - version_definition->vd_next); - } - return version_definition->vd_ndx == index ? version_definition : NULL; -} - -const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( - const ElfW(Verdef) *verdef) const { - return reinterpret_cast(verdef+1); -} - -const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { - CHECK_LT(offset, strsize_); - return dynstr_ + offset; -} - -void ElfMemImage::Init(const void *base) { - ehdr_ = NULL; - dynsym_ = NULL; - dynstr_ = NULL; - versym_ = NULL; - verdef_ = NULL; - hash_ = NULL; - strsize_ = 0; - verdefnum_ = 0; - link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in an in-memory Elf image. +// + +#include "elf_mem_image.h" +#include "logging.h" + +#ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h + +#if defined(_musl_) + #include +#endif + +#include // for size_t, ptrdiff_t + +// From binutils/include/elf/common.h (this doesn't appear to be documented +// anywhere else). +// +// /* This flag appears in a Versym structure. It means that the symbol +// is hidden, and is only visible with an explicit version number. +// This is a GNU extension. */ +// #define VERSYM_HIDDEN 0x8000 +// +// /* This is the mask for the rest of the Versym information. */ +// #define VERSYM_VERSION 0x7fff + +#define VERSYM_VERSION 0x7fff + +namespace base { + +namespace { +template class ElfClass { + public: + static const int kElfClass = -1; + static int ElfBind(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } + static int ElfType(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } +}; + +template <> class ElfClass<32> { + public: + static const int kElfClass = ELFCLASS32; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF32_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF32_ST_TYPE(symbol->st_info); + } +}; + +template <> class ElfClass<64> { + public: + static const int kElfClass = ELFCLASS64; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF64_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF64_ST_TYPE(symbol->st_info); + } +}; + +typedef ElfClass<__WORDSIZE> CurrentElfClass; + +// Extract an element from one of the ELF tables, cast it to desired type. +// This is just a simple arithmetic and a glorified cast. +// Callers are responsible for bounds checking. +template +const T* GetTableElement(const ElfW(Ehdr) *ehdr, + ElfW(Off) table_offset, + ElfW(Word) element_size, + size_t index) { + return reinterpret_cast(reinterpret_cast(ehdr) + + table_offset + + index * element_size); +} +} // namespace + +const void *const ElfMemImage::kInvalidBase = + reinterpret_cast(~0L); + +ElfMemImage::ElfMemImage(const void *base) { + Init(base); +} + +int ElfMemImage::GetNumSymbols() const { + if (!hash_) { + return 0; + } + // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash + return hash_[1]; +} + +const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return dynsym_ + index; +} + +const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return versym_ + index; +} + +const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { + CHECK_LT(index, ehdr_->e_phnum); + return GetTableElement(ehdr_, + ehdr_->e_phoff, + ehdr_->e_phentsize, + index); +} + +const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { + // Symbol corresponds to "special" (e.g. SHN_ABS) section. + return reinterpret_cast(sym->st_value); + } + CHECK_LT(link_base_, sym->st_value); + return GetTableElement(ehdr_, 0, 1, sym->st_value) - link_base_; +} + +const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { + CHECK_LE(index, verdefnum_); + const ElfW(Verdef) *version_definition = verdef_; + while (version_definition->vd_ndx < index && version_definition->vd_next) { + const char *const version_definition_as_char = + reinterpret_cast(version_definition); + version_definition = + reinterpret_cast(version_definition_as_char + + version_definition->vd_next); + } + return version_definition->vd_ndx == index ? version_definition : NULL; +} + +const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( + const ElfW(Verdef) *verdef) const { + return reinterpret_cast(verdef+1); +} + +const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +void ElfMemImage::Init(const void *base) { + ehdr_ = NULL; + dynsym_ = NULL; + dynstr_ = NULL; + versym_ = NULL; + verdef_ = NULL; + hash_ = NULL; + strsize_ = 0; + verdefnum_ = 0; + link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. if (!base || base == kInvalidBase) { - return; - } - const intptr_t base_as_uintptr_t = reinterpret_cast(base); - // Fake VDSO has low bit set. - const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); - base = reinterpret_cast(base_as_uintptr_t & ~1); - const char *const base_as_char = reinterpret_cast(base); - if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || - base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { - RAW_DCHECK(false, "no ELF magic"); // at %p", base); - return; - } - int elf_class = base_as_char[EI_CLASS]; - if (elf_class != CurrentElfClass::kElfClass) { - DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); - return; - } - switch (base_as_char[EI_DATA]) { - case ELFDATA2LSB: { - if (__LITTLE_ENDIAN != __BYTE_ORDER) { - DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; - return; - } - break; - } - case ELFDATA2MSB: { - if (__BIG_ENDIAN != __BYTE_ORDER) { - DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; - return; - } - break; - } - default: { - RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; - return; - } - } - - ehdr_ = reinterpret_cast(base); - const ElfW(Phdr) *dynamic_program_header = NULL; - for (int i = 0; i < ehdr_->e_phnum; ++i) { - const ElfW(Phdr) *const program_header = GetPhdr(i); - switch (program_header->p_type) { - case PT_LOAD: - if (link_base_ == ~0L) { - link_base_ = program_header->p_vaddr; - } - break; - case PT_DYNAMIC: - dynamic_program_header = program_header; - break; - } - } - if (link_base_ == ~0L || !dynamic_program_header) { - RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); - RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); - // Mark this image as not present. Can not recur infinitely. - Init(0); - return; - } - ptrdiff_t relocation = - base_as_char - reinterpret_cast(link_base_); - ElfW(Dyn) *dynamic_entry = - reinterpret_cast(dynamic_program_header->p_vaddr + - relocation); - for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { - ElfW(Xword) value = dynamic_entry->d_un.d_val; - if (fake_vdso) { - // A complication: in the real VDSO, dynamic entries are not relocated - // (it wasn't loaded by a dynamic loader). But when testing with a - // "fake" dlopen()ed vdso library, the loader relocates some (but - // not all!) of them before we get here. - if (dynamic_entry->d_tag == DT_VERDEF) { - // The only dynamic entry (of the ones we care about) libc-2.3.6 - // loader doesn't relocate. - value += relocation; - } - } else { - // Real VDSO. Everything needs to be relocated. - value += relocation; - } - switch (dynamic_entry->d_tag) { - case DT_HASH: - hash_ = reinterpret_cast(value); - break; - case DT_SYMTAB: - dynsym_ = reinterpret_cast(value); - break; - case DT_STRTAB: - dynstr_ = reinterpret_cast(value); - break; - case DT_VERSYM: - versym_ = reinterpret_cast(value); - break; - case DT_VERDEF: - verdef_ = reinterpret_cast(value); - break; - case DT_VERDEFNUM: - verdefnum_ = dynamic_entry->d_un.d_val; - break; - case DT_STRSZ: - strsize_ = dynamic_entry->d_un.d_val; - break; - default: - // Unrecognized entries explicitly ignored. - break; - } - } - if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || - !verdef_ || !verdefnum_ || !strsize_) { - RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); - RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); - RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); - RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); - RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); - RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); - RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); - // Mark this image as not present. Can not recur infinitely. - Init(0); - return; - } -} - -bool ElfMemImage::LookupSymbol(const char *name, - const char *version, - int type, - SymbolInfo *info) const { - for (SymbolIterator it = begin(); it != end(); ++it) { - if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && - CurrentElfClass::ElfType(it->symbol) == type) { - if (info) { - *info = *it; - } - return true; - } - } - return false; -} - -bool ElfMemImage::LookupSymbolByAddress(const void *address, - SymbolInfo *info_out) const { - for (SymbolIterator it = begin(); it != end(); ++it) { - const char *const symbol_start = - reinterpret_cast(it->address); - const char *const symbol_end = symbol_start + it->symbol->st_size; - if (symbol_start <= address && address < symbol_end) { - if (info_out) { - // Client wants to know details for that symbol (the usual case). - if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { - // Strong symbol; just return it. - *info_out = *it; - return true; - } else { - // Weak or local. Record it, but keep looking for a strong one. - *info_out = *it; - } - } else { - // Client only cares if there is an overlapping symbol. - return true; - } - } - } - return false; -} - -ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) - : index_(index), image_(image) { -} - -const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { - return &info_; -} - -const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { - return info_; -} - -bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { - return this->image_ == rhs.image_ && this->index_ == rhs.index_; -} - -bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { - return !(*this == rhs); -} - -ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { - this->Update(1); - return *this; -} - -ElfMemImage::SymbolIterator ElfMemImage::begin() const { - SymbolIterator it(this, 0); - it.Update(0); - return it; -} - -ElfMemImage::SymbolIterator ElfMemImage::end() const { - return SymbolIterator(this, GetNumSymbols()); -} - -void ElfMemImage::SymbolIterator::Update(int increment) { - const ElfMemImage *image = reinterpret_cast(image_); - CHECK(image->IsPresent() || increment == 0); - if (!image->IsPresent()) { - return; - } - index_ += increment; - if (index_ >= image->GetNumSymbols()) { - index_ = image->GetNumSymbols(); - return; - } - const ElfW(Sym) *symbol = image->GetDynsym(index_); - const ElfW(Versym) *version_symbol = image->GetVersym(index_); - CHECK(symbol && version_symbol); - const char *const symbol_name = image->GetDynstr(symbol->st_name); - const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; - const ElfW(Verdef) *version_definition = NULL; - const char *version_name = ""; - if (symbol->st_shndx == SHN_UNDEF) { - // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and - // version_index could well be greater than verdefnum_, so calling - // GetVerdef(version_index) may trigger assertion. - } else { - version_definition = image->GetVerdef(version_index); - } - if (version_definition) { - // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, - // optional 2nd if the version has a parent. - CHECK_LE(1, version_definition->vd_cnt); - CHECK_LE(version_definition->vd_cnt, 2); - const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); - version_name = image->GetVerstr(version_aux->vda_name); - } - info_.name = symbol_name; - info_.version = version_name; - info_.address = image->GetSymAddr(symbol); - info_.symbol = symbol; -} - -} // namespace base - -#endif // HAVE_ELF_MEM_IMAGE + return; + } + const intptr_t base_as_uintptr_t = reinterpret_cast(base); + // Fake VDSO has low bit set. + const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); + base = reinterpret_cast(base_as_uintptr_t & ~1); + const char *const base_as_char = reinterpret_cast(base); + if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || + base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { + RAW_DCHECK(false, "no ELF magic"); // at %p", base); + return; + } + int elf_class = base_as_char[EI_CLASS]; + if (elf_class != CurrentElfClass::kElfClass) { + DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); + return; + } + switch (base_as_char[EI_DATA]) { + case ELFDATA2LSB: { + if (__LITTLE_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + case ELFDATA2MSB: { + if (__BIG_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + default: { + RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; + return; + } + } + + ehdr_ = reinterpret_cast(base); + const ElfW(Phdr) *dynamic_program_header = NULL; + for (int i = 0; i < ehdr_->e_phnum; ++i) { + const ElfW(Phdr) *const program_header = GetPhdr(i); + switch (program_header->p_type) { + case PT_LOAD: + if (link_base_ == ~0L) { + link_base_ = program_header->p_vaddr; + } + break; + case PT_DYNAMIC: + dynamic_program_header = program_header; + break; + } + } + if (link_base_ == ~0L || !dynamic_program_header) { + RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); + RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } + ptrdiff_t relocation = + base_as_char - reinterpret_cast(link_base_); + ElfW(Dyn) *dynamic_entry = + reinterpret_cast(dynamic_program_header->p_vaddr + + relocation); + for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { + ElfW(Xword) value = dynamic_entry->d_un.d_val; + if (fake_vdso) { + // A complication: in the real VDSO, dynamic entries are not relocated + // (it wasn't loaded by a dynamic loader). But when testing with a + // "fake" dlopen()ed vdso library, the loader relocates some (but + // not all!) of them before we get here. + if (dynamic_entry->d_tag == DT_VERDEF) { + // The only dynamic entry (of the ones we care about) libc-2.3.6 + // loader doesn't relocate. + value += relocation; + } + } else { + // Real VDSO. Everything needs to be relocated. + value += relocation; + } + switch (dynamic_entry->d_tag) { + case DT_HASH: + hash_ = reinterpret_cast(value); + break; + case DT_SYMTAB: + dynsym_ = reinterpret_cast(value); + break; + case DT_STRTAB: + dynstr_ = reinterpret_cast(value); + break; + case DT_VERSYM: + versym_ = reinterpret_cast(value); + break; + case DT_VERDEF: + verdef_ = reinterpret_cast(value); + break; + case DT_VERDEFNUM: + verdefnum_ = dynamic_entry->d_un.d_val; + break; + case DT_STRSZ: + strsize_ = dynamic_entry->d_un.d_val; + break; + default: + // Unrecognized entries explicitly ignored. + break; + } + } + if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + !verdef_ || !verdefnum_ || !strsize_) { + RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); + RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); + RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); + RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); + RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); + RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); + RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } +} + +bool ElfMemImage::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && + CurrentElfClass::ElfType(it->symbol) == type) { + if (info) { + *info = *it; + } + return true; + } + } + return false; +} + +bool ElfMemImage::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + const char *const symbol_start = + reinterpret_cast(it->address); + const char *const symbol_end = symbol_start + it->symbol->st_size; + if (symbol_start <= address && address < symbol_end) { + if (info_out) { + // Client wants to know details for that symbol (the usual case). + if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { + // Strong symbol; just return it. + *info_out = *it; + return true; + } else { + // Weak or local. Record it, but keep looking for a strong one. + *info_out = *it; + } + } else { + // Client only cares if there is an overlapping symbol. + return true; + } + } + } + return false; +} + +ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) + : index_(index), image_(image) { +} + +const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { + return &info_; +} + +const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { + return info_; +} + +bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { + return this->image_ == rhs.image_ && this->index_ == rhs.index_; +} + +bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { + return !(*this == rhs); +} + +ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { + this->Update(1); + return *this; +} + +ElfMemImage::SymbolIterator ElfMemImage::begin() const { + SymbolIterator it(this, 0); + it.Update(0); + return it; +} + +ElfMemImage::SymbolIterator ElfMemImage::end() const { + return SymbolIterator(this, GetNumSymbols()); +} + +void ElfMemImage::SymbolIterator::Update(int increment) { + const ElfMemImage *image = reinterpret_cast(image_); + CHECK(image->IsPresent() || increment == 0); + if (!image->IsPresent()) { + return; + } + index_ += increment; + if (index_ >= image->GetNumSymbols()) { + index_ = image->GetNumSymbols(); + return; + } + const ElfW(Sym) *symbol = image->GetDynsym(index_); + const ElfW(Versym) *version_symbol = image->GetVersym(index_); + CHECK(symbol && version_symbol); + const char *const symbol_name = image->GetDynstr(symbol->st_name); + const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; + const ElfW(Verdef) *version_definition = NULL; + const char *version_name = ""; + if (symbol->st_shndx == SHN_UNDEF) { + // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and + // version_index could well be greater than verdefnum_, so calling + // GetVerdef(version_index) may trigger assertion. + } else { + version_definition = image->GetVerdef(version_index); + } + if (version_definition) { + // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, + // optional 2nd if the version has a parent. + CHECK_LE(1, version_definition->vd_cnt); + CHECK_LE(version_definition->vd_cnt, 2); + const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); + version_name = image->GetVerstr(version_aux->vda_name); + } + info_.name = symbol_name; + info_.version = version_name; + info_.address = image->GetSymAddr(symbol); + info_.symbol = symbol; +} + +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE diff --git a/contrib/libs/linuxvdso/original/elf_mem_image.h b/contrib/libs/linuxvdso/original/elf_mem_image.h index b6619bcab9a..580184cb37d 100644 --- a/contrib/libs/linuxvdso/original/elf_mem_image.h +++ b/contrib/libs/linuxvdso/original/elf_mem_image.h @@ -1,135 +1,135 @@ -#pragma once - -// Copyright (c) 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Paul Pluzhnikov -// -// Allow dynamic symbol lookup for in-memory Elf images. - -#ifndef BASE_ELF_MEM_IMAGE_H_ -#define BASE_ELF_MEM_IMAGE_H_ - -#include "config.h" - -#include // for __GLIBC__ - -// Maybe one day we can rewrite this file not to require the elf -// symbol extensions in glibc, but for right now we need them. -#if (defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)) || defined(_musl_) - -#define HAVE_ELF_MEM_IMAGE 1 - -#include -#include // for ElfW - -namespace base { - -// An in-memory ELF image (may not exist on disk). -class ElfMemImage { - public: - // Sentinel: there could never be an elf image at this address. - static const void *const kInvalidBase; - - // Information about a single vdso symbol. - // All pointers are into .dynsym, .dynstr, or .text of the VDSO. - // Do not free() them or modify through them. - struct SymbolInfo { - const char *name; // E.g. "__vdso_getcpu" - const char *version; // E.g. "LINUX_2.6", could be "" - // for unversioned symbol. - const void *address; // Relocated symbol address. - const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. - }; - - // Supports iteration over all dynamic symbols. - class SymbolIterator { - public: - friend class ElfMemImage; - const SymbolInfo *operator->() const; - const SymbolInfo &operator*() const; - SymbolIterator& operator++(); - bool operator!=(const SymbolIterator &rhs) const; - bool operator==(const SymbolIterator &rhs) const; - private: - SymbolIterator(const void *const image, int index); - void Update(int incr); - SymbolInfo info_; - int index_; - const void *const image_; - }; - - - explicit ElfMemImage(const void *base); - void Init(const void *base); - bool IsPresent() const { return ehdr_ != NULL; } - const ElfW(Phdr)* GetPhdr(int index) const; - const ElfW(Sym)* GetDynsym(int index) const; - const ElfW(Versym)* GetVersym(int index) const; - const ElfW(Verdef)* GetVerdef(int index) const; - const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; - const char* GetDynstr(ElfW(Word) offset) const; - const void* GetSymAddr(const ElfW(Sym) *sym) const; - const char* GetVerstr(ElfW(Word) offset) const; - int GetNumSymbols() const; - - SymbolIterator begin() const; - SymbolIterator end() const; - - // Look up versioned dynamic symbol in the image. - // Returns false if image is not present, or doesn't contain given - // symbol/version/type combination. - // If info_out != NULL, additional details are filled in. - bool LookupSymbol(const char *name, const char *version, - int symbol_type, SymbolInfo *info_out) const; - - // Find info about symbol (if any) which overlaps given address. - // Returns true if symbol was found; false if image isn't present - // or doesn't have a symbol overlapping given address. - // If info_out != NULL, additional details are filled in. - bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; - - private: - const ElfW(Ehdr) *ehdr_; - const ElfW(Sym) *dynsym_; - const ElfW(Versym) *versym_; - const ElfW(Verdef) *verdef_; - const ElfW(Word) *hash_; - const char *dynstr_; - size_t strsize_; - size_t verdefnum_; - ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). -}; - -} // namespace base - -#endif // __ELF__ and __GLIBC__ and !__native_client__ - -#endif // BASE_ELF_MEM_IMAGE_H_ +#pragma once + +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup for in-memory Elf images. + +#ifndef BASE_ELF_MEM_IMAGE_H_ +#define BASE_ELF_MEM_IMAGE_H_ + +#include "config.h" + +#include // for __GLIBC__ + +// Maybe one day we can rewrite this file not to require the elf +// symbol extensions in glibc, but for right now we need them. +#if (defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)) || defined(_musl_) + +#define HAVE_ELF_MEM_IMAGE 1 + +#include +#include // for ElfW + +namespace base { + +// An in-memory ELF image (may not exist on disk). +class ElfMemImage { + public: + // Sentinel: there could never be an elf image at this address. + static const void *const kInvalidBase; + + // Information about a single vdso symbol. + // All pointers are into .dynsym, .dynstr, or .text of the VDSO. + // Do not free() them or modify through them. + struct SymbolInfo { + const char *name; // E.g. "__vdso_getcpu" + const char *version; // E.g. "LINUX_2.6", could be "" + // for unversioned symbol. + const void *address; // Relocated symbol address. + const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. + }; + + // Supports iteration over all dynamic symbols. + class SymbolIterator { + public: + friend class ElfMemImage; + const SymbolInfo *operator->() const; + const SymbolInfo &operator*() const; + SymbolIterator& operator++(); + bool operator!=(const SymbolIterator &rhs) const; + bool operator==(const SymbolIterator &rhs) const; + private: + SymbolIterator(const void *const image, int index); + void Update(int incr); + SymbolInfo info_; + int index_; + const void *const image_; + }; + + + explicit ElfMemImage(const void *base); + void Init(const void *base); + bool IsPresent() const { return ehdr_ != NULL; } + const ElfW(Phdr)* GetPhdr(int index) const; + const ElfW(Sym)* GetDynsym(int index) const; + const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Verdef)* GetVerdef(int index) const; + const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; + const char* GetDynstr(ElfW(Word) offset) const; + const void* GetSymAddr(const ElfW(Sym) *sym) const; + const char* GetVerstr(ElfW(Word) offset) const; + int GetNumSymbols() const; + + SymbolIterator begin() const; + SymbolIterator end() const; + + // Look up versioned dynamic symbol in the image. + // Returns false if image is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if image isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + private: + const ElfW(Ehdr) *ehdr_; + const ElfW(Sym) *dynsym_; + const ElfW(Versym) *versym_; + const ElfW(Verdef) *verdef_; + const ElfW(Word) *hash_; + const char *dynstr_; + size_t strsize_; + size_t verdefnum_; + ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). +}; + +} // namespace base + +#endif // __ELF__ and __GLIBC__ and !__native_client__ + +#endif // BASE_ELF_MEM_IMAGE_H_ diff --git a/contrib/libs/linuxvdso/original/linux_syscall_support.h b/contrib/libs/linuxvdso/original/linux_syscall_support.h index 37d182ad3c0..0edb951c541 100644 --- a/contrib/libs/linuxvdso/original/linux_syscall_support.h +++ b/contrib/libs/linuxvdso/original/linux_syscall_support.h @@ -1,1644 +1,1644 @@ -#pragma once - -/* Copyright (c) 2005-2008, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * --- - * Author: Markus Gutschke - */ - -/* This file includes Linux-specific support functions common to the - * coredumper and the thread lister; primarily, this is a collection - * of direct system calls, and a couple of symbols missing from - * standard header files. - * There are a few options that the including file can set to control - * the behavior of this file: - * - * SYS_CPLUSPLUS: - * The entire header file will normally be wrapped in 'extern "C" { }", - * making it suitable for compilation as both C and C++ source. If you - * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit - * the wrapping. N.B. doing so will suppress inclusion of all prerequisite - * system header files, too. It is the caller's responsibility to provide - * the necessary definitions. - * - * SYS_ERRNO: - * All system calls will update "errno" unless overriden by setting the - * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be - * an l-value. - * - * SYS_INLINE: - * New symbols will be defined "static inline", unless overridden by - * the SYS_INLINE macro. - * - * SYS_LINUX_SYSCALL_SUPPORT_H - * This macro is used to avoid multiple inclusions of this header file. - * If you need to include this file more than once, make sure to - * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. - * - * SYS_PREFIX: - * New system calls will have a prefix of "sys_" unless overridden by - * the SYS_PREFIX macro. Valid values for this macro are [0..9] which - * results in prefixes "sys[0..9]_". It is also possible to set this - * macro to -1, which avoids all prefixes. - * - * This file defines a few internal symbols that all start with "LSS_". - * Do not access these symbols from outside this file. They are not part - * of the supported API. - * - * NOTE: This is a stripped down version of the official opensource - * version of linux_syscall_support.h, which lives at - * http://code.google.com/p/linux-syscall-support/ - * It includes only the syscalls that are used in perftools, plus a - * few extra. Here's the breakdown: - * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u - * sys__exit( - * sys_clone( - * sys_close( - * sys_fcntl( - * sys_fstat( - * sys_futex( - * sys_futex1( - * sys_getcpu( - * sys_getdents( - * sys_getppid( - * sys_gettid( - * sys_lseek( - * sys_mmap( - * sys_mremap( - * sys_munmap( - * sys_open( - * sys_pipe( - * sys_prctl( - * sys_ptrace( - * sys_ptrace_detach( - * sys_read( - * sys_sched_yield( - * sys_sigaction( - * sys_sigaltstack( - * sys_sigdelset( - * sys_sigfillset( - * sys_sigprocmask( - * sys_socket( - * sys_stat( - * sys_waitpid( - * 2) These are used as subroutines of the above: - * sys_getpid -- gettid - * sys_kill -- ptrace_detach - * sys_restore -- sigaction - * sys_restore_rt -- sigaction - * sys_socketcall -- socket - * sys_wait4 -- waitpid - * 3) I left these in even though they're not used. They either - * complement the above (write vs read) or are variants (rt_sigaction): - * sys_fstat64 - * sys_getdents64 - * sys_llseek - * sys_mmap2 - * sys_openat - * sys_rt_sigaction - * sys_rt_sigprocmask - * sys_sigaddset - * sys_sigemptyset - * sys_stat64 - * sys_write - */ -#ifndef SYS_LINUX_SYSCALL_SUPPORT_H -#define SYS_LINUX_SYSCALL_SUPPORT_H - -/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. - * Porting to other related platforms should not be difficult. - */ -#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ - defined(__mips__) || defined(__PPC__)) && defined(__linux) - -#ifndef SYS_CPLUSPLUS -#ifdef __cplusplus -/* Some system header files in older versions of gcc neglect to properly - * handle being included from C++. As it appears to be harmless to have - * multiple nested 'extern "C"' blocks, just add another one here. - */ -extern "C" { -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __mips__ -/* Include definitions of the ABI currently in use. */ -#include -#endif - -#endif - -/* As glibc often provides subtly incompatible data structures (and implicit - * wrapper functions that convert them), we provide our own kernel data - * structures for use by the system calls. - * These structures have been developed by using Linux 2.6.23 headers for - * reference. Note though, we do not care about exact API compatibility - * with the kernel, and in fact the kernel often does not have a single - * API that works across architectures. Instead, we try to mimic the glibc - * API where reasonable, and only guarantee ABI compatibility with the - * kernel headers. - * Most notably, here are a few changes that were made to the structures - * defined by kernel headers: - * - * - we only define structures, but not symbolic names for kernel data - * types. For the latter, we directly use the native C datatype - * (i.e. "unsigned" instead of "mode_t"). - * - in a few cases, it is possible to define identical structures for - * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by - * standardizing on the 64bit version of the data types. In particular, - * this means that we use "unsigned" where the 32bit headers say - * "unsigned long". - * - overall, we try to minimize the number of cases where we need to - * conditionally define different structures. - * - the "struct kernel_sigaction" class of structures have been - * modified to more closely mimic glibc's API by introducing an - * anonymous union for the function pointer. - * - a small number of field names had to have an underscore appended to - * them, because glibc defines a global macro by the same name. - */ - -/* include/linux/dirent.h */ -struct kernel_dirent64 { - unsigned long long d_ino; - long long d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[256]; -}; - -/* include/linux/dirent.h */ -struct kernel_dirent { - long d_ino; - long d_off; - unsigned short d_reclen; - char d_name[256]; -}; - -/* include/linux/time.h */ -struct kernel_timespec { - long tv_sec; - long tv_nsec; -}; - -/* include/linux/time.h */ -struct kernel_timeval { - long tv_sec; - long tv_usec; -}; - -/* include/linux/resource.h */ -struct kernel_rusage { - struct kernel_timeval ru_utime; - struct kernel_timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - -struct siginfo; -#if defined(__i386__) || defined(__arm__) || defined(__PPC__) - -/* include/asm-{arm,i386,mips,ppc}/signal.h */ -struct kernel_old_sigaction { - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - unsigned long sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -} __attribute__((packed,aligned(4))); -#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - #define kernel_old_sigaction kernel_sigaction -#endif - -/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the - * exactly match the size of the signal set, even though the API was - * intended to be extensible. We define our own KERNEL_NSIG to deal with - * this. - * Please note that glibc provides signals [1.._NSIG-1], whereas the - * kernel (and this header) provides the range [1..KERNEL_NSIG]. The - * actual number of signals is obviously the same, but the constants - * differ by one. - */ -#ifdef __mips__ -#define KERNEL_NSIG 128 -#else -#define KERNEL_NSIG 64 -#endif - -/* include/asm-{arm,i386,mips,x86_64}/signal.h */ -struct kernel_sigset_t { - unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ - (8*sizeof(unsigned long))]; -}; - -/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ -struct kernel_sigaction { -#ifdef __mips__ - unsigned long sa_flags; - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - struct kernel_sigset_t sa_mask; -#else - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - unsigned long sa_flags; - void (*sa_restorer)(void); - struct kernel_sigset_t sa_mask; -#endif -}; - -/* include/asm-{arm,i386,mips,ppc}/stat.h */ -#ifdef __mips__ -#if _MIPS_SIM == _MIPS_SIM_ABI64 -struct kernel_stat { -#else -struct kernel_stat64 { -#endif - unsigned st_dev; - unsigned __pad0[3]; - unsigned long long st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - unsigned __pad1[3]; - long long st_size; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned st_blksize; - unsigned __pad2; - unsigned long long st_blocks; -}; -#elif defined __PPC__ -struct kernel_stat64 { - unsigned long long st_dev; - unsigned long long st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned long long st_rdev; - unsigned short int __pad2; - long long st_size; - long st_blksize; - long long st_blocks; - long st_atime_; - unsigned long st_atime_nsec_; - long st_mtime_; - unsigned long st_mtime_nsec_; - long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long __unused4; - unsigned long __unused5; -}; -#else -struct kernel_stat64 { - unsigned long long st_dev; - unsigned char __pad0[4]; - unsigned __st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned long long st_rdev; - unsigned char __pad3[4]; - long long st_size; - unsigned st_blksize; - unsigned long long st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned long long st_ino; -}; -#endif - -/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */ -#if defined(__i386__) || defined(__arm__) -struct kernel_stat { - /* The kernel headers suggest that st_dev and st_rdev should be 32bit - * quantities encoding 12bit major and 20bit minor numbers in an interleaved - * format. In reality, we do not see useful data in the top bits. So, - * we'll leave the padding in here, until we find a better solution. - */ - unsigned short st_dev; - short pad1; - unsigned st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - short pad2; - unsigned st_size; - unsigned st_blksize; - unsigned st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned __unused4; - unsigned __unused5; -}; -#elif defined(__x86_64__) -struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned st_mode; - unsigned st_uid; - unsigned st_gid; - unsigned __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - long __unused[3]; -}; -#elif defined(__PPC__) -struct kernel_stat { - unsigned st_dev; - unsigned long st_ino; // ino_t - unsigned long st_mode; // mode_t - unsigned short st_nlink; // nlink_t - unsigned st_uid; // uid_t - unsigned st_gid; // gid_t - unsigned st_rdev; - long st_size; // off_t - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long __unused4; - unsigned long __unused5; -}; -#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) -struct kernel_stat { - unsigned st_dev; - int st_pad1[3]; - unsigned st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - int st_pad2[2]; - long st_size; - int st_pad3; - long st_atime_; - long st_atime_nsec_; - long st_mtime_; - long st_mtime_nsec_; - long st_ctime_; - long st_ctime_nsec_; - int st_blksize; - int st_blocks; - int st_pad4[14]; -}; -#endif - - -/* Definitions missing from the standard header files */ -#ifndef O_DIRECTORY -#if defined(__arm__) -#define O_DIRECTORY 0040000 -#else -#define O_DIRECTORY 0200000 -#endif -#endif -#ifndef PR_GET_DUMPABLE -#define PR_GET_DUMPABLE 3 -#endif -#ifndef PR_SET_DUMPABLE -#define PR_SET_DUMPABLE 4 -#endif -#ifndef AT_FDCWD -#define AT_FDCWD (-100) -#endif -#ifndef AT_SYMLINK_NOFOLLOW -#define AT_SYMLINK_NOFOLLOW 0x100 -#endif -#ifndef AT_REMOVEDIR -#define AT_REMOVEDIR 0x200 -#endif -#ifndef MREMAP_FIXED -#define MREMAP_FIXED 2 -#endif -#ifndef SA_RESTORER -#define SA_RESTORER 0x04000000 -#endif - -#if defined(__i386__) -#ifndef __NR_rt_sigaction -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#endif -#ifndef __NR_stat64 -#define __NR_stat64 195 -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 197 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 220 -#endif -#ifndef __NR_gettid -#define __NR_gettid 224 -#endif -#ifndef __NR_futex -#define __NR_futex 240 -#endif -#ifndef __NR_openat -#define __NR_openat 295 -#endif -#ifndef __NR_getcpu -#define __NR_getcpu 318 -#endif -/* End of i386 definitions */ -#elif defined(__arm__) -#ifndef __syscall -#if defined(__thumb__) || defined(__ARM_EABI__) -#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; -#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs -#define __syscall(name) "swi\t0" -#define __syscall_safe(name) \ - "push {r7}\n" \ - "mov r7,%[sysreg]\n" \ - __syscall(name)"\n" \ - "pop {r7}" -#else -#define __SYS_REG(name) -#define __SYS_REG_LIST(regs...) regs -#define __syscall(name) "swi\t" __sys1(__NR_##name) "" -#define __syscall_safe(name) __syscall(name) -#endif -#endif -#ifndef __NR_rt_sigaction -#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) -#endif -#ifndef __NR_stat64 -#define __NR_stat64 (__NR_SYSCALL_BASE + 195) -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_SYSCALL_BASE + 224) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_SYSCALL_BASE + 240) -#endif -/* End of ARM definitions */ -#elif defined(__x86_64__) -#ifndef __NR_gettid -#define __NR_gettid 186 -#endif -#ifndef __NR_futex -#define __NR_futex 202 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 217 -#endif -#ifndef __NR_openat -#define __NR_openat 257 -#endif -/* End of x86-64 definitions */ -#elif defined(__mips__) -#if _MIPS_SIM == _MIPS_SIM_ABI32 -#ifndef __NR_rt_sigaction -#define __NR_rt_sigaction (__NR_Linux + 194) -#define __NR_rt_sigprocmask (__NR_Linux + 195) -#endif -#ifndef __NR_stat64 -#define __NR_stat64 (__NR_Linux + 213) -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_Linux + 215) -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_Linux + 219) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 222) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 238) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 288) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 293) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 312) -#endif -/* End of MIPS (old 32bit API) definitions */ -#elif _MIPS_SIM == _MIPS_SIM_ABI64 -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 247) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 252) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 271) -#endif -/* End of MIPS (64bit API) definitions */ -#else -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 251) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 256) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 275) -#endif -/* End of MIPS (new 32bit API) definitions */ -#endif -/* End of MIPS definitions */ -#elif defined(__PPC__) -#ifndef __NR_rt_sigaction -#define __NR_rt_sigaction 173 -#define __NR_rt_sigprocmask 174 -#endif -#ifndef __NR_stat64 -#define __NR_stat64 195 -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 197 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 202 -#endif -#ifndef __NR_gettid -#define __NR_gettid 207 -#endif -#ifndef __NR_futex -#define __NR_futex 221 -#endif -#ifndef __NR_openat -#define __NR_openat 286 -#endif -#ifndef __NR_getcpu -#define __NR_getcpu 302 -#endif -/* End of powerpc defininitions */ -#endif - - -/* After forking, we must make sure to only call system calls. */ -#if __BOUNDED_POINTERS__ - #error "Need to port invocations of syscalls for bounded ptrs" -#else - /* The core dumper and the thread lister get executed after threads - * have been suspended. As a consequence, we cannot call any functions - * that acquire locks. Unfortunately, libc wraps most system calls - * (e.g. in order to implement pthread_atfork, and to make calls - * cancellable), which means we cannot call these functions. Instead, - * we have to call syscall() directly. - */ - #undef LSS_ERRNO - #ifdef SYS_ERRNO - /* Allow the including file to override the location of errno. This can - * be useful when using clone() with the CLONE_VM option. - */ - #define LSS_ERRNO SYS_ERRNO - #else - #define LSS_ERRNO errno - #endif - - #undef LSS_INLINE - #ifdef SYS_INLINE - #define LSS_INLINE SYS_INLINE - #else - #define LSS_INLINE static inline - #endif - - /* Allow the including file to override the prefix used for all new - * system calls. By default, it will be set to "sys_". - */ - #undef LSS_NAME - #ifndef SYS_PREFIX - #define LSS_NAME(name) sys_##name - #elif SYS_PREFIX < 0 - #define LSS_NAME(name) name - #elif SYS_PREFIX == 0 - #define LSS_NAME(name) sys0_##name - #elif SYS_PREFIX == 1 - #define LSS_NAME(name) sys1_##name - #elif SYS_PREFIX == 2 - #define LSS_NAME(name) sys2_##name - #elif SYS_PREFIX == 3 - #define LSS_NAME(name) sys3_##name - #elif SYS_PREFIX == 4 - #define LSS_NAME(name) sys4_##name - #elif SYS_PREFIX == 5 - #define LSS_NAME(name) sys5_##name - #elif SYS_PREFIX == 6 - #define LSS_NAME(name) sys6_##name - #elif SYS_PREFIX == 7 - #define LSS_NAME(name) sys7_##name - #elif SYS_PREFIX == 8 - #define LSS_NAME(name) sys8_##name - #elif SYS_PREFIX == 9 - #define LSS_NAME(name) sys9_##name - #endif - - #undef LSS_RETURN - #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__)) - /* Failing system calls return a negative result in the range of - * -1..-4095. These are "errno" values with the sign inverted. - */ - #define LSS_RETURN(type, res) \ - do { \ - if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ - LSS_ERRNO = -(res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #elif defined(__mips__) - /* On MIPS, failing system calls return -1, and set errno in a - * separate CPU register. - */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #elif defined(__PPC__) - /* On PPC, failing system calls return -1, and set errno in a - * separate CPU register. See linux/unistd.h. - */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err & 0x10000000 ) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #endif - #if defined(__i386__) - #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) - /* This only works for GCC-4.4 and above -- the first version to use - .cfi directives for dwarf unwind info. */ - #define CFI_ADJUST_CFA_OFFSET(adjust) \ - ".cfi_adjust_cfa_offset " #adjust "\n" - #else - #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ - #endif - - /* In PIC mode (e.g. when building shared libraries), gcc for i386 - * reserves ebx. Unfortunately, most distribution ship with implementations - * of _syscallX() which clobber ebx. - * Also, most definitions of _syscallX() neglect to mark "memory" as being - * clobbered. This causes problems with compilers, that do a better job - * at optimizing across __asm__ calls. - * So, we just have to redefine all of the _syscallX() macros. - */ - #undef LSS_BODY - #define LSS_BODY(type,args...) \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ - CFI_ADJUST_CFA_OFFSET(4) \ - "movl %2,%%ebx\n" \ - "int $0x80\n" \ - "pop %%ebx\n" \ - CFI_ADJUST_CFA_OFFSET(-4) \ - args \ - : "esp", "memory"); \ - LSS_RETURN(type,__res) - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)(void) { \ - long __res; \ - __asm__ volatile("int $0x80" \ - : "=a" (__res) \ - : "0" (__NR_##name) \ - : "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1))); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1,type2 arg2) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3))); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4))); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ - "movl %2,%%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx" \ - : "=a" (__res) \ - : "i" (__NR_##name), "ri" ((long)(arg1)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - long __res; \ - struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ - __asm__ __volatile__("push %%ebp\n" \ - "push %%ebx\n" \ - "movl 4(%2),%%ebp\n" \ - "movl 0(%2), %%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx\n" \ - "pop %%ebp" \ - : "=a" (__res) \ - : "i" (__NR_##name), "0" ((long)(&__s)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - __asm__ __volatile__(/* if (fn == NULL) - * return -EINVAL; - */ - "movl %3,%%ecx\n" - "jecxz 1f\n" - - /* if (child_stack == NULL) - * return -EINVAL; - */ - "movl %4,%%ecx\n" - "jecxz 1f\n" - - /* Set up alignment of the child stack: - * child_stack = (child_stack & ~0xF) - 20; - */ - "andl $-16,%%ecx\n" - "subl $20,%%ecx\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "movl %6,%%eax\n" - "movl %%eax,4(%%ecx)\n" - "movl %3,%%eax\n" - "movl %%eax,(%%ecx)\n" - - /* %eax = syscall(%eax = __NR_clone, - * %ebx = flags, - * %ecx = child_stack, - * %edx = parent_tidptr, - * %esi = newtls, - * %edi = child_tidptr) - * Also, make sure that %ebx gets preserved as it is - * used in PIC mode. - */ - "movl %8,%%esi\n" - "movl %7,%%edx\n" - "movl %5,%%eax\n" - "movl %9,%%edi\n" - "pushl %%ebx\n" - "movl %%eax,%%ebx\n" - "movl %2,%%eax\n" - "int $0x80\n" - - /* In the parent: restore %ebx - * In the child: move "fn" into %ebx - */ - "popl %%ebx\n" - - /* if (%eax != 0) - * return %eax; - */ - "test %%eax,%%eax\n" - "jnz 1f\n" - - /* In the child, now. Terminate frame pointer chain. - */ - "movl $0,%%ebp\n" - - /* Call "fn". "arg" is already on the stack. - */ - "call *%%ebx\n" - - /* Call _exit(%ebx). Unfortunately older versions - * of gcc restrict the number of arguments that can - * be passed to asm(). So, we need to hard-code the - * system call number. - */ - "movl %%eax,%%ebx\n" - "movl $1,%%eax\n" - "int $0x80\n" - - /* Return to parent. - */ - "1:\n" - : "=a" (__res) - : "0"(-EINVAL), "i"(__NR_clone), - "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), - "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) - : "esp", "memory", "ecx", "edx", "esi", "edi"); - LSS_RETURN(int, __res); - } - - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On i386, the kernel does not know how to return from a signal - * handler. Instead, it relies on user space to provide a - * restorer function that calls the {rt_,}sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return res; - } - LSS_INLINE void (*LSS_NAME(restore)(void))(void) { - /* On i386, the kernel does not know how to return from a signal - * handler. Instead, it relies on user space to provide a - * restorer function that calls the {rt_,}sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:pop %%eax\n" - "movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_sigreturn)); - return res; - } - #elif defined(__x86_64__) - /* There are no known problems with any of the _syscallX() macros - * currently shipping for x86_64, but we still need to be able to define - * our own version so that we can override the location of the errno - * location (e.g. when using the clone() system call with the CLONE_VM - * option). - */ - #undef LSS_BODY - #define LSS_BODY(type,name, ...) \ - long __res; \ - __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ - ##__VA_ARGS__ : "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, name, "D" ((long)(arg1))); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ - "d" ((long)(arg3))); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)) : \ - "r8", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ - "syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ - "r8", "r9", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - { - __asm__ __volatile__(/* if (fn == NULL) - * return -EINVAL; - */ - "testq %4,%4\n" - "jz 1f\n" - - /* if (child_stack == NULL) - * return -EINVAL; - */ - "testq %5,%5\n" - "jz 1f\n" - - /* Set up alignment of the child stack: - * child_stack = (child_stack & ~0xF) - 16; - */ - "andq $-16,%5\n" - "subq $16,%5\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "movq %7,8(%5)\n" - "movq %4,0(%5)\n" - - /* %rax = syscall(%rax = __NR_clone, - * %rdi = flags, - * %rsi = child_stack, - * %rdx = parent_tidptr, - * %r8 = new_tls, - * %r10 = child_tidptr) - */ - "movq %2,%%rax\n" - "movq %9,%%r8\n" - "movq %10,%%r10\n" - "syscall\n" - - /* if (%rax != 0) - * return; - */ - "testq %%rax,%%rax\n" - "jnz 1f\n" - - /* In the child. Terminate frame pointer chain. - */ - "xorq %%rbp,%%rbp\n" - - /* Call "fn(arg)". - */ - "popq %%rax\n" - "popq %%rdi\n" - "call *%%rax\n" - - /* Call _exit(%ebx). - */ - "movq %%rax,%%rdi\n" - "movq %3,%%rax\n" - "syscall\n" - - /* Return to parent. - */ - "1:\n" - : "=a" (__res) - : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), - "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) - : "rsp", "memory", "r8", "r10", "r11", "rcx"); - } - LSS_RETURN(int, __res); - } - - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On x86-64, the kernel does not know how to return from - * a signal handler. Instead, it relies on user space to provide a - * restorer function that calls the rt_sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movq %1,%%rax\n" - "syscall\n" - "2:popq %0\n" - "addq $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return res; - } - #elif defined(__arm__) - /* Most definitions of _syscallX() neglect to mark "memory" as being - * clobbered. This causes problems with compilers, that do a better job - * at optimizing across __asm__ calls. - * So, we just have to redefine all fo the _syscallX() macros. - */ - #undef LSS_REG - #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a - - /* r0..r3 are scratch registers and not preserved across function - * calls. We need to first evaluate the first 4 syscall arguments - * and store them on stack. They must be loaded into r0..r3 after - * all function calls to avoid r0..r3 being clobbered. - */ - #undef LSS_SAVE_ARG - #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a - #undef LSS_LOAD_ARG - #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r - - #undef LSS_BODY - #define LSS_BODY(type, name, args...) \ +#pragma once + +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +/* This file includes Linux-specific support functions common to the + * coredumper and the thread lister; primarily, this is a collection + * of direct system calls, and a couple of symbols missing from + * standard header files. + * There are a few options that the including file can set to control + * the behavior of this file: + * + * SYS_CPLUSPLUS: + * The entire header file will normally be wrapped in 'extern "C" { }", + * making it suitable for compilation as both C and C++ source. If you + * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit + * the wrapping. N.B. doing so will suppress inclusion of all prerequisite + * system header files, too. It is the caller's responsibility to provide + * the necessary definitions. + * + * SYS_ERRNO: + * All system calls will update "errno" unless overriden by setting the + * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be + * an l-value. + * + * SYS_INLINE: + * New symbols will be defined "static inline", unless overridden by + * the SYS_INLINE macro. + * + * SYS_LINUX_SYSCALL_SUPPORT_H + * This macro is used to avoid multiple inclusions of this header file. + * If you need to include this file more than once, make sure to + * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. + * + * SYS_PREFIX: + * New system calls will have a prefix of "sys_" unless overridden by + * the SYS_PREFIX macro. Valid values for this macro are [0..9] which + * results in prefixes "sys[0..9]_". It is also possible to set this + * macro to -1, which avoids all prefixes. + * + * This file defines a few internal symbols that all start with "LSS_". + * Do not access these symbols from outside this file. They are not part + * of the supported API. + * + * NOTE: This is a stripped down version of the official opensource + * version of linux_syscall_support.h, which lives at + * http://code.google.com/p/linux-syscall-support/ + * It includes only the syscalls that are used in perftools, plus a + * few extra. Here's the breakdown: + * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u + * sys__exit( + * sys_clone( + * sys_close( + * sys_fcntl( + * sys_fstat( + * sys_futex( + * sys_futex1( + * sys_getcpu( + * sys_getdents( + * sys_getppid( + * sys_gettid( + * sys_lseek( + * sys_mmap( + * sys_mremap( + * sys_munmap( + * sys_open( + * sys_pipe( + * sys_prctl( + * sys_ptrace( + * sys_ptrace_detach( + * sys_read( + * sys_sched_yield( + * sys_sigaction( + * sys_sigaltstack( + * sys_sigdelset( + * sys_sigfillset( + * sys_sigprocmask( + * sys_socket( + * sys_stat( + * sys_waitpid( + * 2) These are used as subroutines of the above: + * sys_getpid -- gettid + * sys_kill -- ptrace_detach + * sys_restore -- sigaction + * sys_restore_rt -- sigaction + * sys_socketcall -- socket + * sys_wait4 -- waitpid + * 3) I left these in even though they're not used. They either + * complement the above (write vs read) or are variants (rt_sigaction): + * sys_fstat64 + * sys_getdents64 + * sys_llseek + * sys_mmap2 + * sys_openat + * sys_rt_sigaction + * sys_rt_sigprocmask + * sys_sigaddset + * sys_sigemptyset + * sys_stat64 + * sys_write + */ +#ifndef SYS_LINUX_SYSCALL_SUPPORT_H +#define SYS_LINUX_SYSCALL_SUPPORT_H + +/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) + +#ifndef SYS_CPLUSPLUS +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#include +#endif + +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: + * + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. + */ + +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +struct siginfo; +#if defined(__i386__) || defined(__arm__) || defined(__PPC__) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/asm-{arm,i386,mips,ppc}/stat.h */ +#ifdef __mips__ +#if _MIPS_SIM == _MIPS_SIM_ABI64 +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned short int __pad2; + long long st_size; + long st_blksize; + long long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */ +#if defined(__i386__) || defined(__arm__) +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + long __unused[3]; +}; +#elif defined(__PPC__) +struct kernel_stat { + unsigned st_dev; + unsigned long st_ino; // ino_t + unsigned long st_mode; // mode_t + unsigned short st_nlink; // nlink_t + unsigned st_uid; // uid_t + unsigned st_gid; // gid_t + unsigned st_rdev; + long st_size; // off_t + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +struct kernel_stat { + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; +}; +#endif + + +/* Definitions missing from the standard header files */ +#ifndef O_DIRECTORY +#if defined(__arm__) +#define O_DIRECTORY 0040000 +#else +#define O_DIRECTORY 0200000 +#endif +#endif +#ifndef PR_GET_DUMPABLE +#define PR_GET_DUMPABLE 3 +#endif +#ifndef PR_SET_DUMPABLE +#define PR_SET_DUMPABLE 4 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif +#ifndef AT_SYMLINK_NOFOLLOW +#define AT_SYMLINK_NOFOLLOW 0x100 +#endif +#ifndef AT_REMOVEDIR +#define AT_REMOVEDIR 0x200 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif + +#if defined(__i386__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_gettid +#define __NR_gettid 224 +#endif +#ifndef __NR_futex +#define __NR_futex 240 +#endif +#ifndef __NR_openat +#define __NR_openat 295 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 318 +#endif +/* End of i386 definitions */ +#elif defined(__arm__) +#ifndef __syscall +#if defined(__thumb__) || defined(__ARM_EABI__) +#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; +#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs +#define __syscall(name) "swi\t0" +#define __syscall_safe(name) \ + "push {r7}\n" \ + "mov r7,%[sysreg]\n" \ + __syscall(name)"\n" \ + "pop {r7}" +#else +#define __SYS_REG(name) +#define __SYS_REG_LIST(regs...) regs +#define __syscall(name) "swi\t" __sys1(__NR_##name) "" +#define __syscall_safe(name) __syscall(name) +#endif +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +/* End of ARM definitions */ +#elif defined(__x86_64__) +#ifndef __NR_gettid +#define __NR_gettid 186 +#endif +#ifndef __NR_futex +#define __NR_futex 202 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 217 +#endif +#ifndef __NR_openat +#define __NR_openat 257 +#endif +/* End of x86-64 definitions */ +#elif defined(__mips__) +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_Linux + 219) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 222) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 238) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 288) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 293) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 312) +#endif +/* End of MIPS (old 32bit API) definitions */ +#elif _MIPS_SIM == _MIPS_SIM_ABI64 +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 247) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 252) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 271) +#endif +/* End of MIPS (64bit API) definitions */ +#else +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 251) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 256) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 275) +#endif +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 302 +#endif +/* End of powerpc defininitions */ +#endif + + +/* After forking, we must make sure to only call system calls. */ +#if __BOUNDED_POINTERS__ + #error "Need to port invocations of syscalls for bounded ptrs" +#else + /* The core dumper and the thread lister get executed after threads + * have been suspended. As a consequence, we cannot call any functions + * that acquire locks. Unfortunately, libc wraps most system calls + * (e.g. in order to implement pthread_atfork, and to make calls + * cancellable), which means we cannot call these functions. Instead, + * we have to call syscall() directly. + */ + #undef LSS_ERRNO + #ifdef SYS_ERRNO + /* Allow the including file to override the location of errno. This can + * be useful when using clone() with the CLONE_VM option. + */ + #define LSS_ERRNO SYS_ERRNO + #else + #define LSS_ERRNO errno + #endif + + #undef LSS_INLINE + #ifdef SYS_INLINE + #define LSS_INLINE SYS_INLINE + #else + #define LSS_INLINE static inline + #endif + + /* Allow the including file to override the prefix used for all new + * system calls. By default, it will be set to "sys_". + */ + #undef LSS_NAME + #ifndef SYS_PREFIX + #define LSS_NAME(name) sys_##name + #elif SYS_PREFIX < 0 + #define LSS_NAME(name) name + #elif SYS_PREFIX == 0 + #define LSS_NAME(name) sys0_##name + #elif SYS_PREFIX == 1 + #define LSS_NAME(name) sys1_##name + #elif SYS_PREFIX == 2 + #define LSS_NAME(name) sys2_##name + #elif SYS_PREFIX == 3 + #define LSS_NAME(name) sys3_##name + #elif SYS_PREFIX == 4 + #define LSS_NAME(name) sys4_##name + #elif SYS_PREFIX == 5 + #define LSS_NAME(name) sys5_##name + #elif SYS_PREFIX == 6 + #define LSS_NAME(name) sys6_##name + #elif SYS_PREFIX == 7 + #define LSS_NAME(name) sys7_##name + #elif SYS_PREFIX == 8 + #define LSS_NAME(name) sys8_##name + #elif SYS_PREFIX == 9 + #define LSS_NAME(name) sys9_##name + #endif + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__)) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ + #define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__mips__) + /* On MIPS, failing system calls return -1, and set errno in a + * separate CPU register. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #endif + #if defined(__i386__) + #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) + /* This only works for GCC-4.4 and above -- the first version to use + .cfi directives for dwarf unwind info. */ + #define CFI_ADJUST_CFA_OFFSET(adjust) \ + ".cfi_adjust_cfa_offset " #adjust "\n" + #else + #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ + #endif + + /* In PIC mode (e.g. when building shared libraries), gcc for i386 + * reserves ebx. Unfortunately, most distribution ship with implementations + * of _syscallX() which clobber ebx. + * Also, most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_BODY + #define LSS_BODY(type,args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(4) \ + "movl %2,%%ebx\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(-4) \ + args \ + : "esp", "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1,type2 arg2) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + : "=a" (__res) \ + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ + __asm__ __volatile__("push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "movl %3,%%ecx\n" + "jecxz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "movl %4,%%ecx\n" + "jecxz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 20; + */ + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" + + /* %eax = syscall(%eax = __NR_clone, + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = newtls, + * %edi = child_tidptr) + * Also, make sure that %ebx gets preserved as it is + * used in PIC mode. + */ + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + "int $0x80\n" + + /* In the parent: restore %ebx + * In the child: move "fn" into %ebx + */ + "popl %%ebx\n" + + /* if (%eax != 0) + * return %eax; + */ + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* In the child, now. Terminate frame pointer chain. + */ + "movl $0,%%ebp\n" + + /* Call "fn". "arg" is already on the stack. + */ + "call *%%ebx\n" + + /* Call _exit(%ebx). Unfortunately older versions + * of gcc restrict the number of arguments that can + * be passed to asm(). So, we need to hard-code the + * system call number. + */ + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + "int $0x80\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "esp", "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } + #elif defined(__x86_64__) + /* There are no known problems with any of the _syscallX() macros + * currently shipping for x86_64, but we still need to be able to define + * our own version so that we can override the location of the errno + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ + #undef LSS_BODY + #define LSS_BODY(type,name, ...) \ + long __res; \ + __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ + ##__VA_ARGS__ : "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, name, "D" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "r" ((long)(arg4)), "r" ((long)(arg5)) : \ + "r8", "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ + "syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ + "r8", "r9", "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "testq %4,%4\n" + "jz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "testq %5,%5\n" + "jz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 16; + */ + "andq $-16,%5\n" + "subq $16,%5\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movq %7,8(%5)\n" + "movq %4,0(%5)\n" + + /* %rax = syscall(%rax = __NR_clone, + * %rdi = flags, + * %rsi = child_stack, + * %rdx = parent_tidptr, + * %r8 = new_tls, + * %r10 = child_tidptr) + */ + "movq %2,%%rax\n" + "movq %9,%%r8\n" + "movq %10,%%r10\n" + "syscall\n" + + /* if (%rax != 0) + * return; + */ + "testq %%rax,%%rax\n" + "jnz 1f\n" + + /* In the child. Terminate frame pointer chain. + */ + "xorq %%rbp,%%rbp\n" + + /* Call "fn(arg)". + */ + "popq %%rax\n" + "popq %%rdi\n" + "call *%%rax\n" + + /* Call _exit(%ebx). + */ + "movq %%rax,%%rdi\n" + "movq %3,%%rax\n" + "syscall\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), + "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) + : "rsp", "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + #elif defined(__arm__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all fo the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + + /* r0..r3 are scratch registers and not preserved across function + * calls. We need to first evaluate the first 4 syscall arguments + * and store them on stack. They must be loaded into r0..r3 after + * all function calls to avoid r0..r3 being clobbered. + */ + #undef LSS_SAVE_ARG + #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a + #undef LSS_LOAD_ARG + #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r + + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ long __res_r0 __asm__("r0"); \ - long __res; \ - __SYS_REG(name) \ - __asm__ __volatile__ (__syscall_safe(name) \ - : "=r"(__res_r0) \ - : __SYS_REG_LIST(args) \ - : "lr", "memory"); \ - __res = __res_r0; \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - /* There is no need for using a volatile temp. */ \ - LSS_REG(0, arg1); \ - LSS_BODY(type, name, "r"(__r0)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_REG(4, arg5); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4)); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_SAVE_ARG(0, arg1); \ - LSS_SAVE_ARG(1, arg2); \ - LSS_SAVE_ARG(2, arg3); \ - LSS_SAVE_ARG(3, arg4); \ - LSS_REG(4, arg5); \ - LSS_REG(5, arg6); \ - LSS_LOAD_ARG(0); \ - LSS_LOAD_ARG(1); \ - LSS_LOAD_ARG(2); \ - LSS_LOAD_ARG(3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4), "r"(__r5)); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { + long __res; \ + __SYS_REG(name) \ + __asm__ __volatile__ (__syscall_safe(name) \ + : "=r"(__res_r0) \ + : __SYS_REG_LIST(args) \ + : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + /* There is no need for using a volatile temp. */ \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { long __res __asm__("r5"); - { - if (fn == NULL || child_stack == NULL) { - __res = -EINVAL; - goto clone_exit; - } - - /* stash first 4 arguments on stack first because we can only load - * them after all function calls. - */ - int tmp_flags = flags; - int * tmp_stack = (int*) child_stack; - void * tmp_ptid = parent_tidptr; - void * tmp_tls = newtls; - + { + if (fn == NULL || child_stack == NULL) { + __res = -EINVAL; + goto clone_exit; + } + + /* stash first 4 arguments on stack first because we can only load + * them after all function calls. + */ + int tmp_flags = flags; + int * tmp_stack = (int*) child_stack; + void * tmp_ptid = parent_tidptr; + void * tmp_tls = newtls; + int *__ctid __asm__("r4") = child_tidptr; - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - *(--tmp_stack) = (int) arg; - *(--tmp_stack) = (int) fn; - - /* We must load r0..r3 last after all possible function calls. */ + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + *(--tmp_stack) = (int) arg; + *(--tmp_stack) = (int) fn; + + /* We must load r0..r3 last after all possible function calls. */ int __flags __asm__("r0") = tmp_flags; void *__stack __asm__("r1") = tmp_stack; void *__ptid __asm__("r2") = tmp_ptid; void *__tls __asm__("r3") = tmp_tls; - - /* %r0 = syscall(%r0 = flags, - * %r1 = child_stack, - * %r2 = parent_tidptr, - * %r3 = newtls, - * %r4 = child_tidptr) - */ - __SYS_REG(clone) - __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, - * %r1 = child_stack, - * %r2 = parent_tidptr, - * %r3 = newtls, - * %r4 = child_tidptr) - */ - "push {r7}\n" - "mov r7,%1\n" - __syscall(clone)"\n" - - /* if (%r0 != 0) - * return %r0; - */ - "movs %0,r0\n" - "bne 1f\n" - - /* In the child, now. Call "fn(arg)". - */ - "ldr r0,[sp, #4]\n" - "mov lr,pc\n" - "ldr pc,[sp]\n" - - /* Call _exit(%r0), which never returns. We only - * need to set r7 for EABI syscall ABI but we do - * this always to simplify code sharing between - * old and new syscall ABIs. - */ - "mov r7,%2\n" - __syscall(exit)"\n" - - /* Pop r7 from the stack only in the parent. - */ - "1: pop {r7}\n" - : "=r" (__res) - : "r"(__sysreg), - "i"(__NR_exit), "r"(__stack), "r"(__flags), - "r"(__ptid), "r"(__tls), "r"(__ctid) - : "cc", "lr", "memory"); - } - clone_exit: - LSS_RETURN(int, __res); - } - #elif defined(__mips__) - #undef LSS_REG - #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ - (unsigned long)(a) - - #if _MIPS_SIM == _MIPS_SIM_ABI32 - // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html - // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html - #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ - "$13", "$14", "$15", "$24", "$25", "memory" - #else - #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13", \ - "$14", "$15", "$24", "$25", "memory" - #endif - - #undef LSS_BODY - #define LSS_BODY(type,name,r7,...) \ + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __SYS_REG(clone) + __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + "push {r7}\n" + "mov r7,%1\n" + __syscall(clone)"\n" + + /* if (%r0 != 0) + * return %r0; + */ + "movs %0,r0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + "mov lr,pc\n" + "ldr pc,[sp]\n" + + /* Call _exit(%r0), which never returns. We only + * need to set r7 for EABI syscall ABI but we do + * this always to simplify code sharing between + * old and new syscall ABIs. + */ + "mov r7,%2\n" + __syscall(exit)"\n" + + /* Pop r7 from the stack only in the parent. + */ + "1: pop {r7}\n" + : "=r" (__res) + : "r"(__sysreg), + "i"(__NR_exit), "r"(__stack), "r"(__flags), + "r"(__ptid), "r"(__tls), "r"(__ctid) + : "cc", "lr", "memory"); + } + clone_exit: + LSS_RETURN(int, __res); + } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ + (unsigned long)(a) + + #if _MIPS_SIM == _MIPS_SIM_ABI32 + // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html + // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ + "$13", "$14", "$15", "$24", "$25", "memory" + #else + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13", \ + "$14", "$15", "$24", "$25", "memory" + #endif + + #undef LSS_BODY + #define LSS_BODY(type,name,r7,...) \ unsigned long __v0 __asm__("$2") = __NR_##name; \ - __asm__ __volatile__ ("syscall\n" \ - : "=&r"(__v0), r7 (__r7) \ - : "0"(__v0), ##__VA_ARGS__ \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ + __asm__ __volatile__ ("syscall\n" \ + : "=&r"(__v0), r7 (__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ unsigned long __r7 __asm__("$7"); \ - LSS_BODY(type, name, "=r"); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, name, "=r"); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall5 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument - * on the stack, whereas the new APIs use registers "r8" and "r9". - */ - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall5 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "m" ((unsigned long)arg5) \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8)); \ - } - #endif - #undef _syscall6 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument - * on the stack, whereas the new APIs use registers "r8" and "r9". - */ - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8)); \ + } + #endif + #undef _syscall6 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "lw $8, %7\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "sw $8, 20($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "r" ((unsigned long)arg5), \ - "r" ((unsigned long)arg6) \ - : MIPS_SYSCALL_CLOBBERS); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5,type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8), "r"(__r9)); \ - } - #endif - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "lw $8, %7\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "sw $8, 20($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "r" ((unsigned long)arg5), \ + "r" ((unsigned long)arg6) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5,type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8), "r"(__r9)); \ + } + #endif + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { unsigned long __v0 __asm__("$2"); unsigned long __r7 __asm__("$7") = (unsigned long)newtls; - { + { int __flags __asm__("$4") = flags; void *__stack __asm__("$5") = child_stack; void *__ptid __asm__("$6") = parent_tidptr; int *__ctid __asm__("$8") = child_tidptr; - __asm__ __volatile__( - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu $29,24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub $29,16\n" - #else - "dsubu $29,16\n" - #endif - - /* if (fn == NULL || child_stack == NULL) - * return -EINVAL; - */ - "li %0,%2\n" - "beqz %5,1f\n" - "beqz %6,1f\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu %6,32\n" - "sw %5,0(%6)\n" - "sw %8,4(%6)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub %6,32\n" - "sw %5,0(%6)\n" - "sw %8,8(%6)\n" - #else - "dsubu %6,32\n" - "sd %5,0(%6)\n" - "sd %8,8(%6)\n" - #endif - - /* $7 = syscall($4 = flags, - * $5 = child_stack, - * $6 = parent_tidptr, - * $7 = newtls, - * $8 = child_tidptr) - */ - "li $2,%3\n" - "syscall\n" - - /* if ($7 != 0) - * return $2; - */ - "bnez $7,1f\n" - "bnez $2,1f\n" - - /* In the child, now. Call "fn(arg)". - */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "lw $25,0($29)\n" - "lw $4,4($29)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "lw $25,0($29)\n" - "lw $4,8($29)\n" - #else - "ld $25,0($29)\n" - "ld $4,8($29)\n" - #endif - "jalr $25\n" - - /* Call _exit($2) - */ - "move $4,$2\n" - "li $2,%4\n" - "syscall\n" - - "1:\n" - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "addu $29, 24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "add $29, 16\n" - #else - "daddu $29,16\n" - #endif - : "=&r" (__v0), "=r" (__r7) - : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), - "r"(__ptid), "r"(__r7), "r"(__ctid) - : "$9", "$10", "$11", "$12", "$13", "$14", "$15", - "$24", "memory"); - } - LSS_RETURN(int, __v0, __r7); - } - #elif defined (__PPC__) - #undef LSS_LOADARGS_0 - #define LSS_LOADARGS_0(name, dummy...) \ - __sc_0 = __NR_##name - #undef LSS_LOADARGS_1 - #define LSS_LOADARGS_1(name, arg1) \ - LSS_LOADARGS_0(name); \ - __sc_3 = (unsigned long) (arg1) - #undef LSS_LOADARGS_2 - #define LSS_LOADARGS_2(name, arg1, arg2) \ - LSS_LOADARGS_1(name, arg1); \ - __sc_4 = (unsigned long) (arg2) - #undef LSS_LOADARGS_3 - #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ - LSS_LOADARGS_2(name, arg1, arg2); \ - __sc_5 = (unsigned long) (arg3) - #undef LSS_LOADARGS_4 - #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ - LSS_LOADARGS_3(name, arg1, arg2, arg3); \ - __sc_6 = (unsigned long) (arg4) - #undef LSS_LOADARGS_5 - #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ - LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ - __sc_7 = (unsigned long) (arg5) - #undef LSS_LOADARGS_6 - #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ - LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ - __sc_8 = (unsigned long) (arg6) - #undef LSS_ASMINPUT_0 - #define LSS_ASMINPUT_0 "0" (__sc_0) - #undef LSS_ASMINPUT_1 - #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) - #undef LSS_ASMINPUT_2 - #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) - #undef LSS_ASMINPUT_3 - #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) - #undef LSS_ASMINPUT_4 - #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) - #undef LSS_ASMINPUT_5 - #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) - #undef LSS_ASMINPUT_6 - #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) - #undef LSS_BODY - #define LSS_BODY(nr, type, name, args...) \ - long __sc_ret, __sc_err; \ - { \ + __asm__ __volatile__( + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" + #else + "dsubu $29,16\n" + #endif + + /* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "li %0,%2\n" + "beqz %5,1f\n" + "beqz %6,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %6,32\n" + "sw %5,0(%6)\n" + "sw %8,4(%6)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %6,32\n" + "sw %5,0(%6)\n" + "sw %8,8(%6)\n" + #else + "dsubu %6,32\n" + "sd %5,0(%6)\n" + "sd %8,8(%6)\n" + #endif + + /* $7 = syscall($4 = flags, + * $5 = child_stack, + * $6 = parent_tidptr, + * $7 = newtls, + * $8 = child_tidptr) + */ + "li $2,%3\n" + "syscall\n" + + /* if ($7 != 0) + * return $2; + */ + "bnez $7,1f\n" + "bnez $2,1f\n" + + /* In the child, now. Call "fn(arg)". + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" + #else + "ld $25,0($29)\n" + "ld $4,8($29)\n" + #endif + "jalr $25\n" + + /* Call _exit($2) + */ + "move $4,$2\n" + "li $2,%4\n" + "syscall\n" + + "1:\n" + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" + #else + "daddu $29,16\n" + #endif + : "=&r" (__v0), "=r" (__r7) + : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__r7), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$24", "memory"); + } + LSS_RETURN(int, __v0, __r7); + } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ unsigned long __sc_0 __asm__ ("r0"); \ unsigned long __sc_3 __asm__ ("r3"); \ unsigned long __sc_4 __asm__ ("r4"); \ @@ -1646,71 +1646,71 @@ struct kernel_stat { unsigned long __sc_6 __asm__ ("r6"); \ unsigned long __sc_7 __asm__ ("r7"); \ unsigned long __sc_8 __asm__ ("r8"); \ - \ - LSS_LOADARGS_##nr(name, args); \ - __asm__ __volatile__ \ - ("sc\n\t" \ - "mfcr %0" \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : LSS_ASMINPUT_##nr \ - : "cr0", "ctr", "memory", \ - "r9", "r10", "r11", "r12"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - LSS_RETURN(type, __sc_ret, __sc_err) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)(void) { \ - LSS_BODY(0, type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(1, type, name, arg1); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(2, type, name, arg1, arg2); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(3, type, name, arg1, arg2, arg3); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ - } - /* clone function adapted from glibc 2.3.6 clone.S */ - /* TODO(csilvers): consider wrapping some args up in a struct, like we - * do for i386's _syscall6, so we can compile successfully on gcc 2.95 - */ - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __ret, __err; - { + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.3.6 clone.S */ + /* TODO(csilvers): consider wrapping some args up in a struct, like we + * do for i386's _syscall6, so we can compile successfully on gcc 2.95 + */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { int (*__fn)(void *) __asm__ ("r8") = fn; void *__cstack __asm__ ("r4") = child_stack; int __flags __asm__ ("r3") = flags; @@ -1718,342 +1718,342 @@ struct kernel_stat { int * __ptidptr __asm__ ("r5") = parent_tidptr; void * __newtls __asm__ ("r6") = newtls; int * __ctidptr __asm__ ("r7") = child_tidptr; - __asm__ __volatile__( - /* check for fn == NULL - * and child_stack == NULL - */ - "cmpwi cr0, %6, 0\n\t" - "cmpwi cr1, %7, 0\n\t" - "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" - "beq- cr0, 1f\n\t" - - /* set up stack frame for child */ - "clrrwi %7, %7, 4\n\t" - "li 0, 0\n\t" - "stwu 0, -16(%7)\n\t" - - /* fn, arg, child_stack are saved across the syscall: r28-30 */ - "mr 28, %6\n\t" - "mr 29, %7\n\t" - "mr 27, %9\n\t" - - /* syscall */ - "li 0, %4\n\t" - /* flags already in r3 - * child_stack already in r4 - * ptidptr already in r5 - * newtls already in r6 - * ctidptr already in r7 - */ - "sc\n\t" - - /* Test if syscall was successful */ - "cmpwi cr1, 3, 0\n\t" - "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" - "bne- cr1, 1f\n\t" - - /* Do the function call */ - "mtctr 28\n\t" - "mr 3, 27\n\t" - "bctrl\n\t" - - /* Call _exit(r3) */ - "li 0, %5\n\t" - "sc\n\t" - - /* Return to parent */ - "1:\n" - "mfcr %1\n\t" - "mr %0, 3\n\t" - : "=r" (__ret), "=r" (__err) - : "0" (-1), "1" (EINVAL), - "i" (__NR_clone), "i" (__NR_exit), - "r" (__fn), "r" (__cstack), "r" (__flags), - "r" (__arg), "r" (__ptidptr), "r" (__newtls), - "r" (__ctidptr) - : "cr0", "cr1", "memory", "ctr", - "r0", "r29", "r27", "r28"); - } - LSS_RETURN(int, __ret, __err); - } - #endif - #define __NR__exit __NR_exit - #define __NR__gettid __NR_gettid - #define __NR__mremap __NR_mremap - LSS_INLINE _syscall1(int, close, int, f) - LSS_INLINE _syscall1(int, _exit, int, e) - LSS_INLINE _syscall3(int, fcntl, int, f, - int, c, long, a) - LSS_INLINE _syscall2(int, fstat, int, f, - struct kernel_stat*, b) - LSS_INLINE _syscall4(int, futex, int*, a, - int, o, int, v, - struct kernel_timespec*, t) - LSS_INLINE _syscall3(int, getdents, int, f, - struct kernel_dirent*, d, int, c) -#ifdef __NR_getdents64 - LSS_INLINE _syscall3(int, getdents64, int, f, - struct kernel_dirent64*, d, int, c) -#endif - LSS_INLINE _syscall0(pid_t, getpid) - LSS_INLINE _syscall0(pid_t, getppid) - LSS_INLINE _syscall0(pid_t, _gettid) - LSS_INLINE _syscall2(int, kill, pid_t, p, - int, s) - LSS_INLINE _syscall3(off_t, lseek, int, f, - off_t, o, int, w) - LSS_INLINE _syscall2(int, munmap, void*, s, - size_t, l) - LSS_INLINE _syscall5(void*, _mremap, void*, o, - size_t, os, size_t, ns, - unsigned long, f, void *, a) - LSS_INLINE _syscall3(int, open, const char*, p, - int, f, int, m) - LSS_INLINE _syscall2(int, prctl, int, o, - long, a) - LSS_INLINE _syscall4(long, ptrace, int, r, - pid_t, p, void *, a, void *, d) - LSS_INLINE _syscall3(ssize_t, read, int, f, - void *, b, size_t, c) - LSS_INLINE _syscall4(int, rt_sigaction, int, s, - const struct kernel_sigaction*, a, - struct kernel_sigaction*, o, size_t, c) - LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, - const struct kernel_sigset_t*, s, - struct kernel_sigset_t*, o, size_t, c); - LSS_INLINE _syscall0(int, sched_yield) - LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, - const stack_t*, o) - LSS_INLINE _syscall2(int, stat, const char*, f, - struct kernel_stat*, b) - LSS_INLINE _syscall3(ssize_t, write, int, f, - const void *, b, size_t, c) - #if defined(__NR_getcpu) - LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, - unsigned *, node, void *, unused); - #endif - #if defined(__x86_64__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - #endif - #if defined(__x86_64__) - LSS_INLINE _syscall6(void*, mmap, void*, s, - size_t, l, int, p, - int, f, int, d, - off64_t, o) - - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - /* On x86_64, the kernel requires us to always set our own - * SA_RESTORER in order to be able to return from a signal handler. - * This function must have a "magic" signature that the "gdb" - * (and maybe the kernel?) can recognize. - */ - if (act != NULL && !(act->sa_flags & SA_RESTORER)) { - struct kernel_sigaction a = *act; - a.sa_flags |= SA_RESTORER; - a.sa_restorer = LSS_NAME(restore_rt)(); - return LSS_NAME(rt_sigaction)(signum, &a, oldact, - (KERNEL_NSIG+7)/8); - } else { - return LSS_NAME(rt_sigaction)(signum, act, oldact, - (KERNEL_NSIG+7)/8); - } - } - - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - } - #endif - #if defined(__x86_64__) || \ - defined(__arm__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, - int*, s, int, o, - struct kernel_rusage*, r) - LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ - return LSS_NAME(wait4)(pid, status, options, 0); - } - #endif - #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) - LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) - #endif - LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { - memset(&set->sig, 0, sizeof(set->sig)); - return 0; - } - - LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { - memset(&set->sig, -1, sizeof(set->sig)); - return 0; - } - - LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); - return 0; - } - } - - LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); - return 0; - } - } - - #if defined(__i386__) || \ - defined(__arm__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) - #define __NR__sigaction __NR_sigaction - #define __NR__sigprocmask __NR_sigprocmask - LSS_INLINE _syscall2(int, fstat64, int, f, - struct kernel_stat64 *, b) - LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, - loff_t *, res, uint, wh) -#ifdef __PPC64__ - LSS_INLINE _syscall6(void*, mmap, void*, s, - size_t, l, int, p, - int, f, int, d, - off_t, o) -#else - #ifndef __ARM_EABI__ - /* Not available on ARM EABI Linux. */ - LSS_INLINE _syscall1(void*, mmap, void*, a) - #endif - LSS_INLINE _syscall6(void*, mmap2, void*, s, - size_t, l, int, p, - int, f, int, d, - off_t, o) -#endif - LSS_INLINE _syscall3(int, _sigaction, int, s, - const struct kernel_old_sigaction*, a, - struct kernel_old_sigaction*, o) - LSS_INLINE _syscall3(int, _sigprocmask, int, h, - const unsigned long*, s, - unsigned long*, o) - LSS_INLINE _syscall2(int, stat64, const char *, p, - struct kernel_stat64 *, b) - - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - int old_errno = LSS_ERRNO; - int rc; - struct kernel_sigaction a; - if (act != NULL) { - a = *act; - #ifdef __i386__ - /* On i386, the kernel requires us to always set our own - * SA_RESTORER when using realtime signals. Otherwise, it does not - * know how to return from a signal handler. This function must have - * a "magic" signature that the "gdb" (and maybe the kernel?) can - * recognize. - * Apparently, a SA_RESTORER is implicitly set by the kernel, when - * using non-realtime signals. - * - * TODO: Test whether ARM needs a restorer - */ - if (!(a.sa_flags & SA_RESTORER)) { - a.sa_flags |= SA_RESTORER; - a.sa_restorer = (a.sa_flags & SA_SIGINFO) - ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); - } - #endif - } - rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, - (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; - if (!act) { - ptr_a = NULL; - } else { - oa.sa_handler_ = act->sa_handler_; - memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); - #ifndef __mips__ - oa.sa_restorer = act->sa_restorer; - #endif - oa.sa_flags = act->sa_flags; - } - if (!oldact) { - ptr_oa = NULL; - } - LSS_ERRNO = old_errno; - rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); - if (rc == 0 && oldact) { - if (act) { - memcpy(oldact, act, sizeof(*act)); - } else { - memset(oldact, 0, sizeof(*oldact)); - } - oldact->sa_handler_ = ptr_oa->sa_handler_; - oldact->sa_flags = ptr_oa->sa_flags; - memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); - #ifndef __mips__ - oldact->sa_restorer = ptr_oa->sa_restorer; - #endif - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - int olderrno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = olderrno; - if (oldset) { - LSS_NAME(sigemptyset)(oldset); - } - rc = LSS_NAME(_sigprocmask)(how, - set ? &set->sig[0] : NULL, - oldset ? &oldset->sig[0] : NULL); - } - return rc; - } - #endif - #if defined(__PPC__) - #undef LSS_SC_LOADARGS_0 - #define LSS_SC_LOADARGS_0(dummy...) - #undef LSS_SC_LOADARGS_1 - #define LSS_SC_LOADARGS_1(arg1) \ - __sc_4 = (unsigned long) (arg1) - #undef LSS_SC_LOADARGS_2 - #define LSS_SC_LOADARGS_2(arg1, arg2) \ - LSS_SC_LOADARGS_1(arg1); \ - __sc_5 = (unsigned long) (arg2) - #undef LSS_SC_LOADARGS_3 - #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ - LSS_SC_LOADARGS_2(arg1, arg2); \ - __sc_6 = (unsigned long) (arg3) - #undef LSS_SC_LOADARGS_4 - #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ - LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ - __sc_7 = (unsigned long) (arg4) - #undef LSS_SC_LOADARGS_5 - #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ - LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ - __sc_8 = (unsigned long) (arg5) - #undef LSS_SC_BODY - #define LSS_SC_BODY(nr, type, opt, args...) \ - long __sc_ret, __sc_err; \ - { \ + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + } + LSS_RETURN(int, __ret, __err); + } + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid + #define __NR__mremap __NR_mremap + LSS_INLINE _syscall1(int, close, int, f) + LSS_INLINE _syscall1(int, _exit, int, e) + LSS_INLINE _syscall3(int, fcntl, int, f, + int, c, long, a) + LSS_INLINE _syscall2(int, fstat, int, f, + struct kernel_stat*, b) + LSS_INLINE _syscall4(int, futex, int*, a, + int, o, int, v, + struct kernel_timespec*, t) + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) +#ifdef __NR_getdents64 + LSS_INLINE _syscall3(int, getdents64, int, f, + struct kernel_dirent64*, d, int, c) +#endif + LSS_INLINE _syscall0(pid_t, getpid) + LSS_INLINE _syscall0(pid_t, getppid) + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall5(void*, _mremap, void*, o, + size_t, os, size_t, ns, + unsigned long, f, void *, a) + LSS_INLINE _syscall3(int, open, const char*, p, + int, f, int, m) + LSS_INLINE _syscall2(int, prctl, int, o, + long, a) + LSS_INLINE _syscall4(long, ptrace, int, r, + pid_t, p, void *, a, void *, d) + LSS_INLINE _syscall3(ssize_t, read, int, f, + void *, b, size_t, c) + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); + LSS_INLINE _syscall0(int, sched_yield) + LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, + const stack_t*, o) + LSS_INLINE _syscall2(int, stat, const char*, f, + struct kernel_stat*, b) + LSS_INLINE _syscall3(ssize_t, write, int, f, + const void *, b, size_t, c) + #if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused); + #endif + #if defined(__x86_64__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__x86_64__) + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + off64_t, o) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else { + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); + } + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #if defined(__x86_64__) || \ + defined(__arm__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, + int*, s, int, o, + struct kernel_rusage*, r) + LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ + return LSS_NAME(wait4)(pid, status, options, 0); + } + #endif + #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) + LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) + #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + #if defined(__i386__) || \ + defined(__arm__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) + #define __NR__sigaction __NR_sigaction + #define __NR__sigprocmask __NR_sigprocmask + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) + LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, + loff_t *, res, uint, wh) +#ifdef __PPC64__ + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#else + #ifndef __ARM_EABI__ + /* Not available on ARM EABI Linux. */ + LSS_INLINE _syscall1(void*, mmap, void*, a) + #endif + LSS_INLINE _syscall6(void*, mmap2, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#endif + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall3(int, _sigprocmask, int, h, + const unsigned long*, s, + unsigned long*, o) + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + if (oldset) { + LSS_NAME(sigemptyset)(oldset); + } + rc = LSS_NAME(_sigprocmask)(how, + set ? &set->sig[0] : NULL, + oldset ? &oldset->sig[0] : NULL); + } + return rc; + } + #endif + #if defined(__PPC__) + #undef LSS_SC_LOADARGS_0 + #define LSS_SC_LOADARGS_0(dummy...) + #undef LSS_SC_LOADARGS_1 + #define LSS_SC_LOADARGS_1(arg1) \ + __sc_4 = (unsigned long) (arg1) + #undef LSS_SC_LOADARGS_2 + #define LSS_SC_LOADARGS_2(arg1, arg2) \ + LSS_SC_LOADARGS_1(arg1); \ + __sc_5 = (unsigned long) (arg2) + #undef LSS_SC_LOADARGS_3 + #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ + LSS_SC_LOADARGS_2(arg1, arg2); \ + __sc_6 = (unsigned long) (arg3) + #undef LSS_SC_LOADARGS_4 + #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ + LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ + __sc_7 = (unsigned long) (arg4) + #undef LSS_SC_LOADARGS_5 + #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ + LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ + __sc_8 = (unsigned long) (arg5) + #undef LSS_SC_BODY + #define LSS_SC_BODY(nr, type, opt, args...) \ + long __sc_ret, __sc_err; \ + { \ unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \ unsigned long __sc_3 __asm__ ("r3") = opt; \ unsigned long __sc_4 __asm__ ("r4"); \ @@ -2061,125 +2061,125 @@ struct kernel_stat { unsigned long __sc_6 __asm__ ("r6"); \ unsigned long __sc_7 __asm__ ("r7"); \ unsigned long __sc_8 __asm__ ("r8"); \ - LSS_SC_LOADARGS_##nr(args); \ - __asm__ __volatile__ \ - ("stwu 1, -48(1)\n\t" \ - "stw 4, 20(1)\n\t" \ - "stw 5, 24(1)\n\t" \ - "stw 6, 28(1)\n\t" \ - "stw 7, 32(1)\n\t" \ - "stw 8, 36(1)\n\t" \ - "addi 4, 1, 20\n\t" \ - "sc\n\t" \ - "mfcr %0" \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : LSS_ASMINPUT_##nr \ - : "cr0", "ctr", "memory"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - LSS_RETURN(type, __sc_ret, __sc_err) - - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { - LSS_SC_BODY(3, int, 1, domain, type, protocol); - } - #endif - #if defined(__i386__) || \ - (defined(__arm__) && !defined(__ARM_EABI__)) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - - /* See sys_socketcall in net/socket.c in kernel source. - * It de-multiplexes on its first arg and unpacks the arglist - * array in its second arg. - */ - LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a) - - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { - unsigned long args[3] = { - (unsigned long) domain, - (unsigned long) type, - (unsigned long) protocol - }; - return LSS_NAME(socketcall)(1, args); - } - #elif defined(__ARM_EABI__) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - #endif - #if defined(__i386__) || defined(__PPC__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, - int*, s, int, o) - #endif - #if defined(__mips__) - /* sys_pipe() on MIPS has non-standard calling conventions, as it returns - * both file handles through CPU registers. - */ - LSS_INLINE int LSS_NAME(pipe)(int *p) { + LSS_SC_LOADARGS_##nr(args); \ + __asm__ __volatile__ \ + ("stwu 1, -48(1)\n\t" \ + "stw 4, 20(1)\n\t" \ + "stw 5, 24(1)\n\t" \ + "stw 6, 28(1)\n\t" \ + "stw 7, 32(1)\n\t" \ + "stw 8, 36(1)\n\t" \ + "addi 4, 1, 20\n\t" \ + "sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + LSS_SC_BODY(3, int, 1, domain, type, protocol); + } + #endif + #if defined(__i386__) || \ + (defined(__arm__) && !defined(__ARM_EABI__)) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + + /* See sys_socketcall in net/socket.c in kernel source. + * It de-multiplexes on its first arg and unpacks the arglist + * array in its second arg. + */ + LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a) + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + unsigned long args[3] = { + (unsigned long) domain, + (unsigned long) type, + (unsigned long) protocol + }; + return LSS_NAME(socketcall)(1, args); + } + #elif defined(__ARM_EABI__) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__i386__) || defined(__PPC__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, + int*, s, int, o) + #endif + #if defined(__mips__) + /* sys_pipe() on MIPS has non-standard calling conventions, as it returns + * both file handles through CPU registers. + */ + LSS_INLINE int LSS_NAME(pipe)(int *p) { unsigned long __v0 __asm__("$2") = __NR_pipe; unsigned long __v1 __asm__("$3"); unsigned long __r7 __asm__("$7"); - __asm__ __volatile__ ("syscall\n" - : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) - : "0"(__v0) - : "$8", "$9", "$10", "$11", "$12", - "$13", "$14", "$15", "$24", "memory"); - if (__r7) { - LSS_ERRNO = __v0; - return -1; - } else { - p[0] = __v0; - p[1] = __v1; - return 0; - } - } - #else - LSS_INLINE _syscall1(int, pipe, int *, p) - #endif - - LSS_INLINE pid_t LSS_NAME(gettid)() { - pid_t tid = LSS_NAME(_gettid)(); - if (tid != -1) { - return tid; - } - return LSS_NAME(getpid)(); - } - - LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, - size_t new_size, int flags, ...) { - va_list ap; - void *new_address, *rc; - va_start(ap, flags); - new_address = va_arg(ap, void *); - rc = LSS_NAME(_mremap)(old_address, old_size, new_size, - flags, new_address); - va_end(ap); - return rc; - } - - LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { - /* PTRACE_DETACH can sometimes forget to wake up the tracee and it - * then sends job control signals to the real parent, rather than to - * the tracer. We reduce the risk of this happening by starting a - * whole new time slice, and then quickly sending a SIGCONT signal - * right after detaching from the tracee. - */ - int rc, err; - LSS_NAME(sched_yield)(); - rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); - err = LSS_ERRNO; - LSS_NAME(kill)(pid, SIGCONT); - LSS_ERRNO = err; - return rc; - } -#endif - -#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) -} -#endif - -#endif -#endif + __asm__ __volatile__ ("syscall\n" + : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", + "$13", "$14", "$15", "$24", "memory"); + if (__r7) { + LSS_ERRNO = __v0; + return -1; + } else { + p[0] = __v0; + p[1] = __v1; + return 0; + } + } + #else + LSS_INLINE _syscall1(int, pipe, int *, p) + #endif + + LSS_INLINE pid_t LSS_NAME(gettid)() { + pid_t tid = LSS_NAME(_gettid)(); + if (tid != -1) { + return tid; + } + return LSS_NAME(getpid)(); + } + + LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, + size_t new_size, int flags, ...) { + va_list ap; + void *new_address, *rc; + va_start(ap, flags); + new_address = va_arg(ap, void *); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, + flags, new_address); + va_end(ap); + return rc; + } + + LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { + /* PTRACE_DETACH can sometimes forget to wake up the tracee and it + * then sends job control signals to the real parent, rather than to + * the tracer. We reduce the risk of this happening by starting a + * whole new time slice, and then quickly sending a SIGCONT signal + * right after detaching from the tracee. + */ + int rc, err; + LSS_NAME(sched_yield)(); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + err = LSS_ERRNO; + LSS_NAME(kill)(pid, SIGCONT); + LSS_ERRNO = err; + return rc; + } +#endif + +#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) +} +#endif + +#endif +#endif diff --git a/contrib/libs/linuxvdso/original/logging.h b/contrib/libs/linuxvdso/original/logging.h index c61eab18e35..209714ccd0d 100644 --- a/contrib/libs/linuxvdso/original/logging.h +++ b/contrib/libs/linuxvdso/original/logging.h @@ -1,154 +1,154 @@ -#pragma once - -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// This file contains #include information about logging-related stuff. -// Pretty much everybody needs to #include this file so that they can -// log various happenings. -// -#ifndef _LOGGING_H_ -#define _LOGGING_H_ - -#include -#include -#include - -#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) - -#define CHECK(condition) \ - do { \ - if (!(condition)) { \ - WRITE_TO_STDERR("Check failed: " #condition "\n", \ - sizeof("Check failed: " #condition "\n")-1); \ - abort(); \ - } \ - } while (0) - -// This takes a message to print. The name is historical. -#define RAW_CHECK(condition, message) \ - do { \ - if (!(condition)) { \ - WRITE_TO_STDERR("Check failed: " #condition ": " message "\n", \ - sizeof("Check failed: " #condition ": " message "\n")-1);\ - abort(); \ - } \ - } while (0) - -// This is like RAW_CHECK, but only in debug-mode -#ifdef NDEBUG -enum { DEBUG_MODE = 0 }; -#define RAW_DCHECK(condition, message) -#else -enum { DEBUG_MODE = 1 }; -#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message) -#endif - -// This prints errno as well. Note we use write instead of printf/puts to -// avoid the risk we'll call malloc(). -#define PCHECK(condition) \ - do { \ - if (!(condition)) { \ - const int err_no = errno; \ - WRITE_TO_STDERR("Check failed: " #condition ": ", \ - sizeof("Check failed: " #condition ": ")-1); \ - WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no))); \ - WRITE_TO_STDERR("\n", sizeof("\n")-1); \ - abort(); \ - } \ - } while (0) - -// Helper macro for binary operators; prints the two values on error -// Don't use this macro directly in your code, use CHECK_EQ et al below - -// WARNING: These don't compile correctly if one of the arguments is a pointer -// and the other is NULL. To work around this, simply static_cast NULL to the -// type of the desired pointer. - -// TODO(jandrews): Also print the values in case of failure. Requires some -// sort of type-sensitive ToString() function. -#define CHECK_OP(op, val1, val2) \ - do { \ - if (!((val1) op (val2))) { \ - fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \ - abort(); \ - } \ - } while (0) - -#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) -#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) -#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) -#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) -#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) -#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) - -// Synonyms for CHECK_* that are used in some unittests. -#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) -#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) -#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) -#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) -#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) -#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) -#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) -#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) -#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) -#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) -#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) -#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) -// As are these variants. -#define EXPECT_TRUE(cond) CHECK(cond) -#define EXPECT_FALSE(cond) CHECK(!(cond)) -#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) -#define ASSERT_TRUE(cond) EXPECT_TRUE(cond) -#define ASSERT_FALSE(cond) EXPECT_FALSE(cond) -#define ASSERT_STREQ(a, b) EXPECT_STREQ(a, b) - -// Used for (libc) functions that return -1 and set errno -#define CHECK_ERR(invocation) PCHECK((invocation) != -1) - -// A few more checks that only happen in debug mode -#ifdef NDEBUG -#define DCHECK_EQ(val1, val2) -#define DCHECK_NE(val1, val2) -#define DCHECK_LE(val1, val2) -#define DCHECK_LT(val1, val2) -#define DCHECK_GE(val1, val2) -#define DCHECK_GT(val1, val2) -#else -#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) -#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) -#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) -#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) -#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) -#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) -#endif - - -#endif // _LOGGING_H_ +#pragma once + +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file contains #include information about logging-related stuff. +// Pretty much everybody needs to #include this file so that they can +// log various happenings. +// +#ifndef _LOGGING_H_ +#define _LOGGING_H_ + +#include +#include +#include + +#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) + +#define CHECK(condition) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition "\n", \ + sizeof("Check failed: " #condition "\n")-1); \ + abort(); \ + } \ + } while (0) + +// This takes a message to print. The name is historical. +#define RAW_CHECK(condition, message) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition ": " message "\n", \ + sizeof("Check failed: " #condition ": " message "\n")-1);\ + abort(); \ + } \ + } while (0) + +// This is like RAW_CHECK, but only in debug-mode +#ifdef NDEBUG +enum { DEBUG_MODE = 0 }; +#define RAW_DCHECK(condition, message) +#else +enum { DEBUG_MODE = 1 }; +#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message) +#endif + +// This prints errno as well. Note we use write instead of printf/puts to +// avoid the risk we'll call malloc(). +#define PCHECK(condition) \ + do { \ + if (!(condition)) { \ + const int err_no = errno; \ + WRITE_TO_STDERR("Check failed: " #condition ": ", \ + sizeof("Check failed: " #condition ": ")-1); \ + WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no))); \ + WRITE_TO_STDERR("\n", sizeof("\n")-1); \ + abort(); \ + } \ + } while (0) + +// Helper macro for binary operators; prints the two values on error +// Don't use this macro directly in your code, use CHECK_EQ et al below + +// WARNING: These don't compile correctly if one of the arguments is a pointer +// and the other is NULL. To work around this, simply static_cast NULL to the +// type of the desired pointer. + +// TODO(jandrews): Also print the values in case of failure. Requires some +// sort of type-sensitive ToString() function. +#define CHECK_OP(op, val1, val2) \ + do { \ + if (!((val1) op (val2))) { \ + fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \ + abort(); \ + } \ + } while (0) + +#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) +#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) +#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) +#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) +#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) +#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) + +// Synonyms for CHECK_* that are used in some unittests. +#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) +#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) +#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) +#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) +#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) +#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) +#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) +#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) +#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) +#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) +#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) +#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) +// As are these variants. +#define EXPECT_TRUE(cond) CHECK(cond) +#define EXPECT_FALSE(cond) CHECK(!(cond)) +#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) +#define ASSERT_TRUE(cond) EXPECT_TRUE(cond) +#define ASSERT_FALSE(cond) EXPECT_FALSE(cond) +#define ASSERT_STREQ(a, b) EXPECT_STREQ(a, b) + +// Used for (libc) functions that return -1 and set errno +#define CHECK_ERR(invocation) PCHECK((invocation) != -1) + +// A few more checks that only happen in debug mode +#ifdef NDEBUG +#define DCHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) +#else +#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) +#endif + + +#endif // _LOGGING_H_ diff --git a/contrib/libs/linuxvdso/original/vdso_support.cc b/contrib/libs/linuxvdso/original/vdso_support.cc index d1763f38dba..2977477398a 100644 --- a/contrib/libs/linuxvdso/original/vdso_support.cc +++ b/contrib/libs/linuxvdso/original/vdso_support.cc @@ -1,139 +1,139 @@ -// Copyright (c) 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Paul Pluzhnikov -// -// Allow dynamic symbol lookup in the kernel VDSO page. -// -// VDSOSupport -- a class representing kernel VDSO (if present). -// - -#include "vdso_support.h" - -#ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h - -#include -#include // for ptrdiff_t - -#include "linux_syscall_support.h" -#include "logging.h" - -#ifndef AT_SYSINFO_EHDR -#define AT_SYSINFO_EHDR 33 -#endif - -namespace base { - +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// + +#include "vdso_support.h" + +#ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h + +#include +#include // for ptrdiff_t + +#include "linux_syscall_support.h" +#include "logging.h" + +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +namespace base { + const void *VDSOSupport::vdso_base_ = NULL; - -VDSOSupport::VDSOSupport() + +VDSOSupport::VDSOSupport() // If vdso_base_ is still set to NULL, we got here - // before VDSOSupport::Init has been called. Call it now. + // before VDSOSupport::Init has been called. Call it now. : image_(Init()) { -} - -// NOTE: we can't use GoogleOnceInit() below, because we can be -// called by tcmalloc, and none of the *once* stuff may be functional yet. -// -// In addition, we hope that the VDSOSupportHelper constructor -// causes this code to run before there are any threads, and before -// InitGoogle() has executed any chroot or setuid calls. -// -// Finally, even if there is a race here, it is harmless, because -// the operation should be idempotent. -const void *VDSOSupport::Init() { +} + +// NOTE: we can't use GoogleOnceInit() below, because we can be +// called by tcmalloc, and none of the *once* stuff may be functional yet. +// +// In addition, we hope that the VDSOSupportHelper constructor +// causes this code to run before there are any threads, and before +// InitGoogle() has executed any chroot or setuid calls. +// +// Finally, even if there is a race here, it is harmless, because +// the operation should be idempotent. +const void *VDSOSupport::Init() { if (vdso_base_ == NULL) { - // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] - // on stack, and so glibc works as if VDSO was not present. - // But going directly to kernel via /proc/self/auxv below bypasses - // Valgrind zapping. So we check for Valgrind separately. - if (RunningOnValgrind()) { + // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] + // on stack, and so glibc works as if VDSO was not present. + // But going directly to kernel via /proc/self/auxv below bypasses + // Valgrind zapping. So we check for Valgrind separately. + if (RunningOnValgrind()) { vdso_base_ = ElfMemImage::kInvalidBase; return vdso_base_; - } - int fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - // Kernel too old to have a VDSO. + } + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + // Kernel too old to have a VDSO. vdso_base_ = ElfMemImage::kInvalidBase; return vdso_base_; - } - ElfW(auxv_t) aux; - while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { - if (aux.a_type == AT_SYSINFO_EHDR) { - COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), - unexpected_sizeof_pointer_NE_sizeof_a_val); - vdso_base_ = reinterpret_cast(aux.a_un.a_val); - break; - } - } - close(fd); + } + ElfW(auxv_t) aux; + while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { + if (aux.a_type == AT_SYSINFO_EHDR) { + COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), + unexpected_sizeof_pointer_NE_sizeof_a_val); + vdso_base_ = reinterpret_cast(aux.a_un.a_val); + break; + } + } + close(fd); if (vdso_base_ == NULL) { - // Didn't find AT_SYSINFO_EHDR in auxv[]. + // Didn't find AT_SYSINFO_EHDR in auxv[]. vdso_base_ = ElfMemImage::kInvalidBase; - } - } - return vdso_base_; -} - -const void *VDSOSupport::SetBase(const void *base) { + } + } + return vdso_base_; +} + +const void *VDSOSupport::SetBase(const void *base) { CHECK(base != NULL); - const void *old_base = vdso_base_; - vdso_base_ = base; - image_.Init(base); - return old_base; -} - -bool VDSOSupport::LookupSymbol(const char *name, - const char *version, - int type, - SymbolInfo *info) const { - return image_.LookupSymbol(name, version, type, info); -} - -bool VDSOSupport::LookupSymbolByAddress(const void *address, - SymbolInfo *info_out) const { - return image_.LookupSymbolByAddress(address, info_out); -} - -// We need to make sure VDSOSupport::Init() is called before -// the main() runs, since it might do something like setuid or -// chroot. If VDSOSupport -// is used in any global constructor, this will happen, since -// VDSOSupport's constructor calls Init. But if not, we need to -// ensure it here, with a global constructor of our own. This -// is an allowed exception to the normal rule against non-trivial -// global constructors. -static class VDSOInitHelper { - public: - VDSOInitHelper() { VDSOSupport::Init(); } -} vdso_init_helper; -} - -#endif // HAVE_VDSO_SUPPORT + const void *old_base = vdso_base_; + vdso_base_ = base; + image_.Init(base); + return old_base; +} + +bool VDSOSupport::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + return image_.LookupSymbol(name, version, type, info); +} + +bool VDSOSupport::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + return image_.LookupSymbolByAddress(address, info_out); +} + +// We need to make sure VDSOSupport::Init() is called before +// the main() runs, since it might do something like setuid or +// chroot. If VDSOSupport +// is used in any global constructor, this will happen, since +// VDSOSupport's constructor calls Init. But if not, we need to +// ensure it here, with a global constructor of our own. This +// is an allowed exception to the normal rule against non-trivial +// global constructors. +static class VDSOInitHelper { + public: + VDSOInitHelper() { VDSOSupport::Init(); } +} vdso_init_helper; +} + +#endif // HAVE_VDSO_SUPPORT diff --git a/contrib/libs/linuxvdso/original/vdso_support.h b/contrib/libs/linuxvdso/original/vdso_support.h index 99623338d2c..1ccf32c23b9 100644 --- a/contrib/libs/linuxvdso/original/vdso_support.h +++ b/contrib/libs/linuxvdso/original/vdso_support.h @@ -1,132 +1,132 @@ -#pragma once - -// Copyright (c) 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Paul Pluzhnikov -// -// Allow dynamic symbol lookup in the kernel VDSO page. -// -// VDSO stands for "Virtual Dynamic Shared Object" -- a page of -// executable code, which looks like a shared library, but doesn't -// necessarily exist anywhere on disk, and which gets mmap()ed into -// every process by kernels which support VDSO, such as 2.6.x for 32-bit -// executables, and 2.6.24 and above for 64-bit executables. -// -// More details could be found here: -// http://www.trilithium.com/johan/2005/08/linux-gate/ -// -// VDSOSupport -- a class representing kernel VDSO (if present). -// -// Example usage: -// VDSOSupport vdso; -// VDSOSupport::SymbolInfo info; -// typedef (*FN)(unsigned *, void *, void *); -// FN fn = NULL; -// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { -// fn = reinterpret_cast(info.address); -// } - -#ifndef BASE_VDSO_SUPPORT_H_ -#define BASE_VDSO_SUPPORT_H_ - -#include "config.h" -#include "elf_mem_image.h" - -#ifdef HAVE_ELF_MEM_IMAGE - -#define HAVE_VDSO_SUPPORT 1 - -#include // for NULL - -namespace base { - -// NOTE: this class may be used from within tcmalloc, and can not -// use any memory allocation routines. -class VDSOSupport { - public: - VDSOSupport(); - - typedef ElfMemImage::SymbolInfo SymbolInfo; - typedef ElfMemImage::SymbolIterator SymbolIterator; - - // Answers whether we have a vdso at all. - bool IsPresent() const { return image_.IsPresent(); } - - // Allow to iterate over all VDSO symbols. - SymbolIterator begin() const { return image_.begin(); } - SymbolIterator end() const { return image_.end(); } - - // Look up versioned dynamic symbol in the kernel VDSO. - // Returns false if VDSO is not present, or doesn't contain given - // symbol/version/type combination. - // If info_out != NULL, additional details are filled in. - bool LookupSymbol(const char *name, const char *version, - int symbol_type, SymbolInfo *info_out) const; - - // Find info about symbol (if any) which overlaps given address. - // Returns true if symbol was found; false if VDSO isn't present - // or doesn't have a symbol overlapping given address. - // If info_out != NULL, additional details are filled in. - bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; - - // Used only for testing. Replace real VDSO base with a mock. - // Returns previous value of vdso_base_. After you are done testing, - // you are expected to call SetBase() with previous value, in order to - // reset state to the way it was. - const void *SetBase(const void *s); - - // Computes vdso_base_ and returns it. Should be called as early as - // possible; before any thread creation, chroot or setuid. - static const void *Init(); - - private: - // image_ represents VDSO ELF image in memory. - // image_.ehdr_ == NULL implies there is no VDSO. - ElfMemImage image_; - - // Cached value of auxv AT_SYSINFO_EHDR, computed once. - // This is a tri-state: +#pragma once + +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSO stands for "Virtual Dynamic Shared Object" -- a page of +// executable code, which looks like a shared library, but doesn't +// necessarily exist anywhere on disk, and which gets mmap()ed into +// every process by kernels which support VDSO, such as 2.6.x for 32-bit +// executables, and 2.6.24 and above for 64-bit executables. +// +// More details could be found here: +// http://www.trilithium.com/johan/2005/08/linux-gate/ +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// +// Example usage: +// VDSOSupport vdso; +// VDSOSupport::SymbolInfo info; +// typedef (*FN)(unsigned *, void *, void *); +// FN fn = NULL; +// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { +// fn = reinterpret_cast(info.address); +// } + +#ifndef BASE_VDSO_SUPPORT_H_ +#define BASE_VDSO_SUPPORT_H_ + +#include "config.h" +#include "elf_mem_image.h" + +#ifdef HAVE_ELF_MEM_IMAGE + +#define HAVE_VDSO_SUPPORT 1 + +#include // for NULL + +namespace base { + +// NOTE: this class may be used from within tcmalloc, and can not +// use any memory allocation routines. +class VDSOSupport { + public: + VDSOSupport(); + + typedef ElfMemImage::SymbolInfo SymbolInfo; + typedef ElfMemImage::SymbolIterator SymbolIterator; + + // Answers whether we have a vdso at all. + bool IsPresent() const { return image_.IsPresent(); } + + // Allow to iterate over all VDSO symbols. + SymbolIterator begin() const { return image_.begin(); } + SymbolIterator end() const { return image_.end(); } + + // Look up versioned dynamic symbol in the kernel VDSO. + // Returns false if VDSO is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if VDSO isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + // Used only for testing. Replace real VDSO base with a mock. + // Returns previous value of vdso_base_. After you are done testing, + // you are expected to call SetBase() with previous value, in order to + // reset state to the way it was. + const void *SetBase(const void *s); + + // Computes vdso_base_ and returns it. Should be called as early as + // possible; before any thread creation, chroot or setuid. + static const void *Init(); + + private: + // image_ represents VDSO ELF image in memory. + // image_.ehdr_ == NULL implies there is no VDSO. + ElfMemImage image_; + + // Cached value of auxv AT_SYSINFO_EHDR, computed once. + // This is a tri-state: // 0 => value hasn't been determined yet. // kInvalidBase => there is no VDSO. - // else => vma of VDSO Elf{32,64}_Ehdr. - // - // When testing with mock VDSO, low bit is set. - // The low bit is always available because vdso_base_ is - // page-aligned. - static const void *vdso_base_; - - DISALLOW_COPY_AND_ASSIGN(VDSOSupport); -}; -} // namespace base - -#endif // HAVE_ELF_MEM_IMAGE - -#endif // BASE_VDSO_SUPPORT_H_ + // else => vma of VDSO Elf{32,64}_Ehdr. + // + // When testing with mock VDSO, low bit is set. + // The low bit is always available because vdso_base_ is + // page-aligned. + static const void *vdso_base_; + + DISALLOW_COPY_AND_ASSIGN(VDSOSupport); +}; +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE + +#endif // BASE_VDSO_SUPPORT_H_ diff --git a/contrib/libs/linuxvdso/original/ya.make b/contrib/libs/linuxvdso/original/ya.make index e545457d875..c5b41c3586e 100644 --- a/contrib/libs/linuxvdso/original/ya.make +++ b/contrib/libs/linuxvdso/original/ya.make @@ -1,6 +1,6 @@ -LIBRARY() +LIBRARY() -LICENSE(BSD-3-Clause) +LICENSE(BSD-3-Clause) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) @@ -8,16 +8,16 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_UTIL() NO_RUNTIME() NO_COMPILER_WARNINGS() - -SRCS( - vdso_support.cc - elf_mem_image.cc -) - -END() + +SRCS( + vdso_support.cc + elf_mem_image.cc +) + +END() diff --git a/contrib/libs/linuxvdso/ya.make b/contrib/libs/linuxvdso/ya.make index d26073c6cc9..4da8d3d0769 100644 --- a/contrib/libs/linuxvdso/ya.make +++ b/contrib/libs/linuxvdso/ya.make @@ -1,8 +1,8 @@ -LIBRARY() +LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(BSD-3-Clause) +LICENSE(BSD-3-Clause) VERSION(2.0) @@ -12,22 +12,22 @@ OWNER( g:contrib g:cpp-contrib ) - + NO_UTIL() NO_RUNTIME() - + IF (OS_LINUX) - PEERDIR( - contrib/libs/linuxvdso/original - ) - SRCS( - interface.cpp - ) + PEERDIR( + contrib/libs/linuxvdso/original + ) + SRCS( + interface.cpp + ) ELSE() - SRCS( - fake.cpp - ) + SRCS( + fake.cpp + ) ENDIF() - -END() + +END() diff --git a/contrib/libs/lz4/generated/gen.py b/contrib/libs/lz4/generated/gen.py index 6bee186e938..24dec0555c2 100644 --- a/contrib/libs/lz4/generated/gen.py +++ b/contrib/libs/lz4/generated/gen.py @@ -1,5 +1,5 @@ import os - + lz4 = ''' #define LZ4_MEMORY_USAGE {i} #define LZ4_NAMESPACE lz4_{i} @@ -32,14 +32,14 @@ cases = [] os.chdir(os.path.dirname(__file__)) -for i in range(10, 21): +for i in range(10, 21): name = 'lz4_{}.cpp'.format(i) namespaces.append(lz4_namespace.format(i=i)) cases.append(lz4_case.format(i=i)) print ' ' + name - + with open(name, 'w') as f: f.write(lz4.format(i=i)) - + with open('lz4methods.cpp', 'w') as f: f.write(lz4methods % ('\n'.join(namespaces), '\n'.join(cases))) diff --git a/contrib/libs/lz4/generated/iface.h b/contrib/libs/lz4/generated/iface.h index c30f2da8538..e299d44014c 100644 --- a/contrib/libs/lz4/generated/iface.h +++ b/contrib/libs/lz4/generated/iface.h @@ -1,15 +1,15 @@ -#pragma once - -#if defined(__cplusplus) -extern "C" { -#endif - -struct TLZ4Methods { - int (*LZ4CompressLimited)(const char* source, char* dest, int isize, int maxOut); -}; - -struct TLZ4Methods* LZ4Methods(int memory); - -#if defined(__cplusplus) -} -#endif +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif + +struct TLZ4Methods { + int (*LZ4CompressLimited)(const char* source, char* dest, int isize, int maxOut); +}; + +struct TLZ4Methods* LZ4Methods(int memory); + +#if defined(__cplusplus) +} +#endif diff --git a/contrib/libs/lz4/generated/lz4_ns.h b/contrib/libs/lz4/generated/lz4_ns.h index 8da45bceb58..2dfbbf8ccc4 100644 --- a/contrib/libs/lz4/generated/lz4_ns.h +++ b/contrib/libs/lz4/generated/lz4_ns.h @@ -1,7 +1,7 @@ #pragma once - + #include "iface.h" - + #include #include #include @@ -14,6 +14,6 @@ namespace LZ4_NAMESPACE { struct TLZ4Methods ytbl = { LZ4_compress_default, -}; - +}; + } diff --git a/contrib/libs/lz4/generated/ya.make b/contrib/libs/lz4/generated/ya.make index ed4cb874d3f..f37d13bddb6 100644 --- a/contrib/libs/lz4/generated/ya.make +++ b/contrib/libs/lz4/generated/ya.make @@ -2,8 +2,8 @@ LIBRARY() WITHOUT_LICENSE_TEXTS() -LICENSE(BSD-2-Clause) - +LICENSE(BSD-2-Clause) + OWNER( orivej g:contrib diff --git a/contrib/libs/lz4/lz4.c b/contrib/libs/lz4/lz4.c index 657b8f1bafb..c864ba73baa 100644 --- a/contrib/libs/lz4/lz4.c +++ b/contrib/libs/lz4/lz4.c @@ -42,7 +42,7 @@ */ #ifndef LZ4_HEAPMODE # define LZ4_HEAPMODE 0 -#endif +#endif /* * LZ4_ACCELERATION_DEFAULT : diff --git a/contrib/libs/lzmasdk/7zStream.c b/contrib/libs/lzmasdk/7zStream.c index 61b8ad60d80..6b5aa1621dd 100644 --- a/contrib/libs/lzmasdk/7zStream.c +++ b/contrib/libs/lzmasdk/7zStream.c @@ -1,176 +1,176 @@ -/* 7zStream.c -- 7z Stream functions -2017-04-03 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -#include "7zTypes.h" - -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType) -{ - while (size != 0) - { - size_t processed = size; - RINOK(ISeqInStream_Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void *)((Byte *)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) -{ - return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) -{ - size_t processed = 1; - RINOK(ISeqInStream_Read(stream, buf, &processed)); - return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; -} - - - -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) -{ - Int64 t = offset; - return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); -} - -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) -{ - const void *lookBuf; - if (*size == 0) - return SZ_OK; - RINOK(ILookInStream_Look(stream, &lookBuf, size)); - memcpy(buf, lookBuf, *size); - return ILookInStream_Skip(stream, *size); -} - -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) -{ - while (size != 0) - { - size_t processed = size; - RINOK(ILookInStream_Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void *)((Byte *)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) -{ - return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - - - -#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); - -static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) -{ - SRes res = SZ_OK; - GET_LookToRead2 - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size != 0) - { - p->pos = 0; - p->size = 0; - size2 = p->bufSize; - res = ISeekInStream_Read(p->realStream, p->buf, &size2); - p->size = size2; - } - if (*size > size2) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) -{ - SRes res = SZ_OK; - GET_LookToRead2 - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size != 0) - { - p->pos = 0; - p->size = 0; - if (*size > p->bufSize) - *size = p->bufSize; - res = ISeekInStream_Read(p->realStream, p->buf, size); - size2 = p->size = *size; - } - if (*size > size2) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) -{ - GET_LookToRead2 - p->pos += offset; - return SZ_OK; -} - -static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) -{ - GET_LookToRead2 - size_t rem = p->size - p->pos; - if (rem == 0) - return ISeekInStream_Read(p->realStream, buf, size); - if (rem > *size) - rem = *size; - memcpy(buf, p->buf + p->pos, rem); - p->pos += rem; - *size = rem; - return SZ_OK; -} - -static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) -{ - GET_LookToRead2 - p->pos = p->size = 0; - return ISeekInStream_Seek(p->realStream, pos, origin); -} - -void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) -{ - p->vt.Look = lookahead ? - LookToRead2_Look_Lookahead : - LookToRead2_Look_Exact; - p->vt.Skip = LookToRead2_Skip; - p->vt.Read = LookToRead2_Read; - p->vt.Seek = LookToRead2_Seek; -} - - - -static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) -{ - CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); - return LookInStream_LookRead(p->realStream, buf, size); -} - -void SecToLook_CreateVTable(CSecToLook *p) -{ - p->vt.Read = SecToLook_Read; -} - -static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) -{ - CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); - return ILookInStream_Read(p->realStream, buf, size); -} - -void SecToRead_CreateVTable(CSecToRead *p) -{ - p->vt.Read = SecToRead_Read; -} +/* 7zStream.c -- 7z Stream functions +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zTypes.h" + +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ISeqInStream_Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) +{ + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) +{ + size_t processed = 1; + RINOK(ISeqInStream_Read(stream, buf, &processed)); + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + + + +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) +{ + Int64 t = offset; + return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) +{ + const void *lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(ILookInStream_Look(stream, &lookBuf, size)); + memcpy(buf, lookBuf, *size); + return ILookInStream_Skip(stream, *size); +} + +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ILookInStream_Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) +{ + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + + + +#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); + +static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + size2 = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + if (*size > p->bufSize) + *size = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) +{ + GET_LookToRead2 + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) +{ + GET_LookToRead2 + size_t rem = p->size - p->pos; + if (rem == 0) + return ISeekInStream_Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) +{ + GET_LookToRead2 + p->pos = p->size = 0; + return ISeekInStream_Seek(p->realStream, pos, origin); +} + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) +{ + p->vt.Look = lookahead ? + LookToRead2_Look_Lookahead : + LookToRead2_Look_Exact; + p->vt.Skip = LookToRead2_Skip; + p->vt.Read = LookToRead2_Read; + p->vt.Seek = LookToRead2_Seek; +} + + + +static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) +{ + CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook *p) +{ + p->vt.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) +{ + CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); + return ILookInStream_Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead *p) +{ + p->vt.Read = SecToRead_Read; +} diff --git a/contrib/libs/lzmasdk/7zTypes.h b/contrib/libs/lzmasdk/7zTypes.h index adf08fb279b..65b3af63c75 100644 --- a/contrib/libs/lzmasdk/7zTypes.h +++ b/contrib/libs/lzmasdk/7zTypes.h @@ -1,375 +1,375 @@ -/* 7zTypes.h -- Basic types +/* 7zTypes.h -- Basic types 2018-08-04 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#ifdef _WIN32 -/* #include */ -#endif - -#include - -#ifndef EXTERN_C_BEGIN -#ifdef __cplusplus -#define EXTERN_C_BEGIN extern "C" { -#define EXTERN_C_END } -#else -#define EXTERN_C_BEGIN -#define EXTERN_C_END -#endif -#endif - -EXTERN_C_BEGIN - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - - -#ifdef _WIN32 - -/* typedef DWORD WRes; */ -typedef unsigned WRes; -#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) - -#else - -typedef int WRes; -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_WIN32 -#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) - -#endif - - -#ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +#else + +typedef int WRes; +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_WIN32 +#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + +#endif + + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + typedef int BoolInt; /* typedef BoolInt Bool; */ -#define True 1 -#define False 0 - - -#ifdef _WIN32 -#define MY_STD_CALL __stdcall -#else -#define MY_STD_CALL -#endif - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_FORCE_INLINE __forceinline - -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall - -#else - -#define MY_NO_INLINE -#define MY_FORCE_INLINE -#define MY_CDECL -#define MY_FAST_CALL - -/* inline keyword : for C++ / C99 */ - -/* GCC, clang: */ -/* -#if defined (__GNUC__) && (__GNUC__ >= 4) -#define MY_FORCE_INLINE __attribute__((always_inline)) -#define MY_NO_INLINE __attribute__((noinline)) -#endif -*/ - -#endif - - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct IByteIn IByteIn; -struct IByteIn -{ - Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ -}; -#define IByteIn_Read(p) (p)->Read(p) - - -typedef struct IByteOut IByteOut; -struct IByteOut -{ - void (*Write)(const IByteOut *p, Byte b); -}; -#define IByteOut_Write(p, b) (p)->Write(p, b) - - -typedef struct ISeqInStream ISeqInStream; -struct ISeqInStream -{ - SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -}; -#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); - - -typedef struct ISeqOutStream ISeqOutStream; -struct ISeqOutStream -{ - size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -}; -#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) - -typedef enum -{ - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - - -typedef struct ISeekInStream ISeekInStream; -struct ISeekInStream -{ - SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); -}; -#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) -#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) - - -typedef struct ILookInStream ILookInStream; -struct ILookInStream -{ - SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(const ILookInStream *p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); -}; - -#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) -#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) -#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) -#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) - - -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); - - - -typedef struct -{ - ILookInStream vt; - const ISeekInStream *realStream; +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE __forceinline + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else + +#define MY_NO_INLINE +#define MY_FORCE_INLINE +#define MY_CDECL +#define MY_FAST_CALL + +/* inline keyword : for C++ / C99 */ + +/* GCC, clang: */ +/* +#if defined (__GNUC__) && (__GNUC__ >= 4) +#define MY_FORCE_INLINE __attribute__((always_inline)) +#define MY_NO_INLINE __attribute__((noinline)) +#endif +*/ + +#endif + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct IByteIn IByteIn; +struct IByteIn +{ + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +typedef struct IByteOut IByteOut; +struct IByteOut +{ + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream +{ + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); + + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream +{ + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream +{ + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +typedef struct ILookInStream ILookInStream; +struct ILookInStream +{ + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(const ILookInStream *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + + + +typedef struct +{ + ILookInStream vt; + const ISeekInStream *realStream; + + size_t pos; + size_t size; /* it's data size */ - size_t pos; - size_t size; /* it's data size */ - - /* the following variables must be set outside */ - Byte *buf; - size_t bufSize; -} CLookToRead2; - -void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); - -#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } - - -typedef struct -{ - ISeqInStream vt; - const ILookInStream *realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook *p); - - - -typedef struct -{ - ISeqInStream vt; - const ILookInStream *realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead *p); - - -typedef struct ICompressProgress ICompressProgress; - -struct ICompressProgress -{ - SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -}; -#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) - - - -typedef struct ISzAlloc ISzAlloc; -typedef const ISzAlloc * ISzAllocPtr; - -struct ISzAlloc -{ - void *(*Alloc)(ISzAllocPtr p, size_t size); - void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ -}; - -#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) -#define ISzAlloc_Free(p, a) (p)->Free(p, a) - -/* deprecated */ -#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) -#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) - - - - - -#ifndef MY_offsetof - #ifdef offsetof - #define MY_offsetof(type, m) offsetof(type, m) - /* - #define MY_offsetof(type, m) FIELD_OFFSET(type, m) - */ - #else - #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) - #endif -#endif - - - -#ifndef MY_container_of - -/* -#define MY_container_of(ptr, type, m) container_of(ptr, type, m) -#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) -#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) -#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) -*/ - -/* - GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" - GCC 3.4.4 : classes with constructor - GCC 4.8.1 : classes with non-public variable members" -*/ - -#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) - - -#endif - -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) - -/* -#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -*/ -#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) - -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -/* -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) -*/ - - - -#ifdef _WIN32 - -#define CHAR_PATH_SEPARATOR '\\' -#define WCHAR_PATH_SEPARATOR L'\\' -#define STRING_PATH_SEPARATOR "\\" -#define WSTRING_PATH_SEPARATOR L"\\" - -#else - -#define CHAR_PATH_SEPARATOR '/' -#define WCHAR_PATH_SEPARATOR L'/' -#define STRING_PATH_SEPARATOR "/" -#define WSTRING_PATH_SEPARATOR L"/" - -#endif - -EXTERN_C_END - -#endif + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress +{ + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + + + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +EXTERN_C_END + +#endif diff --git a/contrib/libs/lzmasdk/Alloc.c b/contrib/libs/lzmasdk/Alloc.c index f2cd4c546b9..bcede4b8563 100644 --- a/contrib/libs/lzmasdk/Alloc.c +++ b/contrib/libs/lzmasdk/Alloc.c @@ -1,26 +1,26 @@ -/* Alloc.c -- Memory allocation functions +/* Alloc.c -- Memory allocation functions 2018-04-27 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - + +#include "Precomp.h" + #include -#ifdef _WIN32 -#include -#endif -#include - -#include "Alloc.h" - -/* #define _SZ_ALLOC_DEBUG */ - -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG +#ifdef _WIN32 +#include +#endif +#include + +#include "Alloc.h" -#include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; +/* #define _SZ_ALLOC_DEBUG */ + +/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef _SZ_ALLOC_DEBUG + +#include +int g_allocCount = 0; +int g_allocCountMid = 0; +int g_allocCountBig = 0; #define CONVERT_INT_TO_STR(charType, tempSize) \ @@ -125,128 +125,128 @@ static void PrintAddr(void *p) #define PrintDec(v, align) #define PrintAddr(p) -#endif - +#endif -void *MyAlloc(size_t size) -{ - if (size == 0) - return NULL; - #ifdef _SZ_ALLOC_DEBUG - { - void *p = malloc(size); + +void *MyAlloc(size_t size) +{ + if (size == 0) + return NULL; + #ifdef _SZ_ALLOC_DEBUG + { + void *p = malloc(size); PRINT_ALLOC("Alloc ", g_allocCount, size, p); - return p; - } - #else - return malloc(size); - #endif -} - -void MyFree(void *address) -{ + return p; + } + #else + return malloc(size); + #endif +} + +void MyFree(void *address) +{ PRINT_FREE("Free ", g_allocCount, address); - free(address); -} - -#ifdef _WIN32 - -void *MidAlloc(size_t size) -{ - if (size == 0) - return NULL; + free(address); +} + +#ifdef _WIN32 + +void *MidAlloc(size_t size) +{ + if (size == 0) + return NULL; PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); -} - -void MidFree(void *address) -{ + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void MidFree(void *address) +{ PRINT_FREE("Free-Mid", g_allocCountMid, address); - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES -#endif - -#ifdef _7ZIP_LARGE_PAGES -SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); -#endif - -void SetLargePageSize() -{ - #ifdef _7ZIP_LARGE_PAGES - SIZE_T size; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (!largePageMinimum) - return; - size = largePageMinimum(); - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; - #endif -} - - -void *BigAlloc(size_t size) -{ - if (size == 0) - return NULL; + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifndef MEM_LARGE_PAGES +#undef _7ZIP_LARGE_PAGES +#endif + +#ifdef _7ZIP_LARGE_PAGES +SIZE_T g_LargePageSize = 0; +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); +#endif + +void SetLargePageSize() +{ + #ifdef _7ZIP_LARGE_PAGES + SIZE_T size; + GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) + GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); + if (!largePageMinimum) + return; + size = largePageMinimum(); + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; + #endif +} + + +void *BigAlloc(size_t size) +{ + if (size == 0) + return NULL; PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); - - #ifdef _7ZIP_LARGE_PAGES - { - SIZE_T ps = g_LargePageSize; - if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) - { - size_t size2; - ps--; - size2 = (size + ps) & ~ps; - if (size2 >= size) - { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res) - return res; - } - } - } - #endif - - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); -} - -void BigFree(void *address) -{ + + #ifdef _7ZIP_LARGE_PAGES + { + SIZE_T ps = g_LargePageSize; + if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + { + size_t size2; + ps--; + size2 = (size + ps) & ~ps; + if (size2 >= size) + { + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (res) + return res; + } + } + } + #endif + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void BigFree(void *address) +{ PRINT_FREE("Free-Big", g_allocCountBig, address); - - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#endif - - -static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } -static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } + + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#endif + + +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } const ISzAlloc g_Alloc = { SzAlloc, SzFree }; - + static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; -static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } -static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; diff --git a/contrib/libs/lzmasdk/Alloc.h b/contrib/libs/lzmasdk/Alloc.h index fa1e873e071..648237646f2 100644 --- a/contrib/libs/lzmasdk/Alloc.h +++ b/contrib/libs/lzmasdk/Alloc.h @@ -1,39 +1,39 @@ -/* Alloc.h -- Memory allocation functions +/* Alloc.h -- Memory allocation functions 2018-02-19 : Igor Pavlov : Public domain */ - -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -void *MyAlloc(size_t size); -void MyFree(void *address); - -#ifdef _WIN32 - -void SetLargePageSize(); - -void *MidAlloc(size_t size); -void MidFree(void *address); -void *BigAlloc(size_t size); -void BigFree(void *address); - -#else - -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) - -#endif - -extern const ISzAlloc g_Alloc; -extern const ISzAlloc g_BigAlloc; + +#ifndef __COMMON_ALLOC_H +#define __COMMON_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void *MyAlloc(size_t size); +void MyFree(void *address); + +#ifdef _WIN32 + +void SetLargePageSize(); + +void *MidAlloc(size_t size); +void MidFree(void *address); +void *BigAlloc(size_t size); +void BigFree(void *address); + +#else + +#define MidAlloc(size) MyAlloc(size) +#define MidFree(address) MyFree(address) +#define BigAlloc(size) MyAlloc(size) +#define BigFree(address) MyFree(address) + +#endif + +extern const ISzAlloc g_Alloc; +extern const ISzAlloc g_BigAlloc; extern const ISzAlloc g_MidAlloc; extern const ISzAlloc g_AlignedAlloc; - + typedef struct { @@ -46,6 +46,6 @@ typedef struct void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); -EXTERN_C_END - -#endif +EXTERN_C_END + +#endif diff --git a/contrib/libs/lzmasdk/Compiler.h b/contrib/libs/lzmasdk/Compiler.h index 625d96a6c86..0cc409d8a86 100644 --- a/contrib/libs/lzmasdk/Compiler.h +++ b/contrib/libs/lzmasdk/Compiler.h @@ -1,33 +1,33 @@ -/* Compiler.h -2017-04-03 : Igor Pavlov : Public domain */ - -#ifndef __7Z_COMPILER_H -#define __7Z_COMPILER_H - -#ifdef _MSC_VER - - #ifdef UNDER_CE - #define RPC_NO_WINDOWS_H - /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ - #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union - #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int - #endif - - #if _MSC_VER >= 1300 - #pragma warning(disable : 4996) // This function or variable may be unsafe - #else - #pragma warning(disable : 4511) // copy constructor could not be generated - #pragma warning(disable : 4512) // assignment operator could not be generated - #pragma warning(disable : 4514) // unreferenced inline function has been removed - #pragma warning(disable : 4702) // unreachable code - #pragma warning(disable : 4710) // not inlined - #pragma warning(disable : 4714) // function marked as __forceinline not inlined - #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information - #endif - -#endif - -#define UNUSED_VAR(x) (void)x; -/* #define UNUSED_VAR(x) x=x; */ - -#endif +/* Compiler.h +2017-04-03 : Igor Pavlov : Public domain */ + +#ifndef __7Z_COMPILER_H +#define __7Z_COMPILER_H + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + + #if _MSC_VER >= 1300 + #pragma warning(disable : 4996) // This function or variable may be unsafe + #else + #pragma warning(disable : 4511) // copy constructor could not be generated + #pragma warning(disable : 4512) // assignment operator could not be generated + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4702) // unreachable code + #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4714) // function marked as __forceinline not inlined + #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information + #endif + +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/contrib/libs/lzmasdk/LzFind.c b/contrib/libs/lzmasdk/LzFind.c index 6758a4478be..df55e86c146 100644 --- a/contrib/libs/lzmasdk/LzFind.c +++ b/contrib/libs/lzmasdk/LzFind.c @@ -1,422 +1,422 @@ -/* LzFind.c -- Match finder for LZ algorithms +/* LzFind.c -- Match finder for LZ algorithms 2018-07-08 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -#include "LzFind.h" -#include "LzHash.h" - -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)7 << 29) - -#define kStartMaxLen 3 - -static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) -{ - if (!p->directInput) - { - ISzAlloc_Free(alloc, p->bufferBase); - p->bufferBase = NULL; - } -} - -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ - -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) -{ - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } - if (!p->bufferBase || p->blockSize != blockSize) - { - LzInWindow_Free(p, alloc); - p->blockSize = blockSize; - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); - } - return (p->bufferBase != NULL); -} - -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } - -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } - -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} - -static void MatchFinder_ReadBlock(CMatchFinder *p) -{ - if (p->streamEndWasReached || p->result != SZ_OK) - return; - - /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ - - if (p->directInput) - { - UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); - if (curSize > p->directInputRem) - curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; - p->streamPos += curSize; - if (p->directInputRem == 0) - p->streamEndWasReached = 1; - return; - } - - for (;;) - { - Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); - if (size == 0) - return; - - p->result = ISeqInStream_Read(p->stream, dest, &size); - if (p->result != SZ_OK) - return; - if (size == 0) - { - p->streamEndWasReached = 1; - return; - } - p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) - return; - } -} - -void MatchFinder_MoveBlock(CMatchFinder *p) -{ - memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); - p->buffer = p->bufferBase + p->keepSizeBefore; -} - -int MatchFinder_NeedMove(CMatchFinder *p) -{ - if (p->directInput) - return 0; - /* if (p->streamEndWasReached) return 0; */ - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); -} - -void MatchFinder_ReadIfRequired(CMatchFinder *p) -{ - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_SetDefaultSettings(CMatchFinder *p) -{ - p->cutValue = 32; - p->btMode = 1; - p->numHashBytes = 4; - p->bigHash = 0; -} - -#define kCrcPoly 0xEDB88320 - -void MatchFinder_Construct(CMatchFinder *p) -{ + +#include "Precomp.h" + +#include + +#include "LzFind.h" +#include "LzHash.h" + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) +#define kMaxHistorySize ((UInt32)7 << 29) + +#define kStartMaxLen 3 + +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + if (!p->directInput) + { + ISzAlloc_Free(alloc, p->bufferBase); + p->bufferBase = NULL; + } +} + +/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ + +static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) +{ + UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; + if (p->directInput) + { + p->blockSize = blockSize; + return 1; + } + if (!p->bufferBase || p->blockSize != blockSize) + { + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); + } + return (p->bufferBase != NULL); +} + +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } + +UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } + +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) +{ + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + + /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ + + if (p->directInput) + { + UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); + if (curSize > p->directInputRem) + curSize = (UInt32)p->directInputRem; + p->directInputRem -= curSize; + p->streamPos += curSize; + if (p->directInputRem == 0) + p->streamEndWasReached = 1; + return; + } + + for (;;) + { + Byte *dest = p->buffer + (p->streamPos - p->pos); + size_t size = (p->bufferBase + p->blockSize - dest); + if (size == 0) + return; + + p->result = ISeqInStream_Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, + (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); + p->buffer = p->bufferBase + p->keepSizeBefore; +} + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + if (p->directInput) + return 0; + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->streamEndWasReached) + return; + if (p->keepSizeAfter >= p->streamPos - p->pos) + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) +{ + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ unsigned i; - p->bufferBase = NULL; - p->directInput = 0; - p->hash = NULL; - p->expectedDataSize = (UInt64)(Int64)-1; - MatchFinder_SetDefaultSettings(p); - - for (i = 0; i < 256; i++) - { + p->bufferBase = NULL; + p->directInput = 0; + p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { UInt32 r = (UInt32)i; - unsigned j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); - p->crc[i] = r; - } -} - -static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->hash); - p->hash = NULL; -} - -void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) -{ - MatchFinder_FreeThisClassMemory(p, alloc); - LzInWindow_Free(p, alloc); -} - -static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) -{ - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); - if (sizeInBytes / sizeof(CLzRef) != num) - return NULL; - return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); -} - -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAllocPtr alloc) -{ - UInt32 sizeReserv; - - if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); - return 0; - } - - sizeReserv = historySize >> 1; - if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; - else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; - - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - - p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - - if (LzInWindow_Create(p, sizeReserv, alloc)) - { - UInt32 newCyclicBufferSize = historySize + 1; - UInt32 hs; - p->matchMaxLen = matchMaxLen; - { - p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else - { - hs = historySize; - if (hs > p->expectedDataSize) - hs = (UInt32)p->expectedDataSize; - if (hs != 0) - hs--; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ - } - } - p->hashMask = hs; - hs++; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; - hs += p->fixedHashSize; - } - - { - size_t newSize; - size_t numSons; - p->historySize = historySize; - p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - - numSons = newCyclicBufferSize; - if (p->btMode) - numSons <<= 1; - newSize = hs + numSons; - - if (p->hash && p->numRefs == newSize) - return 1; - - MatchFinder_FreeThisClassMemory(p, alloc); - p->numRefs = newSize; - p->hash = AllocRefs(newSize, alloc); - - if (p->hash) - { - p->son = p->hash + p->hashSizeSum; - return 1; - } - } - } - - MatchFinder_Free(p, alloc); - return 0; -} - -static void MatchFinder_SetLimits(CMatchFinder *p) -{ - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - - if (limit2 <= p->keepSizeAfter) - { - if (limit2 > 0) - limit2 = 1; - } - else - limit2 -= p->keepSizeAfter; - - if (limit2 < limit) - limit = limit2; - - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; -} - - -void MatchFinder_Init_LowHash(CMatchFinder *p) -{ - size_t i; - CLzRef *items = p->hash; - size_t numItems = p->fixedHashSize; - for (i = 0; i < numItems; i++) - items[i] = kEmptyHashValue; -} - - -void MatchFinder_Init_HighHash(CMatchFinder *p) -{ - size_t i; - CLzRef *items = p->hash + p->fixedHashSize; - size_t numItems = (size_t)p->hashMask + 1; - for (i = 0; i < numItems; i++) - items[i] = kEmptyHashValue; -} - - -void MatchFinder_Init_3(CMatchFinder *p, int readData) -{ - p->cyclicBufferPos = 0; - p->buffer = p->bufferBase; - p->pos = - p->streamPos = p->cyclicBufferSize; - p->result = SZ_OK; - p->streamEndWasReached = 0; - - if (readData) - MatchFinder_ReadBlock(p); - - MatchFinder_SetLimits(p); -} - - -void MatchFinder_Init(CMatchFinder *p) -{ - MatchFinder_Init_HighHash(p); - MatchFinder_Init_LowHash(p); - MatchFinder_Init_3(p, True); -} - - -static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) -{ - return (p->pos - p->historySize - 1) & kNormalizeMask; -} - -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) -{ - size_t i; - for (i = 0; i < numItems; i++) - { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; - } -} - -static void MatchFinder_Normalize(CMatchFinder *p) -{ - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->numRefs); - MatchFinder_ReduceOffsets(p, subValue); -} - + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hash); + p->hash = NULL; +} + +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) +{ + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return NULL; + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); +} + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + UInt32 sizeReserv; + + if (historySize > kMaxHistorySize) + { + MatchFinder_Free(p, alloc); + return 0; + } + + sizeReserv = historySize >> 1; + if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; + else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; + + sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ + + if (LzInWindow_Create(p, sizeReserv, alloc)) + { + UInt32 newCyclicBufferSize = historySize + 1; + UInt32 hs; + p->matchMaxLen = matchMaxLen; + { + p->fixedHashSize = 0; + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else + { + hs = historySize; + if (hs > p->expectedDataSize) + hs = (UInt32)p->expectedDataSize; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; + if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + hs += p->fixedHashSize; + } + + { + size_t newSize; + size_t numSons; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + + numSons = newCyclicBufferSize; + if (p->btMode) + numSons <<= 1; + newSize = hs + numSons; + + if (p->hash && p->numRefs == newSize) + return 1; + + MatchFinder_FreeThisClassMemory(p, alloc); + p->numRefs = newSize; + p->hash = AllocRefs(newSize, alloc); + + if (p->hash) + { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + + MatchFinder_Free(p, alloc); + return 0; +} + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + UInt32 limit = kMaxValForNormalize - p->pos; + UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + + if (limit2 <= p->keepSizeAfter) + { + if (limit2 > 0) + limit2 = 1; + } + else + limit2 -= p->keepSizeAfter; + + if (limit2 < limit) + limit = limit2; + + { + UInt32 lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_3(CMatchFinder *p, int readData) +{ + p->cyclicBufferPos = 0; + p->buffer = p->bufferBase; + p->pos = + p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = 0; + + if (readData) + MatchFinder_ReadBlock(p); + + MatchFinder_SetLimits(p); +} + + +void MatchFinder_Init(CMatchFinder *p) +{ + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_3(p, True); +} + + +static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) +{ + return (p->pos - p->historySize - 1) & kNormalizeMask; +} + +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) +{ + size_t i; + for (i = 0; i < numItems; i++) + { + UInt32 value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void MatchFinder_Normalize(CMatchFinder *p) +{ + UInt32 subValue = MatchFinder_GetSubValue(p); + MatchFinder_Normalize3(subValue, p->hash, p->numRefs); + MatchFinder_ReduceOffsets(p, subValue); +} + MY_NO_INLINE -static void MatchFinder_CheckLimits(CMatchFinder *p) -{ - if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); - if (p->cyclicBufferPos == p->cyclicBufferSize) - p->cyclicBufferPos = 0; - MatchFinder_SetLimits(p); -} - +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (p->pos == kMaxValForNormalize) + MatchFinder_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + MatchFinder_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); +} + /* (lenLimit > maxLen) */ MY_FORCE_INLINE static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *distances, unsigned maxLen) -{ +{ /* - son[_cyclicBufferPos] = curMatch; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; - { - const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - if (pb[maxLen] == cur[maxLen] && *pb == *cur) - { - UInt32 len = 0; - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { + son[_cyclicBufferPos] = curMatch; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const Byte *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { maxLen = len; *distances++ = len; - *distances++ = delta - 1; - if (len == lenLimit) - return distances; - } - } - } - } + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } */ const Byte *lim = cur + lenLimit; @@ -458,670 +458,670 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos while (--cutValue); return distances; -} - +} + MY_FORCE_INLINE -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) -{ +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, UInt32 maxLen) +{ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } - { + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; + const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); UInt32 pair0 = pair[0]; - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { maxLen = (UInt32)len; *distances++ = (UInt32)len; - *distances++ = delta - 1; - if (len == lenLimit) - { + *distances++ = delta - 1; + if (len == lenLimit) + { *ptr1 = pair0; - *ptr0 = pair[1]; - return distances; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) -{ + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) +{ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } - { + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; + const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) - { - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - { - if (len == lenLimit) - { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -#define MOVE_POS \ - ++p->cyclicBufferPos; \ - p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); - + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + #define MOVE_POS_RET MOVE_POS return (UInt32)offset; - -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } - -#define GET_MATCHES_HEADER2(minLen, ret_op) \ + +static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ - cur = p->buffer; - -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) - -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue - -#define GET_MATCHES_FOOTER(offset, maxLen) \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; - -#define SKIP_FOOTER \ + +#define SKIP_FOOTER \ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; - -#define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - d2; \ - const Byte *c = cur + maxLen; \ - const Byte *lim = cur + lenLimit; \ - for (; c != lim; c++) if (*(c + diff) != *c) break; \ + +#define UPDATE_maxLen { \ + ptrdiff_t diff = (ptrdiff_t)0 - d2; \ + const Byte *c = cur + maxLen; \ + const Byte *lim = cur + lenLimit; \ + for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } - -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + +static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ unsigned offset; - GET_MATCHES_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) -} - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ unsigned offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) -} - -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 2) +} + +static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ UInt32 h2, d2, pos; unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(3) - - HASH3_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash[h2]; - - curMatch = (hash + kFix3HashSize)[hv]; - - hash[h2] = pos; - (hash + kFix3HashSize)[hv] = pos; - - maxLen = 2; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - UPDATE_maxLen + UInt32 *hash; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash[h2]; + + curMatch = (hash + kFix3HashSize)[hv]; + + hash[h2] = pos; + (hash + kFix3HashSize)[hv] = pos; + + maxLen = 2; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + UPDATE_maxLen distances[0] = (UInt32)maxLen; - distances[1] = d2 - 1; - offset = 2; - if (maxLen == lenLimit) - { + distances[1] = d2 - 1; + offset = 2; + if (maxLen == lenLimit) + { SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + MOVE_POS_RET; + } + } + + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ UInt32 h2, h3, d2, d3, pos; unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - hash = p->hash; - pos = p->pos; - + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - - curMatch = (hash + kFix4HashSize)[hv]; - + d3 = pos - (hash + kFix3HashSize)[h3]; + + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { maxLen = 2; distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { - UPDATE_maxLen + distances[1] = d2 - 1; + offset = 2; + } + + if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + maxLen = 3; + distances[(size_t)offset + 1] = d3 - 1; + offset += 2; + d2 = d3; + } + + if (offset != 0) + { + UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; - if (maxLen == lenLimit) - { + if (maxLen == lenLimit) + { SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - if (maxLen < 3) - maxLen = 3; - - GET_MATCHES_FOOTER(offset, maxLen) -} - -/* -static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; - UInt32 *hash; - GET_MATCHES_HEADER(5) - - HASH5_CALC; - - hash = p->hash; - pos = p->pos; - + MOVE_POS_RET; + } + } + + if (maxLen < 3) + maxLen = 3; + + GET_MATCHES_FOOTER(offset, maxLen) +} + +/* +static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; - - curMatch = (hash + kFix5HashSize)[hv]; - + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; - (hash + kFix5HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; - d2 = d3; - } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; - if (maxLen == lenLimit) - { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - if (maxLen < 4) - maxLen = 4; - - GET_MATCHES_FOOTER(offset, maxLen) -} -*/ - -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + distances[0] = maxLen = 3; + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[2] = maxLen = 3; + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[0] = maxLen = 3; + distances[1] = d3 - 1; + offset = 2; + d2 = d3; + } + + if (d2 != d4 && d4 < p->cyclicBufferSize + && *(cur - d4) == *cur + && *(cur - d4 + 3) == *(cur + 3)) + { + maxLen = 4; + distances[(size_t)offset + 1] = d4 - 1; + offset += 2; + d2 = d4; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + + if (maxLen < 4) + maxLen = 4; + + GET_MATCHES_FOOTER(offset, maxLen) +} +*/ + +static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ UInt32 h2, h3, d2, d3, pos; unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - hash = p->hash; - pos = p->pos; - + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = (hash + kFix4HashSize)[hv]; - + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { maxLen = 2; distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { - UPDATE_maxLen + distances[1] = d2 - 1; + offset = 2; + } + + if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + maxLen = 3; + distances[(size_t)offset + 1] = d3 - 1; + offset += 2; + d2 = d3; + } + + if (offset != 0) + { + UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; - if (maxLen == lenLimit) - { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - - if (maxLen < 3) - maxLen = 3; - + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + + if (maxLen < 3) + maxLen = 3; + offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET -} - -/* -static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos - UInt32 *hash; - GET_MATCHES_HEADER(5) - - HASH5_CALC; - - hash = p->hash; - pos = p->pos; - + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} + +/* +static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; - - curMatch = (hash + kFix5HashSize)[hv]; - + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; - (hash + kFix5HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; - d2 = d3; - } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; - if (maxLen == lenLimit) - { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - - if (maxLen < 4) - maxLen = 4; - - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET -} -*/ - -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + distances[0] = maxLen = 3; + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[2] = maxLen = 3; + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[0] = maxLen = 3; + distances[1] = d3 - 1; + offset = 2; + d2 = d3; + } + + if (d2 != d4 && d4 < p->cyclicBufferSize + && *(cur - d4) == *cur + && *(cur - d4 + 3) == *(cur + 3)) + { + maxLen = 4; + distances[(size_t)offset + 1] = d4 - 1; + offset += 2; + d2 = d4; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + + if (maxLen < 4) + maxLen = 4; + + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} +*/ + +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ unsigned offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET -} - -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2; - UInt32 *hash; - SKIP_HEADER(3) - HASH3_CALC; - hash = p->hash; - curMatch = (hash + kFix3HashSize)[hv]; - hash[h2] = - (hash + kFix3HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) - HASH4_CALC; - hash = p->hash; - curMatch = (hash + kFix4HashSize)[hv]; + distances, 2) - (distances)); + MOVE_POS_RET +} + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2; + UInt32 *hash; + SKIP_HEADER(3) + HASH3_CALC; + hash = p->hash; + curMatch = (hash + kFix3HashSize)[hv]; + hash[h2] = + (hash + kFix3HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3; + UInt32 *hash; + SKIP_HEADER(4) + HASH4_CALC; + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -/* -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3, h4; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; - curMatch = (hash + kFix5HashSize)[hv]; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +/* +static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3, h4; + UInt32 *hash; + SKIP_HEADER(5) + HASH5_CALC; + hash = p->hash; + curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} -*/ - -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) - HASH4_CALC; - hash = p->hash; - curMatch = (hash + kFix4HashSize)[hv]; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} +*/ + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3; + UInt32 *hash; + SKIP_HEADER(4) + HASH4_CALC; + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -/* -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3, h4; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; - curMatch = hash + kFix5HashSize)[hv]; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +/* +static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3, h4; + UInt32 *hash; + SKIP_HEADER(5) + HASH5_CALC; + hash = p->hash; + curMatch = hash + kFix5HashSize)[hv]; hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} -*/ - -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) -{ - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; - if (!p->btMode) - { - /* if (p->numHashBytes <= 4) */ - { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; - } - /* - else - { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; - } - */ - } - else if (p->numHashBytes == 2) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; - } - else if (p->numHashBytes == 3) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; - } - else /* if (p->numHashBytes == 4) */ - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; - } - /* - else - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; - } - */ -} + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} +*/ + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + /* if (p->numHashBytes <= 4) */ + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + /* + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + } + */ + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else /* if (p->numHashBytes == 4) */ + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } + /* + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + } + */ +} diff --git a/contrib/libs/lzmasdk/LzFind.h b/contrib/libs/lzmasdk/LzFind.h index d77158bd85c..42c13be157c 100644 --- a/contrib/libs/lzmasdk/LzFind.h +++ b/contrib/libs/lzmasdk/LzFind.h @@ -1,121 +1,121 @@ -/* LzFind.h -- Match finder for LZ algorithms -2017-06-10 : Igor Pavlov : Public domain */ - -#ifndef __LZ_FIND_H -#define __LZ_FIND_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -typedef UInt32 CLzRef; - -typedef struct _CMatchFinder -{ - Byte *buffer; - UInt32 pos; - UInt32 posLimit; - UInt32 streamPos; - UInt32 lenLimit; - - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - - Byte streamEndWasReached; - Byte btMode; - Byte bigHash; - Byte directInput; - - UInt32 matchMaxLen; - CLzRef *hash; - CLzRef *son; - UInt32 hashMask; - UInt32 cutValue; - - Byte *bufferBase; - ISeqInStream *stream; - - UInt32 blockSize; - UInt32 keepSizeBefore; - UInt32 keepSizeAfter; - - UInt32 numHashBytes; - size_t directInputRem; - UInt32 historySize; - UInt32 fixedHashSize; - UInt32 hashSizeSum; - SRes result; - UInt32 crc[256]; - size_t numRefs; - - UInt64 expectedDataSize; -} CMatchFinder; - -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) - -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) - -#define Inline_MatchFinder_IsFinishedOK(p) \ - ((p)->streamEndWasReached \ - && (p)->streamPos == (p)->pos \ - && (!(p)->directInput || (p)->directInputRem == 0)) - -int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); -void MatchFinder_MoveBlock(CMatchFinder *p); -void MatchFinder_ReadIfRequired(CMatchFinder *p); - -void MatchFinder_Construct(CMatchFinder *p); - -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB -*/ -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAllocPtr alloc); -void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); - -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 maxLen); - -/* -Conditions: - Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. - Mf_GetPointerToCurrentPos_Func's result must be used only before any other function -*/ - -typedef void (*Mf_Init_Func)(void *object); -typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); -typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); -typedef void (*Mf_Skip_Func)(void *object, UInt32); - -typedef struct _IMatchFinder -{ - Mf_Init_Func Init; - Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; - Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; - Mf_GetMatches_Func GetMatches; - Mf_Skip_Func Skip; -} IMatchFinder; - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); - -void MatchFinder_Init_LowHash(CMatchFinder *p); -void MatchFinder_Init_HighHash(CMatchFinder *p); -void MatchFinder_Init_3(CMatchFinder *p, int readData); -void MatchFinder_Init(CMatchFinder *p); - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); - -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); - -EXTERN_C_END - -#endif +/* LzFind.h -- Match finder for LZ algorithms +2017-06-10 : Igor Pavlov : Public domain */ + +#ifndef __LZ_FIND_H +#define __LZ_FIND_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef UInt32 CLzRef; + +typedef struct _CMatchFinder +{ + Byte *buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + Byte streamEndWasReached; + Byte btMode; + Byte bigHash; + Byte directInput; + + UInt32 matchMaxLen; + CLzRef *hash; + CLzRef *son; + UInt32 hashMask; + UInt32 cutValue; + + Byte *bufferBase; + ISeqInStream *stream; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + size_t directInputRem; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; + SRes result; + UInt32 crc[256]; + size_t numRefs; + + UInt64 expectedDataSize; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +#define Inline_MatchFinder_IsFinishedOK(p) \ + ((p)->streamEndWasReached \ + && (p)->streamPos == (p)->pos \ + && (!(p)->directInput || (p)->directInputRem == 0)) + +int MatchFinder_NeedMove(CMatchFinder *p); +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); + +typedef struct _IMatchFinder +{ + Mf_Init_Func Init; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_3(CMatchFinder *p, int readData); +void MatchFinder_Init(CMatchFinder *p); + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +EXTERN_C_END + +#endif diff --git a/contrib/libs/lzmasdk/LzHash.h b/contrib/libs/lzmasdk/LzHash.h index 61d9a5d80e6..e7c942303dd 100644 --- a/contrib/libs/lzmasdk/LzHash.h +++ b/contrib/libs/lzmasdk/LzHash.h @@ -1,57 +1,57 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2015-04-12 : Igor Pavlov : Public domain */ - -#ifndef __LZ_HASH_H -#define __LZ_HASH_H - -#define kHash2Size (1 << 10) -#define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) - -#define kFix3HashSize (kHash2Size) -#define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - temp ^= (p->crc[cur[3]] << 5); \ - h4 = temp & (kHash4Size - 1); \ - hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } - -/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } - -#endif +/* LzHash.h -- HASH functions for LZ algorithms +2015-04-12 : Igor Pavlov : Public domain */ + +#ifndef __LZ_HASH_H +#define __LZ_HASH_H + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); + +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << 5); \ + h4 = temp & (kHash4Size - 1); \ + hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } + +/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + + +#define MT_HASH2_CALC \ + h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } + +#endif diff --git a/contrib/libs/lzmasdk/LzmaDec.c b/contrib/libs/lzmasdk/LzmaDec.c index 100e0a691fd..ba3e1dd50ef 100644 --- a/contrib/libs/lzmasdk/LzmaDec.c +++ b/contrib/libs/lzmasdk/LzmaDec.c @@ -1,31 +1,31 @@ -/* LzmaDec.c -- LZMA Decoder +/* LzmaDec.c -- LZMA Decoder 2018-07-04 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - + +#include "Precomp.h" + #include /* #include "CpuArch.h" */ -#include "LzmaDec.h" - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } - +#include "LzmaDec.h" + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) -#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } - +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } + #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ @@ -35,85 +35,85 @@ #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) -#define TREE_DECODE(probs, limit, i) \ - { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; } -#endif - +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + #define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) -#define MATCHED_LITER_DEC \ +#define MATCHED_LITER_DEC \ matchByte += matchByte; \ bit = offs; \ offs &= matchByte; \ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) - -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } - + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } + #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK range -= bound; code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } - - +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ { UPDATE_0_CHECK; i += m; m += m; } else \ { UPDATE_1_CHECK; m += m; i += m; } -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + #define LenLow 0 #define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + #define LenChoice LenLow #define LenChoice2 (LenLow + (1 << kLenNumLowBits)) - -#define kNumStates 12 + +#define kNumStates 12 #define kNumStates2 16 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) - + /* External ASM code needs same CLzmaProb array layout. So don't change it. */ /* (probs_1664) is faster and better for code size at some platforms */ @@ -137,22 +137,22 @@ #define IsMatch (LenCoder + kNumLenProbs) #define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) #define IsRep (Align + kAlignTableSize) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) #define PosSlot (IsRepG2 + kNumStates) #define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) #define NUM_BASE_PROBS (Literal + kStartOffset) - + #if Align != 0 && kStartOffset != 0 #error Stop_Compiling_Bad_LZMA_kAlign #endif - + #if NUM_BASE_PROBS != 1984 #error Stop_Compiling_Bad_LZMA_PROBS -#endif - - +#endif + + #define LZMA_LIT_SIZE 0x300 #define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) @@ -162,8 +162,8 @@ #define COMBINED_PS_STATE (posState + state) #define GET_LEN_STATE (posState) -#define LZMA_DIC_MIN (1 << 12) - +#define LZMA_DIC_MIN (1 << 12) + /* p->remainLen : shows status of LZMA decoder: < kMatchSpecLenStart : normal remain @@ -197,16 +197,16 @@ Processing: It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. -Out: +Out: RangeCoder is normalized - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished -*/ - + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished +*/ + #ifdef _LZMA_DEC_OPT @@ -216,403 +216,403 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit static int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ +{ CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lc = p->prop.lc; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); - - Byte *dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte *buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do - { - CLzmaProb *prob; - UInt32 bound; - unsigned ttt; + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; unsigned posState = CALC_POS_STATE(processedPos, pbMask); - + prob = probs + IsMatch + COMBINED_PS_STATE; - IF_BIT_0(prob) - { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (processedPos != 0 || checkDicSize != 0) + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); - processedPos++; - - if (state < kNumLitStates) - { - state -= (state < 4) ? state : 3; - symbol = 1; - #ifdef _LZMA_SIZE_OPT - do { NORMAL_LITER_DEC } while (symbol < 0x100); - #else - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - #endif - } - else - { - unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - unsigned offs = 0x100; - state -= (state < 10) ? 3 : 6; - symbol = 1; - #ifdef _LZMA_SIZE_OPT - do - { - unsigned bit; - CLzmaProb *probLit; - MATCHED_LITER_DEC - } - while (symbol < 0x100); - #else - { - unsigned bit; - CLzmaProb *probLit; - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - } - #endif - } - - dic[dicPos++] = (Byte)symbol; - continue; - } - - { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else - { - UPDATE_1(prob); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); /* // that case was checked before with kBadRepCode - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; */ - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); prob = probs + IsRep0Long + COMBINED_PS_STATE; - IF_BIT_0(prob) - { - UPDATE_0(prob); - dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else - { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep1; - } - else - { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep2; - } - else - { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - - #ifdef _LZMA_SIZE_OPT - { - unsigned lim, offset; - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef _LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); probLen = prob + LenLow + GET_LEN_STATE; - offset = 0; - lim = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - offset = kLenNumLowSymbols; + offset = kLenNumLowSymbols; lim = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; - lim = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, lim, len); - len += offset; - } - #else - { - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len); + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); probLen = prob + LenLow + GET_LEN_STATE; - len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - len -= 8; - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + len -= 8; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; - TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len); len += kLenNumLowSymbols * 2; - } - } - } - #endif - - if (state >= kNumStates) - { - UInt32 distance; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) - { - unsigned posSlot = (unsigned)distance; - unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) - { - distance <<= numDirectBits; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; prob = probs + SpecPos; - { + { UInt32 m = 1; distance++; - do - { + do + { REV_BIT_VAR(prob, distance, m); - } + } while (--numDirectBits); distance -= m; - } - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } while (--numDirectBits); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; REV_BIT_CONST(prob, i, 1); REV_BIT_CONST(prob, i, 2); REV_BIT_CONST(prob, i, 4); REV_BIT_LAST (prob, i, 8); distance |= i; - } - if (distance == (UInt32)0xFFFFFFFF) - { + } + if (distance == (UInt32)0xFFFFFFFF) + { len = kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - } - - len += kMatchMinLen; - - { - SizeT rem; - unsigned curLen; - SizeT pos; - - if ((rem = limit - dicPos) == 0) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - - curLen = ((rem < len) ? (unsigned)rem : len); - pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); - + { + p->dicPos = dicPos; + return SZ_ERROR_DATA; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + p->dicPos = dicPos; + return SZ_ERROR_DATA; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + processedPos += (UInt32)curLen; - - len -= curLen; - if (curLen <= dicBufSize - pos) - { - Byte *dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte *lim = dest + curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; dicPos += (SizeT)curLen; - do - *(dest) = (Byte)*(dest + src); - while (++dest != lim); - } - else - { - do - { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } - while (--curLen != 0); - } - } - } - } - while (dicPos < limit && buf < bufLimit); - - NORMALIZE; - - p->buf = buf; - p->range = range; - p->code = code; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE; + + p->buf = buf; + p->range = range; + p->code = code; p->remainLen = (UInt32)len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; p->state = (UInt32)state; - - return SZ_OK; -} + + return SZ_OK; +} #endif - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) -{ - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) - { - Byte *dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + Byte *dic = p->dic; + SizeT dicPos = p->dicPos; + SizeT dicBufSize = p->dicBufSize; unsigned len = (unsigned)p->remainLen; - SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ - SizeT rem = limit - dicPos; - if (rem < len) - len = (unsigned)(rem); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - + SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + SizeT rem = limit - dicPos; + if (rem < len) + len = (unsigned)(rem); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + p->processedPos += (UInt32)len; p->remainLen -= (UInt32)len; - while (len != 0) - { - len--; - dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - + while (len != 0) + { + len--; + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + #define kRange0 0xFFFFFFFF #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) @@ -621,265 +621,265 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) #error Stop_Compiling_Bad_LZMA_Check #endif -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - do - { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + do + { + SizeT limit2 = limit; + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; if (p->processedPos == 0) if (p->code >= kBadRepCode) return SZ_ERROR_DATA; - } + } RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); - - if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - - LzmaDec_WriteRem(p, limit); - } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - return 0; -} - -typedef enum -{ - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) -{ - UInt32 range = p->range; - UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; + + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + + LzmaDec_WriteRem(p, limit); + } + while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + return 0; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = buf + inSize; const CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; - ELzmaDummy res; - - { - const CLzmaProb *prob; - UInt32 bound; - unsigned ttt; + ELzmaDummy res; + + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); - + prob = probs + IsMatch + COMBINED_PS_STATE; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += ((UInt32)LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) - { - unsigned symbol = 1; - do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); - } - else - { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + - (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do - { - unsigned bit; - const CLzmaProb *probLit; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; matchByte += matchByte; bit = offs; offs &= matchByte; probLit = prob + (offs + bit + symbol); GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) - } - while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else - { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else - { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; prob = probs + IsRep0Long + COMBINED_PS_STATE; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else - { - UPDATE_1_CHECK; - } - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - const CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; probLen = prob + LenLow + GET_LEN_STATE; - offset = 0; - limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - offset = kLenNumLowSymbols; + offset = kLenNumLowSymbols; limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenHigh; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) - { - unsigned posSlot; - prob = probs + PosSlot + + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << - kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) - { - unsigned numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) - { + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } while (--numDirectBits); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; unsigned m = 1; - do - { + do + { REV_BIT_CHECK(prob, i, m); - } + } while (--numDirectBits); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - - + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + + void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) -{ +{ p->remainLen = kMatchSpecLenStart + 1; - p->tempBufSize = 0; - - if (initDic) - { - p->processedPos = 0; - p->checkDicSize = 0; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; p->remainLen = kMatchSpecLenStart + 2; - } - if (initState) + } + if (initState) p->remainLen = kMatchSpecLenStart + 2; -} - -void LzmaDec_Init(CLzmaDec *p) -{ - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, - ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT inSize = *srcLen; - (*srcLen) = 0; - - *status = LZMA_STATUS_NOT_SPECIFIED; - +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + + *status = LZMA_STATUS_NOT_SPECIFIED; + if (p->remainLen > kMatchSpecLenStart) { for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) @@ -915,271 +915,271 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr LzmaDec_WriteRem(p, dicLimit); - while (p->remainLen != kMatchSpecLenStart) - { + while (p->remainLen != kMatchSpecLenStart) + { int checkEndMarkNow = 0; - - if (p->dicPos >= dicLimit) - { - if (p->remainLen == 0 && p->code == 0) - { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->tempBufSize == 0) - { - SizeT processed; - const Byte *bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) - { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } - else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } - else - { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->tempBufSize == 0) + { + SizeT processed; + const Byte *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (SizeT)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); - if (dummyRes == DUMMY_ERROR) - { + if (dummyRes == DUMMY_ERROR) + { (*srcLen) += (SizeT)lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - - { - unsigned kkk = (unsigned)(p->buf - p->tempBuf); - if (rem < kkk) - return SZ_ERROR_FAIL; /* some internal error */ - rem -= kkk; - if (lookAhead < rem) - return SZ_ERROR_FAIL; /* some internal error */ - lookAhead -= rem; - } + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + + { + unsigned kkk = (unsigned)(p->buf - p->tempBuf); + if (rem < kkk) + return SZ_ERROR_FAIL; /* some internal error */ + rem -= kkk; + if (lookAhead < rem) + return SZ_ERROR_FAIL; /* some internal error */ + lookAhead -= rem; + } (*srcLen) += (SizeT)lookAhead; - src += lookAhead; + src += lookAhead; inSize -= (SizeT)lookAhead; - p->tempBufSize = 0; - } - } + p->tempBufSize = 0; + } + } if (p->code != 0) return SZ_ERROR_DATA; *status = LZMA_STATUS_FINISHED_WITH_MARK; return SZ_OK; -} - +} -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) - { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) - { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } - else - { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->probs); - p->probs = NULL; -} - -static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->dic); - p->dic = NULL; -} - -void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) -{ - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) -{ - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + p->lc = (Byte)(d % 9); - d /= 9; + d /= 9; p->pb = (Byte)(d / 5); p->lp = (Byte)(d % 5); - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) -{ - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (!p->probs || numProbs != p->numProbs) - { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); - if (!p->probs) - return SZ_ERROR_MEM; + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; p->probs_1664 = p->probs + 1664; p->numProbs = numProbs; - } - return SZ_OK; -} - -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) -{ - CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) -{ - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - - { - UInt32 dictSize = propNew.dicSize; - SizeT mask = ((UInt32)1 << 12) - 1; - if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; - else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; - dicBufSize = ((SizeT)dictSize + mask) & ~mask; - if (dicBufSize < dictSize) - dicBufSize = dictSize; - } - - if (!p->dic || dicBufSize != p->dicBufSize) - { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); - if (!p->dic) - { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, - const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAllocPtr alloc) -{ - CLzmaDec p; - SRes res; - SizeT outSize = *destLen, inSize = *srcLen; - *destLen = *srcLen = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; - if (inSize < RC_INIT_SIZE) - return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); - p.dic = dest; - p.dicBufSize = outSize; - LzmaDec_Init(&p); - *srcLen = inSize; - res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - *destLen = p.dicPos; - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - LzmaDec_FreeProbs(&p, alloc); - return res; -} + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_Construct(&p); + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/contrib/libs/lzmasdk/LzmaDec.h b/contrib/libs/lzmasdk/LzmaDec.h index 4d922f23dc9..1f0927ab132 100644 --- a/contrib/libs/lzmasdk/LzmaDec.h +++ b/contrib/libs/lzmasdk/LzmaDec.h @@ -1,234 +1,234 @@ -/* LzmaDec.h -- LZMA Decoder +/* LzmaDec.h -- LZMA Decoder 2018-04-21 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, - but memory usage for CLzmaDec::probs will be doubled in that case */ - + +#ifndef __LZMA_DEC_H +#define __LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + typedef -#ifdef _LZMA_PROB32 +#ifdef _LZMA_PROB32 UInt32 -#else +#else UInt16 -#endif +#endif CLzmaProb; - - -/* ---------- LZMA Properties ---------- */ - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaProps -{ + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ Byte lc; Byte lp; Byte pb; Byte _pad_; - UInt32 dicSize; -} CLzmaProps; - -/* LzmaProps_Decode - decodes properties -Returns: - SZ_OK - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); - - -/* ---------- LZMA Decoder state ---------- */ - -/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. - Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ - -#define LZMA_REQUIRED_INPUT_MAX 20 - -typedef struct -{ + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ /* Don't change this structure. ASM code can use it. */ - CLzmaProps prop; - CLzmaProb *probs; + CLzmaProps prop; + CLzmaProb *probs; CLzmaProb *probs_1664; - Byte *dic; + Byte *dic; SizeT dicBufSize; SizeT dicPos; - const Byte *buf; + const Byte *buf; UInt32 range; UInt32 code; - UInt32 processedPos; - UInt32 checkDicSize; - UInt32 reps[4]; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; UInt32 state; UInt32 remainLen; - UInt32 numProbs; - unsigned tempBufSize; - Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; -} CLzmaDec; - + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + #define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } - -void LzmaDec_Init(CLzmaDec *p); - -/* There are two types of LZMA streams: + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: - Stream with end mark. That end mark adds about 6 bytes to compressed size. - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ - -typedef enum -{ - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ -} ELzmaFinishMode; - -/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! - - You must use LZMA_FINISH_END, when you know that current output buffer - covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. - - If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, - and output value of destLen will be less than output buffer size limit. - You can check status result also. - - You can use multiple checks to test data integrity after full decompression: - 1) Check Result and "status" variable. - 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. - 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. - You must use correct finish mode in that case. */ - -typedef enum -{ - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ -} ELzmaStatus; - -/* ELzmaStatus is used only as output value for function call */ - - -/* ---------- Interfaces ---------- */ - -/* There are 3 levels of interfaces: - 1) Dictionary Interface - 2) Buffer Interface - 3) One Call Interface - You can select any of these interfaces, but don't mix functions from different - groups for same object. */ - - -/* There are two variants to allocate state for Dictionary Interface: - 1) LzmaDec_Allocate / LzmaDec_Free - 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs - You can use variant 2, if you set dictionary buffer manually. - For Buffer Interface you must always use variant 1. - -LzmaDec_Allocate* can return: - SZ_OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); - + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); + SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); - -/* ---------- Dictionary Interface ---------- */ - -/* You can use it, if you want to eliminate the overhead for data copying from - dictionary to some other external buffer. - You must work with CLzmaDec variables directly in this interface. - - STEPS: + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: LzmaDec_Construct() - LzmaDec_Allocate() - for (each new stream) - { - LzmaDec_Init() - while (it needs more decompression) - { - LzmaDec_DecodeToDic() - use data from CLzmaDec::dic and update CLzmaDec::dicPos - } - } - LzmaDec_Free() -*/ - -/* LzmaDec_DecodeToDic - - The decoding to internal dictionary buffer (CLzmaDec::dic). - You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! - -finishMode: - It has meaning only if the decoding reaches output limit (dicLimit). - LZMA_FINISH_ANY - Decode just dicLimit bytes. - LZMA_FINISH_END - Stream must be finished after dicLimit. - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error -*/ - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -/* ---------- Buffer Interface ---------- */ - -/* It's zlib-like interface. - See LzmaDec_DecodeToDic description for information about STEPS and return results, - but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need - to work with CLzmaDec variables manually. - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). -*/ - -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -/* ---------- One Call Interface ---------- */ - -/* LzmaDecode - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). -*/ - -SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, - const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAllocPtr alloc); - -EXTERN_C_END - -#endif + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/contrib/libs/lzmasdk/LzmaEnc.c b/contrib/libs/lzmasdk/LzmaEnc.c index f6a6795d301..0ee29223908 100644 --- a/contrib/libs/lzmasdk/LzmaEnc.c +++ b/contrib/libs/lzmasdk/LzmaEnc.c @@ -1,180 +1,180 @@ -/* LzmaEnc.c -- LZMA Encoder +/* LzmaEnc.c -- LZMA Encoder 2019-01-10: Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -/* #define SHOW_STAT */ -/* #define SHOW_STAT2 */ - -#if defined(SHOW_STAT) || defined(SHOW_STAT2) -#include -#endif - -#include "LzmaEnc.h" - -#include "LzFind.h" -#ifndef _7ZIP_ST -#include "LzFindMt.h" -#endif - -#ifdef SHOW_STAT -static unsigned g_STAT_OFFSET = 0; -#endif - -#define kLzmaMaxHistorySize ((UInt32)3 << 29) -/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 -#define kProbInitValue (kBitModelTotal >> 1) - -#define kNumMoveReducingBits 4 -#define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) - + +#include "Precomp.h" + +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifndef _7ZIP_ST +#include "LzFindMt.h" +#endif + +#ifdef SHOW_STAT +static unsigned g_STAT_OFFSET = 0; +#endif + +#define kLzmaMaxHistorySize ((UInt32)3 << 29) +/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +#define kBitPrice (1 << kNumBitPriceShiftBits) + #define REP_LEN_COUNT 64 -void LzmaEncProps_Init(CLzmaEncProps *p) -{ - p->level = 5; - p->dictSize = p->mc = 0; - p->reduceSize = (UInt64)(Int64)-1; - p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; - p->writeEndMark = 0; -} - -void LzmaEncProps_Normalize(CLzmaEncProps *p) -{ - int level = p->level; - if (level < 0) level = 5; - p->level = level; - - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); - if (p->dictSize > p->reduceSize) - { - unsigned i; - UInt32 reduceSize = (UInt32)p->reduceSize; - for (i = 11; i <= 30; i++) - { - if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } - if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } - } - } - - if (p->lc < 0) p->lc = 3; - if (p->lp < 0) p->lp = 0; - if (p->pb < 0) p->pb = 2; - - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); - if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); - - if (p->numThreads < 0) - p->numThreads = - #ifndef _7ZIP_ST - ((p->btMode && p->algo) ? 2 : 1); - #else - 1; - #endif -} - -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) -{ - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - return props.dictSize; -} - -#if (_MSC_VER >= 1400) -/* BSR code is fast for some new CPUs */ -/* #define LZMA_LOG_BSR */ -#endif - -#ifdef LZMA_LOG_BSR - -#define kDicLogSizeMaxCompress 32 - -#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } - +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->reduceSize = (UInt64)(Int64)-1; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) level = 5; + p->level = level; + + if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); + if (p->dictSize > p->reduceSize) + { + unsigned i; + UInt32 reduceSize = (UInt32)p->reduceSize; + for (i = 11; i <= 30; i++) + { + if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } + if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } + } + } + + if (p->lc < 0) p->lc = 3; + if (p->lp < 0) p->lp = 0; + if (p->pb < 0) p->pb = 2; + + if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = 4; + if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + + if (p->numThreads < 0) + p->numThreads = + #ifndef _7ZIP_ST + ((p->btMode && p->algo) ? 2 : 1); + #else + 1; + #endif +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + +#if (_MSC_VER >= 1400) +/* BSR code is fast for some new CPUs */ +/* #define LZMA_LOG_BSR */ +#endif + +#ifdef LZMA_LOG_BSR + +#define kDicLogSizeMaxCompress 32 + +#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } + static unsigned GetPosSlot1(UInt32 pos) -{ +{ unsigned res; - BSR2_RET(pos, res); - return res; -} -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } - -#else - -#define kNumLogBits (9 + sizeof(size_t) / 2) -/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ - -#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) - -static void LzmaEnc_FastPosInit(Byte *g_FastPos) -{ - unsigned slot; - g_FastPos[0] = 0; - g_FastPos[1] = 1; - g_FastPos += 2; - - for (slot = 2; slot < kNumLogBits * 2; slot++) - { - size_t k = ((size_t)1 << ((slot >> 1) - 1)); - size_t j; - for (j = 0; j < k; j++) - g_FastPos[j] = (Byte)slot; - g_FastPos += k; - } -} - -/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ -/* + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } + +#else + +#define kNumLogBits (9 + sizeof(size_t) / 2) +/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ + +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +static void LzmaEnc_FastPosInit(Byte *g_FastPos) +{ + unsigned slot; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + g_FastPos += 2; + + for (slot = 2; slot < kNumLogBits * 2; slot++) + { + size_t k = ((size_t)1 << ((slot >> 1) - 1)); + size_t j; + for (j = 0; j < k; j++) + g_FastPos[j] = (Byte)slot; + g_FastPos += k; + } +} + +/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ +/* #define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } -*/ - -/* + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +/* #define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } -*/ - + (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + #define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } - -/* -#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ - p->g_FastPos[pos >> 6] + 12 : \ - p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } -*/ - -#define GetPosSlot1(pos) p->g_FastPos[pos] -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } + res = p->g_FastPos[pos >> zz] + (zz * 2); } + +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } - -#endif - - -#define LZMA_NUM_REPS 4 - + +#endif + + +#define LZMA_NUM_REPS 4 + typedef UInt16 CState; typedef UInt16 CExtra; - -typedef struct -{ - UInt32 price; - CState state; + +typedef struct +{ + UInt32 price; + CState state; CExtra extra; // 0 : normal // 1 : LIT : MATCH @@ -183,194 +183,194 @@ typedef struct UInt32 dist; UInt32 reps[LZMA_NUM_REPS]; } COptimal; - - + + // 18.06 #define kNumOpts (1 << 11) #define kPackReserve (kNumOpts * 8) // #define kNumOpts (1 << 12) // #define kPackReserve (1 + kNumOpts * 2) - -#define kNumLenToPosStates 4 -#define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 -#define kDicLogSizeMax 32 -#define kDistTableSizeMax (kDicLogSizeMax * 2) - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) -#define kAlignMask (kAlignTableSize - 1) - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + typedef -#ifdef _LZMA_PROB32 +#ifdef _LZMA_PROB32 UInt32 -#else +#else UInt16 -#endif +#endif CLzmaProb; - -#define LZMA_PB_MAX 4 -#define LZMA_LC_MAX 8 -#define LZMA_LP_MAX 4 - -#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) #define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) - -#define LZMA_MATCH_LEN_MIN 2 -#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) - -#define kNumStates 12 - - -typedef struct -{ + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + + +typedef struct +{ CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; - CLzmaProb high[kLenNumHighSymbols]; -} CLenEnc; - - -typedef struct -{ + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + + +typedef struct +{ unsigned tableSize; - UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; // UInt32 prices2[kLenNumSymbolsTotal]; -} CLenPriceEnc; - +} CLenPriceEnc; + #define GET_PRICE_LEN(p, posState, len) \ ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) - + /* #define GET_PRICE_LEN(p, posState, len) \ ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) */ -typedef struct -{ - UInt32 range; +typedef struct +{ + UInt32 range; unsigned cache; - UInt64 low; - UInt64 cacheSize; - Byte *buf; - Byte *bufLim; - Byte *bufBase; - ISeqOutStream *outStream; - UInt64 processed; - SRes res; -} CRangeEnc; - - -typedef struct -{ - CLzmaProb *litProbs; - + UInt64 low; + UInt64 cacheSize; + Byte *buf; + Byte *bufLim; + Byte *bufBase; + ISeqOutStream *outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + + +typedef struct +{ + CLzmaProb *litProbs; + unsigned state; - UInt32 reps[LZMA_NUM_REPS]; - + UInt32 reps[LZMA_NUM_REPS]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances]; - + CLenEnc lenProbs; CLenEnc repLenProbs; -} CSaveState; - - +} CSaveState; + + typedef UInt32 CProbPrice; -typedef struct -{ - void *matchFinderObj; - IMatchFinder matchFinder; - +typedef struct +{ + void *matchFinderObj; + IMatchFinder matchFinder; + unsigned optCur; unsigned optEnd; - + unsigned longestMatchLen; unsigned numPairs; - UInt32 numAvail; - + UInt32 numAvail; + unsigned state; unsigned numFastBytes; unsigned additionalOffset; - UInt32 reps[LZMA_NUM_REPS]; + UInt32 reps[LZMA_NUM_REPS]; unsigned lpMask, pbMask; CLzmaProb *litProbs; CRangeEnc rc; - + UInt32 backRes; - unsigned lc, lp, pb; - unsigned lclp; - + unsigned lc, lp, pb; + unsigned lclp; + BoolInt fastMode; BoolInt writeEndMark; BoolInt finished; BoolInt multiThread; BoolInt needInit; // BoolInt _maxMode; - - UInt64 nowPos64; - + + UInt64 nowPos64; + unsigned matchPriceCount; // unsigned alignPriceCount; int repLenEncCounter; - + unsigned distTableSize; - - UInt32 dictSize; - SRes result; - - #ifndef _7ZIP_ST + + UInt32 dictSize; + SRes result; + + #ifndef _7ZIP_ST BoolInt mtMode; // begin of CMatchFinderMt is used in LZ thread - CMatchFinderMt matchFinderMt; + CMatchFinderMt matchFinderMt; // end of CMatchFinderMt is used in BT and HASH threads - #endif - - CMatchFinder matchFinderBase; - - #ifndef _7ZIP_ST - Byte pad[128]; - #endif - + #endif + + CMatchFinder matchFinderBase; + + #ifndef _7ZIP_ST + Byte pad[128]; + #endif + // LZ thread CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - + + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + UInt32 alignPrices[kAlignTableSize]; - UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances]; - + CLenEnc lenProbs; CLenEnc repLenProbs; @@ -378,32 +378,32 @@ typedef struct Byte g_FastPos[1 << kNumLogBits]; #endif - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + COptimal opt[kNumOpts]; - CSaveState saveState; - - #ifndef _7ZIP_ST - Byte pad2[128]; - #endif -} CLzmaEnc; - - + CSaveState saveState; + + #ifndef _7ZIP_ST + Byte pad2[128]; + #endif +} CLzmaEnc; + + #define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); -void LzmaEnc_SaveState(CLzmaEncHandle pp) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; +void LzmaEnc_SaveState(CLzmaEncHandle pp) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CSaveState *dest = &p->saveState; - dest->state = p->state; + dest->state = p->state; dest->lenProbs = p->lenProbs; dest->repLenProbs = p->repLenProbs; - + COPY_ARR(dest, p, reps); COPY_ARR(dest, p, posAlignEncoder); @@ -416,17 +416,17 @@ void LzmaEnc_SaveState(CLzmaEncHandle pp) COPY_ARR(dest, p, posSlotEncoder); COPY_ARR(dest, p, posEncoders); - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); -} - + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); +} -void LzmaEnc_RestoreState(CLzmaEncHandle pp) -{ - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - dest->state = p->state; - +void LzmaEnc_RestoreState(CLzmaEncHandle pp) +{ + CLzmaEnc *dest = (CLzmaEnc *)pp; + const CSaveState *p = &dest->saveState; + + dest->state = p->state; + dest->lenProbs = p->lenProbs; dest->repLenProbs = p->repLenProbs; @@ -442,166 +442,166 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp) COPY_ARR(dest, p, posSlotEncoder); COPY_ARR(dest, p, posEncoders); - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); +} -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - - if (props.lc > LZMA_LC_MAX - || props.lp > LZMA_LP_MAX - || props.pb > LZMA_PB_MAX - || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kLzmaMaxHistorySize) - return SZ_ERROR_PARAM; - - p->dictSize = props.dictSize; - { - unsigned fb = props.fb; - if (fb < 5) - fb = 5; - if (fb > LZMA_MATCH_LEN_MAX) - fb = LZMA_MATCH_LEN_MAX; - p->numFastBytes = fb; - } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; - p->fastMode = (props.algo == 0); + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX + || props.lp > LZMA_LP_MAX + || props.pb > LZMA_PB_MAX + || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) + || props.dictSize > kLzmaMaxHistorySize) + return SZ_ERROR_PARAM; + + p->dictSize = props.dictSize; + { + unsigned fb = props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = props.lc; + p->lp = props.lp; + p->pb = props.pb; + p->fastMode = (props.algo == 0); // p->_maxMode = True; - p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); - { + p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); + { unsigned numHashBytes = 4; - if (props.btMode) - { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; - } - p->matchFinderBase.numHashBytes = numHashBytes; - } - - p->matchFinderBase.cutValue = props.mc; - - p->writeEndMark = props.writeEndMark; - - #ifndef _7ZIP_ST - /* - if (newMultiThread != _multiThread) - { - ReleaseMatchFinder(); - _multiThread = newMultiThread; - } - */ - p->multiThread = (props.numThreads > 1); - #endif - - return SZ_OK; -} - - -void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.expectedDataSize = expectedDataSiize; -} - - -#define kState_Start 0 -#define kState_LitAfterMatch 4 -#define kState_LitAfterRep 5 -#define kState_MatchAfterLit 7 -#define kState_RepAfterLit 8 - -static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsLitState(s) ((s) < 7) -#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) -#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) - -#define kInfinityPrice (1 << 30) - -static void RangeEnc_Construct(CRangeEnc *p) -{ - p->outStream = NULL; - p->bufBase = NULL; -} - + if (props.btMode) + { + if (props.numHashBytes < 2) + numHashBytes = 2; + else if (props.numHashBytes < 4) + numHashBytes = props.numHashBytes; + } + p->matchFinderBase.numHashBytes = numHashBytes; + } + + p->matchFinderBase.cutValue = props.mc; + + p->writeEndMark = props.writeEndMark; + + #ifndef _7ZIP_ST + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); + #endif + + return SZ_OK; +} + + +void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->matchFinderBase.expectedDataSize = expectedDataSiize; +} + + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = NULL; + p->bufBase = NULL; +} + #define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) #define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) - -#define RC_BUF_SIZE (1 << 16) - -static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) -{ - if (!p->bufBase) - { - p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); - if (!p->bufBase) - return 0; - p->bufLim = p->bufBase + RC_BUF_SIZE; - } - return 1; -} - -static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->bufBase); - p->bufBase = 0; -} - -static void RangeEnc_Init(CRangeEnc *p) -{ - /* Stream.Init(); */ - p->range = 0xFFFFFFFF; - p->cache = 0; + +#define RC_BUF_SIZE (1 << 16) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) +{ + if (!p->bufBase) + { + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); + if (!p->bufBase) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = 0; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + /* Stream.Init(); */ + p->range = 0xFFFFFFFF; + p->cache = 0; p->low = 0; p->cacheSize = 0; - - p->buf = p->bufBase; - - p->processed = 0; - p->res = SZ_OK; -} - + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) -{ - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; - p->processed += num; - p->buf = p->bufBase; -} - +{ + size_t num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) -{ +{ UInt32 low = (UInt32)p->low; unsigned high = (unsigned)(p->low >> 32); p->low = (UInt32)(low << 8); if (low < (UInt32)0xFF000000 || high != 0) - { - { - Byte *buf = p->buf; + { + { + Byte *buf = p->buf; *buf++ = (Byte)(p->cache + high); p->cache = (unsigned)(low >> 24); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); if (p->cacheSize == 0) return; - } + } high += 0xFF; for (;;) { @@ -613,17 +613,17 @@ MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) if (--p->cacheSize == 0) return; } - } - p->cacheSize++; -} - -static void RangeEnc_FlushData(CRangeEnc *p) -{ - int i; - for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); -} - + } + p->cacheSize++; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + #define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } #define RC_BIT_PRE(p, prob) \ @@ -640,8 +640,8 @@ static void RangeEnc_FlushData(CRangeEnc *p) else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ *(prob) = (CLzmaProb)ttt; \ RC_NORM(p) \ - } - + } + #else #define RC_BIT(p, prob, bit) { \ @@ -681,177 +681,177 @@ static void RangeEnc_FlushData(CRangeEnc *p) RC_NORM(p) static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) -{ +{ UInt32 range, ttt, newBound; range = p->range; RC_BIT_PRE(p, prob) RC_BIT_0(p, prob) p->range = range; -} - +} + static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) -{ +{ UInt32 range = p->range; sym |= 0x100; - do - { + do + { UInt32 ttt, newBound; // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); CLzmaProb *prob = probs + (sym >> 8); UInt32 bit = (sym >> 7) & 1; sym <<= 1; RC_BIT(p, prob, bit); - } + } while (sym < 0x10000); p->range = range; -} - +} + static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte) -{ +{ UInt32 range = p->range; - UInt32 offs = 0x100; + UInt32 offs = 0x100; sym |= 0x100; - do - { + do + { UInt32 ttt, newBound; CLzmaProb *prob; UInt32 bit; - matchByte <<= 1; + matchByte <<= 1; // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1); prob = probs + (offs + (matchByte & offs) + (sym >> 8)); bit = (sym >> 7) & 1; sym <<= 1; offs &= ~(matchByte ^ sym); RC_BIT(p, prob, bit); - } + } while (sym < 0x10000); p->range = range; -} - +} + static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) -{ - UInt32 i; +{ + UInt32 i; for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) - { + { const unsigned kCyclesBits = kNumBitPriceShiftBits; UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); unsigned bitCount = 0; unsigned j; - for (j = 0; j < kCyclesBits; j++) - { - w = w * w; - bitCount <<= 1; - while (w >= ((UInt32)1 << 16)) - { - w >>= 1; - bitCount++; - } - } + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) + { + w >>= 1; + bitCount++; + } + } ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); // printf("\n%3d: %5d", i, ProbPrices[i]); - } -} - - + } +} + + #define GET_PRICE(prob, bit) \ p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - + #define GET_PRICEa(prob, bit) \ ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + #define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - + static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) -{ - UInt32 price = 0; +{ + UInt32 price = 0; sym |= 0x100; - do - { + do + { unsigned bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); - } + } while (sym >= 2); - return price; -} - + return price; +} + static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) -{ - UInt32 price = 0; - UInt32 offs = 0x100; +{ + UInt32 price = 0; + UInt32 offs = 0x100; sym |= 0x100; - do - { - matchByte <<= 1; + do + { + matchByte <<= 1; price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); sym <<= 1; offs &= ~(matchByte ^ sym); - } + } while (sym < 0x10000); - return price; -} - - + return price; +} + + static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym) -{ +{ UInt32 range = rc->range; unsigned m = 1; do - { + { UInt32 ttt, newBound; unsigned bit = sym & 1; // RangeEnc_EncodeBit(rc, probs + m, bit); sym >>= 1; RC_BIT(rc, probs + m, bit); - m = (m << 1) | bit; - } + m = (m << 1) | bit; + } while (--numBits); rc->range = range; -} - - - -static void LenEnc_Init(CLenEnc *p) -{ - unsigned i; +} + + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) - p->low[i] = kProbInitValue; - for (i = 0; i < kLenNumHighSymbols; i++) - p->high[i] = kProbInitValue; -} - + p->low[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) -{ +{ UInt32 range, ttt, newBound; CLzmaProb *probs = p->low; range = rc->range; RC_BIT_PRE(rc, probs); if (sym >= kLenNumLowSymbols) - { + { RC_BIT_1(rc, probs); probs += kLenNumLowSymbols; RC_BIT_PRE(rc, probs); if (sym >= kLenNumLowSymbols * 2) - { + { RC_BIT_1(rc, probs); rc->range = range; // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); return; - } + } sym -= kLenNumLowSymbols; - } - + } + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); - { + { unsigned m; unsigned bit; RC_BIT_0(rc, probs); @@ -860,11 +860,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; bit = sym & 1; RC_BIT(rc, probs + m, bit); rc->range = range; - } -} - + } +} + static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) -{ +{ unsigned i; for (i = 0; i < 8; i += 2) { @@ -876,15 +876,15 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price prices[i ] = price + GET_PRICEa_0(prob); prices[i + 1] = price + GET_PRICEa_1(prob); } -} - - +} + + MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( CLenPriceEnc *p, unsigned numPosStates, const CLenEnc *enc, const CProbPrice *ProbPrices) -{ +{ UInt32 b; { @@ -902,7 +902,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); } } - + /* { unsigned i; @@ -920,7 +920,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( // p->counter = numSymbols; // p->counter = 64; - + { unsigned i = p->tableSize; @@ -948,7 +948,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( price += GET_PRICEa(probs[sym], bit); } while (sym >= 2); - + { unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); @@ -956,7 +956,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( } } while (i); - + { unsigned posState; size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); @@ -968,66 +968,66 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( } /* - #ifdef SHOW_STAT - g_STAT_OFFSET += num; - printf("\n MovePos %u", num); - #endif + #ifdef SHOW_STAT + g_STAT_OFFSET += num; + printf("\n MovePos %u", num); + #endif */ - + #define MOVE_POS(p, num) { \ p->additionalOffset += (num); \ p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } - + static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) -{ +{ unsigned numPairs; p->additionalOffset++; - p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); *numPairsRes = numPairs; - - #ifdef SHOW_STAT - printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); - g_STAT_OFFSET++; - { + + #ifdef SHOW_STAT + printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); + g_STAT_OFFSET++; + { unsigned i; - for (i = 0; i < numPairs; i += 2) - printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); - } - #endif - + for (i = 0; i < numPairs; i += 2) + printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); + } + #endif + if (numPairs == 0) return 0; - { + { unsigned len = p->matches[(size_t)numPairs - 2]; if (len != p->numFastBytes) return len; - { - UInt32 numAvail = p->numAvail; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - { + { + UInt32 numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; + ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; const Byte *lim = p1 + numAvail; for (; p2 != lim && *p2 == p2[dif]; p2++) {} return (unsigned)(p2 - p1); - } - } - } -} - + } + } + } +} + #define MARK_LIT ((UInt32)(Int32)-1) - + #define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } #define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } #define IsShortRep(p) ((p)->dist == 0) - - + + #define GetPrice_ShortRep(p, state, posState) \ ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) @@ -1039,53 +1039,53 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) MY_FORCE_INLINE static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) -{ - UInt32 price; +{ + UInt32 price; UInt32 prob = p->isRepG0[state]; - if (repIndex == 0) - { + if (repIndex == 0) + { price = GET_PRICE_0(prob); - price += GET_PRICE_1(p->isRep0Long[state][posState]); - } - else - { + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { price = GET_PRICE_1(prob); prob = p->isRepG1[state]; - if (repIndex == 1) + if (repIndex == 1) price += GET_PRICE_0(prob); - else - { + else + { price += GET_PRICE_1(prob); - price += GET_PRICE(p->isRepG2[state], repIndex - 2); - } - } - return price; -} - + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + static unsigned Backward(CLzmaEnc *p, unsigned cur) -{ +{ unsigned wr = cur + 1; p->optEnd = wr; - + for (;;) - { + { UInt32 dist = p->opt[cur].dist; unsigned len = (unsigned)p->opt[cur].len; unsigned extra = (unsigned)p->opt[cur].extra; cur -= len; if (extra) - { + { wr--; p->opt[wr].len = (UInt32)len; cur -= extra; len = extra; if (extra == 1) - { + { p->opt[wr].dist = dist; dist = MARK_LIT; - } + } else { p->opt[wr].dist = 0; @@ -1094,35 +1094,35 @@ static unsigned Backward(CLzmaEnc *p, unsigned cur) p->opt[wr].dist = MARK_LIT; p->opt[wr].len = 1; } - } + } if (cur == 0) - { + { p->backRes = dist; p->optCur = wr; return len; - } + } wr--; p->opt[wr].dist = dist; p->opt[wr].len = (UInt32)len; - } -} - - - + } +} + + + #define LIT_PROBS(pos, prevByte) \ (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) - - + + static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { unsigned last, cur; UInt32 reps[LZMA_NUM_REPS]; unsigned repLens[LZMA_NUM_REPS]; UInt32 *matches; - - { + + { UInt32 numAvail; unsigned numPairs, mainLen, repMaxIndex, i, posState; UInt32 matchPrice, repMatchPrice; @@ -1134,17 +1134,17 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else - { + { mainLen = p->longestMatchLen; numPairs = p->numPairs; - } + } numAvail = p->numAvail; if (numAvail < 2) - { + { p->backRes = MARK_LIT; return 1; - } + } if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; @@ -1152,22 +1152,22 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) repMaxIndex = 0; for (i = 0; i < LZMA_NUM_REPS; i++) - { + { unsigned len; const Byte *data2; reps[i] = p->reps[i]; data2 = data - reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) - { + { repLens[i] = 0; continue; - } + } for (len = 2; len < numAvail && data[len] == data2[len]; len++) {} repLens[i] = len; if (len > repLens[repMaxIndex]) repMaxIndex = i; - } + } if (repLens[repMaxIndex] >= p->numFastBytes) { @@ -1189,7 +1189,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) curByte = *data; matchByte = *(data - reps[0]); - + last = repLens[repMaxIndex]; if (last <= mainLen) last = mainLen; @@ -1211,7 +1211,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } - + MakeAs_Lit(&p->opt[1]); matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); @@ -1219,18 +1219,18 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) // 18.06 if (matchByte == curByte && repLens[0] == 0) - { + { UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); if (shortRepPrice < p->opt[1].price) - { + { p->opt[1].price = shortRepPrice; MakeAs_ShortRep(&p->opt[1]); - } + } if (last < 2) - { + { p->backRes = p->opt[1].dist; return 1; - } + } } p->opt[1].len = 1; @@ -1250,7 +1250,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) continue; price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); do - { + { UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen); COptimal *opt = &p->opt[repLen]; if (price2 < opt->price) @@ -1260,9 +1260,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) opt->dist = (UInt32)i; opt->extra = 0; } - } + } while (--repLen >= 2); - } + } // ---------- MATCH ---------- @@ -1272,7 +1272,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { unsigned offs = 0; UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - + if (len < 2) len = 2; else @@ -1316,35 +1316,35 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } } - + cur = 0; - #ifdef SHOW_STAT2 - /* if (position >= 0) */ - { - unsigned i; - printf("\n pos = %4X", position); + #ifdef SHOW_STAT2 + /* if (position >= 0) */ + { + unsigned i; + printf("\n pos = %4X", position); for (i = cur; i <= last; i++) - printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); - } - #endif - } - + printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); + } + #endif + } + // ---------- Optimal Parsing ---------- - for (;;) - { + for (;;) + { unsigned numAvail; UInt32 numAvailFull; unsigned newLen, numPairs, prev, state, posState, startLen; UInt32 litPrice, matchPrice, repMatchPrice; BoolInt nextIsLit; - Byte curByte, matchByte; - const Byte *data; + Byte curByte, matchByte; + const Byte *data; COptimal *curOpt, *nextOpt; - + if (++cur == last) break; @@ -1373,19 +1373,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) cur = best; break; } - - newLen = ReadMatchDistances(p, &numPairs); + + newLen = ReadMatchDistances(p, &numPairs); - if (newLen >= p->numFastBytes) - { - p->numPairs = numPairs; + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; p->longestMatchLen = newLen; break; - } + } curOpt = &p->opt[cur]; - position++; + position++; // we need that check here, if skip_items in p->opt are possible /* @@ -1396,40 +1396,40 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) prev = cur - curOpt->len; if (curOpt->len == 1) - { + { state = (unsigned)p->opt[prev].state; - if (IsShortRep(curOpt)) - state = kShortRepNextStates[state]; - else - state = kLiteralNextStates[state]; - } - else - { - const COptimal *prevOpt; + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + const COptimal *prevOpt; UInt32 b0; UInt32 dist = curOpt->dist; if (curOpt->extra) - { + { prev -= (unsigned)curOpt->extra; state = kState_RepAfterLit; if (curOpt->extra == 1) state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit); - } - else - { + } + else + { state = (unsigned)p->opt[prev].state; if (dist < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } prevOpt = &p->opt[prev]; b0 = prevOpt->reps[0]; if (dist < LZMA_NUM_REPS) - { + { if (dist == 0) { reps[0] = b0; @@ -1454,28 +1454,28 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) reps[3] = prevOpt->reps[dist ^ 1]; } } - } - else - { + } + else + { reps[0] = (dist - LZMA_NUM_REPS + 1); reps[1] = b0; reps[2] = prevOpt->reps[1]; reps[3] = prevOpt->reps[2]; - } - } + } + } - curOpt->state = (CState)state; + curOpt->state = (CState)state; curOpt->reps[0] = reps[0]; curOpt->reps[1] = reps[1]; curOpt->reps[2] = reps[2]; curOpt->reps[3] = reps[3]; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - curByte = *data; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; matchByte = *(data - reps[0]); - - posState = (position & p->pbMask); - + + posState = (position & p->pbMask); + /* The order of Price checks: < LIT @@ -1485,16 +1485,16 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) < MATCH [ : LIT : REP_0 ] */ - { + { UInt32 curPrice = curOpt->price; unsigned prob = p->isMatch[state][posState]; matchPrice = curPrice + GET_PRICE_1(prob); litPrice = curPrice + GET_PRICE_0(prob); - } - - nextOpt = &p->opt[(size_t)cur + 1]; + } + + nextOpt = &p->opt[(size_t)cur + 1]; nextIsLit = False; - + // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice) // 18.new.06 if ((nextOpt->price < kInfinityPrice @@ -1504,7 +1504,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) ) litPrice = 0; else - { + { const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); litPrice += (!IsLitState(state) ? LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : @@ -1517,12 +1517,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) MakeAs_Lit(nextOpt); nextIsLit = True; } - } - - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - + } + + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + numAvailFull = p->numAvail; - { + { unsigned temp = kNumOpts - 1 - cur; if (numAvailFull > temp) numAvailFull = (UInt32)temp; @@ -1545,18 +1545,18 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); // if (shortRepPrice <= nextOpt->price) // 17.old if (shortRepPrice < nextOpt->price) // 18.new - { - nextOpt->price = shortRepPrice; + { + nextOpt->price = shortRepPrice; nextOpt->len = 1; MakeAs_ShortRep(nextOpt); nextIsLit = False; - } - } + } + } - if (numAvailFull < 2) - continue; - numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + // numAvail <= p->numFastBytes // ---------- LIT : REP_0 ---------- @@ -1565,10 +1565,10 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) && litPrice != 0 // 18.new && matchByte != curByte && numAvailFull > 2) - { + { const Byte *data2 = data - reps[0]; if (data[1] == data2[1] && data[2] == data2[2]) - { + { unsigned len; unsigned limit = p->numFastBytes + 1; if (limit > numAvailFull) @@ -1576,11 +1576,11 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) for (len = 3; len < limit && data[len] == data2[len]; len++) {} - { + { unsigned state2 = kLiteralNextStates[state]; unsigned posState2 = (position + 1) & p->pbMask; UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); - { + { unsigned offset = cur + len; if (last < offset) @@ -1605,19 +1605,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } } // while (len >= 3); - } - } - } - } - - startLen = 2; /* speed optimization */ - - { + } + } + } + } + + startLen = 2; /* speed optimization */ + + { // ---------- REP ---------- unsigned repIndex = 0; // 17.old // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused for (; repIndex < LZMA_NUM_REPS; repIndex++) - { + { unsigned len; UInt32 price; const Byte *data2 = data - reps[repIndex]; @@ -1629,11 +1629,11 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) // if (len < startLen) continue; // 18.new: speed optimization - { + { unsigned offset = cur + len; if (last < offset) last = offset; - } + } { unsigned len2 = len; price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); @@ -1651,32 +1651,32 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } while (--len2 >= 2); } - + if (repIndex == 0) startLen = len + 1; // 17.old // startLen = len + 1; // 18.new /* if (_maxMode) */ - { + { // ---------- REP : LIT : REP_0 ---------- // numFastBytes + 1 + numFastBytes unsigned len2 = len + 1; unsigned limit = len2 + p->numFastBytes; - if (limit > numAvailFull) - limit = numAvailFull; + if (limit > numAvailFull) + limit = numAvailFull; len2 += 2; if (len2 <= limit) if (data[len2 - 2] == data2[len2 - 2]) if (data[len2 - 1] == data2[len2 - 1]) - { + { unsigned state2 = kRepNextStates[state]; unsigned posState2 = (position + len) & p->pbMask; price += GET_PRICE_LEN(&p->repLenEnc, posState, len) + GET_PRICE_0(p->isMatch[state2][posState2]) + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), data[len], data2[len], p->ProbPrices); - + // state2 = kLiteralNextStates[state2]; state2 = kState_LitAfterRep; posState2 = (posState2 + 1) & p->pbMask; @@ -1690,13 +1690,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) len2 -= len; // if (len2 >= 3) { - { + { unsigned offset = cur + len + len2; if (last < offset) last = offset; // do - { + { UInt32 price2; COptimal *opt; len2--; @@ -1712,31 +1712,31 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) opt->extra = (CExtra)(len + 1); opt->dist = (UInt32)repIndex; } - } + } // while (len2 >= 3); - } - } + } } - } + } + } } - } + } // ---------- MATCH ---------- /* for (unsigned len = 2; len <= newLen; len++) */ - if (newLen > numAvail) - { - newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); matches[numPairs] = (UInt32)newLen; - numPairs += 2; - } + numPairs += 2; + } // startLen = 2; /* speed optimization */ - if (newLen >= startLen) - { - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + if (newLen >= startLen) + { + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); UInt32 dist; unsigned offs, posSlot, len; @@ -1745,19 +1745,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (last < offset) last = offset; } - - offs = 0; - while (startLen > matches[offs]) - offs += 2; + + offs = 0; + while (startLen > matches[offs]) + offs += 2; dist = matches[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); for (len = /*2*/ startLen; ; len++) - { + { UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); - { + { COptimal *opt; unsigned lenNorm = len - 2; lenNorm = GetLenToPosState2(lenNorm); @@ -1774,24 +1774,24 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) opt->dist = dist + LZMA_NUM_REPS; opt->extra = 0; } - } - + } + if (len == matches[offs]) - { + { // if (p->_maxMode) { // MATCH : LIT : REP_0 const Byte *data2 = data - dist - 1; unsigned len2 = len + 1; unsigned limit = len2 + p->numFastBytes; - if (limit > numAvailFull) - limit = numAvailFull; + if (limit > numAvailFull) + limit = numAvailFull; len2 += 2; if (len2 <= limit) if (data[len2 - 2] == data2[len2 - 2]) if (data[len2 - 1] == data2[len2 - 1]) - { + { for (; len2 < limit && data[len2] == data2[len2]; len2++) {} @@ -1817,138 +1817,138 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (last < offset) last = offset; // do - { + { UInt32 price2; - COptimal *opt; + COptimal *opt; len2--; // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); - opt = &p->opt[offset]; + opt = &p->opt[offset]; // offset--; if (price2 < opt->price) - { + { opt->price = price2; opt->len = (UInt32)len2; opt->extra = (CExtra)(len + 1); opt->dist = dist + LZMA_NUM_REPS; - } - } + } + } // while (len2 >= 3); - } + } } - offs += 2; - if (offs == numPairs) - break; + offs += 2; + if (offs == numPairs) + break; dist = matches[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); - } - } - } - } + } + } + } + } do p->opt[last].price = kInfinityPrice; while (--last); return Backward(p, cur); -} - +} -#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) - + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + static unsigned GetOptimumFast(CLzmaEnc *p) -{ +{ UInt32 numAvail, mainDist; unsigned mainLen, numPairs, repIndex, repLen, i; - const Byte *data; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { + const Byte *data; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { mainLen = p->longestMatchLen; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; p->backRes = MARK_LIT; - if (numAvail < 2) - return 1; + if (numAvail < 2) + return 1; // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repLen = repIndex = 0; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repLen = repIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - { + for (i = 0; i < LZMA_NUM_REPS; i++) + { unsigned len; const Byte *data2 = data - p->reps[i]; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; for (len = 2; len < numAvail && data[len] == data2[len]; len++) {} - if (len >= p->numFastBytes) - { + if (len >= p->numFastBytes) + { p->backRes = (UInt32)i; MOVE_POS(p, len - 1) - return len; - } - if (len > repLen) - { - repIndex = i; - repLen = len; - } - } - - if (mainLen >= p->numFastBytes) - { + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + if (mainLen >= p->numFastBytes) + { p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; MOVE_POS(p, mainLen - 1) - return mainLen; - } - - mainDist = 0; /* for GCC */ + return mainLen; + } + + mainDist = 0; /* for GCC */ - if (mainLen >= 2) - { + if (mainLen >= 2) + { mainDist = p->matches[(size_t)numPairs - 1]; while (numPairs > 2) - { + { UInt32 dist2; if (mainLen != p->matches[(size_t)numPairs - 4] + 1) - break; + break; dist2 = p->matches[(size_t)numPairs - 3]; if (!ChangePair(dist2, mainDist)) break; - numPairs -= 2; + numPairs -= 2; mainLen--; mainDist = dist2; - } - if (mainLen == 2 && mainDist >= 0x80) - mainLen = 1; - } - + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + if (repLen >= 2) if ( repLen + 1 >= mainLen || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) - { + { p->backRes = (UInt32)repIndex; MOVE_POS(p, repLen - 1) - return repLen; - } - - if (mainLen < 2 || numAvail <= 2) - return 1; - - { + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + { unsigned len1 = ReadMatchDistances(p, &p->numPairs); p->longestMatchLen = len1; @@ -1961,17 +1961,17 @@ static unsigned GetOptimumFast(CLzmaEnc *p) || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) return 1; } - } - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + } - for (i = 0; i < LZMA_NUM_REPS; i++) - { + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { unsigned len, limit; const Byte *data2 = data - p->reps[i]; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - limit = mainLen - 1; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; for (len = 2;; len++) { if (len >= limit) @@ -1979,21 +1979,21 @@ static unsigned GetOptimumFast(CLzmaEnc *p) if (data[len] != data2[len]) break; } - } + } p->backRes = mainDist + LZMA_NUM_REPS; if (mainLen != 2) { MOVE_POS(p, mainLen - 2) } - return mainLen; -} - + return mainLen; +} + static void WriteEndMarker(CLzmaEnc *p, unsigned posState) -{ +{ UInt32 range; range = p->rc.range; { @@ -2005,7 +2005,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) RC_BIT_PRE(&p->rc, prob) RC_BIT_0(&p->rc, prob) } - p->state = kMatchNextStates[p->state]; + p->state = kMatchNextStates[p->state]; p->rc.range = range; LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); @@ -2050,37 +2050,37 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) while (m < kAlignTableSize); } p->rc.range = range; -} - +} + + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} -static SRes CheckErrors(CLzmaEnc *p) -{ - if (p->result != SZ_OK) - return p->result; - if (p->rc.res != SZ_OK) - p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) - p->result = SZ_ERROR_READ; - if (p->result != SZ_OK) - p->finished = True; - return p->result; -} - MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) -{ - /* ReleaseMFStream(); */ - p->finished = True; - if (p->writeEndMark) - WriteEndMarker(p, nowPos & p->pbMask); - RangeEnc_FlushData(&p->rc); - RangeEnc_FlushStream(&p->rc); - return CheckErrors(p); -} - +{ + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) -{ +{ unsigned i; const CProbPrice *ProbPrices = p->ProbPrices; const CLzmaProb *probs = p->posAlignEncoder; @@ -2100,21 +2100,21 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); } -} - +} + MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) -{ +{ // int y; for (y = 0; y < 100; y++) { - UInt32 tempPrices[kNumFullDistances]; + UInt32 tempPrices[kNumFullDistances]; unsigned i, lps; const CProbPrice *ProbPrices = p->ProbPrices; p->matchPriceCount = 0; for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) - { + { unsigned posSlot = GetPosSlot1(i); unsigned footerBits = (posSlot >> 1) - 1; unsigned base = ((2 | (posSlot & 1)) << footerBits); @@ -2141,10 +2141,10 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) tempPrices[base ] = price + GET_PRICEa_0(prob); tempPrices[base + offset] = price + GET_PRICEa_1(prob); } - } - + } + for (lps = 0; lps < kNumLenToPosStates; lps++) - { + { unsigned slot; unsigned distTableSize2 = (p->distTableSize + 1) >> 1; UInt32 *posSlotPrices = p->posSlotPrices[lps]; @@ -2176,8 +2176,8 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) delta += ((UInt32)1 << kNumBitPriceShiftBits); } } - - { + + { UInt32 *dp = p->distancesPrices[lps]; dp[0] = posSlotPrices[0]; @@ -2191,118 +2191,118 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) dp[i ] = slotPrice + tempPrices[i]; dp[i + 1] = slotPrice + tempPrices[i + 1]; } - } - } + } + } // } -} - +} -void LzmaEnc_Construct(CLzmaEnc *p) -{ - RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); - - #ifndef _7ZIP_ST - MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; - #endif - - { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); - } - - #ifndef LZMA_LOG_BSR - LzmaEnc_FastPosInit(p->g_FastPos); - #endif - - LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = NULL; - p->saveState.litProbs = NULL; -} - -CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) -{ - void *p; - p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); - if (p) - LzmaEnc_Construct((CLzmaEnc *)p); - return p; -} - -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->litProbs); - ISzAlloc_Free(alloc, p->saveState.litProbs); - p->litProbs = NULL; - p->saveState.litProbs = NULL; -} - -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - #ifndef _7ZIP_ST - MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); - #endif - - MatchFinder_Free(&p->matchFinderBase, allocBig); - LzmaEnc_FreeLits(p, alloc); - RangeEnc_Free(&p->rc, alloc); -} - -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); - ISzAlloc_Free(alloc, p); -} - +void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); + + #ifndef _7ZIP_ST + MatchFinderMt_Construct(&p->matchFinderMt); + p->matchFinderMt.MatchFinder = &p->matchFinderBase; + #endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + + #ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); + #endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = NULL; + p->saveState.litProbs = NULL; + +} + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) +{ + void *p; + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + if (p) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + #ifndef _7ZIP_ST + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); + #endif + + MatchFinder_Free(&p->matchFinderBase, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + ISzAlloc_Free(alloc, p); +} + SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpackSize) -{ +{ CLzmaEnc *p = (CLzmaEnc *) pp; - UInt32 nowPos32, startPos32; - if (p->needInit) - { - p->matchFinder.Init(p->matchFinderObj); - p->needInit = 0; - } - - if (p->finished) - return p->result; - RINOK(CheckErrors(p)); - - nowPos32 = (UInt32)p->nowPos64; - startPos32 = nowPos32; - - if (p->nowPos64 == 0) - { + UInt32 nowPos32, startPos32; + if (p->needInit) + { + p->matchFinder.Init(p->matchFinderObj); + p->needInit = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)); + + nowPos32 = (UInt32)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { unsigned numPairs; - Byte curByte; - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - return Flush(p, nowPos32); - ReadMatchDistances(p, &numPairs); + Byte curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); // p->state = kLiteralNextStates[p->state]; - curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); - LitEnc_Encode(&p->rc, p->litProbs, curByte); - p->additionalOffset--; - nowPos32++; - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - for (;;) - { + for (;;) + { UInt32 dist; unsigned len, posState; UInt32 range, ttt, newBound; CLzmaProb *probs; - if (p->fastMode) + if (p->fastMode) len = GetOptimumFast(p); - else + else { unsigned oci = p->optCur; if (p->optEnd == oci) @@ -2315,7 +2315,7 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac p->optCur = oci + 1; } } - + posState = (unsigned)nowPos32 & p->pbMask; range = p->rc.range; probs = &p->isMatch[p->state][posState]; @@ -2324,41 +2324,41 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac dist = p->backRes; - #ifdef SHOW_STAT2 + #ifdef SHOW_STAT2 printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); - #endif - + #endif + if (dist == MARK_LIT) - { - Byte curByte; - const Byte *data; + { + Byte curByte; + const Byte *data; unsigned state; - + RC_BIT_0(&p->rc, probs); p->rc.range = range; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; probs = LIT_PROBS(nowPos32, *(data - 1)); - curByte = *data; + curByte = *data; state = p->state; p->state = kLiteralNextStates[state]; if (IsLitState(state)) - LitEnc_Encode(&p->rc, probs, curByte); - else + LitEnc_Encode(&p->rc, probs, curByte); + else LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); - } - else - { - RC_BIT_1(&p->rc, probs); + } + else + { + RC_BIT_1(&p->rc, probs); probs = &p->isRep[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist < LZMA_NUM_REPS) - { + { RC_BIT_1(&p->rc, probs); probs = &p->isRepG0[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 0) - { + { RC_BIT_0(&p->rc, probs); probs = &p->isRep0Long[p->state][posState]; RC_BIT_PRE(&p->rc, probs) @@ -2371,9 +2371,9 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac RC_BIT_0_BASE(&p->rc, probs); p->state = kShortRepNextStates[p->state]; } - } - else - { + } + else + { RC_BIT_1(&p->rc, probs); probs = &p->isRepG1[p->state]; RC_BIT_PRE(&p->rc, probs) @@ -2382,8 +2382,8 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac RC_BIT_0_BASE(&p->rc, probs); dist = p->reps[1]; } - else - { + else + { RC_BIT_1(&p->rc, probs); probs = &p->isRepG2[p->state]; RC_BIT_PRE(&p->rc, probs) @@ -2396,31 +2396,31 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac { RC_BIT_1_BASE(&p->rc, probs); dist = p->reps[3]; - p->reps[3] = p->reps[2]; + p->reps[3] = p->reps[2]; } - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; p->reps[0] = dist; - } + } RC_NORM(&p->rc) p->rc.range = range; if (len != 1) - { + { LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); --p->repLenEncCounter; - p->state = kRepNextStates[p->state]; - } - } - else - { + p->state = kRepNextStates[p->state]; + } + } + else + { unsigned posSlot; RC_BIT_0(&p->rc, probs); p->rc.range = range; - p->state = kMatchNextStates[p->state]; + p->state = kMatchNextStates[p->state]; LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); // --p->lenEnc.counter; @@ -2430,11 +2430,11 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac p->reps[2] = p->reps[1]; p->reps[1] = p->reps[0]; p->reps[0] = dist + 1; - + p->matchPriceCount++; GetPosSlot(dist, posSlot); // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); - { + { UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); range = p->rc.range; probs = p->posSlotEncoder[GetLenToPosState(len)]; @@ -2452,14 +2452,14 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac if (dist >= kStartPosModelIndex) { unsigned footerBits = ((posSlot >> 1) - 1); - + if (dist < kNumFullDistances) { unsigned base = ((2 | (posSlot & 1)) << footerBits); RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */)); } - else - { + else + { UInt32 pos2 = (dist | 0xF) << (32 - footerBits); range = p->rc.range; // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); @@ -2494,31 +2494,31 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac p->rc.range = range; // p->alignPriceCount++; } - } - } - } - } + } + } + } + } nowPos32 += (UInt32)len; - p->additionalOffset -= len; + p->additionalOffset -= len; - if (p->additionalOffset == 0) - { - UInt32 processed; + if (p->additionalOffset == 0) + { + UInt32 processed; - if (!p->fastMode) - { + if (!p->fastMode) + { /* if (p->alignPriceCount >= 16) // kAlignTableSize FillAlignPrices(p); if (p->matchPriceCount >= 128) - FillDistancesPrices(p); + FillDistancesPrices(p); if (p->lenEnc.counter <= 0) LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); */ if (p->matchPriceCount >= 64) { - FillAlignPrices(p); + FillAlignPrices(p); // { int y; for (y = 0; y < 100; y++) { FillDistancesPrices(p); // }} @@ -2529,131 +2529,131 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac p->repLenEncCounter = REP_LEN_COUNT; LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); } - } + } - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; if (maxPackSize) - { + { if (processed + kNumOpts + 300 >= maxUnpackSize || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) - break; - } - else if (processed >= (1 << 17)) - { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); - } - } - } - - p->nowPos64 += nowPos32 - startPos32; - return Flush(p, nowPos32); -} - + break; + } + else if (processed >= (1 << 17)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} -#define kBigHashDicLimit ((UInt32)1 << 24) - -static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - UInt32 beforeSize = kNumOpts; - if (!RangeEnc_Alloc(&p->rc, alloc)) - return SZ_ERROR_MEM; - - #ifndef _7ZIP_ST - p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); - #endif - - { - unsigned lclp = p->lc + p->lp; - if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) - { - LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - if (!p->litProbs || !p->saveState.litProbs) - { - LzmaEnc_FreeLits(p, alloc); - return SZ_ERROR_MEM; - } - p->lclp = lclp; - } - } - - p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); - - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; - - #ifndef _7ZIP_ST - if (p->mtMode) - { + + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + UInt32 beforeSize = kNumOpts; + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + + #ifndef _7ZIP_ST + p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); + #endif + + { + unsigned lclp = p->lc + p->lp; + if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) + { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + if (!p->litProbs || !p->saveState.litProbs) + { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + + if (beforeSize + p->dictSize < keepWindowSize) + beforeSize = keepWindowSize - p->dictSize; + + #ifndef _7ZIP_ST + if (p->mtMode) + { RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ , allocBig)); - p->matchFinderObj = &p->matchFinderMt; - p->matchFinderBase.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); - MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); - } - else - #endif - { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) - return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); - } - - return SZ_OK; -} - -void LzmaEnc_Init(CLzmaEnc *p) -{ + p->matchFinderObj = &p->matchFinderMt; + p->matchFinderBase.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else + #endif + { + if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &p->matchFinderBase; + MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + } + + return SZ_OK; +} + +void LzmaEnc_Init(CLzmaEnc *p) +{ unsigned i; - p->state = 0; + p->state = 0; p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - - RangeEnc_Init(&p->rc); - + + RangeEnc_Init(&p->rc); + for (i = 0; i < (1 << kNumAlignBits); i++) p->posAlignEncoder[i] = kProbInitValue; - - for (i = 0; i < kNumStates; i++) - { + + for (i = 0; i < kNumStates; i++) + { unsigned j; - for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) - { - p->isMatch[i][j] = kProbInitValue; - p->isRep0Long[i][j] = kProbInitValue; - } - p->isRep[i] = kProbInitValue; - p->isRepG0[i] = kProbInitValue; - p->isRepG1[i] = kProbInitValue; - p->isRepG2[i] = kProbInitValue; - } - - { - for (i = 0; i < kNumLenToPosStates; i++) - { - CLzmaProb *probs = p->posSlotEncoder[i]; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; unsigned j; - for (j = 0; j < (1 << kNumPosSlotBits); j++) - probs[j] = kProbInitValue; - } - } - { + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { for (i = 0; i < kNumFullDistances; i++) - p->posEncoders[i] = kProbInitValue; - } - + p->posEncoders[i] = kProbInitValue; + } + { UInt32 num = (UInt32)0x300 << (p->lp + p->lc); UInt32 k; @@ -2661,8 +2661,8 @@ void LzmaEnc_Init(CLzmaEnc *p) for (k = 0; k < num; k++) probs[k] = kProbInitValue; } - - + + LenEnc_Init(&p->lenProbs); LenEnc_Init(&p->repLenProbs); @@ -2674,307 +2674,307 @@ void LzmaEnc_Init(CLzmaEnc *p) p->opt[i].price = kInfinityPrice; } - p->additionalOffset = 0; - - p->pbMask = (1 << p->pb) - 1; + p->additionalOffset = 0; + + p->pbMask = (1 << p->pb) - 1; p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); -} - +} -void LzmaEnc_InitPrices(CLzmaEnc *p) -{ - if (!p->fastMode) - { - FillDistancesPrices(p); - FillAlignPrices(p); - } - - p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + +void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; p->repLenEncCounter = REP_LEN_COUNT; LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); -} - -static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ unsigned i; for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) - if (p->dictSize <= ((UInt32)1 << i)) - break; - p->distTableSize = i * 2; - - p->finished = False; - p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - p->nowPos64 = 0; - return SZ_OK; -} - + if (p->dictSize <= ((UInt32)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + p->nowPos64 = 0; + return SZ_OK; +} + SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; - p->needInit = 1; - p->rc.outStream = outStream; - return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); -} - -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, UInt32 keepWindowSize, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; - p->needInit = 1; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) -{ - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - LzmaEnc_SetDataSize(pp, srcLen); - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -void LzmaEnc_Finish(CLzmaEncHandle pp) -{ - #ifndef _7ZIP_ST - CLzmaEnc *p = (CLzmaEnc *)pp; - if (p->mtMode) - MatchFinderMt_ReleaseStream(&p->matchFinderMt); - #else - UNUSED_VAR(pp); - #endif -} - - -typedef struct -{ - ISeqOutStream vt; - Byte *data; - SizeT rem; + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->matchFinderBase.stream = inStream; + p->needInit = 1; + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, + ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->matchFinderBase.stream = inStream; + p->needInit = 1; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) +{ + p->matchFinderBase.directInput = 1; + p->matchFinderBase.bufferBase = (Byte *)src; + p->matchFinderBase.directInputRem = srcLen; +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + LzmaEnc_SetInputBuf(p, src, srcLen); + p->needInit = 1; + + LzmaEnc_SetDataSize(pp, srcLen); + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +void LzmaEnc_Finish(CLzmaEncHandle pp) +{ + #ifndef _7ZIP_ST + CLzmaEnc *p = (CLzmaEnc *)pp; + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); + #else + UNUSED_VAR(pp); + #endif +} + + +typedef struct +{ + ISeqOutStream vt; + Byte *data; + SizeT rem; BoolInt overflow; -} CLzmaEnc_SeqOutStreamBuf; - -static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) -{ - CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); - if (p->rem < size) - { - size = p->rem; - p->overflow = True; - } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; - return size; -} - - -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); -} - - -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; -} - - +} CLzmaEnc_SeqOutStreamBuf; + +static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +{ + CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + return size; +} + + +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} + + +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + + SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, - Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - UInt64 nowPos64; - SRes res; - CLzmaEnc_SeqOutStreamBuf outStream; - - outStream.vt.Write = SeqOutStreamBuf_Write; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = False; - p->finished = False; - p->result = SZ_OK; - - if (reInit) - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + UInt64 nowPos64; + SRes res; + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + + nowPos64 = p->nowPos64; + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.vt; - nowPos64 = p->nowPos64; - RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.vt; - if (desiredPackSize == 0) return SZ_ERROR_OUTPUT_EOF; res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); - - *unpackSize = (UInt32)(p->nowPos64 - nowPos64); - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - - return res; -} - - -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) -{ - SRes res = SZ_OK; - - #ifndef _7ZIP_ST - Byte allocaDummy[0x300]; - allocaDummy[0] = 0; - allocaDummy[1] = allocaDummy[0]; - #endif - - for (;;) - { + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + + +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) +{ + SRes res = SZ_OK; + + #ifndef _7ZIP_ST + Byte allocaDummy[0x300]; + allocaDummy[0] = 0; + allocaDummy[1] = allocaDummy[0]; + #endif + + for (;;) + { res = LzmaEnc_CodeOneBlock(p, 0, 0); - if (res != SZ_OK || p->finished) - break; - if (progress) - { - res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); - if (res != SZ_OK) - { - res = SZ_ERROR_PROGRESS; - break; - } - } - } - - LzmaEnc_Finish(p); - - /* - if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) - res = SZ_ERROR_FAIL; - } - */ - - return res; -} - - -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); -} - - -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - unsigned i; - UInt32 dictSize = p->dictSize; - if (*size < LZMA_PROPS_SIZE) - return SZ_ERROR_PARAM; - *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - if (dictSize >= ((UInt32)1 << 22)) - { - UInt32 kDictMask = ((UInt32)1 << 20) - 1; - if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) - dictSize = (dictSize + kDictMask) & ~kDictMask; - } - else for (i = 11; i <= 30; i++) - { - if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } - } - - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; -} - - -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) -{ - return ((CLzmaEnc *)pp)->writeEndMark; -} - - -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - SRes res; - CLzmaEnc *p = (CLzmaEnc *)pp; - - CLzmaEnc_SeqOutStreamBuf outStream; - - outStream.vt.Write = SeqOutStreamBuf_Write; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = writeEndMark; - p->rc.outStream = &outStream.vt; - - res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); - - if (res == SZ_OK) - { - res = LzmaEnc_Encode2(p, progress); - if (res == SZ_OK && p->nowPos64 != srcLen) - res = SZ_ERROR_FAIL; - } - - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - return res; -} - - -SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); - SRes res; - if (!p) - return SZ_ERROR_MEM; - - res = LzmaEnc_SetProps(p, props); - if (res == SZ_OK) - { - res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); - if (res == SZ_OK) - res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, - writeEndMark, progress, alloc, allocBig); - } - - LzmaEnc_Destroy(p, alloc, allocBig); - return res; -} + if (res != SZ_OK || p->finished) + break; + if (progress) + { + res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) + { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + + LzmaEnc_Finish(p); + + /* + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) + res = SZ_ERROR_FAIL; + } + */ + + return res; +} + + +SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); + return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); +} + + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + unsigned i; + UInt32 dictSize = p->dictSize; + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + if (dictSize >= ((UInt32)1 << 22)) + { + UInt32 kDictMask = ((UInt32)1 << 20) - 1; + if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) + dictSize = (dictSize + kDictMask) & ~kDictMask; + } + else for (i = 11; i <= 30; i++) + { + if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } + if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } + } + + for (i = 0; i < 4; i++) + props[1 + i] = (Byte)(dictSize >> (8 * i)); + return SZ_OK; +} + + +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +{ + return ((CLzmaEnc *)pp)->writeEndMark; +} + + +SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + SRes res; + CLzmaEnc *p = (CLzmaEnc *)pp; + + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + p->rc.outStream = &outStream.vt; + + res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + + if (res == SZ_OK) + { + res = LzmaEnc_Encode2(p, progress); + if (res == SZ_OK && p->nowPos64 != srcLen) + res = SZ_ERROR_FAIL; + } + + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + SRes res; + if (!p) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) + { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, + writeEndMark, progress, alloc, allocBig); + } + + LzmaEnc_Destroy(p, alloc, allocBig); + return res; +} BoolInt LzmaEnc_IsFinished(CLzmaEncHandle pp) { diff --git a/contrib/libs/lzmasdk/LzmaEnc.h b/contrib/libs/lzmasdk/LzmaEnc.h index 55c257d54d3..37a0906c7e9 100644 --- a/contrib/libs/lzmasdk/LzmaEnc.h +++ b/contrib/libs/lzmasdk/LzmaEnc.h @@ -1,78 +1,78 @@ -/* LzmaEnc.h -- LZMA Encoder -2017-07-27 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaEncProps -{ - int level; /* 0 <= level <= 9 */ - UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (3 << 29) for 64-bit version - default = (1 << 24) */ - int lc; /* 0 <= lc <= 8, default = 3 */ - int lp; /* 0 <= lp <= 4, default = 0 */ - int pb; /* 0 <= pb <= 4, default = 2 */ - int algo; /* 0 - fast, 1 - normal, default = 1 */ - int fb; /* 5 <= fb <= 273, default = 32 */ - int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ - unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ - int numThreads; /* 1 or 2, default = 2 */ - - UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. - Encoder uses this value to reduce dictionary size */ -} CLzmaEncProps; - -void LzmaEncProps_Init(CLzmaEncProps *p); -void LzmaEncProps_Normalize(CLzmaEncProps *p); -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); - - -/* ---------- CLzmaEncHandle Interface ---------- */ - -/* LzmaEnc* functions can return the following exit codes: -SRes: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - ISeqOutStream write callback error - SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output - SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) -*/ - -typedef void * CLzmaEncHandle; - -CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); - -SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); -void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); -SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); - -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); - - -/* ---------- One Call Interface ---------- */ - -SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); - -EXTERN_C_END - +/* LzmaEnc.h -- LZMA Encoder +2017-07-27 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_ENC_H +#define __LZMA_ENC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps +{ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc* functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef void * CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + + +/* ---------- One Call Interface ---------- */ + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +EXTERN_C_END + /* ---------- Streaming Interface ---------- */ SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ISzAllocPtr alloc, ISzAllocPtr allocBig); @@ -80,4 +80,4 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac BoolInt LzmaEnc_IsFinished(CLzmaEncHandle pp); void LzmaEnc_Finish(CLzmaEncHandle pp); -#endif +#endif diff --git a/contrib/libs/lzmasdk/LzmaLib.c b/contrib/libs/lzmasdk/LzmaLib.c index 9403aedee6d..706e9e58cd6 100644 --- a/contrib/libs/lzmasdk/LzmaLib.c +++ b/contrib/libs/lzmasdk/LzmaLib.c @@ -1,40 +1,40 @@ -/* LzmaLib.c -- LZMA library wrapper -2015-06-13 : Igor Pavlov : Public domain */ - -#include "Alloc.h" -#include "LzmaDec.h" -#include "LzmaEnc.h" -#include "LzmaLib.h" - -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, - unsigned char *outProps, size_t *outPropsSize, - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ -) -{ - CLzmaEncProps props; - LzmaEncProps_Init(&props); - props.level = level; - props.dictSize = dictSize; - props.lc = lc; - props.lp = lp; - props.pb = pb; - props.fb = fb; - props.numThreads = numThreads; - - return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, - NULL, &g_Alloc, &g_Alloc); -} - - -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, - const unsigned char *props, size_t propsSize) -{ - ELzmaStatus status; - return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); -} +/* LzmaLib.c -- LZMA library wrapper +2015-06-13 : Igor Pavlov : Public domain */ + +#include "Alloc.h" +#include "LzmaDec.h" +#include "LzmaEnc.h" +#include "LzmaLib.h" + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ +) +{ + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + props.lc = lc; + props.lp = lp; + props.pb = pb; + props.fb = fb; + props.numThreads = numThreads; + + return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); +} + + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, + const unsigned char *props, size_t propsSize) +{ + ELzmaStatus status; + return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); +} diff --git a/contrib/libs/lzmasdk/LzmaLib.h b/contrib/libs/lzmasdk/LzmaLib.h index d4afea8393f..88fa87d350c 100644 --- a/contrib/libs/lzmasdk/LzmaLib.h +++ b/contrib/libs/lzmasdk/LzmaLib.h @@ -1,131 +1,131 @@ -/* LzmaLib.h -- LZMA library interface -2013-01-18 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_LIB_H -#define __LZMA_LIB_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -#define MY_STDAPI int MY_STD_CALL - -#define LZMA_PROPS_SIZE 5 - -/* -RAM requirements for LZMA: - for compression: (dictSize * 11.5 + 6 MB) + state_size - for decompression: dictSize + state_size - state_size = (4 + (1.5 << (lc + lp))) KB - by default (lc=3, lp=0), state_size = 16 KB. - -LZMA properties (5 bytes) format - Offset Size Description - 0 1 lc, lp and pb in encoded form. - 1 4 dictSize (little endian). -*/ - -/* -LzmaCompress ------------- - -outPropsSize - - In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. - Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. - - LZMA Encoder will use defult values for any parameter, if it is - -1 for any from: level, loc, lp, pb, fb, numThreads - 0 for dictSize - -level - compression level: 0 <= level <= 9; - - level dictSize algo fb - 0: 16 KB 0 32 - 1: 64 KB 0 32 - 2: 256 KB 0 32 - 3: 1 MB 0 32 - 4: 4 MB 0 32 - 5: 16 MB 1 32 - 6: 32 MB 1 32 - 7+: 64 MB 1 64 +/* LzmaLib.h -- LZMA library interface +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_LIB_H +#define __LZMA_LIB_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define MY_STDAPI int MY_STD_CALL + +#define LZMA_PROPS_SIZE 5 + +/* +RAM requirements for LZMA: + for compression: (dictSize * 11.5 + 6 MB) + state_size + for decompression: dictSize + state_size + state_size = (4 + (1.5 << (lc + lp))) KB + by default (lc=3, lp=0), state_size = 16 KB. + +LZMA properties (5 bytes) format + Offset Size Description + 0 1 lc, lp and pb in encoded form. + 1 4 dictSize (little endian). +*/ + +/* +LzmaCompress +------------ + +outPropsSize - + In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + + LZMA Encoder will use defult values for any parameter, if it is + -1 for any from: level, loc, lp, pb, fb, numThreads + 0 for dictSize - The default value for "level" is 5. - - algo = 0 means fast method - algo = 1 means normal method - -dictSize - The dictionary size in bytes. The maximum value is - 128 MB = (1 << 27) bytes for 32-bit version - 1 GB = (1 << 30) bytes for 64-bit version - The default value is 16 MB = (1 << 24) bytes. - It's recommended to use the dictionary that is larger than 4 KB and - that can be calculated as (1 << N) or (3 << N) sizes. - -lc - The number of literal context bits (high bits of previous literal). - It can be in the range from 0 to 8. The default value is 3. - Sometimes lc=4 gives the gain for big files. - -lp - The number of literal pos bits (low bits of current position for literals). - It can be in the range from 0 to 4. The default value is 0. - The lp switch is intended for periodical data when the period is equal to 2^lp. - For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's - better to set lc=0, if you change lp switch. - -pb - The number of pos bits (low bits of current position). - It can be in the range from 0 to 4. The default value is 2. - The pb switch is intended for periodical data when the period is equal 2^pb. - -fb - Word size (the number of fast bytes). - It can be in the range from 5 to 273. The default value is 32. - Usually, a big number gives a little bit better compression ratio and - slower compression process. - -numThreads - The number of thereads. 1 or 2. The default value is 2. - Fast mode (algo = 0) can use only 1 thread. - -Out: - destLen - processed output size -Returns: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, - unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* default = (1 << 24) */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ); - -/* -LzmaUncompress --------------- -In: - dest - output data - destLen - output data size - src - input data - srcLen - input data size -Out: - destLen - processed output size - srcLen - processed input size -Returns: - SZ_OK - OK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation arror - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) -*/ - -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, - const unsigned char *props, size_t propsSize); - -EXTERN_C_END - -#endif +level - compression level: 0 <= level <= 9; + + level dictSize algo fb + 0: 16 KB 0 32 + 1: 64 KB 0 32 + 2: 256 KB 0 32 + 3: 1 MB 0 32 + 4: 4 MB 0 32 + 5: 16 MB 1 32 + 6: 32 MB 1 32 + 7+: 64 MB 1 64 + + The default value for "level" is 5. + + algo = 0 means fast method + algo = 1 means normal method + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + +lc - The number of literal context bits (high bits of previous literal). + It can be in the range from 0 to 8. The default value is 3. + Sometimes lc=4 gives the gain for big files. + +lp - The number of literal pos bits (low bits of current position for literals). + It can be in the range from 0 to 4. The default value is 0. + The lp switch is intended for periodical data when the period is equal to 2^lp. + For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's + better to set lc=0, if you change lp switch. + +pb - The number of pos bits (low bits of current position). + It can be in the range from 0 to 4. The default value is 2. + The pb switch is intended for periodical data when the period is equal 2^pb. + +fb - Word size (the number of fast bytes). + It can be in the range from 5 to 273. The default value is 32. + Usually, a big number gives a little bit better compression ratio and + slower compression process. + +numThreads - The number of thereads. 1 or 2. The default value is 2. + Fast mode (algo = 0) can use only 1 thread. + +Out: + destLen - processed output size +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + +/* +LzmaUncompress +-------------- +In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size +Out: + destLen - processed output size + srcLen - processed input size +Returns: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation arror + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) +*/ + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, + const unsigned char *props, size_t propsSize); + +EXTERN_C_END + +#endif diff --git a/contrib/libs/lzmasdk/Precomp.h b/contrib/libs/lzmasdk/Precomp.h index ab1ee910a5b..e8ff8b40e81 100644 --- a/contrib/libs/lzmasdk/Precomp.h +++ b/contrib/libs/lzmasdk/Precomp.h @@ -1,10 +1,10 @@ -/* Precomp.h -- StdAfx -2013-11-12 : Igor Pavlov : Public domain */ - -#ifndef __7Z_PRECOMP_H -#define __7Z_PRECOMP_H - -#include "Compiler.h" -/* #include "7zTypes.h" */ - -#endif +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#endif diff --git a/contrib/libs/lzmasdk/ya.make b/contrib/libs/lzmasdk/ya.make index 68f6605e7de..db0a55788d7 100644 --- a/contrib/libs/lzmasdk/ya.make +++ b/contrib/libs/lzmasdk/ya.make @@ -1,26 +1,26 @@ -LIBRARY() +LIBRARY() LICENSE(Public-Domain) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( g:contrib g:cpp-contrib ) - + # https://www.7-zip.org/sdk.html VERSION(19.00) -CFLAGS(-D_7ZIP_ST=1) - +CFLAGS(-D_7ZIP_ST=1) + NO_UTIL() - -SRCS( - 7zStream.c + +SRCS( + 7zStream.c Aes.c AesOpt.c - Alloc.c + Alloc.c Bra.c Bra86.c BraIA64.c @@ -28,10 +28,10 @@ SRCS( LzFind.c Lzma2Dec.c Lzma2Enc.c - LzmaDec.c - LzmaEnc.c - LzmaLib.c + LzmaDec.c + LzmaEnc.c + LzmaLib.c Sha256.c -) - -END() +) + +END() diff --git a/contrib/libs/nayuki_md5/md5-fast-x8664.S b/contrib/libs/nayuki_md5/md5-fast-x8664.S index ac8fa4cdaae..a48f499385e 100644 --- a/contrib/libs/nayuki_md5/md5-fast-x8664.S +++ b/contrib/libs/nayuki_md5/md5-fast-x8664.S @@ -1,171 +1,171 @@ -/* - * MD5 hash in x86-64 assembly - * - * Copyright (c) 2016 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - - -/* void md5_compress(uint32_t state[4], const uint8_t block[64]) */ -.globl md5_compress -md5_compress: - /* - * Storage usage: - * Bytes Location Description - * 4 eax MD5 state variable A - * 4 ebx MD5 state variable B - * 4 ecx MD5 state variable C - * 4 edx MD5 state variable D - * 4 esi Temporary for calculation per round - * 4 edi Temporary for calculation per round - * 8 rbp Base address of block array argument (read-only) - * 8 r8 Base address of state array argument (read-only) - * 16 xmm0 Caller's value of rbx (only low 64 bits are used) - * 16 xmm1 Caller's value of rbp (only low 64 bits are used) - */ - - #define ROUND0(a, b, c, d, k, s, t) \ - movl %c, %esi; \ - addl (k*4)(%rbp), %a; \ - xorl %d, %esi; \ - andl %b, %esi; \ - xorl %d, %esi; \ - leal t(%esi,%a), %a; \ - roll $s, %a; \ - addl %b, %a; - - #define ROUND1(a, b, c, d, k, s, t) \ - movl %d, %esi; \ - movl %d, %edi; \ - addl (k*4)(%rbp), %a; \ - notl %esi; \ - andl %b, %edi; \ - andl %c, %esi; \ - orl %edi, %esi; \ - leal t(%esi,%a), %a; \ - roll $s, %a; \ - addl %b, %a; - - #define ROUND2(a, b, c, d, k, s, t) \ - movl %c, %esi; \ - addl (k*4)(%rbp), %a; \ - xorl %d, %esi; \ - xorl %b, %esi; \ - leal t(%esi,%a), %a; \ - roll $s, %a; \ - addl %b, %a; - - #define ROUND3(a, b, c, d, k, s, t) \ - movl %d, %esi; \ - not %esi; \ - addl (k*4)(%rbp), %a; \ - orl %b, %esi; \ - xorl %c, %esi; \ - leal t(%esi,%a), %a; \ - roll $s, %a; \ - addl %b, %a; - - /* Save registers */ - movq %rbx, %xmm0 - movq %rbp, %xmm1 - - /* Load arguments */ - movq %rsi, %rbp - movl 0(%rdi), %eax /* a */ - movl 4(%rdi), %ebx /* b */ - movl 8(%rdi), %ecx /* c */ - movl 12(%rdi), %edx /* d */ - movq %rdi, %r8 - - /* 64 rounds of hashing */ - ROUND0(eax, ebx, ecx, edx, 0, 7, -0x28955B88) - ROUND0(edx, eax, ebx, ecx, 1, 12, -0x173848AA) - ROUND0(ecx, edx, eax, ebx, 2, 17, 0x242070DB) - ROUND0(ebx, ecx, edx, eax, 3, 22, -0x3E423112) - ROUND0(eax, ebx, ecx, edx, 4, 7, -0x0A83F051) - ROUND0(edx, eax, ebx, ecx, 5, 12, 0x4787C62A) - ROUND0(ecx, edx, eax, ebx, 6, 17, -0x57CFB9ED) - ROUND0(ebx, ecx, edx, eax, 7, 22, -0x02B96AFF) - ROUND0(eax, ebx, ecx, edx, 8, 7, 0x698098D8) - ROUND0(edx, eax, ebx, ecx, 9, 12, -0x74BB0851) - ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F) - ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842) - ROUND0(eax, ebx, ecx, edx, 12, 7, 0x6B901122) - ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D) - ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72) - ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821) - ROUND1(eax, ebx, ecx, edx, 1, 5, -0x09E1DA9E) - ROUND1(edx, eax, ebx, ecx, 6, 9, -0x3FBF4CC0) - ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51) - ROUND1(ebx, ecx, edx, eax, 0, 20, -0x16493856) - ROUND1(eax, ebx, ecx, edx, 5, 5, -0x29D0EFA3) - ROUND1(edx, eax, ebx, ecx, 10, 9, 0x02441453) - ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F) - ROUND1(ebx, ecx, edx, eax, 4, 20, -0x182C0438) - ROUND1(eax, ebx, ecx, edx, 9, 5, 0x21E1CDE6) - ROUND1(edx, eax, ebx, ecx, 14, 9, -0x3CC8F82A) - ROUND1(ecx, edx, eax, ebx, 3, 14, -0x0B2AF279) - ROUND1(ebx, ecx, edx, eax, 8, 20, 0x455A14ED) - ROUND1(eax, ebx, ecx, edx, 13, 5, -0x561C16FB) - ROUND1(edx, eax, ebx, ecx, 2, 9, -0x03105C08) - ROUND1(ecx, edx, eax, ebx, 7, 14, 0x676F02D9) - ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376) - ROUND2(eax, ebx, ecx, edx, 5, 4, -0x0005C6BE) - ROUND2(edx, eax, ebx, ecx, 8, 11, -0x788E097F) - ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122) - ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4) - ROUND2(eax, ebx, ecx, edx, 1, 4, -0x5B4115BC) - ROUND2(edx, eax, ebx, ecx, 4, 11, 0x4BDECFA9) - ROUND2(ecx, edx, eax, ebx, 7, 16, -0x0944B4A0) - ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390) - ROUND2(eax, ebx, ecx, edx, 13, 4, 0x289B7EC6) - ROUND2(edx, eax, ebx, ecx, 0, 11, -0x155ED806) - ROUND2(ecx, edx, eax, ebx, 3, 16, -0x2B10CF7B) - ROUND2(ebx, ecx, edx, eax, 6, 23, 0x04881D05) - ROUND2(eax, ebx, ecx, edx, 9, 4, -0x262B2FC7) - ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B) - ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8) - ROUND2(ebx, ecx, edx, eax, 2, 23, -0x3B53A99B) - ROUND3(eax, ebx, ecx, edx, 0, 6, -0x0BD6DDBC) - ROUND3(edx, eax, ebx, ecx, 7, 10, 0x432AFF97) - ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59) - ROUND3(ebx, ecx, edx, eax, 5, 21, -0x036C5FC7) - ROUND3(eax, ebx, ecx, edx, 12, 6, 0x655B59C3) - ROUND3(edx, eax, ebx, ecx, 3, 10, -0x70F3336E) - ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83) - ROUND3(ebx, ecx, edx, eax, 1, 21, -0x7A7BA22F) - ROUND3(eax, ebx, ecx, edx, 8, 6, 0x6FA87E4F) - ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920) - ROUND3(ecx, edx, eax, ebx, 6, 15, -0x5CFEBCEC) - ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1) - ROUND3(eax, ebx, ecx, edx, 4, 6, -0x08AC817E) - ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB) - ROUND3(ecx, edx, eax, ebx, 2, 15, 0x2AD7D2BB) - ROUND3(ebx, ecx, edx, eax, 9, 21, -0x14792C6F) - - /* Save updated state */ - addl %eax, 0(%r8) - addl %ebx, 4(%r8) - addl %ecx, 8(%r8) - addl %edx, 12(%r8) - - /* Restore registers */ - movq %xmm0, %rbx - movq %xmm1, %rbp - retq +/* + * MD5 hash in x86-64 assembly + * + * Copyright (c) 2016 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + + +/* void md5_compress(uint32_t state[4], const uint8_t block[64]) */ +.globl md5_compress +md5_compress: + /* + * Storage usage: + * Bytes Location Description + * 4 eax MD5 state variable A + * 4 ebx MD5 state variable B + * 4 ecx MD5 state variable C + * 4 edx MD5 state variable D + * 4 esi Temporary for calculation per round + * 4 edi Temporary for calculation per round + * 8 rbp Base address of block array argument (read-only) + * 8 r8 Base address of state array argument (read-only) + * 16 xmm0 Caller's value of rbx (only low 64 bits are used) + * 16 xmm1 Caller's value of rbp (only low 64 bits are used) + */ + + #define ROUND0(a, b, c, d, k, s, t) \ + movl %c, %esi; \ + addl (k*4)(%rbp), %a; \ + xorl %d, %esi; \ + andl %b, %esi; \ + xorl %d, %esi; \ + leal t(%esi,%a), %a; \ + roll $s, %a; \ + addl %b, %a; + + #define ROUND1(a, b, c, d, k, s, t) \ + movl %d, %esi; \ + movl %d, %edi; \ + addl (k*4)(%rbp), %a; \ + notl %esi; \ + andl %b, %edi; \ + andl %c, %esi; \ + orl %edi, %esi; \ + leal t(%esi,%a), %a; \ + roll $s, %a; \ + addl %b, %a; + + #define ROUND2(a, b, c, d, k, s, t) \ + movl %c, %esi; \ + addl (k*4)(%rbp), %a; \ + xorl %d, %esi; \ + xorl %b, %esi; \ + leal t(%esi,%a), %a; \ + roll $s, %a; \ + addl %b, %a; + + #define ROUND3(a, b, c, d, k, s, t) \ + movl %d, %esi; \ + not %esi; \ + addl (k*4)(%rbp), %a; \ + orl %b, %esi; \ + xorl %c, %esi; \ + leal t(%esi,%a), %a; \ + roll $s, %a; \ + addl %b, %a; + + /* Save registers */ + movq %rbx, %xmm0 + movq %rbp, %xmm1 + + /* Load arguments */ + movq %rsi, %rbp + movl 0(%rdi), %eax /* a */ + movl 4(%rdi), %ebx /* b */ + movl 8(%rdi), %ecx /* c */ + movl 12(%rdi), %edx /* d */ + movq %rdi, %r8 + + /* 64 rounds of hashing */ + ROUND0(eax, ebx, ecx, edx, 0, 7, -0x28955B88) + ROUND0(edx, eax, ebx, ecx, 1, 12, -0x173848AA) + ROUND0(ecx, edx, eax, ebx, 2, 17, 0x242070DB) + ROUND0(ebx, ecx, edx, eax, 3, 22, -0x3E423112) + ROUND0(eax, ebx, ecx, edx, 4, 7, -0x0A83F051) + ROUND0(edx, eax, ebx, ecx, 5, 12, 0x4787C62A) + ROUND0(ecx, edx, eax, ebx, 6, 17, -0x57CFB9ED) + ROUND0(ebx, ecx, edx, eax, 7, 22, -0x02B96AFF) + ROUND0(eax, ebx, ecx, edx, 8, 7, 0x698098D8) + ROUND0(edx, eax, ebx, ecx, 9, 12, -0x74BB0851) + ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F) + ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842) + ROUND0(eax, ebx, ecx, edx, 12, 7, 0x6B901122) + ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D) + ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72) + ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821) + ROUND1(eax, ebx, ecx, edx, 1, 5, -0x09E1DA9E) + ROUND1(edx, eax, ebx, ecx, 6, 9, -0x3FBF4CC0) + ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51) + ROUND1(ebx, ecx, edx, eax, 0, 20, -0x16493856) + ROUND1(eax, ebx, ecx, edx, 5, 5, -0x29D0EFA3) + ROUND1(edx, eax, ebx, ecx, 10, 9, 0x02441453) + ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F) + ROUND1(ebx, ecx, edx, eax, 4, 20, -0x182C0438) + ROUND1(eax, ebx, ecx, edx, 9, 5, 0x21E1CDE6) + ROUND1(edx, eax, ebx, ecx, 14, 9, -0x3CC8F82A) + ROUND1(ecx, edx, eax, ebx, 3, 14, -0x0B2AF279) + ROUND1(ebx, ecx, edx, eax, 8, 20, 0x455A14ED) + ROUND1(eax, ebx, ecx, edx, 13, 5, -0x561C16FB) + ROUND1(edx, eax, ebx, ecx, 2, 9, -0x03105C08) + ROUND1(ecx, edx, eax, ebx, 7, 14, 0x676F02D9) + ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376) + ROUND2(eax, ebx, ecx, edx, 5, 4, -0x0005C6BE) + ROUND2(edx, eax, ebx, ecx, 8, 11, -0x788E097F) + ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122) + ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4) + ROUND2(eax, ebx, ecx, edx, 1, 4, -0x5B4115BC) + ROUND2(edx, eax, ebx, ecx, 4, 11, 0x4BDECFA9) + ROUND2(ecx, edx, eax, ebx, 7, 16, -0x0944B4A0) + ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390) + ROUND2(eax, ebx, ecx, edx, 13, 4, 0x289B7EC6) + ROUND2(edx, eax, ebx, ecx, 0, 11, -0x155ED806) + ROUND2(ecx, edx, eax, ebx, 3, 16, -0x2B10CF7B) + ROUND2(ebx, ecx, edx, eax, 6, 23, 0x04881D05) + ROUND2(eax, ebx, ecx, edx, 9, 4, -0x262B2FC7) + ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B) + ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8) + ROUND2(ebx, ecx, edx, eax, 2, 23, -0x3B53A99B) + ROUND3(eax, ebx, ecx, edx, 0, 6, -0x0BD6DDBC) + ROUND3(edx, eax, ebx, ecx, 7, 10, 0x432AFF97) + ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59) + ROUND3(ebx, ecx, edx, eax, 5, 21, -0x036C5FC7) + ROUND3(eax, ebx, ecx, edx, 12, 6, 0x655B59C3) + ROUND3(edx, eax, ebx, ecx, 3, 10, -0x70F3336E) + ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83) + ROUND3(ebx, ecx, edx, eax, 1, 21, -0x7A7BA22F) + ROUND3(eax, ebx, ecx, edx, 8, 6, 0x6FA87E4F) + ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920) + ROUND3(ecx, edx, eax, ebx, 6, 15, -0x5CFEBCEC) + ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1) + ROUND3(eax, ebx, ecx, edx, 4, 6, -0x08AC817E) + ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB) + ROUND3(ecx, edx, eax, ebx, 2, 15, 0x2AD7D2BB) + ROUND3(ebx, ecx, edx, eax, 9, 21, -0x14792C6F) + + /* Save updated state */ + addl %eax, 0(%r8) + addl %ebx, 4(%r8) + addl %ecx, 8(%r8) + addl %edx, 12(%r8) + + /* Restore registers */ + movq %xmm0, %rbx + movq %xmm1, %rbp + retq diff --git a/contrib/libs/nayuki_md5/md5.c b/contrib/libs/nayuki_md5/md5.c index 08973459ff8..6fce57700fb 100644 --- a/contrib/libs/nayuki_md5/md5.c +++ b/contrib/libs/nayuki_md5/md5.c @@ -1,134 +1,134 @@ -/* - * MD5 hash in C - * - * Copyright (c) 2016 Project Nayuki. (MIT License) - * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - The Software is provided "as is", without warranty of any kind, express or - * implied, including but not limited to the warranties of merchantability, - * fitness for a particular purpose and noninfringement. In no event shall the - * authors or copyright holders be liable for any claim, damages or other - * liability, whether in an action of contract, tort or otherwise, arising from, - * out of or in connection with the Software or the use or other dealings in the - * Software. - */ - -#include "md5.h" - -void md5_compress(uint32_t state[4], const uint8_t block[64]) { - #define LOADSCHEDULE(i) \ - schedule[i] = (uint32_t)block[i * 4 + 0] << 0 \ - | (uint32_t)block[i * 4 + 1] << 8 \ - | (uint32_t)block[i * 4 + 2] << 16 \ - | (uint32_t)block[i * 4 + 3] << 24; - - uint32_t schedule[16]; - LOADSCHEDULE( 0) - LOADSCHEDULE( 1) - LOADSCHEDULE( 2) - LOADSCHEDULE( 3) - LOADSCHEDULE( 4) - LOADSCHEDULE( 5) - LOADSCHEDULE( 6) - LOADSCHEDULE( 7) - LOADSCHEDULE( 8) - LOADSCHEDULE( 9) - LOADSCHEDULE(10) - LOADSCHEDULE(11) - LOADSCHEDULE(12) - LOADSCHEDULE(13) - LOADSCHEDULE(14) - LOADSCHEDULE(15) - - #define ROTL32(x, n) (((0U + (x)) << (n)) | ((x) >> (32 - (n)))) // Assumes that x is uint32_t and 0 < n < 32 - #define ROUND0(a, b, c, d, k, s, t) ROUND_TAIL(a, b, d ^ (b & (c ^ d)), k, s, t) - #define ROUND1(a, b, c, d, k, s, t) ROUND_TAIL(a, b, c ^ (d & (b ^ c)), k, s, t) - #define ROUND2(a, b, c, d, k, s, t) ROUND_TAIL(a, b, b ^ c ^ d , k, s, t) - #define ROUND3(a, b, c, d, k, s, t) ROUND_TAIL(a, b, c ^ (b | ~d) , k, s, t) - #define ROUND_TAIL(a, b, expr, k, s, t) \ - a = 0U + a + (expr) + UINT32_C(t) + schedule[k]; \ - a = 0U + b + ROTL32(a, s); - - uint32_t a = state[0]; - uint32_t b = state[1]; - uint32_t c = state[2]; - uint32_t d = state[3]; - - ROUND0(a, b, c, d, 0, 7, 0xD76AA478) - ROUND0(d, a, b, c, 1, 12, 0xE8C7B756) - ROUND0(c, d, a, b, 2, 17, 0x242070DB) - ROUND0(b, c, d, a, 3, 22, 0xC1BDCEEE) - ROUND0(a, b, c, d, 4, 7, 0xF57C0FAF) - ROUND0(d, a, b, c, 5, 12, 0x4787C62A) - ROUND0(c, d, a, b, 6, 17, 0xA8304613) - ROUND0(b, c, d, a, 7, 22, 0xFD469501) - ROUND0(a, b, c, d, 8, 7, 0x698098D8) - ROUND0(d, a, b, c, 9, 12, 0x8B44F7AF) - ROUND0(c, d, a, b, 10, 17, 0xFFFF5BB1) - ROUND0(b, c, d, a, 11, 22, 0x895CD7BE) - ROUND0(a, b, c, d, 12, 7, 0x6B901122) - ROUND0(d, a, b, c, 13, 12, 0xFD987193) - ROUND0(c, d, a, b, 14, 17, 0xA679438E) - ROUND0(b, c, d, a, 15, 22, 0x49B40821) - ROUND1(a, b, c, d, 1, 5, 0xF61E2562) - ROUND1(d, a, b, c, 6, 9, 0xC040B340) - ROUND1(c, d, a, b, 11, 14, 0x265E5A51) - ROUND1(b, c, d, a, 0, 20, 0xE9B6C7AA) - ROUND1(a, b, c, d, 5, 5, 0xD62F105D) - ROUND1(d, a, b, c, 10, 9, 0x02441453) - ROUND1(c, d, a, b, 15, 14, 0xD8A1E681) - ROUND1(b, c, d, a, 4, 20, 0xE7D3FBC8) - ROUND1(a, b, c, d, 9, 5, 0x21E1CDE6) - ROUND1(d, a, b, c, 14, 9, 0xC33707D6) - ROUND1(c, d, a, b, 3, 14, 0xF4D50D87) - ROUND1(b, c, d, a, 8, 20, 0x455A14ED) - ROUND1(a, b, c, d, 13, 5, 0xA9E3E905) - ROUND1(d, a, b, c, 2, 9, 0xFCEFA3F8) - ROUND1(c, d, a, b, 7, 14, 0x676F02D9) - ROUND1(b, c, d, a, 12, 20, 0x8D2A4C8A) - ROUND2(a, b, c, d, 5, 4, 0xFFFA3942) - ROUND2(d, a, b, c, 8, 11, 0x8771F681) - ROUND2(c, d, a, b, 11, 16, 0x6D9D6122) - ROUND2(b, c, d, a, 14, 23, 0xFDE5380C) - ROUND2(a, b, c, d, 1, 4, 0xA4BEEA44) - ROUND2(d, a, b, c, 4, 11, 0x4BDECFA9) - ROUND2(c, d, a, b, 7, 16, 0xF6BB4B60) - ROUND2(b, c, d, a, 10, 23, 0xBEBFBC70) - ROUND2(a, b, c, d, 13, 4, 0x289B7EC6) - ROUND2(d, a, b, c, 0, 11, 0xEAA127FA) - ROUND2(c, d, a, b, 3, 16, 0xD4EF3085) - ROUND2(b, c, d, a, 6, 23, 0x04881D05) - ROUND2(a, b, c, d, 9, 4, 0xD9D4D039) - ROUND2(d, a, b, c, 12, 11, 0xE6DB99E5) - ROUND2(c, d, a, b, 15, 16, 0x1FA27CF8) - ROUND2(b, c, d, a, 2, 23, 0xC4AC5665) - ROUND3(a, b, c, d, 0, 6, 0xF4292244) - ROUND3(d, a, b, c, 7, 10, 0x432AFF97) - ROUND3(c, d, a, b, 14, 15, 0xAB9423A7) - ROUND3(b, c, d, a, 5, 21, 0xFC93A039) - ROUND3(a, b, c, d, 12, 6, 0x655B59C3) - ROUND3(d, a, b, c, 3, 10, 0x8F0CCC92) - ROUND3(c, d, a, b, 10, 15, 0xFFEFF47D) - ROUND3(b, c, d, a, 1, 21, 0x85845DD1) - ROUND3(a, b, c, d, 8, 6, 0x6FA87E4F) - ROUND3(d, a, b, c, 15, 10, 0xFE2CE6E0) - ROUND3(c, d, a, b, 6, 15, 0xA3014314) - ROUND3(b, c, d, a, 13, 21, 0x4E0811A1) - ROUND3(a, b, c, d, 4, 6, 0xF7537E82) - ROUND3(d, a, b, c, 11, 10, 0xBD3AF235) - ROUND3(c, d, a, b, 2, 15, 0x2AD7D2BB) - ROUND3(b, c, d, a, 9, 21, 0xEB86D391) - - state[0] = 0U + state[0] + a; - state[1] = 0U + state[1] + b; - state[2] = 0U + state[2] + c; - state[3] = 0U + state[3] + d; -} +/* + * MD5 hash in C + * + * Copyright (c) 2016 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include "md5.h" + +void md5_compress(uint32_t state[4], const uint8_t block[64]) { + #define LOADSCHEDULE(i) \ + schedule[i] = (uint32_t)block[i * 4 + 0] << 0 \ + | (uint32_t)block[i * 4 + 1] << 8 \ + | (uint32_t)block[i * 4 + 2] << 16 \ + | (uint32_t)block[i * 4 + 3] << 24; + + uint32_t schedule[16]; + LOADSCHEDULE( 0) + LOADSCHEDULE( 1) + LOADSCHEDULE( 2) + LOADSCHEDULE( 3) + LOADSCHEDULE( 4) + LOADSCHEDULE( 5) + LOADSCHEDULE( 6) + LOADSCHEDULE( 7) + LOADSCHEDULE( 8) + LOADSCHEDULE( 9) + LOADSCHEDULE(10) + LOADSCHEDULE(11) + LOADSCHEDULE(12) + LOADSCHEDULE(13) + LOADSCHEDULE(14) + LOADSCHEDULE(15) + + #define ROTL32(x, n) (((0U + (x)) << (n)) | ((x) >> (32 - (n)))) // Assumes that x is uint32_t and 0 < n < 32 + #define ROUND0(a, b, c, d, k, s, t) ROUND_TAIL(a, b, d ^ (b & (c ^ d)), k, s, t) + #define ROUND1(a, b, c, d, k, s, t) ROUND_TAIL(a, b, c ^ (d & (b ^ c)), k, s, t) + #define ROUND2(a, b, c, d, k, s, t) ROUND_TAIL(a, b, b ^ c ^ d , k, s, t) + #define ROUND3(a, b, c, d, k, s, t) ROUND_TAIL(a, b, c ^ (b | ~d) , k, s, t) + #define ROUND_TAIL(a, b, expr, k, s, t) \ + a = 0U + a + (expr) + UINT32_C(t) + schedule[k]; \ + a = 0U + b + ROTL32(a, s); + + uint32_t a = state[0]; + uint32_t b = state[1]; + uint32_t c = state[2]; + uint32_t d = state[3]; + + ROUND0(a, b, c, d, 0, 7, 0xD76AA478) + ROUND0(d, a, b, c, 1, 12, 0xE8C7B756) + ROUND0(c, d, a, b, 2, 17, 0x242070DB) + ROUND0(b, c, d, a, 3, 22, 0xC1BDCEEE) + ROUND0(a, b, c, d, 4, 7, 0xF57C0FAF) + ROUND0(d, a, b, c, 5, 12, 0x4787C62A) + ROUND0(c, d, a, b, 6, 17, 0xA8304613) + ROUND0(b, c, d, a, 7, 22, 0xFD469501) + ROUND0(a, b, c, d, 8, 7, 0x698098D8) + ROUND0(d, a, b, c, 9, 12, 0x8B44F7AF) + ROUND0(c, d, a, b, 10, 17, 0xFFFF5BB1) + ROUND0(b, c, d, a, 11, 22, 0x895CD7BE) + ROUND0(a, b, c, d, 12, 7, 0x6B901122) + ROUND0(d, a, b, c, 13, 12, 0xFD987193) + ROUND0(c, d, a, b, 14, 17, 0xA679438E) + ROUND0(b, c, d, a, 15, 22, 0x49B40821) + ROUND1(a, b, c, d, 1, 5, 0xF61E2562) + ROUND1(d, a, b, c, 6, 9, 0xC040B340) + ROUND1(c, d, a, b, 11, 14, 0x265E5A51) + ROUND1(b, c, d, a, 0, 20, 0xE9B6C7AA) + ROUND1(a, b, c, d, 5, 5, 0xD62F105D) + ROUND1(d, a, b, c, 10, 9, 0x02441453) + ROUND1(c, d, a, b, 15, 14, 0xD8A1E681) + ROUND1(b, c, d, a, 4, 20, 0xE7D3FBC8) + ROUND1(a, b, c, d, 9, 5, 0x21E1CDE6) + ROUND1(d, a, b, c, 14, 9, 0xC33707D6) + ROUND1(c, d, a, b, 3, 14, 0xF4D50D87) + ROUND1(b, c, d, a, 8, 20, 0x455A14ED) + ROUND1(a, b, c, d, 13, 5, 0xA9E3E905) + ROUND1(d, a, b, c, 2, 9, 0xFCEFA3F8) + ROUND1(c, d, a, b, 7, 14, 0x676F02D9) + ROUND1(b, c, d, a, 12, 20, 0x8D2A4C8A) + ROUND2(a, b, c, d, 5, 4, 0xFFFA3942) + ROUND2(d, a, b, c, 8, 11, 0x8771F681) + ROUND2(c, d, a, b, 11, 16, 0x6D9D6122) + ROUND2(b, c, d, a, 14, 23, 0xFDE5380C) + ROUND2(a, b, c, d, 1, 4, 0xA4BEEA44) + ROUND2(d, a, b, c, 4, 11, 0x4BDECFA9) + ROUND2(c, d, a, b, 7, 16, 0xF6BB4B60) + ROUND2(b, c, d, a, 10, 23, 0xBEBFBC70) + ROUND2(a, b, c, d, 13, 4, 0x289B7EC6) + ROUND2(d, a, b, c, 0, 11, 0xEAA127FA) + ROUND2(c, d, a, b, 3, 16, 0xD4EF3085) + ROUND2(b, c, d, a, 6, 23, 0x04881D05) + ROUND2(a, b, c, d, 9, 4, 0xD9D4D039) + ROUND2(d, a, b, c, 12, 11, 0xE6DB99E5) + ROUND2(c, d, a, b, 15, 16, 0x1FA27CF8) + ROUND2(b, c, d, a, 2, 23, 0xC4AC5665) + ROUND3(a, b, c, d, 0, 6, 0xF4292244) + ROUND3(d, a, b, c, 7, 10, 0x432AFF97) + ROUND3(c, d, a, b, 14, 15, 0xAB9423A7) + ROUND3(b, c, d, a, 5, 21, 0xFC93A039) + ROUND3(a, b, c, d, 12, 6, 0x655B59C3) + ROUND3(d, a, b, c, 3, 10, 0x8F0CCC92) + ROUND3(c, d, a, b, 10, 15, 0xFFEFF47D) + ROUND3(b, c, d, a, 1, 21, 0x85845DD1) + ROUND3(a, b, c, d, 8, 6, 0x6FA87E4F) + ROUND3(d, a, b, c, 15, 10, 0xFE2CE6E0) + ROUND3(c, d, a, b, 6, 15, 0xA3014314) + ROUND3(b, c, d, a, 13, 21, 0x4E0811A1) + ROUND3(a, b, c, d, 4, 6, 0xF7537E82) + ROUND3(d, a, b, c, 11, 10, 0xBD3AF235) + ROUND3(c, d, a, b, 2, 15, 0x2AD7D2BB) + ROUND3(b, c, d, a, 9, 21, 0xEB86D391) + + state[0] = 0U + state[0] + a; + state[1] = 0U + state[1] + b; + state[2] = 0U + state[2] + c; + state[3] = 0U + state[3] + d; +} diff --git a/contrib/libs/nayuki_md5/md5.h b/contrib/libs/nayuki_md5/md5.h index cef3110d4eb..aa1188092ef 100644 --- a/contrib/libs/nayuki_md5/md5.h +++ b/contrib/libs/nayuki_md5/md5.h @@ -1,9 +1,9 @@ -#pragma once - -#include - -#if defined(__cplusplus) -extern "C" -#endif - -void md5_compress(uint32_t state[4], const uint8_t block[64]); +#pragma once + +#include + +#if defined(__cplusplus) +extern "C" +#endif + +void md5_compress(uint32_t state[4], const uint8_t block[64]); diff --git a/contrib/libs/nayuki_md5/ya.make b/contrib/libs/nayuki_md5/ya.make index e8c03cecac5..15a6141c7a5 100644 --- a/contrib/libs/nayuki_md5/ya.make +++ b/contrib/libs/nayuki_md5/ya.make @@ -1,7 +1,7 @@ -LIBRARY() - -LICENSE(MIT) - +LIBRARY() + +LICENSE(MIT) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(2016) @@ -13,8 +13,8 @@ OWNER( g:contrib g:cpp-contrib ) - -IF (OS_LINUX AND ARCH_X86_64) + +IF (OS_LINUX AND ARCH_X86_64) SRCS( md5-fast-x8664.S ) @@ -23,5 +23,5 @@ ELSE() md5.c ) ENDIF() - -END() + +END() diff --git a/contrib/libs/nghttp2/ya.make b/contrib/libs/nghttp2/ya.make index 69f6a544811..325cc6ae307 100644 --- a/contrib/libs/nghttp2/ya.make +++ b/contrib/libs/nghttp2/ya.make @@ -15,7 +15,7 @@ LICENSE( FSFAP AND MIT ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) ADDINCL( diff --git a/contrib/libs/openssl/crypto/ya.make b/contrib/libs/openssl/crypto/ya.make index ca01b4776b9..3acfb0cac7d 100644 --- a/contrib/libs/openssl/crypto/ya.make +++ b/contrib/libs/openssl/crypto/ya.make @@ -10,7 +10,7 @@ LICENSE( Public-Domain AND Snprintf ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/openssl/ya.make b/contrib/libs/openssl/ya.make index 80f8e8d3a73..060d0697855 100644 --- a/contrib/libs/openssl/ya.make +++ b/contrib/libs/openssl/ya.make @@ -9,7 +9,7 @@ LICENSE( OpenSSL AND Public-Domain ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/pcre/pcre.h b/contrib/libs/pcre/pcre.h index 133d828216c..86e3956c212 100644 --- a/contrib/libs/pcre/pcre.h +++ b/contrib/libs/pcre/pcre.h @@ -1,51 +1,51 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This is the public header file for the PCRE library, to be #included by -applications that call the PCRE functions. - +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This is the public header file for the PCRE library, to be #included by +applications that call the PCRE functions. + Copyright (c) 1997-2014 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -#ifndef _PCRE_H -#define _PCRE_H - -/* The current PCRE version information. */ - + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef _PCRE_H +#define _PCRE_H + +/* The current PCRE version information. */ + #define PCRE_MAJOR 8 #define PCRE_MINOR 44 #define PCRE_PRERELEASE #define PCRE_DATE 2020-02-12 - + /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE, the appropriate export setting is defined in pcre_internal.h, which includes this file. So we @@ -65,16 +65,16 @@ don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */ # endif #endif -/* By default, we use the standard "extern" declarations. */ - -#ifndef PCRE_EXP_DECL +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE_EXP_DECL # ifdef __cplusplus # define PCRE_EXP_DECL extern "C" # else # define PCRE_EXP_DECL extern # endif -#endif - +#endif + #ifdef __cplusplus # ifndef PCRECPP_EXP_DECL # define PCRECPP_EXP_DECL extern @@ -82,19 +82,19 @@ don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */ # ifndef PCRECPP_EXP_DEFN # define PCRECPP_EXP_DEFN # endif -#endif - -/* Have to include stdlib.h in order to ensure that size_t is defined; -it is needed here for malloc. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - +#endif + +/* Have to include stdlib.h in order to ensure that size_t is defined; +it is needed here for malloc. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + /* Public options. Some are compile-time only, some are run-time only, and some are both. Most of the compile-time options are saved with the compiled regex so that they can be inspected during studying (and therefore JIT compiling). Note @@ -104,12 +104,12 @@ are now used, so in order to conserve them, option bits that were previously only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may also be used for compile-time options that affect only compiling and are not relevant for studying or JIT compiling. - + Some options for pcre_compile() change its behaviour but do not affect the behaviour of the execution functions. Other options are passed through to the execution functions and affect their behaviour, with or without affecting the behaviour of pcre_compile(). - + Options that can be passed to pcre_compile() are tagged Cx below, with these variants: @@ -170,8 +170,8 @@ with J. */ #define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */ #define PCRE_UCP 0x20000000 /* C3 */ -/* Exec-time and get/set-time error codes */ - +/* Exec-time and get/set-time error codes */ + #define PCRE_ERROR_NOMATCH (-1) #define PCRE_ERROR_NULL (-2) #define PCRE_ERROR_BADOPTION (-3) @@ -210,7 +210,7 @@ with J. */ #define PCRE_ERROR_JIT_BADOPTION (-31) #define PCRE_ERROR_BADLENGTH (-32) #define PCRE_ERROR_UNSET (-33) - + /* Specific error codes for UTF-8 validity checks */ #define PCRE_UTF8_ERR0 0 @@ -252,24 +252,24 @@ with J. */ #define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */ #define PCRE_UTF32_ERR3 3 -/* Request types for pcre_fullinfo() */ - -#define PCRE_INFO_OPTIONS 0 -#define PCRE_INFO_SIZE 1 -#define PCRE_INFO_CAPTURECOUNT 2 -#define PCRE_INFO_BACKREFMAX 3 -#define PCRE_INFO_FIRSTBYTE 4 -#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ -#define PCRE_INFO_FIRSTTABLE 5 -#define PCRE_INFO_LASTLITERAL 6 -#define PCRE_INFO_NAMEENTRYSIZE 7 -#define PCRE_INFO_NAMECOUNT 8 -#define PCRE_INFO_NAMETABLE 9 -#define PCRE_INFO_STUDYSIZE 10 -#define PCRE_INFO_DEFAULT_TABLES 11 -#define PCRE_INFO_OKPARTIAL 12 -#define PCRE_INFO_JCHANGED 13 -#define PCRE_INFO_HASCRORLF 14 +/* Request types for pcre_fullinfo() */ + +#define PCRE_INFO_OPTIONS 0 +#define PCRE_INFO_SIZE 1 +#define PCRE_INFO_CAPTURECOUNT 2 +#define PCRE_INFO_BACKREFMAX 3 +#define PCRE_INFO_FIRSTBYTE 4 +#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ +#define PCRE_INFO_FIRSTTABLE 5 +#define PCRE_INFO_LASTLITERAL 6 +#define PCRE_INFO_NAMEENTRYSIZE 7 +#define PCRE_INFO_NAMECOUNT 8 +#define PCRE_INFO_NAMETABLE 9 +#define PCRE_INFO_STUDYSIZE 10 +#define PCRE_INFO_DEFAULT_TABLES 11 +#define PCRE_INFO_OKPARTIAL 12 +#define PCRE_INFO_JCHANGED 13 +#define PCRE_INFO_HASCRORLF 14 #define PCRE_INFO_MINLENGTH 15 #define PCRE_INFO_JIT 16 #define PCRE_INFO_JITSIZE 17 @@ -281,25 +281,25 @@ with J. */ #define PCRE_INFO_MATCHLIMIT 23 #define PCRE_INFO_RECURSIONLIMIT 24 #define PCRE_INFO_MATCH_EMPTY 25 - -/* Request types for pcre_config(). Do not re-arrange, in order to remain -compatible. */ - -#define PCRE_CONFIG_UTF8 0 -#define PCRE_CONFIG_NEWLINE 1 -#define PCRE_CONFIG_LINK_SIZE 2 -#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 -#define PCRE_CONFIG_MATCH_LIMIT 4 -#define PCRE_CONFIG_STACKRECURSE 5 -#define PCRE_CONFIG_UNICODE_PROPERTIES 6 -#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 -#define PCRE_CONFIG_BSR 8 + +/* Request types for pcre_config(). Do not re-arrange, in order to remain +compatible. */ + +#define PCRE_CONFIG_UTF8 0 +#define PCRE_CONFIG_NEWLINE 1 +#define PCRE_CONFIG_LINK_SIZE 2 +#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 +#define PCRE_CONFIG_MATCH_LIMIT 4 +#define PCRE_CONFIG_STACKRECURSE 5 +#define PCRE_CONFIG_UNICODE_PROPERTIES 6 +#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 +#define PCRE_CONFIG_BSR 8 #define PCRE_CONFIG_JIT 9 #define PCRE_CONFIG_UTF16 10 #define PCRE_CONFIG_JITTARGET 11 #define PCRE_CONFIG_UTF32 12 #define PCRE_CONFIG_PARENS_LIMIT 13 - + /* Request types for pcre_study(). Do not re-arrange, in order to remain compatible. */ @@ -309,21 +309,21 @@ compatible. */ #define PCRE_STUDY_EXTRA_NEEDED 0x0008 /* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine -these bits, just add new ones on the end, in order to remain compatible. */ - -#define PCRE_EXTRA_STUDY_DATA 0x0001 -#define PCRE_EXTRA_MATCH_LIMIT 0x0002 -#define PCRE_EXTRA_CALLOUT_DATA 0x0004 -#define PCRE_EXTRA_TABLES 0x0008 -#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010 +these bits, just add new ones on the end, in order to remain compatible. */ + +#define PCRE_EXTRA_STUDY_DATA 0x0001 +#define PCRE_EXTRA_MATCH_LIMIT 0x0002 +#define PCRE_EXTRA_CALLOUT_DATA 0x0004 +#define PCRE_EXTRA_TABLES 0x0008 +#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010 #define PCRE_EXTRA_MARK 0x0020 #define PCRE_EXTRA_EXECUTABLE_JIT 0x0040 - -/* Types */ - + +/* Types */ + struct real_pcre8_or_16; /* declaration; the definition is private */ typedef struct real_pcre8_or_16 pcre; - + struct real_pcre8_or_16; /* declaration; the definition is private */ typedef struct real_pcre8_or_16 pcre16; @@ -361,29 +361,29 @@ pcre32 functions are not implemented. There is a check for this in pcre_internal #define PCRE_SPTR32 const PCRE_UCHAR32 * #endif -/* When PCRE is compiled as a C++ library, the subject pointer type can be -replaced with a custom type. For conventional use, the public interface is a -const char *. */ - -#ifndef PCRE_SPTR -#define PCRE_SPTR const char * -#endif - -/* The structure for passing additional data to pcre_exec(). This is defined in -such as way as to be extensible. Always add new fields at the end, in order to -remain compatible. */ - -typedef struct pcre_extra { - unsigned long int flags; /* Bits for which fields are set */ - void *study_data; /* Opaque data from pcre_study() */ - unsigned long int match_limit; /* Maximum number of calls to match() */ - void *callout_data; /* Data passed back in callouts */ - const unsigned char *tables; /* Pointer to character tables */ - unsigned long int match_limit_recursion; /* Max recursive calls to match() */ +/* When PCRE is compiled as a C++ library, the subject pointer type can be +replaced with a custom type. For conventional use, the public interface is a +const char *. */ + +#ifndef PCRE_SPTR +#define PCRE_SPTR const char * +#endif + +/* The structure for passing additional data to pcre_exec(). This is defined in +such as way as to be extensible. Always add new fields at the end, in order to +remain compatible. */ + +typedef struct pcre_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ + unsigned long int match_limit_recursion; /* Max recursive calls to match() */ unsigned char **mark; /* For passing back a mark pointer */ void *executable_jit; /* Contains a pointer to a compiled jit code */ -} pcre_extra; - +} pcre_extra; + /* Same structure as above, but with 16 bit char pointers. */ typedef struct pcre16_extra { @@ -410,31 +410,31 @@ typedef struct pcre32_extra { void *executable_jit; /* Contains a pointer to a compiled jit code */ } pcre32_extra; -/* The structure for passing out data via the pcre_callout_function. We use a -structure so that new fields can be added on the end in future versions, -without changing the API of the function, thereby allowing old clients to work -without modification. */ - -typedef struct pcre_callout_block { - int version; /* Identifies version of block */ - /* ------------------------ Version 0 ------------------------------- */ - int callout_number; /* Number compiled into pattern */ - int *offset_vector; /* The offset vector */ - PCRE_SPTR subject; /* The subject being matched */ - int subject_length; /* The length of the subject */ - int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are in the subject */ - int capture_top; /* Max current capture */ - int capture_last; /* Most recently closed capture */ - void *callout_data; /* Data passed in with the call */ - /* ------------------- Added for Version 1 -------------------------- */ - int pattern_position; /* Offset to next item in the pattern */ - int next_item_length; /* Length of next item in the pattern */ +/* The structure for passing out data via the pcre_callout_function. We use a +structure so that new fields can be added on the end in future versions, +without changing the API of the function, thereby allowing old clients to work +without modification. */ + +typedef struct pcre_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + PCRE_SPTR subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ /* ------------------- Added for Version 2 -------------------------- */ const unsigned char *mark; /* Pointer to current mark or NULL */ - /* ------------------------------------------------------------------ */ -} pcre_callout_block; - + /* ------------------------------------------------------------------ */ +} pcre_callout_block; + /* Same structure as above, but with 16 bit char pointers. */ typedef struct pcre16_callout_block { @@ -479,18 +479,18 @@ typedef struct pcre32_callout_block { /* ------------------------------------------------------------------ */ } pcre32_callout_block; -/* Indirection for store get and free functions. These can be set to -alternative malloc/free functions if required. Special ones are used in the -non-recursive case for "frames". There is also an optional callout function -that is triggered by the (?) regex item. For Virtual Pascal, these definitions -have to take another form. */ - -#ifndef VPCOMPAT -PCRE_EXP_DECL void *(*pcre_malloc)(size_t); -PCRE_EXP_DECL void (*pcre_free)(void *); -PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); -PCRE_EXP_DECL void (*pcre_stack_free)(void *); -PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); +/* Indirection for store get and free functions. These can be set to +alternative malloc/free functions if required. Special ones are used in the +non-recursive case for "frames". There is also an optional callout function +that is triggered by the (?) regex item. For Virtual Pascal, these definitions +have to take another form. */ + +#ifndef VPCOMPAT +PCRE_EXP_DECL void *(*pcre_malloc)(size_t); +PCRE_EXP_DECL void (*pcre_free)(void *); +PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre_stack_free)(void *); +PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); PCRE_EXP_DECL int (*pcre_stack_guard)(void); PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); @@ -506,12 +506,12 @@ PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t); PCRE_EXP_DECL void (*pcre32_stack_free)(void *); PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *); PCRE_EXP_DECL int (*pcre32_stack_guard)(void); -#else /* VPCOMPAT */ -PCRE_EXP_DECL void *pcre_malloc(size_t); -PCRE_EXP_DECL void pcre_free(void *); -PCRE_EXP_DECL void *pcre_stack_malloc(size_t); -PCRE_EXP_DECL void pcre_stack_free(void *); -PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); +#else /* VPCOMPAT */ +PCRE_EXP_DECL void *pcre_malloc(size_t); +PCRE_EXP_DECL void pcre_free(void *); +PCRE_EXP_DECL void *pcre_stack_malloc(size_t); +PCRE_EXP_DECL void pcre_stack_free(void *); +PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); PCRE_EXP_DECL int pcre_stack_guard(void); PCRE_EXP_DECL void *pcre16_malloc(size_t); @@ -527,33 +527,33 @@ PCRE_EXP_DECL void *pcre32_stack_malloc(size_t); PCRE_EXP_DECL void pcre32_stack_free(void *); PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *); PCRE_EXP_DECL int pcre32_stack_guard(void); -#endif /* VPCOMPAT */ - +#endif /* VPCOMPAT */ + /* User defined callback which provides a stack just before the match starts. */ typedef pcre_jit_stack *(*pcre_jit_callback)(void *); typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *); typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *); -/* Exported PCRE functions */ - -PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, - const unsigned char *); +/* Exported PCRE functions */ + +PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, + const unsigned char *); PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *, const unsigned char *); PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *, const unsigned char *); -PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, - int *, const unsigned char *); +PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, + int *, const unsigned char *); PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, int *, const unsigned char *); PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **, int *, const unsigned char *); -PCRE_EXP_DECL int pcre_config(int, void *); +PCRE_EXP_DECL int pcre_config(int, void *); PCRE_EXP_DECL int pcre16_config(int, void *); PCRE_EXP_DECL int pcre32_config(int, void *); -PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, - int *, int, const char *, char *, int); +PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, + int *, int, const char *, char *, int); PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16, int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int); PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32, @@ -564,14 +564,14 @@ PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int, PCRE_UCHAR16 *, int); PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int, PCRE_UCHAR32 *, int); -PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, - const char *, int, int, int, int *, int , int *, int); +PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, + const char *, int, int, int, int *, int , int *, int); PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, PCRE_SPTR16, int, int, int, int *, int , int *, int); PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *, PCRE_SPTR32, int, int, int, int *, int , int *, int); -PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, - int, int, int, int *, int); +PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, + int, int, int, int *, int); PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *, PCRE_SPTR16, int, int, int, int *, int); PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *, @@ -585,61 +585,61 @@ PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *, PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *, PCRE_SPTR32, int, int, int, int *, int, pcre32_jit_stack *); -PCRE_EXP_DECL void pcre_free_substring(const char *); +PCRE_EXP_DECL void pcre_free_substring(const char *); PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16); PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32); -PCRE_EXP_DECL void pcre_free_substring_list(const char **); +PCRE_EXP_DECL void pcre_free_substring_list(const char **); PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *); PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, - void *); +PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, + void *); PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, void *); PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int, void *); -PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, - int *, int, const char *, const char **); +PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, + int *, int, const char *, const char **); PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16, int *, int, PCRE_SPTR16, PCRE_SPTR16 *); PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32, int *, int, PCRE_SPTR32, PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); +PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16); PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32); -PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, - char **, char **); +PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, + char **, char **); PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16, PCRE_UCHAR16 **, PCRE_UCHAR16 **); PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32, PCRE_UCHAR32 **, PCRE_UCHAR32 **); -PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, - const char **); +PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, + const char **); PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int, PCRE_SPTR16 *); PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int, PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, - const char ***); +PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, + const char ***); PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int, PCRE_SPTR16 **); PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int, PCRE_SPTR32 **); -PCRE_EXP_DECL const unsigned char *pcre_maketables(void); +PCRE_EXP_DECL const unsigned char *pcre_maketables(void); PCRE_EXP_DECL const unsigned char *pcre16_maketables(void); PCRE_EXP_DECL const unsigned char *pcre32_maketables(void); -PCRE_EXP_DECL int pcre_refcount(pcre *, int); +PCRE_EXP_DECL int pcre_refcount(pcre *, int); PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int); PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int); -PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **); PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **); PCRE_EXP_DECL void pcre_free_study(pcre_extra *); PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *); PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *); -PCRE_EXP_DECL const char *pcre_version(void); +PCRE_EXP_DECL const char *pcre_version(void); PCRE_EXP_DECL const char *pcre16_version(void); PCRE_EXP_DECL const char *pcre32_version(void); - + /* Utility functions for byte order swaps. */ PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, const unsigned char *); @@ -670,8 +670,8 @@ PCRE_EXP_DECL void pcre_jit_free_unused_memory(void); PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void); PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void); -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre.h */ +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* End of pcre.h */ diff --git a/contrib/libs/pcre/pcre_chartables.c b/contrib/libs/pcre/pcre_chartables.c index 87ecc05e91d..f22172b8355 100644 --- a/contrib/libs/pcre/pcre_chartables.c +++ b/contrib/libs/pcre/pcre_chartables.c @@ -1,198 +1,198 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This file contains character tables that are used when no external tables -are passed to PCRE by the application that calls it. The tables are used only -for characters whose code values are less than 256. - -This is a default version of the tables that assumes ASCII encoding. A program -called dftables (which is distributed with PCRE) can be used to build -alternative versions of this file. This is necessary if you are running in an -EBCDIC environment, or if you want to default to a different encoding, for -example ISO-8859-1. When dftables is run, it creates these tables in the -current locale. If PCRE is configured with --enable-rebuild-chartables, this -happens automatically. - +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This file contains character tables that are used when no external tables +are passed to PCRE by the application that calls it. The tables are used only +for characters whose code values are less than 256. + +This is a default version of the tables that assumes ASCII encoding. A program +called dftables (which is distributed with PCRE) can be used to build +alternative versions of this file. This is necessary if you are running in an +EBCDIC environment, or if you want to default to a different encoding, for +example ISO-8859-1. When dftables is run, it creates these tables in the +current locale. If PCRE is configured with --enable-rebuild-chartables, this +happens automatically. + The following #includes are present because without them gcc 4.x may remove the -array definition from the final binary if PCRE is built into a static library -and dead code stripping is activated. This leads to link errors. Pulling in the -header ensures that the array gets flagged as "someone outside this compilation -unit might reference this" and so it will always be supplied to the linker. */ - -#ifdef HAVE_CONFIG_H +array definition from the final binary if PCRE is built into a static library +and dead code stripping is activated. This leads to link errors. Pulling in the +header ensures that the array gets flagged as "someone outside this compilation +unit might reference this" and so it will always be supplied to the linker. */ + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - +#endif + +#include "pcre_internal.h" + const pcre_uint8 PRIV(default_tables)[] = { - -/* This table is a lower casing table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table is a case flipping table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table contains bit maps for various character classes. Each map is 32 -bytes long and the bits run from the least significant end of each byte. The -classes that have their own maps are: space, xdigit, digit, upper, lower, word, -graph, print, punct, and cntrl. Other classes are built from combinations. */ - - 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, - 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - -/* This table identifies various classes of character by individual bits: - 0x01 white space character - 0x02 letter - 0x04 decimal digit - 0x08 hexadecimal digit - 0x10 alphanumeric or '_' - 0x80 regular expression metacharacter or binary zero -*/ - - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + +/* This table is a lower casing table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table is a case flipping table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table contains bit maps for various character classes. Each map is 32 +bytes long and the bits run from the least significant end of each byte. The +classes that have their own maps are: space, xdigit, digit, upper, lower, word, +graph, print, punct, and cntrl. Other classes are built from combinations. */ + + 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, + 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + +/* This table identifies various classes of character by individual bits: + 0x01 white space character + 0x02 letter + 0x04 decimal digit + 0x08 hexadecimal digit + 0x10 alphanumeric or '_' + 0x80 regular expression metacharacter or binary zero +*/ + + 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ - 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ - 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - -/* End of pcre_chartables.c */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ + 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ + 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ + 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ + 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ + 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ + +/* End of pcre_chartables.c */ diff --git a/contrib/libs/pcre/pcre_compile.c b/contrib/libs/pcre/pcre_compile.c index 4501a7e47b7..8051988093e 100644 --- a/contrib/libs/pcre/pcre_compile.c +++ b/contrib/libs/pcre/pcre_compile.c @@ -1,84 +1,84 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2020 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_compile(), along with -supporting internal functions that are not used by other modules. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_compile(), along with +supporting internal functions that are not used by other modules. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#define NLBLOCK cd /* Block containing newline information */ +#endif + +#define NLBLOCK cd /* Block containing newline information */ #define PSSTART start_pattern /* Field containing pattern start */ #define PSEND end_pattern /* Field containing pattern end */ - -#include "pcre_internal.h" - - + +#include "pcre_internal.h" + + /* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which is also used by pcretest. PCRE_DEBUG is not defined when building a production library. We do not need to select pcre16_printint.c specially, because the COMPILE_PCREx macro will already be appropriately set. */ - + #ifdef PCRE_DEBUG /* pcre_printint.c should not include any headers */ #define PCRE_INCLUDED #include "pcre_printint.c" #undef PCRE_INCLUDED -#endif - - -/* Macro for setting individual bits in class bitmaps. */ - +#endif + + +/* Macro for setting individual bits in class bitmaps. */ + #define SETBIT(a,b) a[(b)/8] |= (1U << ((b)&7)) - -/* Maximum length value to check against when making sure that the integer that -holds the compiled pattern length does not overflow. We make it a bit less than -INT_MAX to allow for adding in group terminating bytes, so that we don't have -to check them every time. */ - -#define OFLOW_MAX (INT_MAX - 20) - + +/* Maximum length value to check against when making sure that the integer that +holds the compiled pattern length does not overflow. We make it a bit less than +INT_MAX to allow for adding in group terminating bytes, so that we don't have +to check them every time. */ + +#define OFLOW_MAX (INT_MAX - 20) + /* Definitions to allow mutual recursion */ - + static int add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *, const pcre_uint32 *, unsigned int); @@ -90,20 +90,20 @@ static BOOL -/************************************************* -* Code parameters and static tables * -*************************************************/ - -/* This value specifies the size of stack workspace that is used during the -first pre-compile phase that determines how much memory is required. The regex -is partly compiled into this space, but the compiled parts are discarded as -soon as they can be, so that hopefully there will never be an overrun. The code -does, however, check for an overrun. The largest amount I've seen used is 218, -so this number is very generous. - -The same workspace is used during the second, actual compile phase for -remembering forward references to groups so that they can be filled in at the -end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE +/************************************************* +* Code parameters and static tables * +*************************************************/ + +/* This value specifies the size of stack workspace that is used during the +first pre-compile phase that determines how much memory is required. The regex +is partly compiled into this space, but the compiled parts are discarded as +soon as they can be, so that hopefully there will never be an overrun. The code +does, however, check for an overrun. The largest amount I've seen used is 218, +so this number is very generous. + +The same workspace is used during the second, actual compile phase for +remembering forward references to groups so that they can be filled in at the +end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE is 4 there is plenty of room for most patterns. However, the memory can get filled up by repetitions of forward references, for example patterns like /(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so @@ -111,15 +111,15 @@ that the workspace is expanded using malloc() in this situation. The value below is therefore a minimum, and we put a maximum on it for safety. The minimum is now also defined in terms of LINK_SIZE so that the use of malloc() kicks in at the same number of forward references in all cases. */ - + #define COMPILE_WORK_SIZE (2048*LINK_SIZE) #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) - + /* This value determines the size of the initial vector that is used for remembering named groups during the pre-compile. It is allocated on the stack, but if it is too small, it is expanded using malloc(), in a similar way to the workspace. The value is the number of slots in the list. */ - + #define NAMED_GROUP_LIST_SIZE 20 /* The overrun tests check for a slightly smaller size so that they detect the @@ -139,17 +139,17 @@ overrun before it actually does run off the end of the data block. */ #define UTF_LENGTH 0x10000000l /* The char contains its length. */ -/* Table for handling escaped characters in the range '0'-'z'. Positive returns -are simple data values; negative values are for special things like \d and so -on. Zero means further processing is needed (for things like \x), or the escape -is invalid. */ - +/* Table for handling escaped characters in the range '0'-'z'. Positive returns +are simple data values; negative values are for special things like \d and so +on. Zero means further processing is needed (for things like \x), or the escape +is invalid. */ + #ifndef EBCDIC /* This is the "normal" table for ASCII systems or for EBCDIC systems running in UTF-8 mode. */ -static const short int escapes[] = { +static const short int escapes[] = { 0, 0, 0, 0, 0, 0, @@ -188,59 +188,59 @@ static const short int escapes[] = { -ESC_v, -ESC_w, 0, 0, -ESC_z -}; - +}; + #else /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */ -static const short int escapes[] = { -/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', -/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, -/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', -/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, -/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', -/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', +static const short int escapes[] = { +/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', +/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, +/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', +/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, +/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', +/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', /* 80 */ 0, ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, -/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, +/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, /* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p, -/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, -/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, -/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, -/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', -/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, -/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, +/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, +/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, +/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, +/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', +/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, +/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, /* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, -/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, -/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, -/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, -/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 -}; +/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, +/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, +/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, +/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 +}; /* We also need a table of characters that may follow \c in an EBCDIC environment for characters 0-31. */ static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; -#endif - - -/* Table of special "verbs" like (*PRUNE). This is a short table, so it is -searched linearly. Put all the names into a single string, in order to reduce +#endif + + +/* Table of special "verbs" like (*PRUNE). This is a short table, so it is +searched linearly. Put all the names into a single string, in order to reduce the number of relocations when a shared library is dynamically linked. The string is built from string macros so that it works in UTF-8 mode on EBCDIC platforms. */ - -typedef struct verbitem { + +typedef struct verbitem { int len; /* Length of verb name */ int op; /* Op when no arg, or -1 if arg mandatory */ int op_arg; /* Op when arg present, or -1 if not allowed */ -} verbitem; - -static const char verbnames[] = +} verbitem; + +static const char verbnames[] = "\0" /* Empty name is a shorthand for MARK */ STRING_MARK0 STRING_ACCEPT0 @@ -250,7 +250,7 @@ static const char verbnames[] = STRING_PRUNE0 STRING_SKIP0 STRING_THEN; - + static const verbitem verbs[] = { { 0, -1, OP_MARK }, { 4, -1, OP_MARK }, @@ -261,11 +261,11 @@ static const verbitem verbs[] = { { 5, OP_PRUNE, OP_PRUNE_ARG }, { 4, OP_SKIP, OP_SKIP_ARG }, { 4, OP_THEN, OP_THEN_ARG } -}; - +}; + static const int verbcount = sizeof(verbs)/sizeof(verbitem); - - + + /* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in another regex library. */ @@ -279,57 +279,57 @@ static const pcre_uchar sub_end_of_word[] = { CHAR_RIGHT_PARENTHESIS, '\0' }; -/* Tables of names of POSIX character classes and their lengths. The names are -now all in a single string, to reduce the number of relocations when a shared -library is dynamically loaded. The list of lengths is terminated by a zero -length entry. The first three must be alpha, lower, upper, as this is assumed +/* Tables of names of POSIX character classes and their lengths. The names are +now all in a single string, to reduce the number of relocations when a shared +library is dynamically loaded. The list of lengths is terminated by a zero +length entry. The first three must be alpha, lower, upper, as this is assumed for handling case independence. The indices for graph, print, and punct are needed, so identify them. */ - -static const char posix_names[] = + +static const char posix_names[] = STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 STRING_word0 STRING_xdigit; - + static const pcre_uint8 posix_name_lengths[] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; - + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; + #define PC_GRAPH 8 #define PC_PRINT 9 #define PC_PUNCT 10 -/* Table of class bit maps for each POSIX class. Each class is formed from a -base map, with an optional addition or removal of another map. Then, for some -classes, there is some additional tweaking: for [:blank:] the vertical space -characters are removed, and for [:alpha:] and [:alnum:] the underscore -character is removed. The triples in the table consist of the base map offset, -second map offset or -1 if no second map, and a non-negative value for map -addition or a negative value for map subtraction (if there are two maps). The -absolute value of the third field has these meanings: 0 => no tweaking, 1 => -remove vertical space characters, 2 => remove underscore. */ - -static const int posix_class_maps[] = { - cbit_word, cbit_digit, -2, /* alpha */ - cbit_lower, -1, 0, /* lower */ - cbit_upper, -1, 0, /* upper */ - cbit_word, -1, 2, /* alnum - word without underscore */ - cbit_print, cbit_cntrl, 0, /* ascii */ - cbit_space, -1, 1, /* blank - a GNU extension */ - cbit_cntrl, -1, 0, /* cntrl */ - cbit_digit, -1, 0, /* digit */ - cbit_graph, -1, 0, /* graph */ - cbit_print, -1, 0, /* print */ - cbit_punct, -1, 0, /* punct */ - cbit_space, -1, 0, /* space */ - cbit_word, -1, 0, /* word - a Perl extension */ - cbit_xdigit,-1, 0 /* xdigit */ -}; - +/* Table of class bit maps for each POSIX class. Each class is formed from a +base map, with an optional addition or removal of another map. Then, for some +classes, there is some additional tweaking: for [:blank:] the vertical space +characters are removed, and for [:alpha:] and [:alnum:] the underscore +character is removed. The triples in the table consist of the base map offset, +second map offset or -1 if no second map, and a non-negative value for map +addition or a negative value for map subtraction (if there are two maps). The +absolute value of the third field has these meanings: 0 => no tweaking, 1 => +remove vertical space characters, 2 => remove underscore. */ + +static const int posix_class_maps[] = { + cbit_word, cbit_digit, -2, /* alpha */ + cbit_lower, -1, 0, /* lower */ + cbit_upper, -1, 0, /* upper */ + cbit_word, -1, 2, /* alnum - word without underscore */ + cbit_print, cbit_cntrl, 0, /* ascii */ + cbit_space, -1, 1, /* blank - a GNU extension */ + cbit_cntrl, -1, 0, /* cntrl */ + cbit_digit, -1, 0, /* digit */ + cbit_graph, -1, 0, /* graph */ + cbit_print, -1, 0, /* print */ + cbit_punct, -1, 0, /* punct */ + cbit_space, -1, 0, /* space */ + cbit_word, -1, 0, /* word - a Perl extension */ + cbit_xdigit,-1, 0 /* xdigit */ +}; + /* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by Unicode property escapes. */ - + #ifdef SUPPORT_UCP static const pcre_uchar string_PNd[] = { CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, @@ -434,99 +434,99 @@ static const pcre_uchar *posix_substitutes[] = { #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) #endif -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -/* The texts of compile-time error messages. These are "char *" because they -are passed to the outside world. Do not ever re-use any error number, because -they are documented. Always add a new error instead. Messages marked DEAD below -are no longer used. This used to be a table of strings, but in order to reduce -the number of relocations needed when a shared library is loaded dynamically, -it is now one long string. We cannot use a table of offsets, because the -lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we -simply count through to the one we want - this isn't a performance issue +#define STRING(a) # a +#define XSTRING(s) STRING(s) + +/* The texts of compile-time error messages. These are "char *" because they +are passed to the outside world. Do not ever re-use any error number, because +they are documented. Always add a new error instead. Messages marked DEAD below +are no longer used. This used to be a table of strings, but in order to reduce +the number of relocations needed when a shared library is loaded dynamically, +it is now one long string. We cannot use a table of offsets, because the +lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we +simply count through to the one we want - this isn't a performance issue because these strings are used only when there is a compilation error. - + Each substring ends with \0 to insert a null character. This includes the final substring, so that the whole string ends with \0\0, which can be detected when counting through. */ -static const char error_texts[] = - "no error\0" - "\\ at end of pattern\0" - "\\c at end of pattern\0" - "unrecognized character follows \\\0" - "numbers out of order in {} quantifier\0" - /* 5 */ - "number too big in {} quantifier\0" - "missing terminating ] for character class\0" - "invalid escape sequence in character class\0" - "range out of order in character class\0" - "nothing to repeat\0" - /* 10 */ +static const char error_texts[] = + "no error\0" + "\\ at end of pattern\0" + "\\c at end of pattern\0" + "unrecognized character follows \\\0" + "numbers out of order in {} quantifier\0" + /* 5 */ + "number too big in {} quantifier\0" + "missing terminating ] for character class\0" + "invalid escape sequence in character class\0" + "range out of order in character class\0" + "nothing to repeat\0" + /* 10 */ "internal error: invalid forward reference offset\0" - "internal error: unexpected repeat\0" - "unrecognized character after (? or (?-\0" - "POSIX named classes are supported only within a class\0" - "missing )\0" - /* 15 */ - "reference to non-existent subpattern\0" - "erroffset passed as NULL\0" - "unknown option bit(s) set\0" - "missing ) after comment\0" - "parentheses nested too deeply\0" /** DEAD **/ - /* 20 */ - "regular expression is too large\0" - "failed to get memory\0" - "unmatched parentheses\0" - "internal error: code overflow\0" - "unrecognized character after (?<\0" - /* 25 */ - "lookbehind assertion is not fixed length\0" - "malformed number or name after (?(\0" - "conditional group contains more than two branches\0" + "internal error: unexpected repeat\0" + "unrecognized character after (? or (?-\0" + "POSIX named classes are supported only within a class\0" + "missing )\0" + /* 15 */ + "reference to non-existent subpattern\0" + "erroffset passed as NULL\0" + "unknown option bit(s) set\0" + "missing ) after comment\0" + "parentheses nested too deeply\0" /** DEAD **/ + /* 20 */ + "regular expression is too large\0" + "failed to get memory\0" + "unmatched parentheses\0" + "internal error: code overflow\0" + "unrecognized character after (?<\0" + /* 25 */ + "lookbehind assertion is not fixed length\0" + "malformed number or name after (?(\0" + "conditional group contains more than two branches\0" "assertion expected after (?( or (?(?C)\0" - "(?R or (?[+-]digits must be followed by )\0" - /* 30 */ - "unknown POSIX class name\0" - "POSIX collating elements are not supported\0" + "(?R or (?[+-]digits must be followed by )\0" + /* 30 */ + "unknown POSIX class name\0" + "POSIX collating elements are not supported\0" "this version of PCRE is compiled without UTF support\0" - "spare error\0" /** DEAD **/ + "spare error\0" /** DEAD **/ "character value in \\x{} or \\o{} is too large\0" - /* 35 */ - "invalid condition (?(0)\0" - "\\C not allowed in lookbehind assertion\0" + /* 35 */ + "invalid condition (?(0)\0" + "\\C not allowed in lookbehind assertion\0" "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" - "number after (?C is > 255\0" - "closing ) for (?C expected\0" - /* 40 */ - "recursive call could loop indefinitely\0" - "unrecognized character after (?P\0" - "syntax error in subpattern name (missing terminator)\0" - "two named subpatterns have the same name\0" - "invalid UTF-8 string\0" - /* 45 */ - "support for \\P, \\p, and \\X has not been compiled\0" - "malformed \\P or \\p sequence\0" - "unknown property name after \\P or \\p\0" - "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" - "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" - /* 50 */ - "repeated subpattern is too long\0" /** DEAD **/ + "number after (?C is > 255\0" + "closing ) for (?C expected\0" + /* 40 */ + "recursive call could loop indefinitely\0" + "unrecognized character after (?P\0" + "syntax error in subpattern name (missing terminator)\0" + "two named subpatterns have the same name\0" + "invalid UTF-8 string\0" + /* 45 */ + "support for \\P, \\p, and \\X has not been compiled\0" + "malformed \\P or \\p sequence\0" + "unknown property name after \\P or \\p\0" + "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" + "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" + /* 50 */ + "repeated subpattern is too long\0" /** DEAD **/ "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" - "internal error: overran compiling workspace\0" - "internal error: previously-checked referenced subpattern not found\0" - "DEFINE group contains more than one branch\0" - /* 55 */ + "internal error: overran compiling workspace\0" + "internal error: previously-checked referenced subpattern not found\0" + "DEFINE group contains more than one branch\0" + /* 55 */ "repeating a DEFINE group is not allowed\0" /** DEAD **/ - "inconsistent NEWLINE options\0" + "inconsistent NEWLINE options\0" "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" "a numbered reference must not be zero\0" "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" - /* 60 */ + /* 60 */ "(*VERB) not recognized or malformed\0" - "number is too big\0" - "subpattern name expected\0" + "number is too big\0" + "subpattern name expected\0" "digit expected after (?+\0" "] is an invalid data character in JavaScript compatibility mode\0" /* 65 */ @@ -562,23 +562,23 @@ static const char error_texts[] = "digits missing in \\x{} or \\o{}\0" "regular expression is too complicated\0" ; - -/* Table to identify digits and hex digits. This is used when compiling -patterns. Note that the tables in chartables are dependent on the locale, and -may mark arbitrary characters as digits - but the PCRE compiling code expects -to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have -a private table here. It costs 256 bytes, but it is a lot faster than doing -character value tests (at least in some simple cases I timed), and in some -applications one wants PCRE to compile efficiently as well as match -efficiently. - -For convenience, we use the same bit definitions as in chartables: - - 0x04 decimal digit - 0x08 hexadecimal digit - -Then we can use ctype_digit and ctype_xdigit in the code. */ - + +/* Table to identify digits and hex digits. This is used when compiling +patterns. Note that the tables in chartables are dependent on the locale, and +may mark arbitrary characters as digits - but the PCRE compiling code expects +to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have +a private table here. It costs 256 bytes, but it is a lot faster than doing +character value tests (at least in some simple cases I timed), and in some +applications one wants PCRE to compile efficiently as well as match +efficiently. + +For convenience, we use the same bit definitions as in chartables: + + 0x04 decimal digit + 0x08 hexadecimal digit + +Then we can use ctype_digit and ctype_xdigit in the code. */ + /* Using a simple comparison for decimal numbers rather than a memory read is much faster, and the resulting code is simpler (the compiler turns it into a subtraction and unsigned comparison). */ @@ -591,131 +591,131 @@ into a subtraction and unsigned comparison). */ UTF-8 mode. */ static const pcre_uint8 digitab[] = - { - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ - 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ - 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ + 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ + 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ + #else /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ static const pcre_uint8 digitab[] = - { - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ - 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ - 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ - + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ + 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ + 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ + static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */ - 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ - 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ - 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ - 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ - 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ - 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ - 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ - 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ - 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ - 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ - 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ - 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ -#endif - - + 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ + 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ + 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ + 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ + 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ + 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ + 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ + 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ + 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ + 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ + 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ + 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ + 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ +#endif + + /* This table is used to check whether auto-possessification is possible between adjacent character-type opcodes. The left-hand (repeated) opcode is used to select the row, and the right-hand opcode is use to select the column. A value of 1 means that auto-possessification is OK. For example, the second value in the first row means that \D+\d can be turned into \D++\d. - + The Unicode property types (\P and \p) have to be present to fill out the table because of what their opcode values are, but the table values should always be zero because property types are handled separately in the code. The last four columns apply to items that cannot be repeated, so there is no need to have rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ - + #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1) - + static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = { /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */ { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */ @@ -736,7 +736,7 @@ static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = { { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ }; - + /* This table is used to check whether auto-possessification is possible between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The @@ -878,34 +878,34 @@ static const pcre_uint8 opcode_possessify[] = { -/************************************************* -* Find an error text * -*************************************************/ - -/* The error texts are now all in one long string, to save on relocations. As -some of the text is of unknown length, we can't use a table of offsets. -Instead, just count through the strings. This is not a performance issue -because it happens only when there has been a compilation error. - -Argument: the error number -Returns: pointer to the error string -*/ - -static const char * -find_error_text(int n) -{ -const char *s = error_texts; +/************************************************* +* Find an error text * +*************************************************/ + +/* The error texts are now all in one long string, to save on relocations. As +some of the text is of unknown length, we can't use a table of offsets. +Instead, just count through the strings. This is not a performance issue +because it happens only when there has been a compilation error. + +Argument: the error number +Returns: pointer to the error string +*/ + +static const char * +find_error_text(int n) +{ +const char *s = error_texts; for (; n > 0; n--) { while (*s++ != CHAR_NULL) {}; if (*s == CHAR_NULL) return "Error text not found (please report)"; } -return s; -} - - +return s; +} -/************************************************* + + +/************************************************* * Expand the workspace * *************************************************/ @@ -978,82 +978,82 @@ return (*p == CHAR_RIGHT_CURLY_BRACKET); /************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a +* Handle escapes * +*************************************************/ + +/* This function is called when a \ has been encountered. It either returns a positive value for a simple escape such as \n, or 0 for a data character which will be placed in chptr. A backreference to group n is returned as negative n. When UTF-8 is enabled, a positive value greater than 255 may be returned in chptr. On entry, ptr is pointing at the \. On exit, it is on the final character of the escape sequence. - -Arguments: - ptrptr points to the pattern position pointer + +Arguments: + ptrptr points to the pattern position pointer chptr points to a returned data character - errorcodeptr points to the errorcode variable - bracount number of previous extracting brackets - options the options bits - isclass TRUE if inside a character class - + errorcodeptr points to the errorcode variable + bracount number of previous extracting brackets + options the options bits + isclass TRUE if inside a character class + Returns: zero => a data character positive => a special escape sequence negative => a back reference - on error, errorcodeptr is set -*/ - -static int + on error, errorcodeptr is set +*/ + +static int check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr, int bracount, int options, BOOL isclass) -{ +{ /* PCRE_UTF16 has the same value as PCRE_UTF8. */ BOOL utf = (options & PCRE_UTF8) != 0; const pcre_uchar *ptr = *ptrptr + 1; pcre_uint32 c; int escape = 0; int i; - -GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ -ptr--; /* Set pointer back to the last byte */ - -/* If backslash is at the end of the pattern, it's an error. */ - + +GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ +ptr--; /* Set pointer back to the last byte */ + +/* If backslash is at the end of the pattern, it's an error. */ + if (c == CHAR_NULL) *errorcodeptr = ERR1; - -/* Non-alphanumerics are literals. For digits or letters, do an initial lookup -in a table. A non-zero result is something that can be returned immediately. -Otherwise further processing may be required. */ - + +/* Non-alphanumerics are literals. For digits or letters, do an initial lookup +in a table. A non-zero result is something that can be returned immediately. +Otherwise further processing may be required. */ + #ifndef EBCDIC /* ASCII/UTF-8 coding */ /* Not alphanumeric */ else if (c < CHAR_0 || c > CHAR_z) {} else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } - -#else /* EBCDIC coding */ + +#else /* EBCDIC coding */ /* Not alphanumeric */ else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} else if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } -#endif - -/* Escapes that need further processing, or are illegal. */ - -else - { +#endif + +/* Escapes that need further processing, or are illegal. */ + +else + { const pcre_uchar *oldptr; BOOL braced, negated, overflow; int s; - - switch (c) - { - /* A number of Perl escapes are not handled by PCRE. We give an explicit - error. */ - + + switch (c) + { + /* A number of Perl escapes are not handled by PCRE. We give an explicit + error. */ + case CHAR_l: case CHAR_L: - *errorcodeptr = ERR37; - break; - + *errorcodeptr = ERR37; + break; + case CHAR_u: if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) { @@ -1076,7 +1076,7 @@ else c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); #endif } - + #if defined COMPILE_PCRE8 if (c > (utf ? 0x10ffffU : 0xffU)) #elif defined COMPILE_PCRE16 @@ -1119,7 +1119,7 @@ else case CHAR_g: if (isclass) break; if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE) - { + { escape = ESC_g; break; } @@ -1132,27 +1132,27 @@ else for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) - { + { escape = ESC_k; - break; - } - braced = TRUE; - ptr++; - } - else braced = FALSE; - + break; + } + braced = TRUE; + ptr++; + } + else braced = FALSE; + if (ptr[1] == CHAR_MINUS) - { - negated = TRUE; - ptr++; - } - else negated = FALSE; - + { + negated = TRUE; + ptr++; + } + else negated = FALSE; + /* The integer range is limited by the machine's int representation. */ s = 0; overflow = FALSE; while (IS_DIGIT(ptr[1])) - { + { if (s > INT_MAX / 10 - 1) /* Integer overflow */ { overflow = TRUE; @@ -1164,62 +1164,62 @@ else { while (IS_DIGIT(ptr[1])) ptr++; - *errorcodeptr = ERR61; - break; - } - + *errorcodeptr = ERR61; + break; + } + if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET) - { - *errorcodeptr = ERR57; - break; - } - + { + *errorcodeptr = ERR57; + break; + } + if (s == 0) { *errorcodeptr = ERR58; break; } - if (negated) - { + if (negated) + { if (s > bracount) - { - *errorcodeptr = ERR15; - break; - } + { + *errorcodeptr = ERR15; + break; + } s = bracount - (s - 1); - } - + } + escape = -s; - break; - - /* The handling of escape sequences consisting of a string of digits + break; + + /* The handling of escape sequences consisting of a string of digits starting with one that is not zero is not straightforward. Perl has changed over the years. Nowadays \g{} for backreferences and \o{} for octal are recommended to avoid the ambiguities in the old syntax. - - Outside a character class, the digits are read as a decimal number. If the + + Outside a character class, the digits are read as a decimal number. If the number is less than 8 (used to be 10), or if there are that many previous extracting left brackets, then it is a back reference. Otherwise, up to three octal digits are read to form an escaped byte. Thus \123 is likely to be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal value is greater than 377, the least significant 8 bits are taken. \8 and \9 are treated as the literal characters 8 and 9. - + Inside a character class, \ followed by a digit is always either a literal 8 or 9 or an octal number. */ - + case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - if (!isclass) - { - oldptr = ptr; + if (!isclass) + { + oldptr = ptr; /* The integer range is limited by the machine's int representation. */ s = (int)(c -CHAR_0); overflow = FALSE; while (IS_DIGIT(ptr[1])) - { + { if (s > INT_MAX / 10 - 1) /* Integer overflow */ { overflow = TRUE; @@ -1231,32 +1231,32 @@ else { while (IS_DIGIT(ptr[1])) ptr++; - *errorcodeptr = ERR61; - break; - } + *errorcodeptr = ERR61; + break; + } if (s < 8 || s <= bracount) /* Check for back reference */ - { + { escape = -s; - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - + break; + } + ptr = oldptr; /* Put the pointer back and fall through */ + } + /* Handle a digit following \ when the number is not a back reference. If the first digit is 8 or 9, Perl used to generate a binary zero byte and then treat the digit as a following literal. At least by Perl 5.18 this changed so as not to insert the binary zero. */ - + if ((c = *ptr) >= CHAR_8) break; - + /* Fall through with a digit less than 8 */ - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. The original code used just to take the least - significant 8 bits of octal numbers (I think this is what early Perls used + /* \0 always starts an octal number, but we may drop through to here with a + larger first octal digit. The original code used just to take the least + significant 8 bits of octal numbers (I think this is what early Perls used to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, but no more than 3 octal digits. */ - + case CHAR_0: c -= CHAR_0; while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) @@ -1264,20 +1264,20 @@ else #ifdef COMPILE_PCRE8 if (!utf && c > 0xff) *errorcodeptr = ERR51; #endif - break; - + break; + /* \o is a relatively new Perl feature, supporting a more general way of specifying character codes in octal. The only supported form is \o{ddd}. */ - + case CHAR_o: if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else - { + { ptr += 2; - c = 0; + c = 0; overflow = FALSE; while (*ptr >= CHAR_0 && *ptr <= CHAR_7) - { + { register pcre_uint32 cc = *ptr++; if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ #ifdef COMPILE_PCRE32 @@ -1304,7 +1304,7 @@ else else *errorcodeptr = ERR80; } break; - + /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal numbers. Otherwise it is a lowercase x letter. */ @@ -1321,14 +1321,14 @@ else #ifndef EBCDIC /* ASCII/UTF-8 coding */ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ +#else /* EBCDIC coding */ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif +#endif } - } + } } /* End JavaScript handling */ - + /* Handle \x in Perl's style. \x{ddd} is a character number which can be greater than 0xff in utf or non-8bit mode, but only if the ddd are hex digits. If not, { used to be treated as a data character. However, Perl @@ -1339,7 +1339,7 @@ else else { if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) - { + { ptr += 2; if (*ptr == CHAR_RIGHT_CURLY_BRACKET) { @@ -1352,11 +1352,11 @@ else { register pcre_uint32 cc = *ptr++; if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ - + #ifdef COMPILE_PCRE32 if (c >= 0x10000000l) { overflow = TRUE; break; } #endif - + #ifndef EBCDIC /* ASCII/UTF-8 coding */ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); @@ -1364,7 +1364,7 @@ else if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); #endif - + #if defined COMPILE_PCRE8 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } #elif defined COMPILE_PCRE16 @@ -1405,27 +1405,27 @@ else #ifndef EBCDIC /* ASCII/UTF-8 coding */ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ +#else /* EBCDIC coding */ if (cc <= CHAR_z) cc += 64; /* Convert to upper case */ c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif +#endif } } /* End of \xdd handling */ } /* End of Perl-style \x handling */ - break; - - /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. + break; + + /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. An error is given if the byte following \c is not an ASCII character. This coding is ASCII-specific, but then the whole concept of \cx is - ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ - + ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ + case CHAR_c: - c = *(++ptr); + c = *(++ptr); if (c == CHAR_NULL) - { - *errorcodeptr = ERR2; - break; - } + { + *errorcodeptr = ERR2; + break; + } #ifndef EBCDIC /* ASCII/UTF-8 coding */ if (c > 127) /* Excludes all non-ASCII in either mode */ { @@ -1433,7 +1433,7 @@ else break; } if (c >= CHAR_a && c <= CHAR_z) c -= 32; - c ^= 0x40; + c ^= 0x40; #else /* EBCDIC coding */ if (c >= CHAR_a && c <= CHAR_z) c += 64; if (c == CHAR_QUESTION_MARK) @@ -1446,26 +1446,26 @@ else } if (i < 32) c = i; else *errorcodeptr = ERR68; } -#endif - break; - - /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any - other alphanumeric following \ is an error if PCRE_EXTRA was set; - otherwise, for Perl compatibility, it is a literal. This code looks a bit - odd, but there used to be some cases other than the default, and there may - be again in future, so I haven't "optimized" it. */ - - default: - if ((options & PCRE_EXTRA) != 0) switch(c) - { - default: - *errorcodeptr = ERR3; - break; - } - break; - } - } - +#endif + break; + + /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any + other alphanumeric following \ is an error if PCRE_EXTRA was set; + otherwise, for Perl compatibility, it is a literal. This code looks a bit + odd, but there used to be some cases other than the default, and there may + be again in future, so I haven't "optimized" it. */ + + default: + if ((options & PCRE_EXTRA) != 0) switch(c) + { + default: + *errorcodeptr = ERR3; + break; + } + break; + } + } + /* Perl supports \N{name} for character names, as well as plain \N for "not newline". PCRE does not support \N{name}. However, it does support quantification such as \N{2,3}. */ @@ -1481,314 +1481,314 @@ if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w) /* Set the pointer to the final character before returning. */ -*ptrptr = ptr; +*ptrptr = ptr; *chptr = c; return escape; -} - - - -#ifdef SUPPORT_UCP -/************************************************* -* Handle \P and \p * -*************************************************/ - -/* This function is called after \P or \p has been encountered, provided that -PCRE is compiled with support for Unicode properties. On entry, ptrptr is -pointing at the P or p. On exit, it is pointing at the final character of the -escape sequence. - -Argument: - ptrptr points to the pattern position pointer - negptr points to a boolean that is set TRUE for negation else FALSE +} + + + +#ifdef SUPPORT_UCP +/************************************************* +* Handle \P and \p * +*************************************************/ + +/* This function is called after \P or \p has been encountered, provided that +PCRE is compiled with support for Unicode properties. On entry, ptrptr is +pointing at the P or p. On exit, it is pointing at the final character of the +escape sequence. + +Argument: + ptrptr points to the pattern position pointer + negptr points to a boolean that is set TRUE for negation else FALSE ptypeptr points to an unsigned int that is set to the type value pdataptr points to an unsigned int that is set to the detailed property value - errorcodeptr points to the error code variable - + errorcodeptr points to the error code variable + Returns: TRUE if the type value was found, or FALSE for an invalid type -*/ - +*/ + static BOOL get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr, unsigned int *pdataptr, int *errorcodeptr) -{ +{ pcre_uchar c; int i, bot, top; const pcre_uchar *ptr = *ptrptr; pcre_uchar name[32]; - -c = *(++ptr); + +c = *(++ptr); if (c == CHAR_NULL) goto ERROR_RETURN; - -*negptr = FALSE; - -/* \P or \p can be followed by a name in {}, optionally preceded by ^ for -negation. */ - + +*negptr = FALSE; + +/* \P or \p can be followed by a name in {}, optionally preceded by ^ for +negation. */ + if (c == CHAR_LEFT_CURLY_BRACKET) - { + { if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT) - { - *negptr = TRUE; - ptr++; - } + { + *negptr = TRUE; + ptr++; + } for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) - { - c = *(++ptr); + { + c = *(++ptr); if (c == CHAR_NULL) goto ERROR_RETURN; if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; - } + name[i] = c; + } if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; - name[i] = 0; - } - -/* Otherwise there is just one following character */ - -else - { - name[0] = c; - name[1] = 0; - } - -*ptrptr = ptr; - -/* Search for a recognized property name using binary chop */ - -bot = 0; + name[i] = 0; + } + +/* Otherwise there is just one following character */ + +else + { + name[0] = c; + name[1] = 0; + } + +*ptrptr = ptr; + +/* Search for a recognized property name using binary chop */ + +bot = 0; top = PRIV(utt_size); - -while (bot < top) - { + +while (bot < top) + { int r; - i = (bot + top) >> 1; + i = (bot + top) >> 1; r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); if (r == 0) - { + { *ptypeptr = PRIV(utt)[i].type; *pdataptr = PRIV(utt)[i].value; return TRUE; - } + } if (r > 0) bot = i + 1; else top = i; - } - -*errorcodeptr = ERR47; -*ptrptr = ptr; + } + +*errorcodeptr = ERR47; +*ptrptr = ptr; return FALSE; - -ERROR_RETURN: -*errorcodeptr = ERR46; -*ptrptr = ptr; + +ERROR_RETURN: +*errorcodeptr = ERR46; +*ptrptr = ptr; return FALSE; -} -#endif - - - -/************************************************* -* Read repeat counts * -*************************************************/ - -/* Read an item of the form {n,m} and return the values. This is called only -after is_counted_repeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. - -Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorcodeptr points to error code variable - -Returns: pointer to '}' on success; - current ptr on error, with errorcodeptr set non-zero -*/ - +} +#endif + + + +/************************************************* +* Read repeat counts * +*************************************************/ + +/* Read an item of the form {n,m} and return the values. This is called only +after is_counted_repeat() has confirmed that a repeat-count quantifier exists, +so the syntax is guaranteed to be correct, but we need to check the values. + +Arguments: + p pointer to first char after '{' + minp pointer to int for min + maxp pointer to int for max + returned as -1 if no max + errorcodeptr points to error code variable + +Returns: pointer to '}' on success; + current ptr on error, with errorcodeptr set non-zero +*/ + static const pcre_uchar * read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr) -{ -int min = 0; -int max = -1; - +{ +int min = 0; +int max = -1; + while (IS_DIGIT(*p)) - { + { min = min * 10 + (int)(*p++ - CHAR_0); if (min > 65535) { *errorcodeptr = ERR5; return p; } - } - + } + if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else - { + { if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) - { - max = 0; + { + max = 0; while(IS_DIGIT(*p)) - { + { max = max * 10 + (int)(*p++ - CHAR_0); if (max > 65535) { *errorcodeptr = ERR5; return p; } - } - if (max < min) - { - *errorcodeptr = ERR4; - return p; - } - } - } - -*minp = min; -*maxp = max; -return p; -} - - - -/************************************************* -* Find first significant op code * -*************************************************/ - -/* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things + } + if (max < min) + { + *errorcodeptr = ERR4; + return p; + } + } + } + +*minp = min; +*maxp = max; +return p; +} + + + +/************************************************* +* Find first significant op code * +*************************************************/ + +/* This is called by several functions that scan a compiled expression looking +for a fixed first character, or an anchoring op code etc. It skips over things that do not influence this. For some calls, it makes sense to skip negative forward and all backward assertions, and also the \b assertion; for others it does not. - -Arguments: - code pointer to the start of the group - skipassert TRUE if certain assertions are to be skipped - -Returns: pointer to the first significant opcode -*/ - -static const pcre_uchar* -first_significant_code(const pcre_uchar *code, BOOL skipassert) -{ -for (;;) - { - switch ((int)*code) - { - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - if (!skipassert) return code; - do code += GET(code, 1); while (*code == OP_ALT); - code += PRIV(OP_lengths)[*code]; - break; - - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - if (!skipassert) return code; - /* Fall through */ - - case OP_CALLOUT: - case OP_CREF: + +Arguments: + code pointer to the start of the group + skipassert TRUE if certain assertions are to be skipped + +Returns: pointer to the first significant opcode +*/ + +static const pcre_uchar* +first_significant_code(const pcre_uchar *code, BOOL skipassert) +{ +for (;;) + { + switch ((int)*code) + { + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + if (!skipassert) return code; + do code += GET(code, 1); while (*code == OP_ALT); + code += PRIV(OP_lengths)[*code]; + break; + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + if (!skipassert) return code; + /* Fall through */ + + case OP_CALLOUT: + case OP_CREF: case OP_DNCREF: - case OP_RREF: + case OP_RREF: case OP_DNRREF: - case OP_DEF: + case OP_DEF: code += PRIV(OP_lengths)[*code]; - break; - - default: - return code; - } - } -/* Control never reaches here */ -} - - - -/************************************************* + break; + + default: + return code; + } + } +/* Control never reaches here */ +} + + + +/************************************************* * Find the fixed length of a branch * -*************************************************/ - +*************************************************/ + /* Scan a branch and compute the fixed length of subject that will match it, -if the length is fixed. This is needed for dealing with backward assertions. +if the length is fixed. This is needed for dealing with backward assertions. In UTF8 mode, the result is in characters rather than bytes. The branch is temporarily terminated with OP_END when this function is called. - + This function is called when a backward assertion is encountered, so that if it fails, the error message can point to the correct place in the pattern. However, we cannot do this when the assertion contains subroutine calls, because they can be forward references. We solve this by remembering this case and doing the check at the end; a flag specifies which mode we are running in. -Arguments: - code points to the start of the pattern (the bracket) +Arguments: + code points to the start of the pattern (the bracket) utf TRUE in UTF-8 / UTF-16 / UTF-32 mode atend TRUE if called when the pattern is complete cd the "compile data" structure recurses chain of recurse_check to catch mutual recursion - + Returns: the fixed length, or -1 if there is no fixed length, or -2 if \C was encountered (in UTF-8 mode only) or -3 if an OP_RECURSE item was encountered and atend is FALSE or -4 if an unknown opcode was encountered (internal error) -*/ - -static int +*/ + +static int find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd, recurse_check *recurses) -{ -int length = -1; +{ +int length = -1; recurse_check this_recurse; -register int branchlength = 0; +register int branchlength = 0; register pcre_uchar *cc = code + 1 + LINK_SIZE; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) - { - int d; + +/* Scan along the opcodes for this branch. If we get to the end of the +branch, check the length against that of the other branches. */ + +for (;;) + { + int d; pcre_uchar *ce, *cs; register pcre_uchar op = *cc; - switch (op) - { + switch (op) + { /* We only need to continue for OP_CBRA (normal capturing bracket) and OP_BRA (normal non-capturing bracket) because the other variants of these opcodes are all concerned with unlimited repeated groups, which of course are not of fixed length. */ - case OP_CBRA: - case OP_BRA: - case OP_ONCE: + case OP_CBRA: + case OP_BRA: + case OP_ONCE: case OP_ONCE_NC: - case OP_COND: + case OP_COND: d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd, recurses); - if (d < 0) return d; - branchlength += d; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; - + if (d < 0) return d; + branchlength += d; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + /* Reached end of a branch; if it's a ket it is the end of a nested call. If it's ALT it is an alternation in a nested call. An ACCEPT is effectively an ALT. If it is END it's the end of the outer call. All can be handled by the same code. Note that we must not include the OP_KETRxxx opcodes here, because they all imply an unlimited repeat. */ - - case OP_ALT: - case OP_KET: - case OP_END: + + case OP_ALT: + case OP_KET: + case OP_END: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - if (length < 0) length = branchlength; - else if (length != branchlength) return -1; - if (*cc != OP_ALT) return length; - cc += 1 + LINK_SIZE; - branchlength = 0; - break; - + if (length < 0) length = branchlength; + else if (length != branchlength) return -1; + if (*cc != OP_ALT) return length; + cc += 1 + LINK_SIZE; + branchlength = 0; + break; + /* A true recursion implies not fixed length, but a subroutine call may be OK. If the subroutine is a forward reference, we can't deal with it until the end of the pattern, so return -3. */ @@ -1812,18 +1812,18 @@ for (;;) cc += 1 + LINK_SIZE; break; - /* Skip over assertive subpatterns */ - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += GET(cc, 1); while (*cc == OP_ALT); + /* Skip over assertive subpatterns */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do cc += GET(cc, 1); while (*cc == OP_ALT); cc += 1 + LINK_SIZE; break; - - /* Skip over things that don't match chars */ - + + /* Skip over things that don't match chars */ + case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: @@ -1836,16 +1836,16 @@ for (;;) case OP_CIRCM: case OP_CLOSE: case OP_COMMIT: - case OP_CREF: - case OP_DEF: + case OP_CREF: + case OP_DEF: case OP_DNCREF: case OP_DNRREF: case OP_DOLL: case OP_DOLLM: - case OP_EOD: - case OP_EODN: + case OP_EOD: + case OP_EODN: case OP_FAIL: - case OP_NOT_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: case OP_PRUNE: case OP_REVERSE: case OP_RREF: @@ -1854,27 +1854,27 @@ for (;;) case OP_SOD: case OP_SOM: case OP_THEN: - case OP_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: cc += PRIV(OP_lengths)[*cc]; - break; - - /* Handle literal characters */ - - case OP_CHAR: + break; + + /* Handle literal characters */ + + case OP_CHAR: case OP_CHARI: - case OP_NOT: + case OP_NOT: case OP_NOTI: - branchlength++; - cc += 2; + branchlength++; + cc += 2; #ifdef SUPPORT_UTF if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - /* Handle exact repetitions. The count is already in characters, but we - need to skip over a multibyte character in UTF8 mode. */ - - case OP_EXACT: +#endif + break; + + /* Handle exact repetitions. The count is already in characters, but we + need to skip over a multibyte character in UTF8 mode. */ + + case OP_EXACT: case OP_EXACTI: case OP_NOTEXACT: case OP_NOTEXACTI: @@ -1882,51 +1882,51 @@ for (;;) cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UTF if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - case OP_TYPEEXACT: - branchlength += GET2(cc,1); +#endif + break; + + case OP_TYPEEXACT: + branchlength += GET2(cc,1); if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; cc += 1 + IMM2_SIZE + 1; - break; - - /* Handle single-char matchers */ - - case OP_PROP: - case OP_NOTPROP: - cc += 2; - /* Fall through */ - + break; + + /* Handle single-char matchers */ + + case OP_PROP: + case OP_NOTPROP: + cc += 2; + /* Fall through */ + case OP_HSPACE: case OP_VSPACE: case OP_NOT_HSPACE: case OP_NOT_VSPACE: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: case OP_ALLANY: - branchlength++; - cc++; - break; - + branchlength++; + cc++; + break; + /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; otherwise \C is coded as OP_ALLANY. */ - - case OP_ANYBYTE: - return -2; - - /* Check a class for variable quantification */ - + + case OP_ANYBYTE: + return -2; + + /* Check a class for variable quantification */ + case OP_CLASS: case OP_NCLASS: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case OP_XCLASS: + case OP_XCLASS: /* The original code caused an unsigned overflow in 64 bit systems, so now we use a conditional statement. */ if (op == OP_XCLASS) @@ -1935,36 +1935,36 @@ for (;;) cc += PRIV(OP_lengths)[OP_CLASS]; #else cc += PRIV(OP_lengths)[OP_CLASS]; -#endif - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: +#endif + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: case OP_CRPLUS: case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSSTAR: case OP_CRPOSPLUS: case OP_CRPOSQUERY: - return -1; - - case OP_CRRANGE: - case OP_CRMINRANGE: + return -1; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; branchlength += (int)GET2(cc,1); cc += 1 + 2 * IMM2_SIZE; - break; - - default: - branchlength++; - } - break; - - /* Anything else is variable length */ - + break; + + default: + branchlength++; + } + break; + + /* Anything else is variable length */ + case OP_ANYNL: case OP_BRAMINZERO: case OP_BRAPOS: @@ -2050,48 +2050,48 @@ for (;;) /* Catch unrecognized opcodes so that when new ones are added they are not forgotten, as has happened in the past. */ - default: + default: return -4; - } - } -/* Control never gets here */ -} - - - -/************************************************* + } + } +/* Control never gets here */ +} + + + +/************************************************* * Scan compiled regex for specific bracket * -*************************************************/ - -/* This little function scans through a compiled pattern until it finds a +*************************************************/ + +/* This little function scans through a compiled pattern until it finds a capturing bracket with the given number, or, if the number is negative, an instance of OP_REVERSE for a lookbehind. The function is global in the C sense so that it can be called from pcre_study() when finding the minimum matching length. - -Arguments: - code points to start of expression + +Arguments: + code points to start of expression utf TRUE in UTF-8 / UTF-16 / UTF-32 mode number the required bracket number or negative to find a lookbehind - -Returns: pointer to the opcode for the bracket, or NULL if not found -*/ - + +Returns: pointer to the opcode for the bracket, or NULL if not found +*/ + const pcre_uchar * PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number) -{ -for (;;) - { +{ +for (;;) + { register pcre_uchar c = *code; - if (c == OP_END) return NULL; - - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ - - if (c == OP_XCLASS) code += GET(code, 1); - + if (c == OP_END) return NULL; + + /* XCLASS is used for classes that cannot be represented just by a bit + map. This includes negated single high-valued characters. The length in + the table is zero; the actual length is stored in the compiled code. */ + + if (c == OP_XCLASS) code += GET(code, 1); + /* Handle recursion */ else if (c == OP_REVERSE) @@ -2100,44 +2100,44 @@ for (;;) code += PRIV(OP_lengths)[c]; } - /* Handle capturing bracket */ - + /* Handle capturing bracket */ + else if (c == OP_CBRA || c == OP_SCBRA || c == OP_CBRAPOS || c == OP_SCBRAPOS) - { + { int n = (int)GET2(code, 1+LINK_SIZE); if (n == number) return (pcre_uchar *)code; code += PRIV(OP_lengths)[c]; - } - - /* Otherwise, we can get the item's length from the table, except that for - repeated character types, we have to test for \p and \P, which have an extra + } + + /* Otherwise, we can get the item's length from the table, except that for + repeated character types, we have to test for \p and \P, which have an extra two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we must add in its length. */ - - else - { - switch(c) - { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSUPTO: + + else + { + switch(c) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSUPTO: if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; - break; + break; case OP_MARK: case OP_PRUNE_ARG: @@ -2145,144 +2145,144 @@ for (;;) case OP_THEN_ARG: code += code[1]; break; - } - - /* Add in the fixed length from the table */ - + } + + /* Add in the fixed length from the table */ + code += PRIV(OP_lengths)[c]; - - /* In UTF-8 mode, opcodes that are followed by a character may be followed by - a multi-byte character. The length in the table is a minimum, so we have to - arrange to skip the extra bytes. */ - + + /* In UTF-8 mode, opcodes that are followed by a character may be followed by + a multi-byte character. The length in the table is a minimum, so we have to + arrange to skip the extra bytes. */ + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf) switch(c) - { - case OP_CHAR: + { + case OP_CHAR: case OP_CHARI: case OP_NOT: case OP_NOTI: - case OP_EXACT: + case OP_EXACT: case OP_EXACTI: case OP_NOTEXACT: case OP_NOTEXACTI: - case OP_UPTO: + case OP_UPTO: case OP_UPTOI: case OP_NOTUPTO: case OP_NOTUPTOI: - case OP_MINUPTO: + case OP_MINUPTO: case OP_MINUPTOI: case OP_NOTMINUPTO: case OP_NOTMINUPTOI: - case OP_POSUPTO: + case OP_POSUPTO: case OP_POSUPTOI: case OP_NOTPOSUPTO: case OP_NOTPOSUPTOI: - case OP_STAR: + case OP_STAR: case OP_STARI: case OP_NOTSTAR: case OP_NOTSTARI: - case OP_MINSTAR: + case OP_MINSTAR: case OP_MINSTARI: case OP_NOTMINSTAR: case OP_NOTMINSTARI: - case OP_POSSTAR: + case OP_POSSTAR: case OP_POSSTARI: case OP_NOTPOSSTAR: case OP_NOTPOSSTARI: - case OP_PLUS: + case OP_PLUS: case OP_PLUSI: case OP_NOTPLUS: case OP_NOTPLUSI: - case OP_MINPLUS: + case OP_MINPLUS: case OP_MINPLUSI: case OP_NOTMINPLUS: case OP_NOTMINPLUSI: - case OP_POSPLUS: + case OP_POSPLUS: case OP_POSPLUSI: case OP_NOTPOSPLUS: case OP_NOTPOSPLUSI: - case OP_QUERY: + case OP_QUERY: case OP_QUERYI: case OP_NOTQUERY: case OP_NOTQUERYI: - case OP_MINQUERY: + case OP_MINQUERY: case OP_MINQUERYI: case OP_NOTMINQUERY: case OP_NOTMINQUERYI: - case OP_POSQUERY: + case OP_POSQUERY: case OP_POSQUERYI: case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); - break; - } + break; + } #else (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif - } - } -} - - - -/************************************************* -* Scan compiled regex for recursion reference * -*************************************************/ - -/* This little function scans through a compiled pattern until it finds an -instance of OP_RECURSE. - -Arguments: - code points to start of expression +#endif + } + } +} + + + +/************************************************* +* Scan compiled regex for recursion reference * +*************************************************/ + +/* This little function scans through a compiled pattern until it finds an +instance of OP_RECURSE. + +Arguments: + code points to start of expression utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - -Returns: pointer to the opcode for OP_RECURSE, or NULL if not found -*/ - + +Returns: pointer to the opcode for OP_RECURSE, or NULL if not found +*/ + static const pcre_uchar * find_recurse(const pcre_uchar *code, BOOL utf) -{ -for (;;) - { +{ +for (;;) + { register pcre_uchar c = *code; - if (c == OP_END) return NULL; - if (c == OP_RECURSE) return code; - - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ - - if (c == OP_XCLASS) code += GET(code, 1); - - /* Otherwise, we can get the item's length from the table, except that for - repeated character types, we have to test for \p and \P, which have an extra + if (c == OP_END) return NULL; + if (c == OP_RECURSE) return code; + + /* XCLASS is used for classes that cannot be represented just by a bit + map. This includes negated single high-valued characters. The length in + the table is zero; the actual length is stored in the compiled code. */ + + if (c == OP_XCLASS) code += GET(code, 1); + + /* Otherwise, we can get the item's length from the table, except that for + repeated character types, we have to test for \p and \P, which have an extra two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we must add in its length. */ - - else - { - switch(c) - { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - case OP_TYPEPOSUPTO: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: + + else + { + switch(c) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEPOSUPTO: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; - break; + break; case OP_MARK: case OP_PRUNE_ARG: @@ -2290,134 +2290,134 @@ for (;;) case OP_THEN_ARG: code += code[1]; break; - } - - /* Add in the fixed length from the table */ - + } + + /* Add in the fixed length from the table */ + code += PRIV(OP_lengths)[c]; - - /* In UTF-8 mode, opcodes that are followed by a character may be followed - by a multi-byte character. The length in the table is a minimum, so we have - to arrange to skip the extra bytes. */ - + + /* In UTF-8 mode, opcodes that are followed by a character may be followed + by a multi-byte character. The length in the table is a minimum, so we have + to arrange to skip the extra bytes. */ + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf) switch(c) - { - case OP_CHAR: + { + case OP_CHAR: case OP_CHARI: case OP_NOT: case OP_NOTI: - case OP_EXACT: + case OP_EXACT: case OP_EXACTI: case OP_NOTEXACT: case OP_NOTEXACTI: - case OP_UPTO: + case OP_UPTO: case OP_UPTOI: case OP_NOTUPTO: case OP_NOTUPTOI: - case OP_MINUPTO: + case OP_MINUPTO: case OP_MINUPTOI: case OP_NOTMINUPTO: case OP_NOTMINUPTOI: - case OP_POSUPTO: + case OP_POSUPTO: case OP_POSUPTOI: case OP_NOTPOSUPTO: case OP_NOTPOSUPTOI: - case OP_STAR: + case OP_STAR: case OP_STARI: case OP_NOTSTAR: case OP_NOTSTARI: - case OP_MINSTAR: + case OP_MINSTAR: case OP_MINSTARI: case OP_NOTMINSTAR: case OP_NOTMINSTARI: - case OP_POSSTAR: + case OP_POSSTAR: case OP_POSSTARI: case OP_NOTPOSSTAR: case OP_NOTPOSSTARI: - case OP_PLUS: + case OP_PLUS: case OP_PLUSI: case OP_NOTPLUS: case OP_NOTPLUSI: - case OP_MINPLUS: + case OP_MINPLUS: case OP_MINPLUSI: case OP_NOTMINPLUS: case OP_NOTMINPLUSI: - case OP_POSPLUS: + case OP_POSPLUS: case OP_POSPLUSI: case OP_NOTPOSPLUS: case OP_NOTPOSPLUSI: - case OP_QUERY: + case OP_QUERY: case OP_QUERYI: case OP_NOTQUERY: case OP_NOTQUERYI: - case OP_MINQUERY: + case OP_MINQUERY: case OP_MINQUERYI: case OP_NOTMINQUERY: case OP_NOTMINQUERYI: - case OP_POSQUERY: + case OP_POSQUERY: case OP_POSQUERYI: case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); - break; - } + break; + } #else (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif - } - } -} - - - -/************************************************* -* Scan compiled branch for non-emptiness * -*************************************************/ - -/* This function scans through a branch of a compiled pattern to see whether it -can match the empty string or not. It is called from could_be_empty() -below and from compile_branch() when checking for an unlimited repeat of a -group that can match nothing. Note that first_significant_code() skips over -backward and negative forward assertions when its final argument is TRUE. If we -hit an unclosed bracket, we return "empty" - this means we've struck an inner -bracket whose current branch will already have been scanned. - -Arguments: - code points to start of search - endcode points to where to stop +#endif + } + } +} + + + +/************************************************* +* Scan compiled branch for non-emptiness * +*************************************************/ + +/* This function scans through a branch of a compiled pattern to see whether it +can match the empty string or not. It is called from could_be_empty() +below and from compile_branch() when checking for an unlimited repeat of a +group that can match nothing. Note that first_significant_code() skips over +backward and negative forward assertions when its final argument is TRUE. If we +hit an unclosed bracket, we return "empty" - this means we've struck an inner +bracket whose current branch will already have been scanned. + +Arguments: + code points to start of search + endcode points to where to stop utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode cd contains pointers to tables etc. recurses chain of recurse_check to catch mutual recursion - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL + +Returns: TRUE if what is matched could be empty +*/ + +static BOOL could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, BOOL utf, compile_data *cd, recurse_check *recurses) -{ +{ register pcre_uchar c; recurse_check this_recurse; for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); - code < endcode; + code < endcode; code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) - { + { const pcre_uchar *ccode; - - c = *code; - - /* Skip over forward assertions; the other assertions are skipped by - first_significant_code() with a TRUE final argument. */ - - if (c == OP_ASSERT) - { - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - + + c = *code; + + /* Skip over forward assertions; the other assertions are skipped by + first_significant_code() with a TRUE final argument. */ + + if (c == OP_ASSERT) + { + do code += GET(code, 1); while (*code == OP_ALT); + c = *code; + continue; + } + /* For a recursion/subroutine call, if its end has been reached, which implies a backward reference subroutine call, we can scan it. If it's a forward reference subroutine call, we can't. To detect forward reference @@ -2480,17 +2480,17 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); continue; } - /* Groups with zero repeats can of course be empty; skip them. */ - + /* Groups with zero repeats can of course be empty; skip them. */ + if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || c == OP_BRAPOSZERO) - { + { code += PRIV(OP_lengths)[c]; - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - + do code += GET(code, 1); while (*code == OP_ALT); + c = *code; + continue; + } + /* A nested group that is already marked as "could be empty" can just be skipped. */ @@ -2502,24 +2502,24 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); continue; } - /* For other groups, scan the branches. */ - + /* For other groups, scan the branches. */ + if (c == OP_BRA || c == OP_BRAPOS || c == OP_CBRA || c == OP_CBRAPOS || c == OP_ONCE || c == OP_ONCE_NC || c == OP_COND || c == OP_SCOND) - { - BOOL empty_branch; - if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ - + { + BOOL empty_branch; + if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ + /* If a conditional group has only one branch, there is a second, implied, empty branch, so just skip over the conditional, because it could be empty. Otherwise, scan the individual branches of the group. */ - + if (c == OP_COND && code[GET(code, 1)] != OP_ALT) code += GET(code, 1); else - { + { empty_branch = FALSE; do { @@ -2529,176 +2529,176 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); } while (*code == OP_ALT); if (!empty_branch) return FALSE; /* All branches are non-empty */ - } - - c = *code; - continue; - } - - /* Handle the other opcodes */ - - switch (c) - { - /* Check for quantifiers after a class. XCLASS is used for classes that - cannot be represented just by a bit map. This includes negated single + } + + c = *code; + continue; + } + + /* Handle the other opcodes */ + + switch (c) + { + /* Check for quantifiers after a class. XCLASS is used for classes that + cannot be represented just by a bit map. This includes negated single high-valued characters. The length in PRIV(OP_lengths)[] is zero; the - actual length is stored in the compiled code, so we must update "code" - here. */ - + actual length is stored in the compiled code, so we must update "code" + here. */ + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - ccode = code += GET(code, 1); - goto CHECK_CLASS_REPEAT; -#endif - - case OP_CLASS: - case OP_NCLASS: + case OP_XCLASS: + ccode = code += GET(code, 1); + goto CHECK_CLASS_REPEAT; +#endif + + case OP_CLASS: + case OP_NCLASS: ccode = code + PRIV(OP_lengths)[OP_CLASS]; - + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - CHECK_CLASS_REPEAT: -#endif - - switch (*ccode) - { - case OP_CRSTAR: /* These could be empty; continue */ - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: + CHECK_CLASS_REPEAT: +#endif + + switch (*ccode) + { + case OP_CRSTAR: /* These could be empty; continue */ + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSSTAR: case OP_CRPOSQUERY: - break; - - default: /* Non-repeat => class must match */ - case OP_CRPLUS: /* These repeats aren't empty */ - case OP_CRMINPLUS: + break; + + default: /* Non-repeat => class must match */ + case OP_CRPLUS: /* These repeats aren't empty */ + case OP_CRMINPLUS: case OP_CRPOSPLUS: - return FALSE; - - case OP_CRRANGE: - case OP_CRMINRANGE: + return FALSE; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: - if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ - break; - } - break; - - /* Opcodes that must match a character */ - + if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ + break; + } + break; + + /* Opcodes that must match a character */ + case OP_ANY: case OP_ALLANY: case OP_ANYBYTE: - case OP_PROP: - case OP_NOTPROP: + case OP_PROP: + case OP_NOTPROP: case OP_ANYNL: case OP_NOT_HSPACE: case OP_HSPACE: case OP_NOT_VSPACE: case OP_VSPACE: - case OP_EXTUNI: + case OP_EXTUNI: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: - case OP_CHAR: + case OP_CHAR: case OP_CHARI: - case OP_NOT: + case OP_NOT: case OP_NOTI: - case OP_PLUS: + case OP_PLUS: case OP_PLUSI: - case OP_MINPLUS: + case OP_MINPLUS: case OP_MINPLUSI: - case OP_NOTPLUS: + case OP_NOTPLUS: case OP_NOTPLUSI: - case OP_NOTMINPLUS: + case OP_NOTMINPLUS: case OP_NOTMINPLUSI: case OP_POSPLUS: case OP_POSPLUSI: - case OP_NOTPOSPLUS: + case OP_NOTPOSPLUS: case OP_NOTPOSPLUSI: case OP_EXACT: case OP_EXACTI: - case OP_NOTEXACT: + case OP_NOTEXACT: case OP_NOTEXACTI: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEEXACT: - - return FALSE; - - /* These are going to continue, as they may be empty, but we have to - fudge the length for the \p and \P cases. */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - /* Same for these */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + case OP_TYPEEXACT: + + return FALSE; + + /* These are going to continue, as they may be empty, but we have to + fudge the length for the \p and \P cases. */ + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + /* Same for these */ + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; - break; - - /* End of branch */ - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: + break; + + /* End of branch */ + + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: case OP_KETRPOS: - case OP_ALT: - return TRUE; - - /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, + case OP_ALT: + return TRUE; + + /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, MINUPTO, and POSUPTO and their caseless and negative versions may be followed by a multibyte character. */ - + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - case OP_STAR: + case OP_STAR: case OP_STARI: case OP_NOTSTAR: case OP_NOTSTARI: - case OP_MINSTAR: + case OP_MINSTAR: case OP_MINSTARI: case OP_NOTMINSTAR: case OP_NOTMINSTARI: - case OP_POSSTAR: + case OP_POSSTAR: case OP_POSSTARI: case OP_NOTPOSSTAR: case OP_NOTPOSSTARI: - case OP_QUERY: + case OP_QUERY: case OP_QUERYI: case OP_NOTQUERY: case OP_NOTQUERYI: - case OP_MINQUERY: + case OP_MINQUERY: case OP_MINQUERYI: case OP_NOTMINQUERY: case OP_NOTMINQUERYI: - case OP_POSQUERY: + case OP_POSQUERY: case OP_POSQUERYI: case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: @@ -2706,24 +2706,24 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); break; - case OP_UPTO: + case OP_UPTO: case OP_UPTOI: case OP_NOTUPTO: case OP_NOTUPTOI: - case OP_MINUPTO: + case OP_MINUPTO: case OP_MINUPTOI: case OP_NOTMINUPTO: case OP_NOTMINUPTOI: - case OP_POSUPTO: + case OP_POSUPTO: case OP_POSUPTOI: case OP_NOTPOSUPTO: case OP_NOTPOSUPTOI: if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); - break; -#endif + break; +#endif /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument string. */ @@ -2739,51 +2739,51 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); default: break; - } - } - -return TRUE; -} - - - -/************************************************* -* Scan compiled regex for non-emptiness * -*************************************************/ - -/* This function is called to check for left recursive calls. We want to check -the current branch of the current pattern to see if it could match the empty -string. If it could, we must look outwards for branches at other levels, -stopping when we pass beyond the bracket which is the subject of the recursion. + } + } + +return TRUE; +} + + + +/************************************************* +* Scan compiled regex for non-emptiness * +*************************************************/ + +/* This function is called to check for left recursive calls. We want to check +the current branch of the current pattern to see if it could match the empty +string. If it could, we must look outwards for branches at other levels, +stopping when we pass beyond the bracket which is the subject of the recursion. This function is called only during the real compile, not during the pre-compile. - -Arguments: - code points to start of the recursion - endcode points to where to stop (current RECURSE item) - bcptr points to the chain of current (unclosed) branch starts + +Arguments: + code points to start of the recursion + endcode points to where to stop (current RECURSE item) + bcptr points to the chain of current (unclosed) branch starts utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode cd pointers to tables etc - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL + +Returns: TRUE if what is matched could be empty +*/ + +static BOOL could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, branch_chain *bcptr, BOOL utf, compile_data *cd) -{ +{ while (bcptr != NULL && bcptr->current_branch >= code) - { + { if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL)) return FALSE; - bcptr = bcptr->outer; - } -return TRUE; -} - - - -/************************************************* + bcptr = bcptr->outer; + } +return TRUE; +} + + + +/************************************************* * Base opcode of repeated opcodes * *************************************************/ @@ -3889,29 +3889,29 @@ for (;;) /************************************************* -* Check for POSIX class syntax * -*************************************************/ - -/* This function is called when the sequence "[:" or "[." or "[=" is -encountered in a character class. It checks whether this is followed by a -sequence of characters terminated by a matching ":]" or ".]" or "=]". If we -reach an unescaped ']' without the special preceding character, return FALSE. - -Originally, this function only recognized a sequence of letters between the -terminators, but it seems that Perl recognizes any sequence of characters, -though of course unknown POSIX names are subsequently rejected. Perl gives an -"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE -didn't consider this to be a POSIX class. Likewise for [:1234:]. - -The problem in trying to be exactly like Perl is in the handling of escapes. We -have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX -class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code +* Check for POSIX class syntax * +*************************************************/ + +/* This function is called when the sequence "[:" or "[." or "[=" is +encountered in a character class. It checks whether this is followed by a +sequence of characters terminated by a matching ":]" or ".]" or "=]". If we +reach an unescaped ']' without the special preceding character, return FALSE. + +Originally, this function only recognized a sequence of letters between the +terminators, but it seems that Perl recognizes any sequence of characters, +though of course unknown POSIX names are subsequently rejected. Perl gives an +"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE +didn't consider this to be a POSIX class. Likewise for [:1234:]. + +The problem in trying to be exactly like Perl is in the handling of escapes. We +have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX +class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code below handles the special cases \\ and \], but does not try to do any other escape processing. This makes it different from Perl for cases such as [:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize "l\ower". This is a lesser evil than not diagnosing bad classes when Perl does, I think. - + A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the appearance of a nested POSIX class supersedes an apparent external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or @@ -3923,20 +3923,20 @@ example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for seem right at all. PCRE does not allow closing square brackets in POSIX class names. -Arguments: - ptr pointer to the initial [ - endptr where to return the end pointer - -Returns: TRUE or FALSE -*/ - -static BOOL +Arguments: + ptr pointer to the initial [ + endptr where to return the end pointer + +Returns: TRUE or FALSE +*/ + +static BOOL check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) -{ +{ pcre_uchar terminator; /* Don't combine these lines; the Solaris cc */ -terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ +terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ for (++ptr; *ptr != CHAR_NULL; ptr++) - { + { if (*ptr == CHAR_BACKSLASH && (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH)) @@ -3944,107 +3944,107 @@ for (++ptr; *ptr != CHAR_NULL; ptr++) else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { + { *endptr = ptr; return TRUE; - } - } -return FALSE; -} - - - - -/************************************************* -* Check POSIX class name * -*************************************************/ - -/* This function is called to check the name given in a POSIX-style class entry -such as [:alnum:]. - -Arguments: - ptr points to the first letter - len the length of the name - -Returns: a value representing the name, or -1 if unknown -*/ - -static int + } + } +return FALSE; +} + + + + +/************************************************* +* Check POSIX class name * +*************************************************/ + +/* This function is called to check the name given in a POSIX-style class entry +such as [:alnum:]. + +Arguments: + ptr points to the first letter + len the length of the name + +Returns: a value representing the name, or -1 if unknown +*/ + +static int check_posix_name(const pcre_uchar *ptr, int len) -{ -const char *pn = posix_names; -register int yield = 0; -while (posix_name_lengths[yield] != 0) - { - if (len == posix_name_lengths[yield] && +{ +const char *pn = posix_names; +register int yield = 0; +while (posix_name_lengths[yield] != 0) + { + if (len == posix_name_lengths[yield] && STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield; - pn += posix_name_lengths[yield] + 1; - yield++; - } -return -1; -} - - -/************************************************* -* Adjust OP_RECURSE items in repeated group * -*************************************************/ - -/* OP_RECURSE items contain an offset from the start of the regex to the group -that is referenced. This means that groups can be replicated for fixed -repetition simply by copying (because the recursion is allowed to refer to -earlier groups that are outside the current group). However, when a group is + pn += posix_name_lengths[yield] + 1; + yield++; + } +return -1; +} + + +/************************************************* +* Adjust OP_RECURSE items in repeated group * +*************************************************/ + +/* OP_RECURSE items contain an offset from the start of the regex to the group +that is referenced. This means that groups can be replicated for fixed +repetition simply by copying (because the recursion is allowed to refer to +earlier groups that are outside the current group). However, when a group is optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is inserted before it, after it has been compiled. This means that any OP_RECURSE items within it that refer to the group itself or any contained groups have to have their offsets adjusted. That one of the jobs of this function. Before it is called, the partially compiled regex must be temporarily terminated with OP_END. - + This function has been extended to cope with forward references for recursions and subroutine calls. It must check the list of such references for the group we are dealing with. If it finds that one of the recursions in the current group is on this list, it does not adjust the value in the reference (which is a group number). After the group has been scanned, all the offsets in the forward reference list for the group are adjusted. - -Arguments: - group points to the start of the group - adjust the amount by which the group is to be moved + +Arguments: + group points to the start of the group + adjust the amount by which the group is to be moved utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - cd contains pointers to tables etc. + cd contains pointers to tables etc. save_hwm_offset the hwm forward reference offset at the start of the group - -Returns: nothing -*/ - -static void + +Returns: nothing +*/ + +static void adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, size_t save_hwm_offset) -{ +{ int offset; pcre_uchar *hc; pcre_uchar *ptr = group; - + while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) - { + { for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; hc += LINK_SIZE) - { + { offset = (int)GET(hc, 0); if (cd->start_code + offset == ptr + 1) break; - } - + } + /* If we have not found this recursion on the forward reference list, adjust the recursion's offset if it's after the start of this group. */ - - if (hc >= cd->hwm) - { + + if (hc >= cd->hwm) + { offset = (int)GET(ptr, 1); - if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); - } - - ptr += 1 + LINK_SIZE; - } + if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); + } + + ptr += 1 + LINK_SIZE; + } /* Now adjust all forward reference offsets for the group. */ @@ -4054,96 +4054,96 @@ for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; offset = (int)GET(hc, 0); PUT(hc, 0, offset + adjust); } -} - - - -/************************************************* -* Insert an automatic callout point * -*************************************************/ - -/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert -callout points before each pattern item. - -Arguments: - code current code pointer - ptr current pattern pointer - cd pointers to tables etc - -Returns: new code pointer -*/ - +} + + + +/************************************************* +* Insert an automatic callout point * +*************************************************/ + +/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert +callout points before each pattern item. + +Arguments: + code current code pointer + ptr current pattern pointer + cd pointers to tables etc + +Returns: new code pointer +*/ + static pcre_uchar * auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd) -{ -*code++ = OP_CALLOUT; -*code++ = 255; +{ +*code++ = OP_CALLOUT; +*code++ = 255; PUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */ PUT(code, LINK_SIZE, 0); /* Default length */ return code + 2 * LINK_SIZE; -} - - - -/************************************************* -* Complete a callout item * -*************************************************/ - -/* A callout item contains the length of the next item in the pattern, which -we can't fill in till after we have reached the relevant point. This is used -for both automatic and manual callouts. - -Arguments: - previous_callout points to previous callout item - ptr current pattern pointer - cd pointers to tables etc - -Returns: nothing -*/ - -static void +} + + + +/************************************************* +* Complete a callout item * +*************************************************/ + +/* A callout item contains the length of the next item in the pattern, which +we can't fill in till after we have reached the relevant point. This is used +for both automatic and manual callouts. + +Arguments: + previous_callout points to previous callout item + ptr current pattern pointer + cd pointers to tables etc + +Returns: nothing +*/ + +static void complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd) -{ +{ int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2)); -PUT(previous_callout, 2 + LINK_SIZE, length); -} - - - -#ifdef SUPPORT_UCP -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range, in UTF-8 mode +PUT(previous_callout, 2 + LINK_SIZE, length); +} + + + +#ifdef SUPPORT_UCP +/************************************************* +* Get othercase range * +*************************************************/ + +/* This function is passed the start and end of a class range, in UTF-8 mode with UCP support. It searches up the characters, looking for ranges of -characters in the "other" case. Each call returns the next one, updating the +characters in the "other" case. Each call returns the next one, updating the start address. A character with multiple other cases is returned on its own with a special return value. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - + +Arguments: + cptr points to starting character value; updated + d end value + ocptr where to put start of othercase range + odptr where to put end of othercase range + Yield: -1 when no more 0 when a range is returned >0 the CASESET offset for char with multiple other cases in this case, ocptr contains the original -*/ - +*/ + static int get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr, pcre_uint32 *odptr) -{ +{ pcre_uint32 c, othercase, next; unsigned int co; - + /* Find the first character that has an other case. If it has multiple other cases, return its case offset value. */ -for (c = *cptr; c <= d; c++) +for (c = *cptr; c <= d; c++) { if ((co = UCD_CASESET(c)) != 0) { @@ -4153,69 +4153,69 @@ for (c = *cptr; c <= d; c++) } if ((othercase = UCD_OTHERCASE(c)) != c) break; } - + if (c > d) return -1; /* Reached end of range */ - + /* Found a character that has a single other case. Search for the end of the range, which is either the end of the input range, or a character that has zero or more than one other cases. */ -*ocptr = othercase; -next = othercase + 1; - -for (++c; c <= d; c++) - { +*ocptr = othercase; +next = othercase + 1; + +for (++c; c <= d; c++) + { if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; - next++; - } - + next++; + } + *odptr = next - 1; /* End of othercase range */ *cptr = c; /* Rest of input range */ return 0; -} -#endif /* SUPPORT_UCP */ - - - -/************************************************* +} +#endif /* SUPPORT_UCP */ + + + +/************************************************* * Add a character or range to a class * -*************************************************/ - +*************************************************/ + /* This function packages up the logic of adding a character or range of characters to a class. The character values in the arguments will be within the valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is mutually recursive with the function immediately below. - -Arguments: + +Arguments: classbits the bit map for characters < 256 uchardptr points to the pointer for extra data options the options word - cd contains pointers to tables etc. + cd contains pointers to tables etc. start start of range character end end of range character - + Returns: the number of < 256 characters added the pointer to extra data is updated -*/ - +*/ + static int add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, compile_data *cd, pcre_uint32 start, pcre_uint32 end) -{ +{ pcre_uint32 c; pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff); int n8 = 0; - + /* If caseless matching is required, scan the range and process alternate cases. In Unicode, there are 8-bit characters that have alternate cases that are greater than 255 and vice-versa. Sometimes we can just extend the original range. */ - + if ((options & PCRE_CASELESS) != 0) - { + { #ifdef SUPPORT_UCP if ((options & PCRE_UTF8) != 0) - { + { int rc; pcre_uint32 oc, od; @@ -4223,20 +4223,20 @@ if ((options & PCRE_CASELESS) != 0) c = start; while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) - { + { /* Handle a single character that has more than one other case. */ - + if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd, PRIV(ucd_caseless_sets) + rc, oc); - + /* Do nothing if the other case range is within the original range. */ - + else if (oc >= start && od <= end) continue; - + /* Extend the original range if there is overlap, noting that if oc < c, we can't have od > end because a subrange is always shorter than the basic range. Otherwise, use a recursive call to add the additional range. */ - + else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ else if (od > end && oc <= end + 1) { @@ -4248,52 +4248,52 @@ if ((options & PCRE_CASELESS) != 0) } else #endif /* SUPPORT_UCP */ - + /* Not UTF-mode, or no UCP */ for (c = start; c <= classbits_end; c++) - { + { SETBIT(classbits, cd->fcc[c]); n8++; - } - } - + } + } + /* Now handle the original range. Adjust the final value according to the bit length - this means that the same lists of (e.g.) horizontal spaces can be used in all cases. */ - + #if defined COMPILE_PCRE8 #ifdef SUPPORT_UTF if ((options & PCRE_UTF8) == 0) #endif if (end > 0xff) end = 0xff; - + #elif defined COMPILE_PCRE16 #ifdef SUPPORT_UTF if ((options & PCRE_UTF16) == 0) #endif if (end > 0xffff) end = 0xffff; - + #endif /* COMPILE_PCRE[8|16] */ - + /* Use the bitmap for characters < 256. Otherwise use extra data.*/ - + for (c = start; c <= classbits_end; c++) - { + { /* Regardless of start, c will always be <= 255. */ SETBIT(classbits, c); n8++; } - + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (start <= 0xff) start = 0xff + 1; - + if (end >= start) { pcre_uchar *uchardata = *uchardptr; #ifdef SUPPORT_UTF if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */ - { + { if (start < end) { *uchardata++ = XCL_RANGE; @@ -4305,49 +4305,49 @@ if (end >= start) *uchardata++ = XCL_SINGLE; uchardata += PRIV(ord2utf)(start, uchardata); } - } - else + } + else #endif /* SUPPORT_UTF */ - + /* Without UTF support, character values are constrained by the bit length, and can only be > 256 for 16-bit and 32-bit libraries. */ - + #ifdef COMPILE_PCRE8 {} -#else +#else if (start < end) - { + { *uchardata++ = XCL_RANGE; *uchardata++ = start; *uchardata++ = end; - } + } else if (start == end) - { + { *uchardata++ = XCL_SINGLE; *uchardata++ = start; - } + } #endif - + *uchardptr = uchardata; /* Updata extra data pointer */ - } + } #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ - + return n8; /* Number of 8-bit characters */ } - - - - + + + + /************************************************* * Add a list of characters to a class * *************************************************/ - + /* This function is used for adding a list of case-equivalent characters to a class, and also for adding a list of horizontal or vertical whitespace. If the list is in order (which it should be), ranges of characters are detected and handled appropriately. This function is mutually recursive with the function above. - + Arguments: classbits the bit map for characters < 256 uchardptr points to the pointer for extra data @@ -4357,11 +4357,11 @@ Arguments: except character to omit; this is used when adding lists of case-equivalent characters to avoid including the one we already know about - + Returns: the number of < 256 characters added the pointer to extra data is updated */ - + static int add_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, compile_data *cd, const pcre_uint32 *p, unsigned int except) @@ -4374,32 +4374,32 @@ while (p[0] < NOTACHAR) { while(p[n+1] == p[0] + n + 1) n++; n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]); - } + } p += n + 1; } return n8; } - - - + + + /************************************************* * Add characters not in a list to a class * *************************************************/ - + /* This function is used for adding the complement of a list of horizontal or vertical whitespace to a class. The list must be in order. - + Arguments: classbits the bit map for characters < 256 uchardptr points to the pointer for extra data options the options word cd contains pointers to tables etc. p points to row of 32-bit values, terminated by NOTACHAR - + Returns: the number of < 256 characters added the pointer to extra data is updated */ - + static int add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, compile_data *cd, const pcre_uint32 *p) @@ -4414,23 +4414,23 @@ while (p[0] < NOTACHAR) n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1, (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); p++; - } + } return n8; -} - - - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the a vector. If the options are -changed during the branch, the pointer is used to change the external options -bits. This function is used during the pre-compile phase when we are trying -to find out the amount of memory needed, as well as during the real compile -phase. The value of lengthptr distinguishes the two phases. - -Arguments: +} + + + +/************************************************* +* Compile one branch * +*************************************************/ + +/* Scan the pattern, compiling it into the a vector. If the options are +changed during the branch, the pointer is used to change the external options +bits. This function is used during the pre-compile phase when we are trying +to find out the amount of memory needed, as well as during the real compile +phase. The value of lengthptr distinguishes the two phases. + +Arguments: optionsptr pointer to the option bits codeptr points to the pointer to the current code point ptrptr points to the current pattern pointer @@ -4444,38 +4444,38 @@ Arguments: cd contains pointers to tables etc. lengthptr NULL during the real compile phase points to length accumulator during pre-compile phase - + Returns: TRUE on success FALSE, with *errorcodeptr set non-zero on error -*/ - -static BOOL +*/ + +static BOOL compile_branch(int *optionsptr, pcre_uchar **codeptr, const pcre_uchar **ptrptr, int *errorcodeptr, pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, branch_chain *bcptr, int cond_depth, - compile_data *cd, int *lengthptr) -{ -int repeat_type, op_type; -int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ -int bravalue = 0; -int greedy_default, greedy_non_default; + compile_data *cd, int *lengthptr) +{ +int repeat_type, op_type; +int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ +int bravalue = 0; +int greedy_default, greedy_non_default; pcre_uint32 firstchar, reqchar; pcre_int32 firstcharflags, reqcharflags; pcre_uint32 zeroreqchar, zerofirstchar; pcre_int32 zeroreqcharflags, zerofirstcharflags; pcre_int32 req_caseopt, reqvary, tempreqvary; int options = *optionsptr; /* May change dynamically */ -int after_manual_callout = 0; -int length_prevgroup = 0; +int after_manual_callout = 0; +int length_prevgroup = 0; register pcre_uint32 c; int escape; register pcre_uchar *code = *codeptr; pcre_uchar *last_code = code; pcre_uchar *orig_code = code; pcre_uchar *tempcode; -BOOL inescq = FALSE; +BOOL inescq = FALSE; BOOL groupsetfirstchar = FALSE; const pcre_uchar *ptr = *ptrptr; const pcre_uchar *tempptr; @@ -4484,7 +4484,7 @@ pcre_uchar *previous = NULL; pcre_uchar *previous_callout = NULL; size_t item_hwm_offset = 0; pcre_uint8 classbits[32]; - + /* We can fish out the UTF-8 setting once and for all into a BOOL, but we must not do this for other options (e.g. PCRE_EXTENDED) because they may change dynamically as we process the pattern. */ @@ -4495,11 +4495,11 @@ BOOL utf = (options & PCRE_UTF8) != 0; #ifndef COMPILE_PCRE32 pcre_uchar utf_chars[6]; #endif -#else +#else BOOL utf = FALSE; -#endif - -/* Helper variables for OP_XCLASS opcode (for characters > 255). We define +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). We define class_uchardata always so that it can be passed to add_to_class() always, though it will not be used in non-UTF 8-bit cases. This avoids having to supply alternative calls for the different cases. */ @@ -4511,70 +4511,70 @@ pcre_uchar *class_uchardata_base; #endif #ifdef PCRE_DEBUG -if (lengthptr != NULL) DPRINTF((">> start branch\n")); -#endif - -/* Set up the default and non-default settings for greediness */ - -greedy_default = ((options & PCRE_UNGREEDY) != 0); -greedy_non_default = greedy_default ^ 1; - -/* Initialize no first byte, no required byte. REQ_UNSET means "no char -matching encountered yet". It gets changed to REQ_NONE if we hit something that +if (lengthptr != NULL) DPRINTF((">> start branch\n")); +#endif + +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + +/* Initialize no first byte, no required byte. REQ_UNSET means "no char +matching encountered yet". It gets changed to REQ_NONE if we hit something that matches a non-fixed char first char; reqchar just remains unset if we never -find one. - -When we hit a repeat whose minimum is zero, we may have to adjust these values -to take the zero repeat into account. This is implemented by setting them to +find one. + +When we hit a repeat whose minimum is zero, we may have to adjust these values +to take the zero repeat into account. This is implemented by setting them to zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual -item types that can be repeated set these backoff variables appropriately. */ - +item types that can be repeated set these backoff variables appropriately. */ + firstchar = reqchar = zerofirstchar = zeroreqchar = 0; firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET; - + /* The variable req_caseopt contains either the REQ_CASELESS value or zero, according to the current setting of the caseless flag. The REQ_CASELESS leaves the lower 28 bit empty. It is added into the firstchar or reqchar variables to record the case status of the value. This is used only for ASCII characters. */ - + req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0; - -/* Switch on next character until the end of the branch */ - -for (;; ptr++) - { - BOOL negate_class; - BOOL should_flip_negation; - BOOL possessive_quantifier; - BOOL is_quantifier; - BOOL is_recurse; - BOOL reset_bracount; + +/* Switch on next character until the end of the branch */ + +for (;; ptr++) + { + BOOL negate_class; + BOOL should_flip_negation; + BOOL possessive_quantifier; + BOOL is_quantifier; + BOOL is_recurse; + BOOL reset_bracount; int class_has_8bitchar; int class_one_char; #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 BOOL xclass_has_prop; #endif - int newoptions; - int recno; - int refsign; - int skipbytes; + int newoptions; + int recno; + int refsign; + int skipbytes; pcre_uint32 subreqchar, subfirstchar; pcre_int32 subreqcharflags, subfirstcharflags; - int terminator; + int terminator; unsigned int mclength; unsigned int tempbracount; pcre_uint32 ec; pcre_uchar mcbuffer[8]; - + /* Come here to restart the loop without advancing the pointer. */ - + REDO_LOOP: /* Get next character in the pattern */ - c = *ptr; - + c = *ptr; + /* If we are at the end of a nested substitution, revert to the outer level string. Nesting only happens one level deep. */ @@ -4585,122 +4585,122 @@ for (;; ptr++) c = *ptr; } - /* If we are in the pre-compile phase, accumulate the length used for the - previous cycle of this loop. */ - - if (lengthptr != NULL) - { + /* If we are in the pre-compile phase, accumulate the length used for the + previous cycle of this loop. */ + + if (lengthptr != NULL) + { #ifdef PCRE_DEBUG - if (code > cd->hwm) cd->hwm = code; /* High water info */ -#endif + if (code > cd->hwm) cd->hwm = code; /* High water info */ +#endif if (code > cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ - { + { *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)? ERR52 : ERR87; - goto FAILED; - } - - /* There is at least one situation where code goes backwards: this is the - case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, - the class is simply eliminated. However, it is created first, so we have to - allow memory for it. Therefore, don't ever reduce the length at this point. - */ - - if (code < last_code) code = last_code; - - /* Paranoid check for integer overflow */ - - if (OFLOW_MAX - *lengthptr < code - last_code) - { - *errorcodeptr = ERR20; - goto FAILED; - } - + goto FAILED; + } + + /* There is at least one situation where code goes backwards: this is the + case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, + the class is simply eliminated. However, it is created first, so we have to + allow memory for it. Therefore, don't ever reduce the length at this point. + */ + + if (code < last_code) code = last_code; + + /* Paranoid check for integer overflow */ + + if (OFLOW_MAX - *lengthptr < code - last_code) + { + *errorcodeptr = ERR20; + goto FAILED; + } + *lengthptr += (int)(code - last_code); DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr, (int)(code - last_code), c, c)); - - /* If "previous" is set and it is not at the start of the work space, move - it back to there, in order to avoid filling up the work space. Otherwise, - if "previous" is NULL, reset the current code pointer to the start. */ - - if (previous != NULL) - { - if (previous > orig_code) - { + + /* If "previous" is set and it is not at the start of the work space, move + it back to there, in order to avoid filling up the work space. Otherwise, + if "previous" is NULL, reset the current code pointer to the start. */ + + if (previous != NULL) + { + if (previous > orig_code) + { memmove(orig_code, previous, IN_UCHARS(code - previous)); - code -= previous - orig_code; - previous = orig_code; - } - } - else code = orig_code; - - /* Remember where this code item starts so we can pick up the length - next time round. */ - - last_code = code; - } - - /* In the real compile phase, just check the workspace used by the forward - reference list. */ - + code -= previous - orig_code; + previous = orig_code; + } + } + else code = orig_code; + + /* Remember where this code item starts so we can pick up the length + next time round. */ + + last_code = code; + } + + /* In the real compile phase, just check the workspace used by the forward + reference list. */ + else if (cd->hwm > cd->start_workspace + cd->workspace_size) - { - *errorcodeptr = ERR52; - goto FAILED; - } - + { + *errorcodeptr = ERR52; + goto FAILED; + } + /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an isolated \E is ignored. */ - + if (c != CHAR_NULL) - { + { if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { - inescq = FALSE; - ptr++; - continue; - } + { + inescq = FALSE; + ptr++; + continue; + } else if (inescq) - { - if (previous_callout != NULL) - { - if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ - complete_callout(previous_callout, ptr, cd); - previous_callout = NULL; - } - if ((options & PCRE_AUTO_CALLOUT) != 0) - { - previous_callout = code; - code = auto_callout(code, ptr, cd); - } - goto NORMAL_CHAR; - } - + { + if (previous_callout != NULL) + { + if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ + complete_callout(previous_callout, ptr, cd); + previous_callout = NULL; + } + if ((options & PCRE_AUTO_CALLOUT) != 0) + { + previous_callout = code; + code = auto_callout(code, ptr, cd); + } + goto NORMAL_CHAR; + } + /* Check for the start of a \Q...\E sequence. We must do this here rather than later in case it is immediately followed by \E, which turns it into a "do nothing" sequence. */ - + if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q) { inescq = TRUE; ptr++; continue; } - } - + } + /* In extended mode, skip white space and comments. */ - - if ((options & PCRE_EXTENDED) != 0) - { + + if ((options & PCRE_EXTENDED) != 0) + { const pcre_uchar *wscptr = ptr; while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); if (c == CHAR_NUMBER_SIGN) - { + { ptr++; while (*ptr != CHAR_NULL) - { + { if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ { /* IS_NEWLINE sets cd->nllen. */ ptr += cd->nllen; @@ -4710,9 +4710,9 @@ for (;; ptr++) #ifdef SUPPORT_UTF if (utf) FORWARDCHAR(ptr); #endif - } + } } - + /* If we skipped any characters, restart the loop. Otherwise, we didn't see a comment. */ @@ -4732,12 +4732,12 @@ for (;; ptr++) { *errorcodeptr = ERR18; goto FAILED; - } + } continue; - } - + } + /* See if the next thing is a quantifier. */ - + is_quantifier = c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); @@ -4747,7 +4747,7 @@ for (;; ptr++) if (!is_quantifier && previous_callout != NULL && nestptr == NULL && after_manual_callout-- <= 0) - { + { if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ complete_callout(previous_callout, ptr, cd); previous_callout = NULL; @@ -4758,15 +4758,15 @@ for (;; ptr++) if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL) { - previous_callout = code; - code = auto_callout(code, ptr, cd); - } - + previous_callout = code; + code = auto_callout(code, ptr, cd); + } + /* Process the next pattern item. */ - switch(c) - { - /* ===================================================================*/ + switch(c) + { + /* ===================================================================*/ case CHAR_NULL: /* The branch terminates at string end */ case CHAR_VERTICAL_LINE: /* or | or ) */ case CHAR_RIGHT_PARENTHESIS: @@ -4774,68 +4774,68 @@ for (;; ptr++) *firstcharflagsptr = firstcharflags; *reqcharptr = reqchar; *reqcharflagsptr = reqcharflags; - *codeptr = code; - *ptrptr = ptr; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < code - last_code) - { - *errorcodeptr = ERR20; - goto FAILED; - } + *codeptr = code; + *ptrptr = ptr; + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < code - last_code) + { + *errorcodeptr = ERR20; + goto FAILED; + } *lengthptr += (int)(code - last_code); /* To include callout length */ - DPRINTF((">> end branch\n")); - } - return TRUE; - - - /* ===================================================================*/ - /* Handle single-character metacharacters. In multiline mode, ^ disables - the setting of any following char as a first character. */ - + DPRINTF((">> end branch\n")); + } + return TRUE; + + + /* ===================================================================*/ + /* Handle single-character metacharacters. In multiline mode, ^ disables + the setting of any following char as a first character. */ + case CHAR_CIRCUMFLEX_ACCENT: previous = NULL; - if ((options & PCRE_MULTILINE) != 0) - { + if ((options & PCRE_MULTILINE) != 0) + { if (firstcharflags == REQ_UNSET) zerofirstcharflags = firstcharflags = REQ_NONE; *code++ = OP_CIRCM; - } + } else *code++ = OP_CIRC; - break; - + break; + case CHAR_DOLLAR_SIGN: - previous = NULL; + previous = NULL; *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; - break; - - /* There can never be a first char if '.' is first, whatever happens about + break; + + /* There can never be a first char if '.' is first, whatever happens about repeats. The value of reqchar doesn't change either. */ - + case CHAR_DOT: if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - previous = code; + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; - break; - - - /* ===================================================================*/ - /* Character classes. If the included characters are all < 256, we build a - 32-byte bitmap of the permitted characters, except in the special case - where there is only one such character. For negated classes, we build the - map as usual, then invert it at the end. However, we use a different opcode - so that data characters > 255 can be handled correctly. - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells - whether the bitmap is present, and whether this is a negated class or not. - + break; + + + /* ===================================================================*/ + /* Character classes. If the included characters are all < 256, we build a + 32-byte bitmap of the permitted characters, except in the special case + where there is only one such character. For negated classes, we build the + map as usual, then invert it at the end. However, we use a different opcode + so that data characters > 255 can be handled correctly. + + If the class contains characters outside the 0-255 range, a different + opcode is compiled. It may optionally have a bit map for characters < 256, + but those above are are explicitly listed afterwards. A flag byte tells + whether the bitmap is present, and whether this is a negated class or not. + In JavaScript compatibility mode, an isolated ']' causes an error. In default (Perl) mode, it is treated as a data character. */ @@ -4870,42 +4870,42 @@ for (;; ptr++) /* Handle a real character class. */ - previous = code; + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; - - /* PCRE supports POSIX class stuff inside a class. Perl gives an error if - they are encountered at the top level, so we'll do that too. */ - + + /* PCRE supports POSIX class stuff inside a class. Perl gives an error if + they are encountered at the top level, so we'll do that too. */ + if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, &tempptr)) - { + check_posix_syntax(ptr, &tempptr)) + { *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31; - goto FAILED; - } - - /* If the first character is '^', set the negation flag and skip it. Also, - if the first few characters (either before or after ^) are \Q\E or \E we - skip them too. This makes for compatibility with Perl. */ - - negate_class = FALSE; - for (;;) - { - c = *(++ptr); + goto FAILED; + } + + /* If the first character is '^', set the negation flag and skip it. Also, + if the first few characters (either before or after ^) are \Q\E or \E we + skip them too. This makes for compatibility with Perl. */ + + negate_class = FALSE; + for (;;) + { + c = *(++ptr); if (c == CHAR_BACKSLASH) - { + { if (ptr[1] == CHAR_E) ptr++; else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) ptr += 3; else break; - } + } else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } - + negate_class = TRUE; + else break; + } + /* Empty classes are allowed in JavaScript compatibility mode. Otherwise, an initial ']' is taken as a data character -- the code below handles that. In JS mode, [] must always fail, so generate OP_FAIL, whereas @@ -4921,21 +4921,21 @@ for (;; ptr++) break; } - /* If a class contains a negative special such as \S, we need to flip the - negation flag at the end, so that support for characters > 255 works - correctly (they are all included in the class). */ - - should_flip_negation = FALSE; - + /* If a class contains a negative special such as \S, we need to flip the + negation flag at the end, so that support for characters > 255 works + correctly (they are all included in the class). */ + + should_flip_negation = FALSE; + /* Extended class (xclass) will be used when characters > 255 might match. */ - + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 xclass = FALSE; class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ class_uchardata_base = class_uchardata; /* Save the start */ #endif - + /* For optimization purposes, we track some properties of the class: class_has_8bitchar will be non-zero if the class contains at least one < 256 character; class_one_char will be 1 if the class contains just one @@ -4948,28 +4948,28 @@ for (;; ptr++) xclass_has_prop = FALSE; #endif - /* Initialize the 32-char bit map to all zeros. We build the map in a + /* Initialize the 32-char bit map to all zeros. We build the map in a temporary bit of memory, in case the class contains fewer than two 8-bit characters because in that case the compiled code doesn't use the bit map. */ - + memset(classbits, 0, 32 * sizeof(pcre_uint8)); - - /* Process characters until ] is reached. By writing this as a "do" it - means that an initial ] is taken as a data character. At the start of the - loop, c contains the first byte of the character. */ - + + /* Process characters until ] is reached. By writing this as a "do" it + means that an initial ] is taken as a data character. At the start of the + loop, c contains the first byte of the character. */ + if (c != CHAR_NULL) do - { + { const pcre_uchar *oldptr; - + #ifdef SUPPORT_UTF if (utf && HAS_EXTRALEN(c)) - { /* Braces are required because the */ - GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ - } -#endif - + { /* Braces are required because the */ + GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ + } +#endif + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 /* In the pre-compile phase, accumulate the length of any extra data and reset the pointer. This is so that very large classes that @@ -4986,67 +4986,67 @@ for (;; ptr++) } #endif - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { + /* Inside \Q...\E everything is literal except \E */ + + if (inescq) + { if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ - { - inescq = FALSE; /* Reset literal state */ - ptr++; /* Skip the 'E' */ - continue; /* Carry on with next */ - } - goto CHECK_RANGE; /* Could be range if \E follows */ - } - - /* Handle POSIX class names. Perl allows a negation extension of the - form [:^name:]. A square bracket that doesn't match the syntax is - treated as a literal. We also recognize the POSIX constructions - [.ch.] and [=ch=] ("collating elements") and fault them, as Perl - 5.6 and 5.8 do. */ - + { + inescq = FALSE; /* Reset literal state */ + ptr++; /* Skip the 'E' */ + continue; /* Carry on with next */ + } + goto CHECK_RANGE; /* Could be range if \E follows */ + } + + /* Handle POSIX class names. Perl allows a negation extension of the + form [:^name:]. A square bracket that doesn't match the syntax is + treated as a literal. We also recognize the POSIX constructions + [.ch.] and [=ch=] ("collating elements") and fault them, as Perl + 5.6 and 5.8 do. */ + if (c == CHAR_LEFT_SQUARE_BRACKET && (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) - { - BOOL local_negate = FALSE; - int posix_class, taboffset, tabopt; + { + BOOL local_negate = FALSE; + int posix_class, taboffset, tabopt; register const pcre_uint8 *cbits = cd->cbits; pcre_uint8 pbits[32]; - + if (ptr[1] != CHAR_COLON) - { - *errorcodeptr = ERR31; - goto FAILED; - } - - ptr += 2; + { + *errorcodeptr = ERR31; + goto FAILED; + } + + ptr += 2; if (*ptr == CHAR_CIRCUMFLEX_ACCENT) - { - local_negate = TRUE; - should_flip_negation = TRUE; /* Note negative special */ - ptr++; - } - + { + local_negate = TRUE; + should_flip_negation = TRUE; /* Note negative special */ + ptr++; + } + posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); - if (posix_class < 0) - { - *errorcodeptr = ERR30; - goto FAILED; - } - - /* If matching is caseless, upper and lower are converted to - alpha. This relies on the fact that the class table starts with - alpha, lower, upper as the first 3 entries. */ - - if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - + if (posix_class < 0) + { + *errorcodeptr = ERR30; + goto FAILED; + } + + /* If matching is caseless, upper and lower are converted to + alpha. This relies on the fact that the class table starts with + alpha, lower, upper as the first 3 entries. */ + + if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + /* When PCRE_UCP is set, some of the POSIX classes are converted to different escape sequences that use Unicode properties \p or \P. Others that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP directly. */ - + #ifdef SUPPORT_UCP if ((options & PCRE_UCP) != 0) { @@ -5115,91 +5115,91 @@ for (;; ptr++) may be in the main map already. At the end we or the result into the bit map that is being built. */ - posix_class *= 3; - - /* Copy in the first table (always present) */ - - memcpy(pbits, cbits + posix_class_maps[posix_class], + posix_class *= 3; + + /* Copy in the first table (always present) */ + + memcpy(pbits, cbits + posix_class_maps[posix_class], 32 * sizeof(pcre_uint8)); - - /* If there is a second table, add or remove it as required. */ - - taboffset = posix_class_maps[posix_class + 1]; - tabopt = posix_class_maps[posix_class + 2]; - - if (taboffset >= 0) - { - if (tabopt >= 0) - for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; - else - for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; - } - + + /* If there is a second table, add or remove it as required. */ + + taboffset = posix_class_maps[posix_class + 1]; + tabopt = posix_class_maps[posix_class + 2]; + + if (taboffset >= 0) + { + if (tabopt >= 0) + for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; + else + for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; + } + /* Now see if we need to remove any special characters. An option - value of 1 removes vertical space and 2 removes underscore. */ - - if (tabopt < 0) tabopt = -tabopt; - if (tabopt == 1) pbits[1] &= ~0x3c; - else if (tabopt == 2) pbits[11] &= 0x7f; - - /* Add the POSIX table or its complement into the main table that is - being built and we are done. */ - - if (local_negate) - for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; - else - for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; - - ptr = tempptr + 1; + value of 1 removes vertical space and 2 removes underscore. */ + + if (tabopt < 0) tabopt = -tabopt; + if (tabopt == 1) pbits[1] &= ~0x3c; + else if (tabopt == 2) pbits[11] &= 0x7f; + + /* Add the POSIX table or its complement into the main table that is + being built and we are done. */ + + if (local_negate) + for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; + else + for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; + + ptr = tempptr + 1; /* Every class contains at least one < 256 character. */ class_has_8bitchar = 1; /* Every class contains at least two characters. */ class_one_char = 2; - continue; /* End of POSIX syntax handling */ - } - - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. The sequence \b is a special + continue; /* End of POSIX syntax handling */ + } + + /* Backslash may introduce a single character, or it may introduce one + of the specials, which just set a flag. The sequence \b is a special case. Inside a class (and only there) it is treated as backspace. We assume that other escapes have more than one character in them, so speculatively set both class_has_8bitchar and class_one_char bigger than one. Unrecognized escapes fall through and are either treated as literal characters (by default), or are faulted if PCRE_EXTRA is set. */ - + if (c == CHAR_BACKSLASH) - { + { escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE); - if (*errorcodeptr != 0) goto FAILED; + if (*errorcodeptr != 0) goto FAILED; if (escape == 0) c = ec; else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ else if (escape == ESC_N) /* \N is not supported in a class */ - { + { *errorcodeptr = ERR71; goto FAILED; } else if (escape == ESC_Q) /* Handle start of quoted string */ { if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - ptr += 2; /* avoid empty string */ - } - else inescq = TRUE; - continue; - } + { + ptr += 2; /* avoid empty string */ + } + else inescq = TRUE; + continue; + } else if (escape == ESC_E) continue; /* Ignore orphan \E */ - + else - { + { register const pcre_uint8 *cbits = cd->cbits; /* Every class contains at least two < 256 characters. */ class_has_8bitchar++; /* Every class contains at least two characters. */ class_one_char += 2; - + switch (escape) - { + { #ifdef SUPPORT_UCP case ESC_du: /* These are the values given for \d etc */ case ESC_DU: /* when PCRE_UCP is set. We replace the */ @@ -5212,24 +5212,24 @@ for (;; ptr++) class_has_8bitchar--; /* Undo! */ continue; #endif - case ESC_d: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; - continue; - - case ESC_D: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; - continue; - - case ESC_w: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; - continue; - - case ESC_W: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; - continue; - + case ESC_d: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; + continue; + + case ESC_D: + should_flip_negation = TRUE; + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; + continue; + + case ESC_w: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; + continue; + + case ESC_W: + should_flip_negation = TRUE; + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; + continue; + /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was previously set by something earlier in the character class. @@ -5237,41 +5237,41 @@ for (;; ptr++) we could just adjust the appropriate bit. From PCRE 8.34 we no longer treat \s and \S specially. */ - case ESC_s: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; - continue; - - case ESC_S: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; - continue; - + case ESC_s: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; + continue; + + case ESC_S: + should_flip_negation = TRUE; + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; + continue; + /* The rest apply in both UCP and non-UCP cases. */ - + case ESC_h: (void)add_list_to_class(classbits, &class_uchardata, options, cd, PRIV(hspace_list), NOTACHAR); continue; - + case ESC_H: (void)add_not_list_to_class(classbits, &class_uchardata, options, cd, PRIV(hspace_list)); continue; - + case ESC_v: (void)add_list_to_class(classbits, &class_uchardata, options, cd, PRIV(vspace_list), NOTACHAR); - continue; - + continue; + case ESC_V: (void)add_not_list_to_class(classbits, &class_uchardata, options, cd, PRIV(vspace_list)); - continue; - + continue; + case ESC_p: case ESC_P: #ifdef SUPPORT_UCP - { + { BOOL negated; unsigned int ptype = 0, pdata = 0; if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) @@ -5283,118 +5283,118 @@ for (;; ptr++) xclass_has_prop = TRUE; class_has_8bitchar--; /* Undo! */ continue; - } + } #else *errorcodeptr = ERR45; goto FAILED; -#endif +#endif /* Unrecognized escapes are faulted if PCRE is running in its strict mode. By default, for compatibility with Perl, they are treated as literals. */ - + default: if ((options & PCRE_EXTRA) != 0) - { + { *errorcodeptr = ERR7; goto FAILED; - } + } class_has_8bitchar--; /* Undo the speculative increase. */ class_one_char -= 2; /* Undo the speculative increase. */ c = *ptr; /* Get the final character and fall through */ break; - } + } } - + /* Fall through if the escape just defined a single character (c >= 0). This may be greater than 256. */ - + escape = 0; - - } /* End of backslash handling */ - + + } /* End of backslash handling */ + /* A character may be followed by '-' to form a range. However, Perl does not permit ']' to be the end of the range. A '-' character at the end is treated as a literal. Perl ignores orphaned \E sequences entirely. The code for handling \Q and \E is messy. */ - - CHECK_RANGE: + + CHECK_RANGE: while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - inescq = FALSE; - ptr += 2; - } - oldptr = ptr; - + { + inescq = FALSE; + ptr += 2; + } + oldptr = ptr; + /* Remember if \r or \n were explicitly used */ - + if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; - - /* Check for range */ - + + /* Check for range */ + if (!inescq && ptr[1] == CHAR_MINUS) - { + { pcre_uint32 d; - ptr += 2; + ptr += 2; while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; - - /* If we hit \Q (not followed by \E) at this point, go into escaped - mode. */ - + + /* If we hit \Q (not followed by \E) at this point, go into escaped + mode. */ + while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) - { - ptr += 2; + { + ptr += 2; if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) { ptr += 2; continue; } - inescq = TRUE; - break; - } - + inescq = TRUE; + break; + } + /* Minus (hyphen) at the end of a class is treated as a literal, so put back the pointer and jump to handle the character that preceded it. */ if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) - { - ptr = oldptr; + { + ptr = oldptr; goto CLASS_SINGLE_CHARACTER; - } - + } + /* Otherwise, we have a potential range; pick up the next character */ #ifdef SUPPORT_UTF if (utf) - { /* Braces are required because the */ - GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ - } - else -#endif - d = *ptr; /* Not UTF-8 mode */ - + { /* Braces are required because the */ + GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ + } + else +#endif + d = *ptr; /* Not UTF-8 mode */ + /* The second part of a range can be a single-character escape sequence, but not any of the other escapes. Perl treats a hyphen as a literal in such circumstances. However, in Perl's warning mode, a warning is given, so PCRE now faults it as it is almost certainly a mistake on the user's part. */ - + if (!inescq) - { + { if (d == CHAR_BACKSLASH) { int descape; descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE); if (*errorcodeptr != 0) goto FAILED; - + /* 0 means a character was put into d; \b is backspace; any other special causes an error. */ - + if (descape != 0) - { + { if (descape == ESC_b) d = CHAR_BS; else { *errorcodeptr = ERR83; goto FAILED; } - } - } + } + } /* A hyphen followed by a POSIX class is treated in the same way. */ @@ -5406,43 +5406,43 @@ for (;; ptr++) *errorcodeptr = ERR83; goto FAILED; } - } - - /* Check that the two values are in the correct order. Optimize + } + + /* Check that the two values are in the correct order. Optimize one-character ranges. */ - - if (d < c) - { - *errorcodeptr = ERR8; - goto FAILED; - } + + if (d < c) + { + *errorcodeptr = ERR8; + goto FAILED; + } if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */ - + /* We have found a character range, so single character optimizations cannot be done anymore. Any value greater than 1 indicates that there is more than one character. */ - + class_one_char = 2; - + /* Remember an explicit \r or \n, and add the range to the class. */ - + if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; - + class_has_8bitchar += add_to_class(classbits, &class_uchardata, options, cd, c, d); - + continue; /* Go get the next char in the class */ } - + /* Handle a single character - we can get here for a normal non-escape char, or after \ that introduces a single character or for an apparent range that isn't. Only the value 1 matters for class_one_char, so don't increase it if it is already 2 or more ... just in case there's a class with a zillion characters in it. */ - + CLASS_SINGLE_CHARACTER: if (class_one_char < 2) class_one_char++; - + /* If xclass_has_prop is false and class_one_char is 1, we have the first single character in the class, and there have been no prior ranges, or XCLASS items generated by escapes. If this is the final character in the @@ -5451,7 +5451,7 @@ for (;; ptr++) can cause firstchar to be set. Otherwise, there can be no first char if this item is first, whatever repeat count may follow. In the case of reqchar, save the previous value for reinstating. */ - + if (!inescq && #ifdef SUPPORT_UCP !xclass_has_prop && @@ -5461,7 +5461,7 @@ for (;; ptr++) ptr++; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - + if (negate_class) { #ifdef SUPPORT_UCP @@ -5470,12 +5470,12 @@ for (;; ptr++) if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; - + /* For caseless UTF-8 mode when UCP support is available, check whether this character has more than one other case. If so, generate a special OP_NOTPROP item instead of OP_NOTI. */ - -#ifdef SUPPORT_UCP + +#ifdef SUPPORT_UCP if (utf && (options & PCRE_CASELESS) != 0 && (d = UCD_CASESET(c)) != 0) { @@ -5486,8 +5486,8 @@ for (;; ptr++) else #endif /* Char has only one other case, or UCP not available */ - - { + + { *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) @@ -5495,52 +5495,52 @@ for (;; ptr++) else #endif *code++ = c; - } - + } + /* We are finished with this character class */ - + goto END_CLASS; } - + /* For a single, positive character, get the value into mcbuffer, and then we can handle this with the normal one-character code. */ - + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) mclength = PRIV(ord2utf)(c, mcbuffer); else #endif - { + { mcbuffer[0] = c; mclength = 1; - } + } goto ONE_CHAR; } /* End of 1-char optimization */ - + /* There is more than one character in the class, or an XCLASS item has been generated. Add this character to the class. */ - + class_has_8bitchar += add_to_class(classbits, &class_uchardata, options, cd, c, c); - } - + } + /* Loop until ']' reached. This "while" is the end of the "do" far above. If we are at the end of an internal nested string, revert to the outer string. */ - + while (((c = *(++ptr)) != CHAR_NULL || (nestptr != NULL && (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); - + /* Check for missing terminating ']' */ if (c == CHAR_NULL) - { - *errorcodeptr = ERR6; - goto FAILED; - } - + { + *errorcodeptr = ERR6; + goto FAILED; + } + /* We will need an XCLASS if data has been placed in class_uchardata. In the second phase this is a sufficient test. However, in the pre-compile phase, class_uchardata gets emptied to prevent workspace overflow, so it @@ -5548,21 +5548,21 @@ for (;; ptr++) anything at this point. For this reason, xclass gets set TRUE above when uchar_classdata is emptied, and that's why this code is the way it is here instead of just doing a test on class_uchardata below. */ - + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (class_uchardata > class_uchardata_base) xclass = TRUE; -#endif - +#endif + /* If this is the first thing in the branch, there can be no first char setting, whatever the repeat count. Any reqchar setting must remain unchanged after any kind of repeat. */ - + if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - + /* If there are characters with values > 255, we have to compile an extended class, with its own opcode, unless there was a negated special such as \S in the class, and PCRE_UCP is not set, because in that case all @@ -5570,25 +5570,25 @@ for (;; ptr++) well can be ignored. If (when there are explicit characters > 255 that must be listed) there are no characters < 256, we can omit the bitmap in the actual compiled code. */ - + #ifdef SUPPORT_UTF if (xclass && (xclass_has_prop || !should_flip_negation || (options & PCRE_UCP) != 0)) #elif !defined COMPILE_PCRE8 if (xclass && (xclass_has_prop || !should_flip_negation)) -#endif +#endif #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - { + { /* For non-UCP wide characters, in a non-negative class containing \S or similar (should_flip_negation is set), all characters greater than 255 must be in the class. */ - + if ( #if defined COMPILE_PCRE8 utf && #endif should_flip_negation && !negate_class && (options & PCRE_UCP) == 0) - { + { *class_uchardata++ = XCL_RANGE; if (utf) /* Will always be utf in the 8-bit library */ { @@ -5603,114 +5603,114 @@ for (;; ptr++) #elif defined COMPILE_PCRE32 *class_uchardata++ = 0x100; *class_uchardata++ = 0xffffffffu; -#endif +#endif } - } - + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; + *code++ = OP_XCLASS; + code += LINK_SIZE; *code = negate_class? XCL_NOT:0; if (xclass_has_prop) *code |= XCL_HASPROP; - - /* If the map is required, move up the extra data to make room for it; - otherwise just move the code pointer to the end of the extra data. */ - + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ + if (class_has_8bitchar > 0) - { - *code++ |= XCL_MAP; + { + *code++ |= XCL_MAP; memmove(code + (32 / sizeof(pcre_uchar)), code, IN_UCHARS(class_uchardata - code)); if (negate_class && !xclass_has_prop) for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; - memcpy(code, classbits, 32); + memcpy(code, classbits, 32); code = class_uchardata + (32 / sizeof(pcre_uchar)); - } + } else code = class_uchardata; - - /* Now fill in the complete length of the item */ - + + /* Now fill in the complete length of the item */ + PUT(previous, 1, (int)(code - previous)); - break; /* End of class handling */ - } + break; /* End of class handling */ + } /* Even though any XCLASS list is now discarded, we must allow for its memory. */ if (lengthptr != NULL) *lengthptr += (int)(class_uchardata - class_uchardata_base); -#endif - +#endif + /* If there are no characters > 255, or they are all to be included or excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the whole class was negated and whether there were negative specials such as \S (non-UCP) in the class. Then copy the 32-byte map into the code vector, negating it if necessary. */ - - *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; + + *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; if (lengthptr == NULL) /* Save time in the pre-compile phase */ - { + { if (negate_class) for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; - memcpy(code, classbits, 32); - } + memcpy(code, classbits, 32); + } code += 32 / sizeof(pcre_uchar); END_CLASS: - break; - - - /* ===================================================================*/ - /* Various kinds of repeat; '{' is not necessarily a quantifier, but this - has been tested above. */ - + break; + + + /* ===================================================================*/ + /* Various kinds of repeat; '{' is not necessarily a quantifier, but this + has been tested above. */ + case CHAR_LEFT_CURLY_BRACKET: - if (!is_quantifier) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); - if (*errorcodeptr != 0) goto FAILED; - goto REPEAT; - + if (!is_quantifier) goto NORMAL_CHAR; + ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); + if (*errorcodeptr != 0) goto FAILED; + goto REPEAT; + case CHAR_ASTERISK: - repeat_min = 0; - repeat_max = -1; - goto REPEAT; - + repeat_min = 0; + repeat_max = -1; + goto REPEAT; + case CHAR_PLUS: - repeat_min = 1; - repeat_max = -1; - goto REPEAT; - + repeat_min = 1; + repeat_max = -1; + goto REPEAT; + case CHAR_QUESTION_MARK: - repeat_min = 0; - repeat_max = 1; - - REPEAT: - if (previous == NULL) - { - *errorcodeptr = ERR9; - goto FAILED; - } - - if (repeat_min == 0) - { + repeat_min = 0; + repeat_max = 1; + + REPEAT: + if (previous == NULL) + { + *errorcodeptr = ERR9; + goto FAILED; + } + + if (repeat_min == 0) + { firstchar = zerofirstchar; /* Adjust for zero repeat */ firstcharflags = zerofirstcharflags; reqchar = zeroreqchar; /* Ditto */ reqcharflags = zeroreqcharflags; - } - - /* Remember whether this is a variable length repeat */ - - reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - - op_type = 0; /* Default single-char op codes */ - possessive_quantifier = FALSE; /* Default not possessive quantifier */ - + } + + /* Remember whether this is a variable length repeat */ + + reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; + + op_type = 0; /* Default single-char op codes */ + possessive_quantifier = FALSE; /* Default not possessive quantifier */ + /* Save start of previous item, in case we have to move it up in order to insert something before it. */ - - tempcode = previous; - + + tempcode = previous; + /* Before checking for a possessive quantifier, we must skip over whitespace and comments in extended mode because Perl allows white space at this point. */ @@ -5754,33 +5754,33 @@ for (;; ptr++) } } - /* If the next character is '+', we have a possessive quantifier. This - implies greediness, whatever the setting of the PCRE_UNGREEDY option. - If the next character is '?' this is a minimizing repeat, by default, - but if PCRE_UNGREEDY is set, it works the other way round. We change the - repeat type to the non-default. */ - + /* If the next character is '+', we have a possessive quantifier. This + implies greediness, whatever the setting of the PCRE_UNGREEDY option. + If the next character is '?' this is a minimizing repeat, by default, + but if PCRE_UNGREEDY is set, it works the other way round. We change the + repeat type to the non-default. */ + if (ptr[1] == CHAR_PLUS) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - ptr++; - } + { + repeat_type = 0; /* Force greedy */ + possessive_quantifier = TRUE; + ptr++; + } else if (ptr[1] == CHAR_QUESTION_MARK) - { - repeat_type = greedy_non_default; - ptr++; - } - else repeat_type = greedy_default; - + { + repeat_type = greedy_non_default; + ptr++; + } + else repeat_type = greedy_default; + /* If previous was a recursion call, wrap it in atomic brackets so that previous becomes the atomic group. All recursions were so wrapped in the past, but it no longer happens for non-repeated recursions. In fact, the repeated ones could be re-implemented independently so as not to need this, but for the moment we rely on the code for repeating groups. */ - + if (*previous == OP_RECURSE) - { + { memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE)); *previous = OP_ONCE; PUT(previous, 1, 2 + 2*LINK_SIZE); @@ -5788,20 +5788,20 @@ for (;; ptr++) PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); code += 2 + 2 * LINK_SIZE; length_prevgroup = 3 + 3*LINK_SIZE; - + /* When actually compiling, we need to check whether this was a forward reference, and if so, adjust the offset. */ if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE) - { + { int offset = GET(cd->hwm, -LINK_SIZE); if (offset == previous + 1 - cd->start_code) PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); - } + } } - + /* Now handle repetition for the different types of item. */ - + /* If previous was a character or negated character match, abolish the item and generate a repeat item instead. If a char item has a minimum of more than one, ensure that it is set in reqchar - it might not be if a sequence @@ -5812,236 +5812,236 @@ for (;; ptr++) || *previous == OP_NOT || *previous == OP_NOTI) { switch (*previous) - { + { default: /* Make compiler happy. */ case OP_CHAR: op_type = OP_STAR - OP_STAR; break; case OP_CHARI: op_type = OP_STARI - OP_STAR; break; case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; - } - + } + /* Deal with UTF characters that take up more than one character. It's easier to write this out separately than try to macrify it. Use c to hold the length of the character in bytes, plus UTF_LENGTH to flag that it's a length rather than a small character. */ - + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && NOT_FIRSTCHAR(code[-1])) - { + { pcre_uchar *lastchar = code - 1; BACKCHAR(lastchar); c = (int)(code - lastchar); /* Length of UTF-8 character */ memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */ c |= UTF_LENGTH; /* Flag c as a length */ - } + } else #endif /* SUPPORT_UTF */ - + /* Handle the case of a single charater - either with no UTF support, or with UTF disabled, or for a single character UTF character. */ - { + { c = code[-1]; if (*previous <= OP_CHARI && repeat_min > 1) { reqchar = c; reqcharflags = req_caseopt | cd->req_varyopt; } - } + } goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by setting op_type to add a suitable offset into repeat_type. Note - the the Unicode property types will be present only when SUPPORT_UCP is - defined, but we don't wrap the little bits of code here because it just - makes it horribly messy. */ - - else if (*previous < OP_EODN) - { + } + + /* If previous was a character type match (\d or similar), abolish it and + create a suitable repeat item. The code is shared with single-character + repeats by setting op_type to add a suitable offset into repeat_type. Note + the the Unicode property types will be present only when SUPPORT_UCP is + defined, but we don't wrap the little bits of code here because it just + makes it horribly messy. */ + + else if (*previous < OP_EODN) + { pcre_uchar *oldcode; - int prop_type, prop_value; - op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; - - OUTPUT_SINGLE_REPEAT: - if (*previous == OP_PROP || *previous == OP_NOTPROP) - { - prop_type = previous[1]; - prop_value = previous[2]; - } - else prop_type = prop_value = -1; - - oldcode = code; - code = previous; /* Usually overwrite previous item */ - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) goto END_REPEAT; - - /* Combine the op_type with the repeat_type */ - - repeat_type += op_type; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeat_min == 0) - { - if (repeat_max == -1) *code++ = OP_STAR + repeat_type; - else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - - /* A repeat minimum of 1 is optimized into some special cases. If the - maximum is unlimited, we use OP_PLUS. Otherwise, the original item is - left in place and, if the maximum is greater than 1, we use OP_UPTO with - one less than the maximum. */ - - else if (repeat_min == 1) - { - if (repeat_max == -1) - *code++ = OP_PLUS + repeat_type; - else - { - code = oldcode; /* leave previous item in place */ - if (repeat_max == 1) goto END_REPEAT; - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max - 1); - } - } - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO. */ - - else - { - *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ - PUT2INC(code, 0, repeat_min); - - /* If the maximum is unlimited, insert an OP_STAR. Before doing so, - we have to insert the character for the previous code. For a repeated - Unicode property match, there are two extra bytes that define the - required property. In UTF-8 mode, long characters have their length in + int prop_type, prop_value; + op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ + c = *previous; + + OUTPUT_SINGLE_REPEAT: + if (*previous == OP_PROP || *previous == OP_NOTPROP) + { + prop_type = previous[1]; + prop_value = previous[2]; + } + else prop_type = prop_value = -1; + + oldcode = code; + code = previous; /* Usually overwrite previous item */ + + /* If the maximum is zero then the minimum must also be zero; Perl allows + this case, so we do too - by simply omitting the item altogether. */ + + if (repeat_max == 0) goto END_REPEAT; + + /* Combine the op_type with the repeat_type */ + + repeat_type += op_type; + + /* A minimum of zero is handled either as the special case * or ?, or as + an UPTO, with the maximum given. */ + + if (repeat_min == 0) + { + if (repeat_max == -1) *code++ = OP_STAR + repeat_type; + else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + + /* A repeat minimum of 1 is optimized into some special cases. If the + maximum is unlimited, we use OP_PLUS. Otherwise, the original item is + left in place and, if the maximum is greater than 1, we use OP_UPTO with + one less than the maximum. */ + + else if (repeat_min == 1) + { + if (repeat_max == -1) + *code++ = OP_PLUS + repeat_type; + else + { + code = oldcode; /* leave previous item in place */ + if (repeat_max == 1) goto END_REPEAT; + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max - 1); + } + } + + /* The case {n,n} is just an EXACT, while the general case {n,m} is + handled as an EXACT followed by an UPTO. */ + + else + { + *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ + PUT2INC(code, 0, repeat_min); + + /* If the maximum is unlimited, insert an OP_STAR. Before doing so, + we have to insert the character for the previous code. For a repeated + Unicode property match, there are two extra bytes that define the + required property. In UTF-8 mode, long characters have their length in c, with the UTF_LENGTH bit as a flag. */ - - if (repeat_max < 0) - { + + if (repeat_max < 0) + { #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && (c & UTF_LENGTH) != 0) - { + { memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - { - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - } - *code++ = OP_STAR + repeat_type; - } - - /* Else insert an UPTO if the max is greater than the min, again - preceded by the character, for the previously inserted code. If the - UPTO is just for 1 instance, we can use QUERY instead. */ - - else if (repeat_max != repeat_min) - { + code += c & 7; + } + else +#endif + { + *code++ = c; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + } + *code++ = OP_STAR + repeat_type; + } + + /* Else insert an UPTO if the max is greater than the min, again + preceded by the character, for the previously inserted code. If the + UPTO is just for 1 instance, we can use QUERY instead. */ + + else if (repeat_max != repeat_min) + { #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && (c & UTF_LENGTH) != 0) - { + { memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - repeat_max -= repeat_min; - - if (repeat_max == 1) - { - *code++ = OP_QUERY + repeat_type; - } - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - } - - /* The character or character type itself comes last in all cases. */ - + code += c & 7; + } + else +#endif + *code++ = c; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + repeat_max -= repeat_min; + + if (repeat_max == 1) + { + *code++ = OP_QUERY + repeat_type; + } + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + } + + /* The character or character type itself comes last in all cases. */ + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && (c & UTF_LENGTH) != 0) - { + { memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - *code++ = c; - - /* For a repeated Unicode property match, there are two extra bytes that - define the required property. */ - -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } -#endif - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - + code += c & 7; + } + else +#endif + *code++ = c; + + /* For a repeated Unicode property match, there are two extra bytes that + define the required property. */ + +#ifdef SUPPORT_UCP + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } +#endif + } + + /* If previous was a character class or a back reference, we put the repeat + stuff after it, but just skip the item if the repeat was {0,0}. */ + else if (*previous == OP_CLASS || *previous == OP_NCLASS || #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - *previous == OP_XCLASS || -#endif + *previous == OP_XCLASS || +#endif *previous == OP_REF || *previous == OP_REFI || *previous == OP_DNREF || *previous == OP_DNREFI) - { - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - - if (repeat_min == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeat_type; - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeat_type; - else if (repeat_min == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeat_type; - else - { - *code++ = OP_CRRANGE + repeat_type; - PUT2INC(code, 0, repeat_min); - if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ - PUT2INC(code, 0, repeat_max); - } - } - - /* If previous was a bracket group, we may have to replicate it in certain + { + if (repeat_max == 0) + { + code = previous; + goto END_REPEAT; + } + + if (repeat_min == 0 && repeat_max == -1) + *code++ = OP_CRSTAR + repeat_type; + else if (repeat_min == 1 && repeat_max == -1) + *code++ = OP_CRPLUS + repeat_type; + else if (repeat_min == 0 && repeat_max == 1) + *code++ = OP_CRQUERY + repeat_type; + else + { + *code++ = OP_CRRANGE + repeat_type; + PUT2INC(code, 0, repeat_min); + if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ + PUT2INC(code, 0, repeat_max); + } + } + + /* If previous was a bracket group, we may have to replicate it in certain cases. Note that at this point we can encounter only the "basic" bracket opcodes such as BRA and CBRA, as this is the place where they get converted into the more special varieties such as BRAPOS and SBRA. A test for >= @@ -6049,56 +6049,56 @@ for (;; ptr++) ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND. Originally, PCRE did not allow repetition of assertions, but now it does, for Perl compatibility. */ - + else if (*previous >= OP_ASSERT && *previous <= OP_COND) - { + { register int i; int len = (int)(code - previous); size_t base_hwm_offset = item_hwm_offset; pcre_uchar *bralink = NULL; pcre_uchar *brazeroptr = NULL; - + /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so we just ignore the repeat. */ - - if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) + + if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) goto END_REPEAT; - + /* There is no sense in actually repeating assertions. The only potential use of repetition is in cases when the assertion is optional. Therefore, if the minimum is greater than zero, just ignore the repeat. If the maximum is not zero or one, set it to 1. */ - + if (*previous < OP_ONCE) /* Assertion */ - { + { if (repeat_min > 0) goto END_REPEAT; if (repeat_max < 0 || repeat_max > 1) repeat_max = 1; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too messy. There are several special subcases when the - minimum is zero. */ - - if (repeat_min == 0) - { + } + + /* The case of a zero minimum is special because of the need to stick + OP_BRAZERO in front of it, and because the group appears once in the + data, whereas in other cases it appears the minimum number of times. For + this reason, it is simplest to treat this case separately, as otherwise + the code gets far too messy. There are several special subcases when the + minimum is zero. */ + + if (repeat_min == 0) + { /* If the maximum is also zero, we used to just omit the group from the output altogether, like this: - + ** if (repeat_max == 0) ** { ** code = previous; ** goto END_REPEAT; ** } - + However, that fails when a group or a subgroup within it is referenced as a subroutine from elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it so that it is skipped on execution. As we don't have a list of which groups are referenced, we cannot do this selectively. - + If the maximum is 1 or unlimited, we just have to stick in the BRAZERO and do no more at this point. However, we do need to adjust any OP_RECURSE calls inside the group that refer to the group itself or any @@ -6107,94 +6107,94 @@ for (;; ptr++) this. */ if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ - { - *code = OP_END; + { + *code = OP_END; adjust_recurse(previous, 1, utf, cd, item_hwm_offset); memmove(previous + 1, previous, IN_UCHARS(len)); - code++; + code++; if (repeat_max == 0) { *previous++ = OP_SKIPZERO; goto END_REPEAT; } brazeroptr = previous; /* Save for possessive optimizing */ - *previous++ = OP_BRAZERO + repeat_type; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We have to - adjust the value or repeat_max, since one less copy is required. Once - again, we may have to adjust any OP_RECURSE calls inside the group. */ - - else - { - int offset; - *code = OP_END; + *previous++ = OP_BRAZERO + repeat_type; + } + + /* If the maximum is greater than 1 and limited, we have to replicate + in a nested fashion, sticking OP_BRAZERO before each set of brackets. + The first one has to be handled carefully because it's the original + copy, which has to be moved up. The remainder can be handled by code + that is common with the non-zero minimum case below. We have to + adjust the value or repeat_max, since one less copy is required. Once + again, we may have to adjust any OP_RECURSE calls inside the group. */ + + else + { + int offset; + *code = OP_END; adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); - code += 2 + LINK_SIZE; - *previous++ = OP_BRAZERO + repeat_type; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - + code += 2 + LINK_SIZE; + *previous++ = OP_BRAZERO + repeat_type; + *previous++ = OP_BRA; + + /* We chain together the bracket offset fields that have to be + filled in later when the ends of the brackets are reached. */ + offset = (bralink == NULL)? 0 : (int)(previous - bralink); - bralink = previous; - PUTINC(previous, 0, offset); - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. If we set a first char from the group, and didn't - set a required char, copy the latter from the former. If there are any - forward reference subroutine calls in the group, there will be entries on - the workspace list; replicate these with an appropriate increment. */ - - else - { - if (repeat_min > 1) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. Do some paranoid checks for + bralink = previous; + PUTINC(previous, 0, offset); + } + + repeat_max--; + } + + /* If the minimum is greater than zero, replicate the group as many + times as necessary, and adjust the maximum to the number of subsequent + copies that we need. If we set a first char from the group, and didn't + set a required char, copy the latter from the former. If there are any + forward reference subroutine calls in the group, there will be entries on + the workspace list; replicate these with an appropriate increment. */ + + else + { + if (repeat_min > 1) + { + /* In the pre-compile phase, we don't actually do the replication. We + just adjust the length as if we had. Do some paranoid checks for potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit integer type when available, otherwise double. */ - - if (lengthptr != NULL) - { - int delta = (repeat_min - 1)*length_prevgroup; + + if (lengthptr != NULL) + { + int delta = (repeat_min - 1)*length_prevgroup; if ((INT64_OR_DOUBLE)(repeat_min - 1)* (INT64_OR_DOUBLE)length_prevgroup > (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + goto FAILED; + } + *lengthptr += delta; + } + /* This is compiling for real. If there is a set first byte for the group, and we have not yet set a "required byte", set it. Make sure there is enough workspace for copying forward references before doing the copy. */ - - else - { + + else + { if (groupsetfirstchar && reqcharflags < 0) { reqchar = firstchar; reqcharflags = firstcharflags; } - for (i = 1; i < repeat_min; i++) - { + for (i = 1; i < repeat_min; i++) + { pcre_uchar *hc; size_t this_hwm_offset = cd->hwm - cd->start_workspace; memcpy(code, previous, IN_UCHARS(len)); @@ -6202,7 +6202,7 @@ for (;; ptr++) while (cd->hwm > cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN - (this_hwm_offset - base_hwm_offset)) - { + { *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; } @@ -6211,70 +6211,70 @@ for (;; ptr++) hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; hc += LINK_SIZE) { - PUT(cd->hwm, 0, GET(hc, 0) + len); - cd->hwm += LINK_SIZE; - } + PUT(cd->hwm, 0, GET(hc, 0) + len); + cd->hwm += LINK_SIZE; + } base_hwm_offset = this_hwm_offset; - code += len; - } - } - } - - if (repeat_max > 0) repeat_max -= repeat_min; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. Again, we must remember to - replicate entries on the forward reference list. */ - - if (repeat_max >= 0) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. For each repetition we must add 1 - to the length for BRAZERO and for all but the last repetition we must - add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some + code += len; + } + } + } + + if (repeat_max > 0) repeat_max -= repeat_min; + } + + /* This code is common to both the zero and non-zero minimum cases. If + the maximum is limited, it replicates the group in a nested fashion, + remembering the bracket starts on a stack. In the case of a zero minimum, + the first one was set up above. In all cases the repeat_max now specifies + the number of additional copies needed. Again, we must remember to + replicate entries on the forward reference list. */ + + if (repeat_max >= 0) + { + /* In the pre-compile phase, we don't actually do the replication. We + just adjust the length as if we had. For each repetition we must add 1 + to the length for BRAZERO and for all but the last repetition we must + add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is a 64-bit integer type when available, otherwise double. */ - - if (lengthptr != NULL && repeat_max > 0) - { - int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ + + if (lengthptr != NULL && repeat_max > 0) + { + int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - + 2 - 2*LINK_SIZE; /* Last one doesn't nest */ if ((INT64_OR_DOUBLE)repeat_max * (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) > (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - - /* This is compiling for real */ - - else for (i = repeat_max - 1; i >= 0; i--) - { + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + goto FAILED; + } + *lengthptr += delta; + } + + /* This is compiling for real */ + + else for (i = repeat_max - 1; i >= 0; i--) + { pcre_uchar *hc; size_t this_hwm_offset = cd->hwm - cd->start_workspace; - - *code++ = OP_BRAZERO + repeat_type; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) - { - int offset; - *code++ = OP_BRA; + + *code++ = OP_BRAZERO + repeat_type; + + /* All but the final copy start a new nesting, maintaining the + chain of brackets outstanding. */ + + if (i != 0) + { + int offset; + *code++ = OP_BRA; offset = (bralink == NULL)? 0 : (int)(code - bralink); - bralink = code; - PUTINC(code, 0, offset); - } - + bralink = code; + PUTINC(code, 0, offset); + } + memcpy(code, previous, IN_UCHARS(len)); /* Ensure there is enough workspace for forward references before @@ -6283,7 +6283,7 @@ for (;; ptr++) while (cd->hwm > cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN - (this_hwm_offset - base_hwm_offset)) - { + { *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; } @@ -6292,41 +6292,41 @@ for (;; ptr++) hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; hc += LINK_SIZE) { - PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); - cd->hwm += LINK_SIZE; - } + PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); + cd->hwm += LINK_SIZE; + } base_hwm_offset = this_hwm_offset; - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink != NULL) - { - int oldlinkoffset; + code += len; + } + + /* Now chain through the pending brackets, and fill in their length + fields (which are holding the chain links pro tem). */ + + while (bralink != NULL) + { + int oldlinkoffset; int offset = (int)(code - bralink + 1); pcre_uchar *bra = code - offset; - oldlinkoffset = GET(bra, 1); - bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; - *code++ = OP_KET; - PUTINC(code, 0, offset); - PUT(bra, 1, offset); - } - } - + oldlinkoffset = GET(bra, 1); + bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; + *code++ = OP_KET; + PUTINC(code, 0, offset); + PUT(bra, 1, offset); + } + } + /* If the maximum is unlimited, set a repeater in the final copy. For ONCE brackets, that's all we need to do. However, possessively repeated ONCE brackets can be converted into non-capturing brackets, as the behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to deal with possessive ONCEs specially. - + Otherwise, when we are doing the actual compile phase, check to see whether this group is one that could match an empty string. If so, - convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so + convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime checking can be done. [This check is also applied to ONCE groups at runtime, but in a different way.] - + Then, if the quantifier was possessive and the bracket is not a conditional, we convert the BRA code to the POS form, and the KET code to KETRPOS. (It turns out to be convenient at runtime to detect this kind of @@ -6340,8 +6340,8 @@ for (;; ptr++) there will be earlier copies of the group, and so we still have to wrap the whole thing. */ - else - { + else + { pcre_uchar *ketcode = code - 1 - LINK_SIZE; pcre_uchar *bracode = ketcode - GET(ketcode, 1); @@ -6360,23 +6360,23 @@ for (;; ptr++) converted to non-capturing above). */ else - { + { /* In the compile phase, check for empty string matching. */ if (lengthptr == NULL) - { + { pcre_uchar *scode = bracode; do - { + { if (could_be_empty_branch(scode, ketcode, utf, cd, NULL)) { *bracode += OP_SBRA - OP_BRA; break; } scode += GET(scode, 1); - } + } while (*scode == OP_ALT); - } + } /* A conditional group with only one branch has an implicit empty alternative branch. */ @@ -6425,10 +6425,10 @@ for (;; ptr++) /* Non-possessive quantifier */ else *ketcode = OP_KETRMAX + repeat_type; - } - } - } - + } + } + } + /* If previous is OP_FAIL, it was generated by an empty class [] in JavaScript mode. The other ways in which OP_FAIL can be generated, that is by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" @@ -6436,14 +6436,14 @@ for (;; ptr++) else if (*previous == OP_FAIL) goto END_REPEAT; - /* Else there's some kind of shambles */ - - else - { - *errorcodeptr = ERR11; - goto FAILED; - } - + /* Else there's some kind of shambles */ + + else + { + *errorcodeptr = ERR11; + goto FAILED; + } + /* If the character following a repeat is '+', possessive_quantifier is TRUE. For some opcodes, there are special alternative opcodes for this case. For anything else, we wrap the entire repeated item inside OP_ONCE @@ -6453,12 +6453,12 @@ for (;; ptr++) Some (but not all) possessively repeated subpatterns have already been completely handled in the code just above. For them, possessive_quantifier is always FALSE at this stage. Note that the repeated item starts at - tempcode, not at previous, which might be the first part of a string whose + tempcode, not at previous, which might be the first part of a string whose (former) last char we repeated. */ - - if (possessive_quantifier) - { - int len; + + if (possessive_quantifier) + { + int len; /* Possessifying an EXACT quantifier has no effect, so we can ignore it. However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, @@ -6543,23 +6543,23 @@ for (;; ptr++) } #ifdef NEVER - if (len > 0) switch (*tempcode) - { - case OP_STAR: *tempcode = OP_POSSTAR; break; - case OP_PLUS: *tempcode = OP_POSPLUS; break; - case OP_QUERY: *tempcode = OP_POSQUERY; break; - case OP_UPTO: *tempcode = OP_POSUPTO; break; - + if (len > 0) switch (*tempcode) + { + case OP_STAR: *tempcode = OP_POSSTAR; break; + case OP_PLUS: *tempcode = OP_POSPLUS; break; + case OP_QUERY: *tempcode = OP_POSQUERY; break; + case OP_UPTO: *tempcode = OP_POSUPTO; break; + case OP_STARI: *tempcode = OP_POSSTARI; break; case OP_PLUSI: *tempcode = OP_POSPLUSI; break; case OP_QUERYI: *tempcode = OP_POSQUERYI; break; case OP_UPTOI: *tempcode = OP_POSUPTOI; break; - - case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; - case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; - case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; - case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; - + + case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; + case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; + case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; + case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; + case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break; case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break; case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break; @@ -6578,50 +6578,50 @@ for (;; ptr++) /* Because we are moving code along, we must ensure that any pending recursive references are updated. */ - default: + default: *code = OP_END; adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); - code += 1 + LINK_SIZE; - len += 1 + LINK_SIZE; - tempcode[0] = OP_ONCE; - *code++ = OP_KET; - PUTINC(code, 0, len); - PUT(tempcode, 1, len); - break; - } + code += 1 + LINK_SIZE; + len += 1 + LINK_SIZE; + tempcode[0] = OP_ONCE; + *code++ = OP_KET; + PUTINC(code, 0, len); + PUT(tempcode, 1, len); + break; + } #endif - } - - /* In all case we no longer have a previous item. We also set the + } + + /* In all case we no longer have a previous item. We also set the "follows varying string" flag for subsequently encountered reqchars if - it isn't already set and we have just passed a varying length item. */ - - END_REPEAT: - previous = NULL; - cd->req_varyopt |= reqvary; - break; - - - /* ===================================================================*/ - /* Start of nested parenthesized sub-expression, or comment or lookahead or - lookbehind or option setting or condition or all the other extended - parenthesis forms. */ - + it isn't already set and we have just passed a varying length item. */ + + END_REPEAT: + previous = NULL; + cd->req_varyopt |= reqvary; + break; + + + /* ===================================================================*/ + /* Start of nested parenthesized sub-expression, or comment or lookahead or + lookbehind or option setting or condition or all the other extended + parenthesis forms. */ + case CHAR_LEFT_PARENTHESIS: ptr++; - + /* Now deal with various "verbs" that can be introduced by '*'. */ - + if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) - { - int i, namelen; + { + int i, namelen; int arglen = 0; - const char *vn = verbnames; + const char *vn = verbnames; const pcre_uchar *name = ptr + 1; const pcre_uchar *arg = NULL; - previous = NULL; + previous = NULL; ptr++; while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; namelen = (int)(ptr - name); @@ -6631,7 +6631,7 @@ for (;; ptr++) letters, digits, and underscores. */ if (*ptr == CHAR_COLON) - { + { arg = ++ptr; while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; arglen = (int)(ptr - arg); @@ -6640,21 +6640,21 @@ for (;; ptr++) *errorcodeptr = ERR75; goto FAILED; } - } + } if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR60; - goto FAILED; - } + { + *errorcodeptr = ERR60; + goto FAILED; + } /* Scan the table of verb names */ - for (i = 0; i < verbcount; i++) - { - if (namelen == verbs[i].len && + for (i = 0; i < verbcount; i++) + { + if (namelen == verbs[i].len && STRNCMP_UC_C8(name, vn, namelen) == 0) - { + { int setverb; /* Check for open captures before ACCEPT and convert it to @@ -6744,16 +6744,16 @@ for (;; ptr++) } break; /* Found verb, exit loop */ - } + } - vn += verbs[i].len + 1; - } + vn += verbs[i].len + 1; + } if (i < verbcount) continue; /* Successfully handled a verb */ *errorcodeptr = ERR60; /* Verb not recognized */ - goto FAILED; - } - + goto FAILED; + } + /* Initialize for "real" parentheses */ newoptions = options; @@ -6762,48 +6762,48 @@ for (;; ptr++) item_hwm_offset = cd->hwm - cd->start_workspace; reset_bracount = FALSE; - /* Deal with the extended parentheses; all are introduced by '?', and the - appearance of any of them means that this is not a capturing group. */ - + /* Deal with the extended parentheses; all are introduced by '?', and the + appearance of any of them means that this is not a capturing group. */ + if (*ptr == CHAR_QUESTION_MARK) - { - int i, set, unset, namelen; - int *optset; + { + int i, set, unset, namelen; + int *optset; const pcre_uchar *name; pcre_uchar *slot; - - switch (*(++ptr)) - { - /* ------------------------------------------------------------ */ + + switch (*(++ptr)) + { + /* ------------------------------------------------------------ */ case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ - reset_bracount = TRUE; + reset_bracount = TRUE; cd->dupgroups = TRUE; /* Record (?| encountered */ - /* Fall through */ - - /* ------------------------------------------------------------ */ + /* Fall through */ + + /* ------------------------------------------------------------ */ case CHAR_COLON: /* Non-capturing bracket */ - bravalue = OP_BRA; - ptr++; - break; - - - /* ------------------------------------------------------------ */ + bravalue = OP_BRA; + ptr++; + break; + + + /* ------------------------------------------------------------ */ case CHAR_LEFT_PARENTHESIS: - bravalue = OP_COND; /* Conditional group */ + bravalue = OP_COND; /* Conditional group */ tempptr = ptr; - - /* A condition can be an assertion, a number (referring to a numbered + + /* A condition can be an assertion, a number (referring to a numbered group's having been set), a name (referring to a named group), or 'R', referring to recursion. R and R&name are also permitted for recursion tests. - + There are ways of testing a named group: (?(name)) is used by Python; Perl 5.10 onwards uses (?() or (?('name')). - + There is one unfortunate ambiguity, caused by history. 'R' can be the recursive thing or the name 'R' (and similarly for 'R' followed by digits). We look for a name first; if not found, we try the other case. - + For compatibility with auto-callouts, we allow a callout to be specified before a condition that is an assertion. First, check for the syntax of a callout; if found, adjust the temporary pointer that is @@ -6825,10 +6825,10 @@ for (;; ptr++) } } - /* For conditions that are assertions, check the syntax, and then exit - the switch. This will take control down to where bracketed groups, - including assertions, are processed. */ - + /* For conditions that are assertions, check the syntax, and then exit + the switch. This will take control down to where bracketed groups, + including assertions, are processed. */ + if (tempptr[1] == CHAR_QUESTION_MARK && (tempptr[2] == CHAR_EQUALS_SIGN || tempptr[2] == CHAR_EXCLAMATION_MARK || @@ -6837,54 +6837,54 @@ for (;; ptr++) tempptr[3] == CHAR_EXCLAMATION_MARK)))) { cd->iscondassert = TRUE; - break; + break; } - + /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all need to skip at least 1+IMM2_SIZE bytes at the start of the group. */ - - code[1+LINK_SIZE] = OP_CREF; + + code[1+LINK_SIZE] = OP_CREF; skipbytes = 1+IMM2_SIZE; refsign = -1; /* => not a number */ namelen = -1; /* => not a name; must set to avoid warning */ name = NULL; /* Always set to avoid warning */ recno = 0; /* Always set to avoid warning */ - - /* Check for a test for recursion in a named group. */ - + + /* Check for a test for recursion in a named group. */ + ptr++; if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) - { - terminator = -1; - ptr += 2; - code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ - } - - /* Check for a test for a named group's having been set, using the Perl + { + terminator = -1; + ptr += 2; + code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ + } + + /* Check for a test for a named group's having been set, using the Perl syntax (?() or (?('name'), and also allow for the original PCRE syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */ - + else if (*ptr == CHAR_LESS_THAN_SIGN) - { + { terminator = CHAR_GREATER_THAN_SIGN; - ptr++; - } + ptr++; + } else if (*ptr == CHAR_APOSTROPHE) - { + { terminator = CHAR_APOSTROPHE; - ptr++; - } - else - { + ptr++; + } + else + { terminator = CHAR_NULL; if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++; else if (IS_DIGIT(*ptr)) refsign = 0; - } - + } + /* Handle a number */ - + if (refsign >= 0) - { + { while (IS_DIGIT(*ptr)) { if (recno > INT_MAX / 10 - 1) /* Integer overflow */ @@ -6896,15 +6896,15 @@ for (;; ptr++) recno = recno * 10 + (int)(*ptr - CHAR_0); ptr++; } - } - + } + /* Otherwise we expect to read a name; anything else is an error. When a name is one of a number of duplicates, a different opcode is used and it needs more memory. Unfortunately we cannot tell whether a name is a duplicate in the first pass, so we have to allow for more memory. */ - + else - { + { if (IS_DIGIT(*ptr)) { *errorcodeptr = ERR84; @@ -6922,62 +6922,62 @@ for (;; ptr++) } namelen = (int)(ptr - name); if (lengthptr != NULL) skipbytes += IMM2_SIZE; - } - + } + /* Check the terminator */ if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) || *ptr++ != CHAR_RIGHT_PARENTHESIS) - { + { ptr--; /* Error offset */ *errorcodeptr = ERR26; /* Malformed number or name */ - goto FAILED; - } - - /* Do no further checking in the pre-compile phase. */ - - if (lengthptr != NULL) break; - - /* In the real compile we do the work of looking for the actual + goto FAILED; + } + + /* Do no further checking in the pre-compile phase. */ + + if (lengthptr != NULL) break; + + /* In the real compile we do the work of looking for the actual reference. If refsign is not negative, it means we have a number in recno. */ - + if (refsign >= 0) - { - if (recno <= 0) - { + { + if (recno <= 0) + { *errorcodeptr = ERR35; - goto FAILED; - } + goto FAILED; + } if (refsign != 0) recno = (refsign == CHAR_MINUS)? cd->bracount - recno + 1 : recno + cd->bracount; - if (recno <= 0 || recno > cd->final_bracount) - { - *errorcodeptr = ERR15; - goto FAILED; - } - PUT2(code, 2+LINK_SIZE, recno); + if (recno <= 0 || recno > cd->final_bracount) + { + *errorcodeptr = ERR15; + goto FAILED; + } + PUT2(code, 2+LINK_SIZE, recno); if (recno > cd->top_backref) cd->top_backref = recno; - break; - } - + break; + } + /* Otherwise look for the name. */ - - slot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { + + slot = cd->name_table; + for (i = 0; i < cd->names_found; i++) + { if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && slot[IMM2_SIZE+namelen] == 0) break; - slot += cd->name_entry_size; - } - + slot += cd->name_entry_size; + } + /* Found the named subpattern. If the name is duplicated, add one to the opcode to change CREF/RREF into DNCREF/DNRREF and insert appropriate data values. Otherwise, just insert the unique subpattern number. */ - - if (i < cd->names_found) - { + + if (i < cd->names_found) + { int offset = i++; int count = 1; recno = GET2(slot, 0); /* Number from first found */ @@ -6989,7 +6989,7 @@ for (;; ptr++) (slot+IMM2_SIZE)[namelen] != 0) break; count++; } - + if (count > 1) { PUT2(code, 2+LINK_SIZE, offset); @@ -7001,133 +7001,133 @@ for (;; ptr++) { PUT2(code, 2+LINK_SIZE, recno); } - } - + } + /* If terminator == CHAR_NULL it means that the name followed directly after the opening parenthesis [e.g. (?(abc)...] and in this case there are some further alternatives to try. For the cases where terminator != CHAR_NULL [things like (?(... or (?('name')... or (?(R&name)... ] we have now checked all the possibilities, so give an error. */ - + else if (terminator != CHAR_NULL) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* Check for (?(R) for recursion. Allow digits after R to specify a - specific group number. */ - + { + *errorcodeptr = ERR15; + goto FAILED; + } + + /* Check for (?(R) for recursion. Allow digits after R to specify a + specific group number. */ + else if (*name == CHAR_R) - { - recno = 0; - for (i = 1; i < namelen; i++) - { + { + recno = 0; + for (i = 1; i < namelen; i++) + { if (!IS_DIGIT(name[i])) - { - *errorcodeptr = ERR15; - goto FAILED; - } + { + *errorcodeptr = ERR15; + goto FAILED; + } if (recno > INT_MAX / 10 - 1) /* Integer overflow */ { *errorcodeptr = ERR61; goto FAILED; } recno = recno * 10 + name[i] - CHAR_0; - } - if (recno == 0) recno = RREF_ANY; - code[1+LINK_SIZE] = OP_RREF; /* Change test type */ - PUT2(code, 2+LINK_SIZE, recno); - } - - /* Similarly, check for the (?(DEFINE) "condition", which is always - false. */ - + } + if (recno == 0) recno = RREF_ANY; + code[1+LINK_SIZE] = OP_RREF; /* Change test type */ + PUT2(code, 2+LINK_SIZE, recno); + } + + /* Similarly, check for the (?(DEFINE) "condition", which is always + false. */ + else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0) - { - code[1+LINK_SIZE] = OP_DEF; - skipbytes = 1; - } - + { + code[1+LINK_SIZE] = OP_DEF; + skipbytes = 1; + } + /* Reference to an unidentified subpattern. */ - - else - { + + else + { *errorcodeptr = ERR15; - goto FAILED; - } - break; - - - /* ------------------------------------------------------------ */ + goto FAILED; + } + break; + + + /* ------------------------------------------------------------ */ case CHAR_EQUALS_SIGN: /* Positive lookahead */ - bravalue = OP_ASSERT; + bravalue = OP_ASSERT; cd->assert_depth += 1; - ptr++; - break; - + ptr++; + break; + /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird thing to do, but Perl allows all assertions to be quantified, and when they contain capturing parentheses there may be a potential use for this feature. Not that that applies to a quantified (?!) but we allow it for uniformity. */ - - /* ------------------------------------------------------------ */ + + /* ------------------------------------------------------------ */ case CHAR_EXCLAMATION_MARK: /* Negative lookahead */ - ptr++; + ptr++; if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK && ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK && (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2))) - { - *code++ = OP_FAIL; - previous = NULL; - continue; - } - bravalue = OP_ASSERT_NOT; + { + *code++ = OP_FAIL; + previous = NULL; + continue; + } + bravalue = OP_ASSERT_NOT; cd->assert_depth += 1; - break; - - - /* ------------------------------------------------------------ */ + break; + + + /* ------------------------------------------------------------ */ case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */ - switch (ptr[1]) - { + switch (ptr[1]) + { case CHAR_EQUALS_SIGN: /* Positive lookbehind */ - bravalue = OP_ASSERTBACK; + bravalue = OP_ASSERTBACK; cd->assert_depth += 1; - ptr += 2; - break; - + ptr += 2; + break; + case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ - bravalue = OP_ASSERTBACK_NOT; + bravalue = OP_ASSERTBACK_NOT; cd->assert_depth += 1; - ptr += 2; - break; - - default: /* Could be name define, else bad */ + ptr += 2; + break; + + default: /* Could be name define, else bad */ if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME; - ptr++; /* Correct offset for error */ - *errorcodeptr = ERR24; - goto FAILED; - } - break; - - - /* ------------------------------------------------------------ */ + ptr++; /* Correct offset for error */ + *errorcodeptr = ERR24; + goto FAILED; + } + break; + + + /* ------------------------------------------------------------ */ case CHAR_GREATER_THAN_SIGN: /* One-time brackets */ - bravalue = OP_ONCE; - ptr++; - break; - - - /* ------------------------------------------------------------ */ + bravalue = OP_ONCE; + ptr++; + break; + + + /* ------------------------------------------------------------ */ case CHAR_C: /* Callout - may be followed by digits; */ previous_callout = code; /* Save for later completion */ after_manual_callout = 1; /* Skip one item before completing */ - *code++ = OP_CALLOUT; - { - int n = 0; + *code++ = OP_CALLOUT; + { + int n = 0; ptr++; while(IS_DIGIT(*ptr)) { @@ -7139,63 +7139,63 @@ for (;; ptr++) } } if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR39; - goto FAILED; - } - *code++ = n; + { + *errorcodeptr = ERR39; + goto FAILED; + } + *code++ = n; PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */ PUT(code, LINK_SIZE, 0); /* Default length */ - code += 2 * LINK_SIZE; - } - previous = NULL; - continue; - - - /* ------------------------------------------------------------ */ + code += 2 * LINK_SIZE; + } + previous = NULL; + continue; + + + /* ------------------------------------------------------------ */ case CHAR_P: /* Python-style named subpattern handling */ if (*(++ptr) == CHAR_EQUALS_SIGN || *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ - { + { is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; terminator = CHAR_RIGHT_PARENTHESIS; - goto NAMED_REF_OR_RECURSE; - } + goto NAMED_REF_OR_RECURSE; + } else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */ - { - *errorcodeptr = ERR41; - goto FAILED; - } - /* Fall through to handle (?P< as (?< is handled */ - - - /* ------------------------------------------------------------ */ - DEFINE_NAME: /* Come here from (?< handling */ + { + *errorcodeptr = ERR41; + goto FAILED; + } + /* Fall through to handle (?P< as (?< is handled */ + + + /* ------------------------------------------------------------ */ + DEFINE_NAME: /* Come here from (?< handling */ case CHAR_APOSTROPHE: terminator = (*ptr == CHAR_LESS_THAN_SIGN)? CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; name = ++ptr; if (IS_DIGIT(*ptr)) - { + { *errorcodeptr = ERR84; /* Group name must start with non-digit */ goto FAILED; } while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); - + /* In the pre-compile phase, do a syntax check, remember the longest name, and then remember the group in a vector, expanding it if necessary. Duplicates for the same number are skipped; other duplicates are checked for validity. In the actual compile, there is nothing to do. */ - + if (lengthptr != NULL) { named_group *ng; pcre_uint32 number = cd->bracount + 1; - + if (*ptr != (pcre_uchar)terminator) - { + { *errorcodeptr = ERR42; goto FAILED; } @@ -7210,10 +7210,10 @@ for (;; ptr++) { cd->name_entry_size = namelen + IMM2_SIZE + 1; if (namelen > MAX_NAME_SIZE) - { + { *errorcodeptr = ERR48; - goto FAILED; - } + goto FAILED; + } } /* Scan the list to check for duplicates. For duplicate names, if the @@ -7228,37 +7228,37 @@ for (;; ptr++) { if (namelen == ng->length && STRNCMP_UC_UC(name, ng->name, namelen) == 0) - { + { if (ng->number == number) break; if ((options & PCRE_DUPNAMES) == 0) - { + { *errorcodeptr = ERR43; - goto FAILED; - } + goto FAILED; + } cd->dupnames = TRUE; /* Duplicate names exist */ - } + } else if (ng->number == number) { *errorcodeptr = ERR65; goto FAILED; } - } - + } + if (i >= cd->names_found) /* Not a duplicate with same number */ { /* Increase the list size if necessary */ - + if (cd->names_found >= cd->named_group_list_size) - { + { int newsize = cd->named_group_list_size * 2; named_group *newspace = (PUBL(malloc)) (newsize * sizeof(named_group)); if (newspace == NULL) - { + { *errorcodeptr = ERR21; goto FAILED; - } + } memcpy(newspace, cd->named_groups, cd->named_group_list_size * sizeof(named_group)); @@ -7266,33 +7266,33 @@ for (;; ptr++) (PUBL(free))((void *)cd->named_groups); cd->named_groups = newspace; cd->named_group_list_size = newsize; - } - + } + cd->named_groups[cd->names_found].name = name; cd->named_groups[cd->names_found].length = namelen; cd->named_groups[cd->names_found].number = number; cd->names_found++; - } - } - + } + } + ptr++; /* Move past > or ' in both passes. */ - goto NUMBERED_GROUP; - - - /* ------------------------------------------------------------ */ + goto NUMBERED_GROUP; + + + /* ------------------------------------------------------------ */ case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */ terminator = CHAR_RIGHT_PARENTHESIS; - is_recurse = TRUE; - /* Fall through */ - - /* We come here from the Python syntax above that handles both - references (?P=name) and recursion (?P>name), as well as falling - through from the Perl recursion syntax (?&name). We also come here from - the Perl \k or \k'name' back reference syntax and the \k{name} + is_recurse = TRUE; + /* Fall through */ + + /* We come here from the Python syntax above that handles both + references (?P=name) and recursion (?P>name), as well as falling + through from the Perl recursion syntax (?&name). We also come here from + the Perl \k or \k'name' back reference syntax and the \k{name} .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */ - - NAMED_REF_OR_RECURSE: - name = ++ptr; + + NAMED_REF_OR_RECURSE: + name = ++ptr; if (IS_DIGIT(*ptr)) { *errorcodeptr = ERR84; /* Group name must start with non-digit */ @@ -7300,34 +7300,34 @@ for (;; ptr++) } while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); - + /* In the pre-compile phase, do a syntax check. We used to just set a dummy reference number, because it was not used in the first pass. However, with the change of recursive back references to be atomic, we have to look for the number so that this state can be identified, as otherwise the incorrect length is computed. If it's not a backwards reference, the dummy number will do. */ - - if (lengthptr != NULL) - { + + if (lengthptr != NULL) + { named_group *ng; recno = 0; - if (namelen == 0) - { - *errorcodeptr = ERR62; - goto FAILED; - } + if (namelen == 0) + { + *errorcodeptr = ERR62; + goto FAILED; + } if (*ptr != (pcre_uchar)terminator) - { - *errorcodeptr = ERR42; - goto FAILED; - } - if (namelen > MAX_NAME_SIZE) - { - *errorcodeptr = ERR48; - goto FAILED; - } + { + *errorcodeptr = ERR42; + goto FAILED; + } + if (namelen > MAX_NAME_SIZE) + { + *errorcodeptr = ERR48; + goto FAILED; + } /* Count named back references. */ @@ -7393,43 +7393,43 @@ for (;; ptr++) } } } - } - + } + /* In the real compile, search the name table. We check the name - first, and then check that we have reached the end of the name in the + first, and then check that we have reached the end of the name in the table. That way, if the name is longer than any in the table, the comparison will fail without reading beyond the table entry. */ - - else - { - slot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { + + else + { + slot = cd->name_table; + for (i = 0; i < cd->names_found; i++) + { if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && slot[IMM2_SIZE+namelen] == 0) - break; - slot += cd->name_entry_size; - } - + break; + slot += cd->name_entry_size; + } + if (i < cd->names_found) - { - recno = GET2(slot, 0); - } + { + recno = GET2(slot, 0); + } else - { - *errorcodeptr = ERR15; - goto FAILED; - } - } - + { + *errorcodeptr = ERR15; + goto FAILED; + } + } + /* In both phases, for recursions, we can now go to the code than handles numerical recursion. */ - - if (is_recurse) goto HANDLE_RECURSION; - + + if (is_recurse) goto HANDLE_RECURSION; + /* In the second pass we must see if the name is duplicated. If so, we generate a different opcode. */ - + if (lengthptr == NULL && cd->dupnames) { int count = 1; @@ -7484,7 +7484,7 @@ for (;; ptr++) goto HANDLE_REFERENCE; - /* ------------------------------------------------------------ */ + /* ------------------------------------------------------------ */ case CHAR_R: /* Recursion, same as (?0) */ recno = 0; if (*(++ptr) != CHAR_RIGHT_PARENTHESIS) @@ -7493,16 +7493,16 @@ for (;; ptr++) goto FAILED; } goto HANDLE_RECURSION; - - - /* ------------------------------------------------------------ */ + + + /* ------------------------------------------------------------ */ case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */ case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - { + { const pcre_uchar *called; terminator = CHAR_RIGHT_PARENTHESIS; - + /* Come here from the \g<...> and \g'...' code (Oniguruma compatibility). However, the syntax has been checked to ensure that the ... are a (signed) number, so that neither ERR63 nor ERR29 will @@ -7512,22 +7512,22 @@ for (;; ptr++) HANDLE_NUMERICAL_RECURSION: if ((refsign = *ptr) == CHAR_PLUS) - { - ptr++; + { + ptr++; if (!IS_DIGIT(*ptr)) - { - *errorcodeptr = ERR63; - goto FAILED; - } - } + { + *errorcodeptr = ERR63; + goto FAILED; + } + } else if (refsign == CHAR_MINUS) - { + { if (!IS_DIGIT(ptr[1])) - goto OTHER_CHAR_AFTER_QUERY; - ptr++; - } - - recno = 0; + goto OTHER_CHAR_AFTER_QUERY; + ptr++; + } + + recno = 0; while(IS_DIGIT(*ptr)) { if (recno > INT_MAX / 10 - 1) /* Integer overflow */ @@ -7538,73 +7538,73 @@ for (;; ptr++) } recno = recno * 10 + *ptr++ - CHAR_0; } - + if (*ptr != (pcre_uchar)terminator) - { - *errorcodeptr = ERR29; - goto FAILED; - } - + { + *errorcodeptr = ERR29; + goto FAILED; + } + if (refsign == CHAR_MINUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno = cd->bracount - recno + 1; - if (recno <= 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - } + { + if (recno == 0) + { + *errorcodeptr = ERR58; + goto FAILED; + } + recno = cd->bracount - recno + 1; + if (recno <= 0) + { + *errorcodeptr = ERR15; + goto FAILED; + } + } else if (refsign == CHAR_PLUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno += cd->bracount; - } - - /* Come here from code above that handles a named recursion */ - - HANDLE_RECURSION: - - previous = code; + { + if (recno == 0) + { + *errorcodeptr = ERR58; + goto FAILED; + } + recno += cd->bracount; + } + + /* Come here from code above that handles a named recursion */ + + HANDLE_RECURSION: + + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; - called = cd->start_code; - - /* When we are actually compiling, find the bracket that is being - referenced. Temporarily end the regex in case it doesn't exist before - this point. If we end up with a forward reference, first check that - the bracket does occur later so we can give the error (and position) - now. Then remember this forward reference in the workspace so it can - be filled in at the end. */ - - if (lengthptr == NULL) - { - *code = OP_END; + called = cd->start_code; + + /* When we are actually compiling, find the bracket that is being + referenced. Temporarily end the regex in case it doesn't exist before + this point. If we end up with a forward reference, first check that + the bracket does occur later so we can give the error (and position) + now. Then remember this forward reference in the workspace so it can + be filled in at the end. */ + + if (lengthptr == NULL) + { + *code = OP_END; if (recno != 0) called = PRIV(find_bracket)(cd->start_code, utf, recno); - - /* Forward reference */ - - if (called == NULL) - { + + /* Forward reference */ + + if (called == NULL) + { if (recno > cd->final_bracount) - { - *errorcodeptr = ERR15; - goto FAILED; - } + { + *errorcodeptr = ERR15; + goto FAILED; + } /* Fudge the value of "called" so that when it is inserted as an offset below, what it actually inserted is the reference number of the group. Then remember the forward reference. */ - called = cd->start_code + recno; + called = cd->start_code + recno; if (cd->hwm >= cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN) { @@ -7612,128 +7612,128 @@ for (;; ptr++) if (*errorcodeptr != 0) goto FAILED; } PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); - } - - /* If not a forward reference, and the subpattern is still open, - this is a recursive call. We check to see if this is a left + } + + /* If not a forward reference, and the subpattern is still open, + this is a recursive call. We check to see if this is a left recursion that could loop for ever, and diagnose that case. We must not, however, do this check if we are in a conditional subpattern because the condition might be testing for recursion in a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid. Forever loops are also detected at runtime, so those that occur in conditional subpatterns will be picked up then. */ - + else if (GET(called, 1) == 0 && cond_depth <= 0 && could_be_empty(called, code, bcptr, utf, cd)) - { - *errorcodeptr = ERR40; - goto FAILED; - } - } - + { + *errorcodeptr = ERR40; + goto FAILED; + } + } + /* Insert the recursion/subroutine item. It does not have a set first character (relevant if it is repeated, because it will then be wrapped with ONCE brackets). */ - - *code = OP_RECURSE; + + *code = OP_RECURSE; PUT(code, 1, (int)(called - cd->start_code)); - code += 1 + LINK_SIZE; + code += 1 + LINK_SIZE; groupsetfirstchar = FALSE; - } - - /* Can't determine a first byte now */ - + } + + /* Can't determine a first byte now */ + if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; - continue; - - - /* ------------------------------------------------------------ */ - default: /* Other characters: check option setting */ - OTHER_CHAR_AFTER_QUERY: - set = unset = 0; - optset = &set; - + continue; + + + /* ------------------------------------------------------------ */ + default: /* Other characters: check option setting */ + OTHER_CHAR_AFTER_QUERY: + set = unset = 0; + optset = &set; + while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) - { - switch (*ptr++) - { + { + switch (*ptr++) + { case CHAR_MINUS: optset = &unset; break; - + case CHAR_J: /* Record that it changed in the external options */ - *optset |= PCRE_DUPNAMES; - cd->external_flags |= PCRE_JCHANGED; - break; - + *optset |= PCRE_DUPNAMES; + cd->external_flags |= PCRE_JCHANGED; + break; + case CHAR_i: *optset |= PCRE_CASELESS; break; case CHAR_m: *optset |= PCRE_MULTILINE; break; case CHAR_s: *optset |= PCRE_DOTALL; break; case CHAR_x: *optset |= PCRE_EXTENDED; break; case CHAR_U: *optset |= PCRE_UNGREEDY; break; case CHAR_X: *optset |= PCRE_EXTRA; break; - - default: *errorcodeptr = ERR12; - ptr--; /* Correct the offset */ - goto FAILED; - } - } - - /* Set up the changed option bits, but don't change anything yet. */ - - newoptions = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested + + default: *errorcodeptr = ERR12; + ptr--; /* Correct the offset */ + goto FAILED; + } + } + + /* Set up the changed option bits, but don't change anything yet. */ + + newoptions = (options | set) & (~unset); + + /* If the options ended with ')' this is not the start of a nested group with option changes, so the options change at this level. If we are not at the pattern start, reset the greedy defaults and the case value for firstchar and reqchar. */ - + if (*ptr == CHAR_RIGHT_PARENTHESIS) - { + { greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); greedy_non_default = greedy_default ^ 1; req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; - + /* Change options at this level, and pass them back for use in subsequent branches. */ - + *optionsptr = options = newoptions; - previous = NULL; /* This item can't be repeated */ - continue; /* It is complete */ - } - - /* If the options ended with ':' we are heading into a nested group - with possible change of options. Such groups are non-capturing and are - not assertions of any kind. All we need to do is skip over the ':'; - the newoptions value is handled below. */ - - bravalue = OP_BRA; - ptr++; - } /* End of switch for character following (? */ - } /* End of (? handling */ - + previous = NULL; /* This item can't be repeated */ + continue; /* It is complete */ + } + + /* If the options ended with ':' we are heading into a nested group + with possible change of options. Such groups are non-capturing and are + not assertions of any kind. All we need to do is skip over the ':'; + the newoptions value is handled below. */ + + bravalue = OP_BRA; + ptr++; + } /* End of switch for character following (? */ + } /* End of (? handling */ + /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE is set, all unadorned brackets become non-capturing and behave like (?:...) - brackets. */ - - else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) - { - bravalue = OP_BRA; - } - - /* Else we have a capturing group. */ - - else - { - NUMBERED_GROUP: - cd->bracount += 1; - PUT2(code, 1+LINK_SIZE, cd->bracount); + brackets. */ + + else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) + { + bravalue = OP_BRA; + } + + /* Else we have a capturing group. */ + + else + { + NUMBERED_GROUP: + cd->bracount += 1; + PUT2(code, 1+LINK_SIZE, cd->bracount); skipbytes = IMM2_SIZE; - } - + } + /* Process nested bracketed regex. First check for parentheses nested too deeply. */ - + if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT) { *errorcodeptr = ERR82; @@ -7760,19 +7760,19 @@ for (;; ptr++) item_hwm_offset = cd->hwm - cd->start_workspace; } - *code = bravalue; - tempcode = code; + *code = bravalue; + tempcode = code; tempreqvary = cd->req_varyopt; /* Save value before bracket */ tempbracount = cd->bracount; /* Save value before bracket */ length_prevgroup = 0; /* Initialize for pre-compile phase */ - - if (!compile_regex( + + if (!compile_regex( newoptions, /* The complete new option state */ &tempcode, /* Where to put code (updated) */ &ptr, /* Input pointer (updated) */ errorcodeptr, /* Where to put an error message */ - (bravalue == OP_ASSERTBACK || - bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ + (bravalue == OP_ASSERTBACK || + bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ reset_bracount, /* True if (?| group */ skipbytes, /* Skip over bracket number */ cond_depth + @@ -7785,9 +7785,9 @@ for (;; ptr++) cd, /* Tables block */ (lengthptr == NULL)? NULL : /* Actual compile phase */ &length_prevgroup /* Pre-compile phase */ - )) - goto FAILED; - + )) + goto FAILED; + cd->parens_depth -= 1; /* If this was an atomic group and there are no capturing groups within it, @@ -7799,144 +7799,144 @@ for (;; ptr++) if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) cd->assert_depth -= 1; - /* At the end of compiling, code is still pointing to the start of the + /* At the end of compiling, code is still pointing to the start of the group, while tempcode has been updated to point past the end of the group. The pattern pointer (ptr) is on the bracket. - + If this is a conditional bracket, check that there are no more than - two branches in the group, or just one if it's a DEFINE group. We do this - in the real compile phase, not in the pre-pass, where the whole group may - not be available. */ - - if (bravalue == OP_COND && lengthptr == NULL) - { + two branches in the group, or just one if it's a DEFINE group. We do this + in the real compile phase, not in the pre-pass, where the whole group may + not be available. */ + + if (bravalue == OP_COND && lengthptr == NULL) + { pcre_uchar *tc = code; - int condcount = 0; - - do { - condcount++; - tc += GET(tc,1); - } - while (*tc != OP_KET); - - /* A DEFINE group is never obeyed inline (the "condition" is always - false). It must have only one branch. */ - - if (code[LINK_SIZE+1] == OP_DEF) - { - if (condcount > 1) - { - *errorcodeptr = ERR54; - goto FAILED; - } - bravalue = OP_DEF; /* Just a flag to suppress char handling below */ - } - - /* A "normal" conditional group. If there is just one branch, we must not + int condcount = 0; + + do { + condcount++; + tc += GET(tc,1); + } + while (*tc != OP_KET); + + /* A DEFINE group is never obeyed inline (the "condition" is always + false). It must have only one branch. */ + + if (code[LINK_SIZE+1] == OP_DEF) + { + if (condcount > 1) + { + *errorcodeptr = ERR54; + goto FAILED; + } + bravalue = OP_DEF; /* Just a flag to suppress char handling below */ + } + + /* A "normal" conditional group. If there is just one branch, we must not make use of its firstchar or reqchar, because this is equivalent to an - empty second branch. */ - - else - { - if (condcount > 2) - { - *errorcodeptr = ERR27; - goto FAILED; - } + empty second branch. */ + + else + { + if (condcount > 2) + { + *errorcodeptr = ERR27; + goto FAILED; + } if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE; - } - } - - /* Error if hit end of pattern */ - + } + } + + /* Error if hit end of pattern */ + if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR14; - goto FAILED; - } - - /* In the pre-compile phase, update the length by the length of the group, - less the brackets at either end. Then reduce the compiled code to just a - set of non-capturing brackets so that it doesn't use much memory if it is - duplicated by a quantifier.*/ - - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; + { + *errorcodeptr = ERR14; + goto FAILED; + } + + /* In the pre-compile phase, update the length by the length of the group, + less the brackets at either end. Then reduce the compiled code to just a + set of non-capturing brackets so that it doesn't use much memory if it is + duplicated by a quantifier.*/ + + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) + { + *errorcodeptr = ERR20; + goto FAILED; + } + *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; code++; /* This already contains bravalue */ - PUTINC(code, 0, 1 + LINK_SIZE); - *code++ = OP_KET; - PUTINC(code, 0, 1 + LINK_SIZE); - break; /* No need to waste time with special character handling */ - } - - /* Otherwise update the main code pointer to the end of the group. */ - - code = tempcode; - - /* For a DEFINE group, required and first character settings are not - relevant. */ - - if (bravalue == OP_DEF) break; - - /* Handle updating of the required and first characters for other types of - group. Update for normal brackets of all kinds, and conditions with two - branches (see code above). If the bracket is followed by a quantifier with + PUTINC(code, 0, 1 + LINK_SIZE); + *code++ = OP_KET; + PUTINC(code, 0, 1 + LINK_SIZE); + break; /* No need to waste time with special character handling */ + } + + /* Otherwise update the main code pointer to the end of the group. */ + + code = tempcode; + + /* For a DEFINE group, required and first character settings are not + relevant. */ + + if (bravalue == OP_DEF) break; + + /* Handle updating of the required and first characters for other types of + group. Update for normal brackets of all kinds, and conditions with two + branches (see code above). If the bracket is followed by a quantifier with zero repeat, we have to back off. Hence the definition of zeroreqchar and zerofirstchar outside the main loop so that they can be accessed for the - back off. */ - + back off. */ + zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; groupsetfirstchar = FALSE; - - if (bravalue >= OP_ONCE) - { + + if (bravalue >= OP_ONCE) + { /* If we have not yet set a firstchar in this branch, take it from the - subpattern, remembering that it was set here so that a repeat of more + subpattern, remembering that it was set here so that a repeat of more than one can replicate it as reqchar if necessary. If the subpattern has no firstchar, set "none" for the whole branch. In both cases, a zero repeat forces firstchar to "none". */ - + if (firstcharflags == REQ_UNSET) - { + { if (subfirstcharflags >= 0) - { + { firstchar = subfirstchar; firstcharflags = subfirstcharflags; groupsetfirstchar = TRUE; - } + } else firstcharflags = REQ_NONE; zerofirstcharflags = REQ_NONE; - } - + } + /* If firstchar was previously set, convert the subpattern's firstchar into reqchar if there wasn't one, using the vary flag that was in - existence beforehand. */ - + existence beforehand. */ + else if (subfirstcharflags >= 0 && subreqcharflags < 0) { subreqchar = subfirstchar; subreqcharflags = subfirstcharflags | tempreqvary; } - - /* If the subpattern set a required byte (or set a first byte that isn't - really the first byte - see above), set it. */ - + + /* If the subpattern set a required byte (or set a first byte that isn't + really the first byte - see above), set it. */ + if (subreqcharflags >= 0) { reqchar = subreqchar; reqcharflags = subreqcharflags; } - } - + } + /* For a forward assertion, we take the reqchar, if set, provided that the group has also set a first char. This can be helpful if the pattern that follows the assertion doesn't set a different char. For example, it's @@ -7945,47 +7945,47 @@ for (;; ptr++) the "real" "a" would then become a reqchar instead of a firstchar. This is overcome by a scan at the end if there's no firstchar, looking for an asserted first char. */ - + else if (bravalue == OP_ASSERT && subreqcharflags >= 0 && subfirstcharflags >= 0) { reqchar = subreqchar; reqcharflags = subreqcharflags; } - break; /* End of processing '(' */ - - - /* ===================================================================*/ - /* Handle metasequences introduced by \. For ones like \d, the ESC_ values + break; /* End of processing '(' */ + + + /* ===================================================================*/ + /* Handle metasequences introduced by \. For ones like \d, the ESC_ values are arranged to be the negation of the corresponding OP_values in the default case when PCRE_UCP is not set. For the back references, the values are negative the reference number. Only back references and those types that consume a character may be repeated. We can test for values between ESC_b and ESC_Z for the latter; this may have to change if any new ones are ever created. */ - + case CHAR_BACKSLASH: - tempptr = ptr; + tempptr = ptr; escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE); - if (*errorcodeptr != 0) goto FAILED; - + if (*errorcodeptr != 0) goto FAILED; + if (escape == 0) /* The escape coded a single character */ c = ec; else - { - /* For metasequences that actually match a character, we disable the - setting of a first character if it hasn't already been set. */ - + { + /* For metasequences that actually match a character, we disable the + setting of a first character if it hasn't already been set. */ + if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) firstcharflags = REQ_NONE; - - /* Set values to reset to if this is followed by a zero repeat. */ - + + /* Set values to reset to if this is followed by a zero repeat. */ + zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - + /* \g or \g'name' is a subroutine call by name and \g or \g'n' is a subroutine call by number (Oniguruma syntax). In fact, the value ESC_g is returned only for these cases. So we don't need to check for < @@ -8033,44 +8033,44 @@ for (;; ptr++) goto HANDLE_NUMERICAL_RECURSION; } - /* \k or \k'name' is a back reference by name (Perl syntax). + /* \k or \k'name' is a back reference by name (Perl syntax). We also support \k{name} (.NET syntax). */ - + if (escape == ESC_k) - { + { if ((ptr[1] != CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) { *errorcodeptr = ERR69; goto FAILED; } - is_recurse = FALSE; + is_recurse = FALSE; terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; - goto NAMED_REF_OR_RECURSE; - } - + goto NAMED_REF_OR_RECURSE; + } + /* Back references are handled specially; must disable firstchar if - not set to cope with cases like (?=(\w+))\1: which would otherwise set - ':' later. */ - + not set to cope with cases like (?=(\w+))\1: which would otherwise set + ':' later. */ + if (escape < 0) - { + { open_capitem *oc; recno = -escape; - + /* Come here from named backref handling when the reference is to a single group (i.e. not to a duplicated name. */ HANDLE_REFERENCE: if (firstcharflags == REQ_UNSET) zerofirstcharflags = firstcharflags = REQ_NONE; - previous = code; + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; - PUT2INC(code, 0, recno); + PUT2INC(code, 0, recno); cd->backref_map |= (recno < 32)? (1U << recno) : 1; - if (recno > cd->top_backref) cd->top_backref = recno; + if (recno > cd->top_backref) cd->top_backref = recno; /* Check to see if this back reference is recursive, that it, it is inside the group that it references. A flag is set so that the @@ -8084,43 +8084,43 @@ for (;; ptr++) break; } } - } - - /* So are Unicode property matches, if supported. */ - -#ifdef SUPPORT_UCP + } + + /* So are Unicode property matches, if supported. */ + +#ifdef SUPPORT_UCP else if (escape == ESC_P || escape == ESC_p) - { - BOOL negated; + { + BOOL negated; unsigned int ptype = 0, pdata = 0; if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) goto FAILED; - previous = code; + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; - *code++ = ptype; - *code++ = pdata; - } -#else - - /* If Unicode properties are not supported, \X, \P, and \p are not - allowed. */ - + *code++ = ptype; + *code++ = pdata; + } +#else + + /* If Unicode properties are not supported, \X, \P, and \p are not + allowed. */ + else if (escape == ESC_X || escape == ESC_P || escape == ESC_p) - { - *errorcodeptr = ERR45; - goto FAILED; - } -#endif - - /* For the rest (including \X when Unicode properties are supported), we + { + *errorcodeptr = ERR45; + goto FAILED; + } +#endif + + /* For the rest (including \X when Unicode properties are supported), we can obtain the OP value by negating the escape value in the default situation when PCRE_UCP is not set. When it *is* set, we substitute Unicode property tests. Note that \b and \B do a one-character lookbehind, and \A also behaves as if it does. */ - - else - { + + else + { if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && cd->max_lookbehind == 0) cd->max_lookbehind = 1; @@ -8140,47 +8140,47 @@ for (;; ptr++) item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; } - } - continue; - } - - /* We have a data character whose value is in c. In UTF-8 mode it may have - a value > 127. We set its representation in the length/buffer, and then - handle it as a data character. */ - + } + continue; + } + + /* We have a data character whose value is in c. In UTF-8 mode it may have + a value > 127. We set its representation in the length/buffer, and then + handle it as a data character. */ + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) mclength = PRIV(ord2utf)(c, mcbuffer); - else -#endif - - { - mcbuffer[0] = c; - mclength = 1; - } - goto ONE_CHAR; - - - /* ===================================================================*/ - /* Handle a literal character. It is guaranteed not to be whitespace or # + else +#endif + + { + mcbuffer[0] = c; + mclength = 1; + } + goto ONE_CHAR; + + + /* ===================================================================*/ + /* Handle a literal character. It is guaranteed not to be whitespace or # when the extended flag is set. If we are in a UTF mode, it may be a multi-unit literal character. */ - - default: - NORMAL_CHAR: - mclength = 1; - mcbuffer[0] = c; - + + default: + NORMAL_CHAR: + mclength = 1; + mcbuffer[0] = c; + #ifdef SUPPORT_UTF if (utf && HAS_EXTRALEN(c)) ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); -#endif - - /* At this point we have the character's bytes in mcbuffer, and the length - in mclength. When not in UTF-8 mode, the length is always 1. */ - - ONE_CHAR: - previous = code; +#endif + + /* At this point we have the character's bytes in mcbuffer, and the length + in mclength. When not in UTF-8 mode, the length is always 1. */ + + ONE_CHAR: + previous = code; item_hwm_offset = cd->hwm - cd->start_workspace; /* For caseless UTF-8 mode when UCP support is available, check whether @@ -8206,29 +8206,29 @@ for (;; ptr++) /* Caseful matches, or not one of the multicase characters. */ *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR; - for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; - - /* Remember if \r or \n were seen */ - + for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; + + /* Remember if \r or \n were seen */ + if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) - cd->external_flags |= PCRE_HASCRORLF; - - /* Set the first and required bytes appropriately. If no previous first - byte, set it from this character, but revert to none on a zero repeat. + cd->external_flags |= PCRE_HASCRORLF; + + /* Set the first and required bytes appropriately. If no previous first + byte, set it from this character, but revert to none on a zero repeat. Otherwise, leave the firstchar value alone, and don't change it on a zero - repeat. */ - + repeat. */ + if (firstcharflags == REQ_UNSET) - { + { zerofirstcharflags = REQ_NONE; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - + /* If the character is more than one byte long, we can set firstchar - only if it is not to be matched caselessly. */ - - if (mclength == 1 || req_caseopt == 0) - { + only if it is not to be matched caselessly. */ + + if (mclength == 1 || req_caseopt == 0) + { firstchar = mcbuffer[0]; firstcharflags = req_caseopt; @@ -8237,54 +8237,54 @@ for (;; ptr++) reqchar = code[-1]; reqcharflags = cd->req_varyopt; } - } + } else firstcharflags = reqcharflags = REQ_NONE; - } - + } + /* firstchar was previously set; we can set reqchar only if the length is - 1 or the matching is caseful. */ - - else - { + 1 or the matching is caseful. */ + + else + { zerofirstchar = firstchar; zerofirstcharflags = firstcharflags; zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; - if (mclength == 1 || req_caseopt == 0) + if (mclength == 1 || req_caseopt == 0) { reqchar = code[-1]; reqcharflags = req_caseopt | cd->req_varyopt; } - } - - break; /* End of literal character handling */ - } - } /* end of big loop */ - - -/* Control never reaches here by falling through, only by a goto for all the -error states. Pass back the position in the pattern so that it can be displayed -to the user for diagnosing the error. */ - -FAILED: -*ptrptr = ptr; -return FALSE; -} - - - -/************************************************* -* Compile sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return it -points to the closing bracket, or vertical bar, or end of string. The code -variable is pointing at the byte into which the BRA operator has been stored. -This function is used during the pre-compile phase when we are trying to find -out the amount of memory needed, as well as during the real compile phase. The -value of lengthptr distinguishes the two phases. - -Arguments: + } + + break; /* End of literal character handling */ + } + } /* end of big loop */ + + +/* Control never reaches here by falling through, only by a goto for all the +error states. Pass back the position in the pattern so that it can be displayed +to the user for diagnosing the error. */ + +FAILED: +*ptrptr = ptr; +return FALSE; +} + + + +/************************************************* +* Compile sequence of alternatives * +*************************************************/ + +/* On entry, ptr is pointing past the bracket character, but on return it +points to the closing bracket, or vertical bar, or end of string. The code +variable is pointing at the byte into which the BRA operator has been stored. +This function is used during the pre-compile phase when we are trying to find +out the amount of memory needed, as well as during the real compile phase. The +value of lengthptr distinguishes the two phases. + +Arguments: options option bits, including any changes for this subpattern codeptr -> the address of the current code pointer ptrptr -> the address of the current pattern pointer @@ -8301,18 +8301,18 @@ Arguments: cd points to the data block with tables pointers etc. lengthptr NULL during the real compile phase points to length accumulator during pre-compile phase - + Returns: TRUE on success -*/ - -static BOOL +*/ + +static BOOL compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, - int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, + int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, int cond_depth, pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, branch_chain *bcptr, compile_data *cd, int *lengthptr) -{ +{ const pcre_uchar *ptr = *ptrptr; pcre_uchar *code = *codeptr; pcre_uchar *last_branch = code; @@ -8324,12 +8324,12 @@ pcre_uint32 firstchar, reqchar; pcre_int32 firstcharflags, reqcharflags; pcre_uint32 branchfirstchar, branchreqchar; pcre_int32 branchfirstcharflags, branchreqcharflags; -int length; +int length; unsigned int orig_bracount; unsigned int max_bracount; -branch_chain bc; +branch_chain bc; size_t save_hwm_offset; - + /* If set, call the external function that checks for stack availability. */ if (PUBL(stack_guard) != NULL && PUBL(stack_guard)()) @@ -8340,28 +8340,28 @@ if (PUBL(stack_guard) != NULL && PUBL(stack_guard)()) /* Miscellaneous initialization */ -bc.outer = bcptr; +bc.outer = bcptr; bc.current_branch = code; - + firstchar = reqchar = 0; firstcharflags = reqcharflags = REQ_UNSET; - + save_hwm_offset = cd->hwm - cd->start_workspace; -/* Accumulate the length for use in the pre-compile phase. Start with the -length of the BRA and KET and any extra bytes that are required at the -beginning. We accumulate in a local variable to save frequent testing of -lenthptr for NULL. We cannot do this by looking at the value of code at the -start and end of each alternative, because compiled items are discarded during -the pre-compile phase so that the work space is not exceeded. */ - -length = 2 + 2*LINK_SIZE + skipbytes; - -/* WARNING: If the above line is changed for any reason, you must also change -the code that abstracts option settings at the start of the pattern and makes -them global. It tests the value of length for (2 + 2*LINK_SIZE) in the -pre-compile phase to find out whether anything has yet been compiled or not. */ - +/* Accumulate the length for use in the pre-compile phase. Start with the +length of the BRA and KET and any extra bytes that are required at the +beginning. We accumulate in a local variable to save frequent testing of +lenthptr for NULL. We cannot do this by looking at the value of code at the +start and end of each alternative, because compiled items are discarded during +the pre-compile phase so that the work space is not exceeded. */ + +length = 2 + 2*LINK_SIZE + skipbytes; + +/* WARNING: If the above line is changed for any reason, you must also change +the code that abstracts option settings at the start of the pattern and makes +them global. It tests the value of length for (2 + 2*LINK_SIZE) in the +pre-compile phase to find out whether anything has yet been compiled or not. */ + /* If this is a capturing subpattern, add to the chain of open capturing items so that we can detect them if (*ACCEPT) is encountered. This is also used to detect groups that contain recursive back references to themselves. Note that @@ -8377,95 +8377,95 @@ if (*code == OP_CBRA) cd->open_caps = &capitem; } -/* Offset is set zero to mark that this bracket is still open */ - -PUT(code, 1, 0); -code += 1 + LINK_SIZE + skipbytes; - -/* Loop for each alternative branch */ - -orig_bracount = max_bracount = cd->bracount; -for (;;) - { - /* For a (?| group, reset the capturing bracket count so that each branch - uses the same numbers. */ - - if (reset_bracount) cd->bracount = orig_bracount; - - /* Set up dummy OP_REVERSE if lookbehind assertion */ - - if (lookbehind) - { - *code++ = OP_REVERSE; - reverse_count = code; - PUTINC(code, 0, 0); - length += 1 + LINK_SIZE; - } - - /* Now compile the branch; in the pre-compile phase its length gets added - into the length. */ - +/* Offset is set zero to mark that this bracket is still open */ + +PUT(code, 1, 0); +code += 1 + LINK_SIZE + skipbytes; + +/* Loop for each alternative branch */ + +orig_bracount = max_bracount = cd->bracount; +for (;;) + { + /* For a (?| group, reset the capturing bracket count so that each branch + uses the same numbers. */ + + if (reset_bracount) cd->bracount = orig_bracount; + + /* Set up dummy OP_REVERSE if lookbehind assertion */ + + if (lookbehind) + { + *code++ = OP_REVERSE; + reverse_count = code; + PUTINC(code, 0, 0); + length += 1 + LINK_SIZE; + } + + /* Now compile the branch; in the pre-compile phase its length gets added + into the length. */ + if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc, cond_depth, cd, (lengthptr == NULL)? NULL : &length)) - { - *ptrptr = ptr; - return FALSE; - } - - /* Keep the highest bracket count in case (?| was used and some branch - has fewer than the rest. */ - - if (cd->bracount > max_bracount) max_bracount = cd->bracount; - - /* In the real compile phase, there is some post-processing to be done. */ - - if (lengthptr == NULL) - { + { + *ptrptr = ptr; + return FALSE; + } + + /* Keep the highest bracket count in case (?| was used and some branch + has fewer than the rest. */ + + if (cd->bracount > max_bracount) max_bracount = cd->bracount; + + /* In the real compile phase, there is some post-processing to be done. */ + + if (lengthptr == NULL) + { /* If this is the first branch, the firstchar and reqchar values for the - branch become the values for the regex. */ - - if (*last_branch != OP_ALT) - { + branch become the values for the regex. */ + + if (*last_branch != OP_ALT) + { firstchar = branchfirstchar; firstcharflags = branchfirstcharflags; reqchar = branchreqchar; reqcharflags = branchreqcharflags; - } - + } + /* If this is not the first branch, the first char and reqchar have to - match the values from all the previous branches, except that if the + match the values from all the previous branches, except that if the previous value for reqchar didn't have REQ_VARY set, it can still match, - and we set REQ_VARY for the regex. */ - - else - { + and we set REQ_VARY for the regex. */ + + else + { /* If we previously had a firstchar, but it doesn't match the new branch, we have to abandon the firstchar for the regex, but if there was previously no reqchar, it takes on the value of the old firstchar. */ - + if (firstcharflags >= 0 && (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar)) - { + { if (reqcharflags < 0) { reqchar = firstchar; reqcharflags = firstcharflags; } firstcharflags = REQ_NONE; - } - + } + /* If we (now or from before) have no firstchar, a firstchar from the branch becomes a reqchar if there isn't a branch reqchar. */ - + if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0) { branchreqchar = branchfirstchar; branchreqcharflags = branchfirstcharflags; } - + /* Now ensure that the reqchars match */ - + if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) || reqchar != branchreqchar) reqcharflags = REQ_NONE; @@ -8474,78 +8474,78 @@ for (;;) reqchar = branchreqchar; reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */ } - } - - /* If lookbehind, check that this branch matches a fixed-length string, and - put the length into the OP_REVERSE item. Temporarily mark the end of the + } + + /* If lookbehind, check that this branch matches a fixed-length string, and + put the length into the OP_REVERSE item. Temporarily mark the end of the branch with OP_END. If the branch contains OP_RECURSE, the result is -3 because there may be forward references that we can't check here. Set a flag to cause another lookbehind check at the end. Why not do it all at the end? Because common, erroneous checks are picked up here and the offset of the problem can be shown. */ - - if (lookbehind) - { - int fixed_length; - *code = OP_END; + + if (lookbehind) + { + int fixed_length; + *code = OP_END; fixed_length = find_fixedlength(last_branch, (options & PCRE_UTF8) != 0, FALSE, cd, NULL); - DPRINTF(("fixed length = %d\n", fixed_length)); + DPRINTF(("fixed length = %d\n", fixed_length)); if (fixed_length == -3) - { + { cd->check_lookbehind = TRUE; } else if (fixed_length < 0) { *errorcodeptr = (fixed_length == -2)? ERR36 : (fixed_length == -4)? ERR70: ERR25; - *ptrptr = ptr; - return FALSE; - } + *ptrptr = ptr; + return FALSE; + } else { if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length; PUT(reverse_count, 0, fixed_length); } - } - } - - /* Reached end of expression, either ')' or end of pattern. In the real - compile phase, go back through the alternative branches and reverse the chain - of offsets, with the field in the BRA item now becoming an offset to the - first alternative. If there are no alternatives, it points to the end of the - group. The length in the terminating ket is always the length of the whole + } + } + + /* Reached end of expression, either ')' or end of pattern. In the real + compile phase, go back through the alternative branches and reverse the chain + of offsets, with the field in the BRA item now becoming an offset to the + first alternative. If there are no alternatives, it points to the end of the + group. The length in the terminating ket is always the length of the whole bracketed item. Return leaving the pointer at the terminating char. */ - + if (*ptr != CHAR_VERTICAL_LINE) - { - if (lengthptr == NULL) - { + { + if (lengthptr == NULL) + { int branch_length = (int)(code - last_branch); - do - { - int prev_length = GET(last_branch, 1); - PUT(last_branch, 1, branch_length); - branch_length = prev_length; - last_branch -= branch_length; - } - while (branch_length > 0); - } - - /* Fill in the ket */ - - *code = OP_KET; + do + { + int prev_length = GET(last_branch, 1); + PUT(last_branch, 1, branch_length); + branch_length = prev_length; + last_branch -= branch_length; + } + while (branch_length > 0); + } + + /* Fill in the ket */ + + *code = OP_KET; PUT(code, 1, (int)(code - start_bracket)); - code += 1 + LINK_SIZE; - + code += 1 + LINK_SIZE; + /* If it was a capturing subpattern, check to see if it contained any recursive back references. If so, we must wrap it in atomic brackets. Because we are moving code along, we must ensure that any pending recursive references are updated. In any event, remove the block from the chain. */ - + if (capnumber > 0) - { + { if (cd->open_caps->flag) { *code = OP_END; @@ -8562,139 +8562,139 @@ for (;;) length += 2 + 2*LINK_SIZE; } cd->open_caps = cd->open_caps->next; - } - - /* Retain the highest bracket number, in case resetting was used. */ - - cd->bracount = max_bracount; - - /* Set values to pass back */ - - *codeptr = code; - *ptrptr = ptr; + } + + /* Retain the highest bracket number, in case resetting was used. */ + + cd->bracount = max_bracount; + + /* Set values to pass back */ + + *codeptr = code; + *ptrptr = ptr; *firstcharptr = firstchar; *firstcharflagsptr = firstcharflags; *reqcharptr = reqchar; *reqcharflagsptr = reqcharflags; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length) - { - *errorcodeptr = ERR20; - return FALSE; - } - *lengthptr += length; - } - return TRUE; - } - - /* Another branch follows. In the pre-compile phase, we can move the code - pointer back to where it was for the start of the first branch. (That is, - pretend that each branch is the only one.) - - In the real compile phase, insert an ALT node. Its length field points back - to the previous branch while the bracket remains open. At the end the chain - is reversed. It's done like this so that the start of the bracket has a - zero offset until it is closed, making it possible to detect recursion. */ - - if (lengthptr != NULL) - { - code = *codeptr + 1 + LINK_SIZE + skipbytes; - length += 1 + LINK_SIZE; - } - else - { - *code = OP_ALT; + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length) + { + *errorcodeptr = ERR20; + return FALSE; + } + *lengthptr += length; + } + return TRUE; + } + + /* Another branch follows. In the pre-compile phase, we can move the code + pointer back to where it was for the start of the first branch. (That is, + pretend that each branch is the only one.) + + In the real compile phase, insert an ALT node. Its length field points back + to the previous branch while the bracket remains open. At the end the chain + is reversed. It's done like this so that the start of the bracket has a + zero offset until it is closed, making it possible to detect recursion. */ + + if (lengthptr != NULL) + { + code = *codeptr + 1 + LINK_SIZE + skipbytes; + length += 1 + LINK_SIZE; + } + else + { + *code = OP_ALT; PUT(code, 1, (int)(code - last_branch)); bc.current_branch = last_branch = code; - code += 1 + LINK_SIZE; - } - - ptr++; - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Check for anchored expression * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket -all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then + code += 1 + LINK_SIZE; + } + + ptr++; + } +/* Control never reaches here */ +} + + + + +/************************************************* +* Check for anchored expression * +*************************************************/ + +/* Try to find out if this is an anchored regular expression. Consider each +alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket +all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then it's anchored. However, if this is a multiline pattern, then only OP_SOD will be found, because ^ generates OP_CIRCM in that mode. - -We can also consider a regex to be anchored if OP_SOM starts all its branches. -This is the code for \G, which means "match at start of match position, taking -into account the match offset". - -A branch is also implicitly anchored if it starts with .* and DOTALL is set, -because that will try the rest of the pattern at all possible matching points, -so there is no point trying again.... er .... - -.... except when the .* appears inside capturing parentheses, and there is a -subsequent back reference to those parentheses. We haven't enough information -to catch that case precisely. - -At first, the best we could do was to detect when .* was in capturing brackets -and the highest back reference was greater than or equal to that level. -However, by keeping a bitmap of the first 31 back references, we can catch some -of the more common cases more precisely. - + +We can also consider a regex to be anchored if OP_SOM starts all its branches. +This is the code for \G, which means "match at start of match position, taking +into account the match offset". + +A branch is also implicitly anchored if it starts with .* and DOTALL is set, +because that will try the rest of the pattern at all possible matching points, +so there is no point trying again.... er .... + +.... except when the .* appears inside capturing parentheses, and there is a +subsequent back reference to those parentheses. We haven't enough information +to catch that case precisely. + +At first, the best we could do was to detect when .* was in capturing brackets +and the highest back reference was greater than or equal to that level. +However, by keeping a bitmap of the first 31 back references, we can catch some +of the more common cases more precisely. + ... A second exception is when the .* appears inside an atomic group, because this prevents the number of characters it matches from being adjusted. -Arguments: - code points to start of expression (the bracket) - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach +Arguments: + code points to start of expression (the bracket) + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach cd points to the compile data block atomcount atomic group level - -Returns: TRUE or FALSE -*/ - -static BOOL + +Returns: TRUE or FALSE +*/ + +static BOOL is_anchored(register const pcre_uchar *code, unsigned int bracket_map, compile_data *cd, int atomcount) -{ -do { +{ +do { const pcre_uchar *scode = first_significant_code( code + PRIV(OP_lengths)[*code], FALSE); register int op = *scode; - - /* Non-capturing brackets */ - + + /* Non-capturing brackets */ + if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) - { + { if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; - } - - /* Capturing brackets */ - + } + + /* Capturing brackets */ + else if (op == OP_CBRA || op == OP_CBRAPOS || op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); + { + int n = GET2(scode, 1+LINK_SIZE); int new_map = bracket_map | ((n < 32)? (1U << n) : 1); if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE; - } - + } + /* Positive forward assertion */ - + else if (op == OP_ASSERT) - { + { if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; - } - + } + /* Condition; not anchored if no second branch */ - + else if (op == OP_COND) { if (scode[GET(scode,1)] != OP_ALT) return FALSE; @@ -8713,60 +8713,60 @@ do { it isn't in brackets that are or may be referenced or inside an atomic group. */ - else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || + else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)) - { + { if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 || atomcount > 0 || cd->had_pruneorskip) return FALSE; - } - - /* Check for explicit anchoring */ - + } + + /* Check for explicit anchoring */ + else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n). As in the case of is_anchored() (see above), we -have to take account of back references to capturing brackets that contain .* + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for starting with ^ or .* * +*************************************************/ + +/* This is called to find out if every branch starts with ^ or .* so that +"first char" processing can be done to speed things up in multiline +matching and for non-DOTALL patterns that start with .* (which must start at +the beginning or after \n). As in the case of is_anchored() (see above), we +have to take account of back references to capturing brackets that contain .* because in that case we can't make the assumption. Also, the appearance of .* inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE or *SKIP does not count, because once again the assumption no longer holds. - -Arguments: - code points to start of expression (the bracket) - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach + +Arguments: + code points to start of expression (the bracket) + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach cd points to the compile data atomcount atomic group level inassert TRUE if in an assertion - -Returns: TRUE or FALSE -*/ - -static BOOL + +Returns: TRUE or FALSE +*/ + +static BOOL is_startline(const pcre_uchar *code, unsigned int bracket_map, compile_data *cd, int atomcount, BOOL inassert) -{ -do { +{ +do { const pcre_uchar *scode = first_significant_code( code + PRIV(OP_lengths)[*code], FALSE); register int op = *scode; - + /* If we are at the start of a conditional assertion group, *both* the conditional assertion *and* what follows the condition must satisfy the test for start of line. Other kinds of condition fail. Note that there may be an @@ -8796,33 +8796,33 @@ do { op = *scode; } - /* Non-capturing brackets */ - + /* Non-capturing brackets */ + if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) - { + { if (!is_startline(scode, bracket_map, cd, atomcount, inassert)) return FALSE; - } - - /* Capturing brackets */ - + } + + /* Capturing brackets */ + else if (op == OP_CBRA || op == OP_CBRAPOS || op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); + { + int n = GET2(scode, 1+LINK_SIZE); int new_map = bracket_map | ((n < 32)? (1U << n) : 1); if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE; - } - + } + /* Positive forward assertions */ - + else if (op == OP_ASSERT) { if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE; } - + /* Atomic brackets */ - + else if (op == OP_ONCE || op == OP_ONCE_NC) { if (!is_startline(scode, bracket_map, cd, atomcount + 1, inassert)) return FALSE; @@ -8834,60 +8834,60 @@ do { example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the start of a line. */ - else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) - { + else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) + { if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 || atomcount > 0 || cd->had_pruneorskip || inassert) return FALSE; - } - + } + /* Check for explicit circumflex; anything else gives a FALSE result. Note in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC because the number of characters matched by .* cannot be adjusted inside them. */ - + else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; - - /* Move on to the next alternative */ - - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for asserted fixed first char * -*************************************************/ - -/* During compilation, the "first char" settings from forward assertions are -discarded, because they can cause conflicts with actual literals that follow. -However, if we end up without a first char setting for an unanchored pattern, -it is worth scanning the regex to see if there is an initial asserted first + + /* Move on to the next alternative */ + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for asserted fixed first char * +*************************************************/ + +/* During compilation, the "first char" settings from forward assertions are +discarded, because they can cause conflicts with actual literals that follow. +However, if we end up without a first char setting for an unanchored pattern, +it is worth scanning the regex to see if there is an initial asserted first char. If all branches start with the same asserted char, or with a non-conditional bracket all of whose alternatives start with the same asserted char (recurse ad lib), then we return that char, with the flags set to zero or REQ_CASELESS; otherwise return zero with REQ_NONE in the flags. - -Arguments: - code points to start of expression (the bracket) + +Arguments: + code points to start of expression (the bracket) flags points to the first char flags, or to REQ_NONE - inassert TRUE if in an assertion - + inassert TRUE if in an assertion + Returns: the fixed first char, or 0 with REQ_NONE in flags -*/ - +*/ + static pcre_uint32 find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags, BOOL inassert) -{ +{ register pcre_uint32 c = 0; int cflags = REQ_NONE; *flags = REQ_NONE; -do { +do { pcre_uint32 d; int dflags; int xl = (*code == OP_CBRA || *code == OP_SCBRA || @@ -8895,39 +8895,39 @@ do { const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); register pcre_uchar op = *scode; - - switch(op) - { - default: + + switch(op) + { + default: return 0; - - case OP_BRA: + + case OP_BRA: case OP_BRAPOS: - case OP_CBRA: + case OP_CBRA: case OP_SCBRA: case OP_CBRAPOS: case OP_SCBRAPOS: - case OP_ASSERT: - case OP_ONCE: + case OP_ASSERT: + case OP_ONCE: case OP_ONCE_NC: d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT); if (dflags < 0) return 0; if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; - break; - + break; + case OP_EXACT: scode += IMM2_SIZE; /* Fall through */ - - case OP_CHAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: + + case OP_CHAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: if (!inassert) return 0; if (cflags < 0) { c = scode[1]; cflags = 0; } else if (c != scode[1]) return 0; - break; + break; case OP_EXACTI: scode += IMM2_SIZE; @@ -8941,19 +8941,19 @@ do { if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } else if (c != scode[1]) return 0; break; - } - - code += GET(code, 1); - } -while (*code == OP_ALT); + } + + code += GET(code, 1); + } +while (*code == OP_ALT); *flags = cflags; -return c; -} - - - -/************************************************* +return c; +} + + + +/************************************************* * Add an entry to the name/number table * *************************************************/ @@ -9009,31 +9009,31 @@ cd->names_found++; /************************************************* -* Compile a Regular Expression * -*************************************************/ - -/* This function takes a string and returns a pointer to a block of store -holding a compiled version of the expression. The original API for this -function had no error code return variable; it is retained for backwards -compatibility. The new function is given a new name. - -Arguments: - pattern the regular expression - options various option bits - errorcodeptr pointer to error code variable (pcre_compile2() only) - can be NULL if you don't want a code value - errorptr pointer to pointer to error text - erroroffset ptr offset in pattern where error was detected - tables pointer to character tables or NULL - -Returns: pointer to compiled data block, or NULL on error, - with errorptr and erroroffset set -*/ - +* Compile a Regular Expression * +*************************************************/ + +/* This function takes a string and returns a pointer to a block of store +holding a compiled version of the expression. The original API for this +function had no error code return variable; it is retained for backwards +compatibility. The new function is given a new name. + +Arguments: + pattern the regular expression + options various option bits + errorcodeptr pointer to error code variable (pcre_compile2() only) + can be NULL if you don't want a code value + errorptr pointer to pointer to error text + erroroffset ptr offset in pattern where error was detected + tables pointer to character tables or NULL + +Returns: pointer to compiled data block, or NULL on error, + with errorptr and erroroffset set +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION -pcre_compile(const char *pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) +pcre_compile(const char *pattern, int options, const char **errorptr, + int *erroroffset, const unsigned char *tables) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr, @@ -9043,21 +9043,21 @@ PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr, int *erroroffset, const unsigned char *tables) #endif -{ +{ #if defined COMPILE_PCRE8 -return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); +return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); #elif defined COMPILE_PCRE16 return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); #elif defined COMPILE_PCRE32 return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables); #endif -} - - +} + + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION -pcre_compile2(const char *pattern, int options, int *errorcodeptr, - const char **errorptr, int *erroroffset, const unsigned char *tables) +pcre_compile2(const char *pattern, int options, int *errorcodeptr, + const char **errorptr, int *erroroffset, const unsigned char *tables) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr, @@ -9067,97 +9067,97 @@ PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr, const char **errorptr, int *erroroffset, const unsigned char *tables) #endif -{ +{ REAL_PCRE *re; -int length = 1; /* For final END opcode */ +int length = 1; /* For final END opcode */ pcre_int32 firstcharflags, reqcharflags; pcre_uint32 firstchar, reqchar; pcre_uint32 limit_match = PCRE_UINT32_MAX; pcre_uint32 limit_recursion = PCRE_UINT32_MAX; int newline; -int errorcode = 0; -int skipatstart = 0; +int errorcode = 0; +int skipatstart = 0; BOOL utf; BOOL never_utf = FALSE; -size_t size; +size_t size; pcre_uchar *code; const pcre_uchar *codestart; const pcre_uchar *ptr; -compile_data compile_block; -compile_data *cd = &compile_block; - -/* This space is used for "compiling" into during the first phase, when we are -computing the amount of memory that is needed. Compiled items are thrown away -as soon as possible, so that a fairly large buffer should be sufficient for -this purpose. The same space is used in the second phase for remembering where +compile_data compile_block; +compile_data *cd = &compile_block; + +/* This space is used for "compiling" into during the first phase, when we are +computing the amount of memory that is needed. Compiled items are thrown away +as soon as possible, so that a fairly large buffer should be sufficient for +this purpose. The same space is used in the second phase for remembering where to fill in forward references to subpatterns. That may overflow, in which case new memory is obtained from malloc(). */ - + pcre_uchar cworkspace[COMPILE_WORK_SIZE]; - + /* This vector is used for remembering name groups during the pre-compile. In a similar way to cworkspace, it can be expanded using malloc() if necessary. */ - + named_group named_groups[NAMED_GROUP_LIST_SIZE]; -/* Set this early so that early errors get offset 0. */ - +/* Set this early so that early errors get offset 0. */ + ptr = (const pcre_uchar *)pattern; - -/* We can't pass back an error message if errorptr is NULL; I guess the best we -can do is just return NULL, but we can set a code value if there is a code -pointer. */ - -if (errorptr == NULL) - { - if (errorcodeptr != NULL) *errorcodeptr = 99; - return NULL; - } - -*errorptr = NULL; -if (errorcodeptr != NULL) *errorcodeptr = ERR0; - -/* However, we can give a message for this error */ - -if (erroroffset == NULL) - { - errorcode = ERR16; - goto PCRE_EARLY_ERROR_RETURN2; - } - -*erroroffset = 0; - + +/* We can't pass back an error message if errorptr is NULL; I guess the best we +can do is just return NULL, but we can set a code value if there is a code +pointer. */ + +if (errorptr == NULL) + { + if (errorcodeptr != NULL) *errorcodeptr = 99; + return NULL; + } + +*errorptr = NULL; +if (errorcodeptr != NULL) *errorcodeptr = ERR0; + +/* However, we can give a message for this error */ + +if (erroroffset == NULL) + { + errorcode = ERR16; + goto PCRE_EARLY_ERROR_RETURN2; + } + +*erroroffset = 0; + /* Set up pointers to the individual character tables */ - + if (tables == NULL) tables = PRIV(default_tables); cd->lcc = tables + lcc_offset; cd->fcc = tables + fcc_offset; cd->cbits = tables + cbits_offset; cd->ctypes = tables + ctypes_offset; - + /* Check that all undefined public option bits are zero */ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) - { - errorcode = ERR17; - goto PCRE_EARLY_ERROR_RETURN; - } - + { + errorcode = ERR17; + goto PCRE_EARLY_ERROR_RETURN; + } + /* If PCRE_NEVER_UTF is set, remember it. */ - + if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE; - -/* Check for global one-time settings at the start of the pattern, and remember -the offset for later. */ - + +/* Check for global one-time settings at the start of the pattern, and remember +the offset for later. */ + cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && ptr[skipatstart+1] == CHAR_ASTERISK) - { - int newnl = 0; - int newbsr = 0; - + { + int newnl = 0; + int newbsr = 0; + /* For completeness and backward compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is generic and always supported. Note that PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ @@ -9223,28 +9223,28 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ } if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) - { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } + { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) - { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } + { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5) == 0) - { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } + { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0) - { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } + { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0) - { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } - + { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0) - { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } + { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0) - { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } - - if (newnl != 0) - options = (options & ~PCRE_NEWLINE_BITS) | newnl; - else if (newbsr != 0) - options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; - else break; - } - + { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } + + if (newnl != 0) + options = (options & ~PCRE_NEWLINE_BITS) | newnl; + else if (newbsr != 0) + options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; + else break; + } + /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ utf = (options & PCRE_UTF8) != 0; if (utf && never_utf) @@ -9289,86 +9289,86 @@ if ((options & PCRE_UCP) != 0) } #endif -/* Check validity of \R options. */ - +/* Check validity of \R options. */ + if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) - { + { errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; - } - -/* Handle different types of newline. The three bits give seven cases. The -current code allows for fixed one- or two-byte sequences, plus "any" and -"anycrlf". */ - -switch (options & PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Build-time default */ + } + +/* Handle different types of newline. The three bits give seven cases. The +current code allows for fixed one- or two-byte sequences, plus "any" and +"anycrlf". */ + +switch (options & PCRE_NEWLINE_BITS) + { + case 0: newline = NEWLINE; break; /* Build-time default */ case PCRE_NEWLINE_CR: newline = CHAR_CR; break; case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ + case PCRE_NEWLINE_CR+ PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; - } - -if (newline == -2) - { - cd->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - cd->nltype = NLTYPE_ANY; - } -else - { - cd->nltype = NLTYPE_FIXED; - if (newline > 255) - { - cd->nllen = 2; - cd->nl[0] = (newline >> 8) & 255; - cd->nl[1] = newline & 255; - } - else - { - cd->nllen = 1; - cd->nl[0] = newline; - } - } - -/* Maximum back reference and backref bitmap. The bitmap records up to 31 back -references to help in deciding whether (.*) can be treated as anchored or not. -*/ - -cd->top_backref = 0; -cd->backref_map = 0; - -/* Reflect pattern for debugging output */ - -DPRINTF(("------------------------------------------------------------------\n")); + case PCRE_NEWLINE_ANY: newline = -1; break; + case PCRE_NEWLINE_ANYCRLF: newline = -2; break; + default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; + } + +if (newline == -2) + { + cd->nltype = NLTYPE_ANYCRLF; + } +else if (newline < 0) + { + cd->nltype = NLTYPE_ANY; + } +else + { + cd->nltype = NLTYPE_FIXED; + if (newline > 255) + { + cd->nllen = 2; + cd->nl[0] = (newline >> 8) & 255; + cd->nl[1] = newline & 255; + } + else + { + cd->nllen = 1; + cd->nl[0] = newline; + } + } + +/* Maximum back reference and backref bitmap. The bitmap records up to 31 back +references to help in deciding whether (.*) can be treated as anchored or not. +*/ + +cd->top_backref = 0; +cd->backref_map = 0; + +/* Reflect pattern for debugging output */ + +DPRINTF(("------------------------------------------------------------------\n")); #ifdef PCRE_DEBUG print_puchar(stdout, (PCRE_PUCHAR)pattern); #endif DPRINTF(("\n")); - -/* Pretend to compile the pattern while actually just accumulating the length -of memory required. This behaviour is triggered by passing a non-NULL final -argument to compile_regex(). We pass a block of workspace (cworkspace) for it -to compile parts of the pattern into; the compiled code is discarded when it is -no longer needed, so hopefully this workspace will never overflow, though there -is a test for its doing so. */ - -cd->bracount = cd->final_bracount = 0; -cd->names_found = 0; -cd->name_entry_size = 0; -cd->name_table = NULL; + +/* Pretend to compile the pattern while actually just accumulating the length +of memory required. This behaviour is triggered by passing a non-NULL final +argument to compile_regex(). We pass a block of workspace (cworkspace) for it +to compile parts of the pattern into; the compiled code is discarded when it is +no longer needed, so hopefully this workspace will never overflow, though there +is a test for its doing so. */ + +cd->bracount = cd->final_bracount = 0; +cd->names_found = 0; +cd->name_entry_size = 0; +cd->name_table = NULL; cd->dupnames = FALSE; cd->dupgroups = FALSE; cd->namedrefcount = 0; -cd->start_code = cworkspace; -cd->hwm = cworkspace; +cd->start_code = cworkspace; +cd->hwm = cworkspace; cd->iscondassert = FALSE; cd->start_workspace = cworkspace; cd->workspace_size = COMPILE_WORK_SIZE; @@ -9376,102 +9376,102 @@ cd->named_groups = named_groups; cd->named_group_list_size = NAMED_GROUP_LIST_SIZE; cd->start_pattern = (const pcre_uchar *)pattern; cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); -cd->req_varyopt = 0; +cd->req_varyopt = 0; cd->parens_depth = 0; cd->assert_depth = 0; cd->max_lookbehind = 0; -cd->external_options = options; +cd->external_options = options; cd->open_caps = NULL; - -/* Now do the pre-compile. On error, errorcode will be set non-zero, so we -don't need to look at the result of the function here. The initial options have -been put into the cd block so that they can be changed if an option setting is -found within the regex right at the beginning. Bringing initial option settings -outside can help speed up starting point checks. */ - -ptr += skipatstart; -code = cworkspace; -*code = OP_BRA; + +/* Now do the pre-compile. On error, errorcode will be set non-zero, so we +don't need to look at the result of the function here. The initial options have +been put into the cd block so that they can be changed if an option setting is +found within the regex right at the beginning. Bringing initial option settings +outside can help speed up starting point checks. */ + +ptr += skipatstart; +code = cworkspace; +*code = OP_BRA; (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, &length); -if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; - -DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, +if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; + +DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, (int)(cd->hwm - cworkspace))); - -if (length > MAX_PATTERN_SIZE) - { - errorcode = ERR20; - goto PCRE_EARLY_ERROR_RETURN; - } - + +if (length > MAX_PATTERN_SIZE) + { + errorcode = ERR20; + goto PCRE_EARLY_ERROR_RETURN; + } + /* Compute the size of the data block for storing the compiled pattern. Integer overflow should no longer be possible because nowadays we limit the maximum value of cd->names_found and cd->name_entry_size. */ - + size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar); - + /* Get the memory. */ re = (REAL_PCRE *)(PUBL(malloc))(size); -if (re == NULL) - { - errorcode = ERR21; - goto PCRE_EARLY_ERROR_RETURN; - } - -/* Put in the magic number, and save the sizes, initial options, internal -flags, and character table pointer. NULL is used for the default character -tables. The nullpad field is at the end; it's there to help in the case when a -regex compiled on a system with 4-byte pointers is run on another with 8-byte -pointers. */ - -re->magic_number = MAGIC_NUMBER; +if (re == NULL) + { + errorcode = ERR21; + goto PCRE_EARLY_ERROR_RETURN; + } + +/* Put in the magic number, and save the sizes, initial options, internal +flags, and character table pointer. NULL is used for the default character +tables. The nullpad field is at the end; it's there to help in the case when a +regex compiled on a system with 4-byte pointers is run on another with 8-byte +pointers. */ + +re->magic_number = MAGIC_NUMBER; re->size = (int)size; -re->options = cd->external_options; -re->flags = cd->external_flags; +re->options = cd->external_options; +re->flags = cd->external_flags; re->limit_match = limit_match; re->limit_recursion = limit_recursion; re->first_char = 0; re->req_char = 0; re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); -re->name_entry_size = cd->name_entry_size; -re->name_count = cd->names_found; -re->ref_count = 0; +re->name_entry_size = cd->name_entry_size; +re->name_count = cd->names_found; +re->ref_count = 0; re->tables = (tables == PRIV(default_tables))? NULL : tables; -re->nullpad = NULL; +re->nullpad = NULL; #ifdef COMPILE_PCRE32 re->dummy = 0; #else re->dummy1 = re->dummy2 = re->dummy3 = 0; #endif - -/* The starting points of the name/number translation table and of the code are -passed around in the compile data block. The start/end pattern and initial -options are already set from the pre-compile phase, as is the name_entry_size -field. Reset the bracket count and the names_found field. Also reset the hwm -field; this time it's used for remembering forward references to subpatterns. -*/ - -cd->final_bracount = cd->bracount; /* Save for checking forward references */ + +/* The starting points of the name/number translation table and of the code are +passed around in the compile data block. The start/end pattern and initial +options are already set from the pre-compile phase, as is the name_entry_size +field. Reset the bracket count and the names_found field. Also reset the hwm +field; this time it's used for remembering forward references to subpatterns. +*/ + +cd->final_bracount = cd->bracount; /* Save for checking forward references */ cd->parens_depth = 0; cd->assert_depth = 0; -cd->bracount = 0; +cd->bracount = 0; cd->max_lookbehind = 0; cd->name_table = (pcre_uchar *)re + re->name_table_offset; -codestart = cd->name_table + re->name_entry_size * re->name_count; -cd->start_code = codestart; +codestart = cd->name_table + re->name_entry_size * re->name_count; +cd->start_code = codestart; cd->hwm = (pcre_uchar *)(cd->start_workspace); cd->iscondassert = FALSE; -cd->req_varyopt = 0; -cd->had_accept = FALSE; +cd->req_varyopt = 0; +cd->had_accept = FALSE; cd->had_pruneorskip = FALSE; cd->check_lookbehind = FALSE; cd->open_caps = NULL; - + /* If any named groups were found, create the name/number table from the list created in the first pass. */ @@ -9486,51 +9486,51 @@ if (cd->names_found > 0) (PUBL(free))((void *)cd->named_groups); } -/* Set up a starting, non-extracting bracket, then compile the expression. On -error, errorcode will be set non-zero, so we don't need to look at the result -of the function here. */ - +/* Set up a starting, non-extracting bracket, then compile the expression. On +error, errorcode will be set non-zero, so we don't need to look at the result +of the function here. */ + ptr = (const pcre_uchar *)pattern + skipatstart; code = (pcre_uchar *)codestart; -*code = OP_BRA; +*code = OP_BRA; (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL); -re->top_bracket = cd->bracount; -re->top_backref = cd->top_backref; +re->top_bracket = cd->bracount; +re->top_backref = cd->top_backref; re->max_lookbehind = cd->max_lookbehind; re->flags = cd->external_flags | PCRE_MODE; - + if (cd->had_accept) { reqchar = 0; /* Must disable after (*ACCEPT) */ reqcharflags = REQ_NONE; } - -/* If not reached end of pattern on success, there's an excess bracket. */ - + +/* If not reached end of pattern on success, there's an excess bracket. */ + if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22; - -/* Fill in the terminating state and check for disastrous overflow, but -if debugging, leave the test till after things are printed out. */ - -*code++ = OP_END; - + +/* Fill in the terminating state and check for disastrous overflow, but +if debugging, leave the test till after things are printed out. */ + +*code++ = OP_END; + #ifndef PCRE_DEBUG -if (code - codestart > length) errorcode = ERR23; -#endif - +if (code - codestart > length) errorcode = ERR23; +#endif + #ifdef SUPPORT_VALGRIND /* If the estimated length exceeds the really used length, mark the extra allocated memory as unaddressable, so that any out-of-bound reads can be detected. */ VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar)); #endif - + /* Fill in any forward references that are required. There may be repeated references; optimize for them, as searching a large regex takes time. */ if (cd->hwm > cd->start_workspace) - { + { int prev_recno = -1; const pcre_uchar *groupptr = NULL; while (errorcode == 0 && cd->hwm > cd->start_workspace) @@ -9557,8 +9557,8 @@ if (cd->hwm > cd->start_workspace) if (groupptr == NULL) errorcode = ERR53; else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart)); } - } - + } + /* If the workspace had to be expanded, free the new memory. Set the pointer to NULL to indicate that forward references have been filled in. */ @@ -9566,11 +9566,11 @@ if (cd->workspace_size > COMPILE_WORK_SIZE) (PUBL(free))((void *)cd->start_workspace); cd->start_workspace = NULL; -/* Give an error if there's back reference to a non-existent capturing -subpattern. */ - -if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; - +/* Give an error if there's back reference to a non-existent capturing +subpattern. */ + +if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; + /* Unless disabled, check whether any single character iterators can be auto-possessified. The function overwrites the appropriate opcode values, so the type of the pointer must be cast. NOTE: the intermediate variable "temp" is @@ -9628,39 +9628,39 @@ if (errorcode == 0 && cd->check_lookbehind) } } -/* Failed to compile, or error while post-processing */ - -if (errorcode != 0) - { +/* Failed to compile, or error while post-processing */ + +if (errorcode != 0) + { (PUBL(free))(re); - PCRE_EARLY_ERROR_RETURN: + PCRE_EARLY_ERROR_RETURN: *erroroffset = (int)(ptr - (const pcre_uchar *)pattern); - PCRE_EARLY_ERROR_RETURN2: - *errorptr = find_error_text(errorcode); - if (errorcodeptr != NULL) *errorcodeptr = errorcode; - return NULL; - } - -/* If the anchored option was not passed, set the flag if we can determine that + PCRE_EARLY_ERROR_RETURN2: + *errorptr = find_error_text(errorcode); + if (errorcodeptr != NULL) *errorcodeptr = errorcode; + return NULL; + } + +/* If the anchored option was not passed, set the flag if we can determine that the pattern is anchored by virtue of ^ characters or \A or anything else, such as starting with non-atomic .* when DOTALL is set and there are no occurrences of *PRUNE or *SKIP. - -Otherwise, if we know what the first byte has to be, save it, because that -speeds up unanchored matches no end. If not, see if we can set the -PCRE_STARTLINE flag. This is helpful for multiline matches when all branches + +Otherwise, if we know what the first byte has to be, save it, because that +speeds up unanchored matches no end. If not, see if we can set the +PCRE_STARTLINE flag. This is helpful for multiline matches when all branches start with ^. and also when all branches start with non-atomic .* for non-DOTALL matches when *PRUNE and SKIP are not present. */ - -if ((re->options & PCRE_ANCHORED) == 0) - { + +if ((re->options & PCRE_ANCHORED) == 0) + { if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED; - else - { + else + { if (firstcharflags < 0) firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE); if (firstcharflags >= 0) /* Remove caseless flag for non-caseable chars */ - { + { #if defined COMPILE_PCRE8 re->first_char = firstchar & 0xff; #elif defined COMPILE_PCRE16 @@ -9689,20 +9689,20 @@ if ((re->options & PCRE_ANCHORED) == 0) re->flags |= PCRE_FCH_CASELESS; } - re->flags |= PCRE_FIRSTSET; - } + re->flags |= PCRE_FIRSTSET; + } else if (is_startline(codestart, 0, cd, 0, FALSE)) re->flags |= PCRE_STARTLINE; - } - } - -/* For an anchored pattern, we use the "required byte" only if it follows a -variable length item in the regex. Remove the caseless flag for non-caseable -bytes. */ - + } + } + +/* For an anchored pattern, we use the "required byte" only if it follows a +variable length item in the regex. Remove the caseless flag for non-caseable +bytes. */ + if (reqcharflags >= 0 && ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0)) - { + { #if defined COMPILE_PCRE8 re->req_char = reqchar & 0xff; #elif defined COMPILE_PCRE16 @@ -9730,36 +9730,36 @@ if (reqcharflags >= 0 && re->flags |= PCRE_RCH_CASELESS; } - re->flags |= PCRE_REQCHSET; - } - -/* Print out the compiled data if debugging is enabled. This is never the -case when building a production library. */ - + re->flags |= PCRE_REQCHSET; + } + +/* Print out the compiled data if debugging is enabled. This is never the +case when building a production library. */ + #ifdef PCRE_DEBUG -printf("Length = %d top_bracket = %d top_backref = %d\n", - length, re->top_bracket, re->top_backref); - -printf("Options=%08x\n", re->options); - -if ((re->flags & PCRE_FIRSTSET) != 0) - { +printf("Length = %d top_bracket = %d top_backref = %d\n", + length, re->top_bracket, re->top_backref); + +printf("Options=%08x\n", re->options); + +if ((re->flags & PCRE_FIRSTSET) != 0) + { pcre_uchar ch = re->first_char; const char *caseless = ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)"; if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless); - else printf("First char = \\x%02x%s\n", ch, caseless); - } - -if ((re->flags & PCRE_REQCHSET) != 0) - { + else printf("First char = \\x%02x%s\n", ch, caseless); + } + +if ((re->flags & PCRE_REQCHSET) != 0) + { pcre_uchar ch = re->req_char; const char *caseless = ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)"; if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless); - else printf("Req char = \\x%02x%s\n", ch, caseless); - } - + else printf("Req char = \\x%02x%s\n", ch, caseless); + } + #if defined COMPILE_PCRE8 pcre_printint((pcre *)re, stdout, TRUE); #elif defined COMPILE_PCRE16 @@ -9767,20 +9767,20 @@ pcre16_printint((pcre *)re, stdout, TRUE); #elif defined COMPILE_PCRE32 pcre32_printint((pcre *)re, stdout, TRUE); #endif - -/* This check is done here in the debugging case so that the code that -was compiled can be seen. */ - -if (code - codestart > length) - { + +/* This check is done here in the debugging case so that the code that +was compiled can be seen. */ + +if (code - codestart > length) + { (PUBL(free))(re); - *errorptr = find_error_text(ERR23); + *errorptr = find_error_text(ERR23); *erroroffset = ptr - (pcre_uchar *)pattern; - if (errorcodeptr != NULL) *errorcodeptr = ERR23; - return NULL; - } + if (errorcodeptr != NULL) *errorcodeptr = ERR23; + return NULL; + } #endif /* PCRE_DEBUG */ - + /* Check for a pattern than can match an empty string, so that this information can be provided to applications. */ @@ -9796,12 +9796,12 @@ do while (*codestart == OP_ALT); #if defined COMPILE_PCRE8 -return (pcre *)re; +return (pcre *)re; #elif defined COMPILE_PCRE16 return (pcre16 *)re; #elif defined COMPILE_PCRE32 return (pcre32 *)re; #endif -} - -/* End of pcre_compile.c */ +} + +/* End of pcre_compile.c */ diff --git a/contrib/libs/pcre/pcre_config.c b/contrib/libs/pcre/pcre_config.c index 6c303244fc0..3c5364e2f85 100644 --- a/contrib/libs/pcre/pcre_config.c +++ b/contrib/libs/pcre/pcre_config.c @@ -1,73 +1,73 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_config(). */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_config(). */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - +#endif + /* Keep the original link size. */ static int real_link_size = LINK_SIZE; -#include "pcre_internal.h" - - -/************************************************* -* Return info about what features are configured * -*************************************************/ - -/* This function has an extensible interface so that additional items can be -added compatibly. - -Arguments: - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - +#include "pcre_internal.h" + + +/************************************************* +* Return info about what features are configured * +*************************************************/ + +/* This function has an extensible interface so that additional items can be +added compatibly. + +Arguments: + what what information is required + where where to put the information + +Returns: 0 if data returned, negative on error +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_config(int what, void *where) +pcre_config(int what, void *where) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_config(int what, void *where) @@ -75,22 +75,22 @@ pcre16_config(int what, void *where) PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_config(int what, void *where) #endif -{ -switch (what) - { - case PCRE_CONFIG_UTF8: +{ +switch (what) + { + case PCRE_CONFIG_UTF8: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 *((int *)where) = 0; return PCRE_ERROR_BADOPTION; #else #if defined SUPPORT_UTF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; #endif - + case PCRE_CONFIG_UTF16: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE32 *((int *)where) = 0; @@ -117,14 +117,14 @@ switch (what) break; #endif - case PCRE_CONFIG_UNICODE_PROPERTIES: -#ifdef SUPPORT_UCP - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; - + case PCRE_CONFIG_UNICODE_PROPERTIES: +#ifdef SUPPORT_UCP + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; + case PCRE_CONFIG_JIT: #ifdef SUPPORT_JIT *((int *)where) = 1; @@ -141,50 +141,50 @@ switch (what) #endif break; - case PCRE_CONFIG_NEWLINE: - *((int *)where) = NEWLINE; - break; - - case PCRE_CONFIG_BSR: -#ifdef BSR_ANYCRLF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; - - case PCRE_CONFIG_LINK_SIZE: + case PCRE_CONFIG_NEWLINE: + *((int *)where) = NEWLINE; + break; + + case PCRE_CONFIG_BSR: +#ifdef BSR_ANYCRLF + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; + + case PCRE_CONFIG_LINK_SIZE: *((int *)where) = real_link_size; - break; - - case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: - *((int *)where) = POSIX_MALLOC_THRESHOLD; - break; - + break; + + case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: + *((int *)where) = POSIX_MALLOC_THRESHOLD; + break; + case PCRE_CONFIG_PARENS_LIMIT: *((unsigned long int *)where) = PARENS_NEST_LIMIT; break; - case PCRE_CONFIG_MATCH_LIMIT: + case PCRE_CONFIG_MATCH_LIMIT: *((unsigned long int *)where) = MATCH_LIMIT; - break; - - case PCRE_CONFIG_MATCH_LIMIT_RECURSION: + break; + + case PCRE_CONFIG_MATCH_LIMIT_RECURSION: *((unsigned long int *)where) = MATCH_LIMIT_RECURSION; - break; - - case PCRE_CONFIG_STACKRECURSE: -#ifdef NO_RECURSE - *((int *)where) = 0; -#else - *((int *)where) = 1; -#endif - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - -/* End of pcre_config.c */ + break; + + case PCRE_CONFIG_STACKRECURSE: +#ifdef NO_RECURSE + *((int *)where) = 0; +#else + *((int *)where) = 1; +#endif + break; + + default: return PCRE_ERROR_BADOPTION; + } + +return 0; +} + +/* End of pcre_config.c */ diff --git a/contrib/libs/pcre/pcre_config.h b/contrib/libs/pcre/pcre_config.h index ebc9c01fc30..622b2ec59ba 100644 --- a/contrib/libs/pcre/pcre_config.h +++ b/contrib/libs/pcre/pcre_config.h @@ -53,8 +53,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* #undef EBCDIC_NL25 */ /* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - +#define HAVE_BCOPY 1 + /* Define to 1 if you have the header file. */ /* #undef HAVE_BITS_TYPE_TRAITS_H */ @@ -76,15 +76,15 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if the system has the type `long long'. */ #define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 @@ -100,27 +100,27 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_READLINE_READLINE_H */ -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + /* Define to 1 if you have the header file. */ #define HAVE_STRING 1 -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 - + /* Define to 1 if you have `strtoimax'. */ /* #undef HAVE_STRTOIMAX */ - + /* Define to 1 if you have `strtoll'. */ /* #undef HAVE_STRTOLL */ @@ -139,9 +139,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 -/* Define to 1 if the system has the type `unsigned long long'. */ +/* Define to 1 if the system has the type `unsigned long long'. */ #define HAVE_UNSIGNED_LONG_LONG 1 - + /* Define to 1 if the compiler supports simple visibility declarations. */ #define HAVE_VISIBILITY 1 @@ -154,44 +154,44 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have `_strtoi64'. */ /* #undef HAVE__STRTOI64 */ -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 64K long. This covers the vast majority of cases. + However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - +#define LINK_SIZE 2 + /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large +/* The value of MATCH_LIMIT determines the default number of times the + internal match() function can be called during a single execution of + pcre_exec(). There is a runtime interface for setting a different limit. + The limit exists in order to catch runaway regular expressions that take + for ever to determine that they do not match. The default is set very large so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is +#define MATCH_LIMIT 10000000 + +/* The above limit applies to all calls of match(), whether or not they + increase the recursion depth. In some environments it is desirable to limit + the depth of recursive calls of match() more strictly, in order to restrict + the maximum amount of stack (or heap, if NO_RECURSE is defined) that is + used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of + match(). To have any useful effect, it must be less than the value of + MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - +#define MATCH_LIMIT_RECURSION MATCH_LIMIT + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#define MAX_NAME_COUNT 10000 + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#define MAX_NAME_SIZE 32 + /* The value of NEWLINE determines the default newline character sequence. PCRE client programs can override this by selecting other values at run time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 @@ -200,38 +200,38 @@ sure both macros are undefined; an emulation function will then be used. */ 0x25) that are used as the NL line terminator that is equivalent to ASCII LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited +#define NEWLINE 10 + +/* PCRE uses recursive function calls to handle backtracking while matching. + This can sometimes be a problem on systems that have stacks of limited size. Define NO_RECURSE to any value to get a version that doesn't use recursion in the match() function; instead it creates its own stack by steam using pcre_recurse_malloc() to obtain memory from the heap. For more detail, see the comments and other stuff just above the match() function. */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ +/* #undef NO_RECURSE */ + +/* Name of package */ +#define PACKAGE "pcre" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "PCRE" + +/* Define to the full name and version of this package. */ #define PACKAGE_STRING "PCRE 8.44" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pcre" + /* Define to the home page for this package. */ #define PACKAGE_URL "" -/* Define to the version of this package. */ +/* Define to the version of this package. */ #define PACKAGE_VERSION "8.44" - + /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system stack that is used while compiling a pattern. */ @@ -277,27 +277,27 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value if linking statically (TODO: make nice with Libtool) */ /* #undef PCRE_STATIC */ -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer +/* When calling PCRE via the POSIX interface, additional working storage is + required for holding the pointers to capturing substrings because PCRE + requires three integers per substring, whereas the POSIX interface provides + only two. If the number of expected substrings is small, the wrapper + function uses space on the stack, because this is faster than using + malloc() for each call. The threshold above which the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - +#define POSIX_MALLOC_THRESHOLD 10 + /* Define to necessary symbol if this constant uses a non-standard name on your system. */ /* #undef PTHREAD_CREATE_JOINABLE */ -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + #ifdef ARCADIA_PCRE_ENABLE_JIT /* Define to any value to enable support for Just-In-Time compiling. */ #define SUPPORT_JIT /**/ #endif - + /* Define to any value to allow pcregrep to be linked with libbz2, so that it is able to handle .bz2 files. */ /* #undef SUPPORT_LIBBZ2 */ @@ -338,7 +338,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ /* #undef SUPPORT_VALGRIND */ -/* Version number of package */ +/* Version number of package */ #define VERSION "8.44" /* Define to empty if `const' does not conform to ANSI C. */ diff --git a/contrib/libs/pcre/pcre_dfa_exec.c b/contrib/libs/pcre/pcre_dfa_exec.c index 649d1b19d9e..81eec053564 100644 --- a/contrib/libs/pcre/pcre_dfa_exec.c +++ b/contrib/libs/pcre/pcre_dfa_exec.c @@ -1,49 +1,49 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language (but see below for why this module is different). - - Written by Philip Hazel + + Written by Philip Hazel Copyright (c) 1997-2017 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains the external function pcre_dfa_exec(), which is an -alternative matching function that uses a sort of DFA algorithm (not a true + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains the external function pcre_dfa_exec(), which is an +alternative matching function that uses a sort of DFA algorithm (not a true FSM). This is NOT Perl-compatible, but it has advantages in certain -applications. */ - - +applications. */ + + /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved the performance of his patterns greatly. I could not use it as it stood, as it was not thread safe, and made assumptions about pattern sizes. Also, it caused @@ -72,61 +72,61 @@ in others, so I abandoned this code. */ -#ifdef HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#define NLBLOCK md /* Block containing newline information */ -#define PSSTART start_subject /* Field containing processed string start */ -#define PSEND end_subject /* Field containing processed string end */ - -#include "pcre_internal.h" - - -/* For use to indent debugging output */ - -#define SP " " - - -/************************************************* -* Code parameters and static tables * -*************************************************/ - -/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes -into others, under special conditions. A gap of 20 between the blocks should be -enough. The resulting opcodes don't have to be less than 256 because they are -never stored, so we push them well clear of the normal opcodes. */ - -#define OP_PROP_EXTRA 300 -#define OP_EXTUNI_EXTRA 320 -#define OP_ANYNL_EXTRA 340 -#define OP_HSPACE_EXTRA 360 -#define OP_VSPACE_EXTRA 380 - - -/* This table identifies those opcodes that are followed immediately by a +#endif + +#define NLBLOCK md /* Block containing newline information */ +#define PSSTART start_subject /* Field containing processed string start */ +#define PSEND end_subject /* Field containing processed string end */ + +#include "pcre_internal.h" + + +/* For use to indent debugging output */ + +#define SP " " + + +/************************************************* +* Code parameters and static tables * +*************************************************/ + +/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes +into others, under special conditions. A gap of 20 between the blocks should be +enough. The resulting opcodes don't have to be less than 256 because they are +never stored, so we push them well clear of the normal opcodes. */ + +#define OP_PROP_EXTRA 300 +#define OP_EXTUNI_EXTRA 320 +#define OP_ANYNL_EXTRA 340 +#define OP_HSPACE_EXTRA 360 +#define OP_VSPACE_EXTRA 380 + + +/* This table identifies those opcodes that are followed immediately by a character that is to be tested in some way. This makes it possible to -centralize the loading of these characters. In the case of Type * etc, the -"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a +centralize the loading of these characters. In the case of Type * etc, the +"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a small value. Non-zero values in the table are the offsets from the opcode where the character is to be found. ***NOTE*** If the start of this table is modified, the three tables that follow must also be modified. */ - + static const pcre_uint8 coptable[] = { - 0, /* End */ - 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ - 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ + 0, /* End */ + 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ + 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ 0, 0, 0, /* Any, AllAny, Anybyte */ 0, 0, /* \P, \p */ - 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ + 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ 0, /* \X */ 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */ - 1, /* Char */ + 1, /* Char */ 1, /* Chari */ - 1, /* not */ + 1, /* not */ 1, /* noti */ - /* Positive single-char repeats */ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + /* Positive single-char repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ 1+IMM2_SIZE, /* exact */ 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ @@ -134,8 +134,8 @@ static const pcre_uint8 coptable[] = { 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ 1+IMM2_SIZE, /* exact I */ 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ - /* Negative single-char repeats - only for chars < 256 */ - 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ + /* Negative single-char repeats - only for chars < 256 */ + 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ 1+IMM2_SIZE, /* NOT exact */ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ @@ -143,34 +143,34 @@ static const pcre_uint8 coptable[] = { 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ 1+IMM2_SIZE, /* NOT exact I */ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ - /* Positive type repeats */ - 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ + /* Positive type repeats */ + 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ 1+IMM2_SIZE, /* Type exact */ 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ - /* Character class & ref repeats */ - 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ - 0, 0, /* CRRANGE, CRMINRANGE */ + /* Character class & ref repeats */ + 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ + 0, 0, /* CRRANGE, CRMINRANGE */ 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */ - 0, /* CLASS */ - 0, /* NCLASS */ - 0, /* XCLASS - variable length */ - 0, /* REF */ + 0, /* CLASS */ + 0, /* NCLASS */ + 0, /* XCLASS - variable length */ + 0, /* REF */ 0, /* REFI */ 0, /* DNREF */ 0, /* DNREFI */ - 0, /* RECURSE */ - 0, /* CALLOUT */ - 0, /* Alt */ - 0, /* Ket */ - 0, /* KetRmax */ - 0, /* KetRmin */ + 0, /* RECURSE */ + 0, /* CALLOUT */ + 0, /* Alt */ + 0, /* Ket */ + 0, /* KetRmax */ + 0, /* KetRmin */ 0, /* KetRpos */ 0, /* Reverse */ - 0, /* Assert */ - 0, /* Assert not */ - 0, /* Assert behind */ - 0, /* Assert behind not */ + 0, /* Assert */ + 0, /* Assert not */ + 0, /* Assert behind */ + 0, /* Assert behind not */ 0, 0, /* ONCE, ONCE_NC */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ @@ -238,7 +238,7 @@ static const pcre_uint8 poptable[] = { 0, /* KetRmax */ 0, /* KetRmin */ 0, /* KetRpos */ - 0, /* Reverse */ + 0, /* Reverse */ 0, /* Assert */ 0, /* Assert not */ 0, /* Assert behind */ @@ -248,516 +248,516 @@ static const pcre_uint8 poptable[] = { 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ 0, 0, /* CREF, DNCREF */ 0, 0, /* RREF, DNRREF */ - 0, /* DEF */ + 0, /* DEF */ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0 /* CLOSE, SKIPZERO */ -}; - -/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, -and \w */ - +}; + +/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, +and \w */ + static const pcre_uint8 toptable1[] = { - 0, 0, 0, 0, 0, 0, - ctype_digit, ctype_digit, - ctype_space, ctype_space, - ctype_word, ctype_word, + 0, 0, 0, 0, 0, 0, + ctype_digit, ctype_digit, + ctype_space, ctype_space, + ctype_word, ctype_word, 0, 0 /* OP_ANY, OP_ALLANY */ -}; - +}; + static const pcre_uint8 toptable2[] = { - 0, 0, 0, 0, 0, 0, - ctype_digit, 0, - ctype_space, 0, - ctype_word, 0, + 0, 0, 0, 0, 0, 0, + ctype_digit, 0, + ctype_space, 0, + ctype_word, 0, 1, 1 /* OP_ANY, OP_ALLANY */ -}; - - -/* Structure for holding data about a particular state, which is in effect the -current data for an active path through the match tree. It must consist -entirely of ints because the working vector we are passed, and which we put -these structures in, is a vector of ints. */ - -typedef struct stateblock { - int offset; /* Offset to opcode */ - int count; /* Count for repeats */ - int data; /* Some use extra data */ -} stateblock; - +}; + + +/* Structure for holding data about a particular state, which is in effect the +current data for an active path through the match tree. It must consist +entirely of ints because the working vector we are passed, and which we put +these structures in, is a vector of ints. */ + +typedef struct stateblock { + int offset; /* Offset to opcode */ + int count; /* Count for repeats */ + int data; /* Some use extra data */ +} stateblock; + #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) - - + + #ifdef PCRE_DEBUG -/************************************************* -* Print character string * -*************************************************/ - -/* Character string printing function for debugging. - -Arguments: - p points to string - length number of bytes - f where to print - -Returns: nothing -*/ - -static void +/************************************************* +* Print character string * +*************************************************/ + +/* Character string printing function for debugging. + +Arguments: + p points to string + length number of bytes + f where to print + +Returns: nothing +*/ + +static void pchars(const pcre_uchar *p, int length, FILE *f) -{ +{ pcre_uint32 c; -while (length-- > 0) - { - if (isprint(c = *(p++))) - fprintf(f, "%c", c); - else +while (length-- > 0) + { + if (isprint(c = *(p++))) + fprintf(f, "%c", c); + else fprintf(f, "\\x{%02x}", c); - } -} -#endif - - - -/************************************************* -* Execute a Regular Expression - DFA engine * -*************************************************/ - -/* This internal function applies a compiled pattern to a subject string, -starting at a given point, using a DFA engine. This function is called from the -external one, possibly multiple times if the pattern is not anchored. The -function calls itself recursively for some kinds of subpattern. - -Arguments: - md the match_data block with fixed information - this_start_code the opening bracket of this subexpression's code - current_subject where we currently are in the subject string - start_offset start offset in the subject string - offsets vector to contain the matching string offsets - offsetcount size of same - workspace vector of workspace - wscount size of same - rlevel function call recursion level - + } +} +#endif + + + +/************************************************* +* Execute a Regular Expression - DFA engine * +*************************************************/ + +/* This internal function applies a compiled pattern to a subject string, +starting at a given point, using a DFA engine. This function is called from the +external one, possibly multiple times if the pattern is not anchored. The +function calls itself recursively for some kinds of subpattern. + +Arguments: + md the match_data block with fixed information + this_start_code the opening bracket of this subexpression's code + current_subject where we currently are in the subject string + start_offset start offset in the subject string + offsets vector to contain the matching string offsets + offsetcount size of same + workspace vector of workspace + wscount size of same + rlevel function call recursion level + Returns: > 0 => number of match offset pairs placed in offsets = 0 => offsets overflowed; longest matches are present - -1 => failed to match - < -1 => some kind of unexpected problem - -The following macros are used for adding states to the two state vectors (one -for the current character, one for the following character). */ - -#define ADD_ACTIVE(x,y) \ - if (active_count++ < wscount) \ - { \ - next_active_state->offset = (x); \ - next_active_state->count = (y); \ - next_active_state++; \ - DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_ACTIVE_DATA(x,y,z) \ - if (active_count++ < wscount) \ - { \ - next_active_state->offset = (x); \ - next_active_state->count = (y); \ - next_active_state->data = (z); \ - next_active_state++; \ - DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_NEW(x,y) \ - if (new_count++ < wscount) \ - { \ - next_new_state->offset = (x); \ - next_new_state->count = (y); \ - next_new_state++; \ - DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_NEW_DATA(x,y,z) \ - if (new_count++ < wscount) \ - { \ - next_new_state->offset = (x); \ - next_new_state->count = (y); \ - next_new_state->data = (z); \ - next_new_state++; \ + -1 => failed to match + < -1 => some kind of unexpected problem + +The following macros are used for adding states to the two state vectors (one +for the current character, one for the following character). */ + +#define ADD_ACTIVE(x,y) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state++; \ + DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_ACTIVE_DATA(x,y,z) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state->data = (z); \ + next_active_state++; \ + DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_NEW(x,y) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state++; \ + DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_NEW_DATA(x,y,z) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state->data = (z); \ + next_new_state++; \ DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \ (x), (y), (z), __LINE__)); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -/* And now, here is the code */ - -static int -internal_dfa_exec( - dfa_match_data *md, + } \ + else return PCRE_ERROR_DFA_WSSIZE + +/* And now, here is the code */ + +static int +internal_dfa_exec( + dfa_match_data *md, const pcre_uchar *this_start_code, const pcre_uchar *current_subject, - int start_offset, - int *offsets, - int offsetcount, - int *workspace, - int wscount, + int start_offset, + int *offsets, + int offsetcount, + int *workspace, + int wscount, int rlevel) -{ -stateblock *active_states, *new_states, *temp_states; -stateblock *next_active_state, *next_new_state; - +{ +stateblock *active_states, *new_states, *temp_states; +stateblock *next_active_state, *next_new_state; + const pcre_uint8 *ctypes, *lcc, *fcc; const pcre_uchar *ptr; const pcre_uchar *end_code, *first_op; - + dfa_recursion_info new_recursive; -int active_count, new_count, match_count; - -/* Some fields in the md block are frequently referenced, so we load them into -independent variables in the hope that this will perform better. */ - +int active_count, new_count, match_count; + +/* Some fields in the md block are frequently referenced, so we load them into +independent variables in the hope that this will perform better. */ + const pcre_uchar *start_subject = md->start_subject; const pcre_uchar *end_subject = md->end_subject; const pcre_uchar *start_code = md->start_code; - + #ifdef SUPPORT_UTF BOOL utf = (md->poptions & PCRE_UTF8) != 0; -#else +#else BOOL utf = FALSE; -#endif - +#endif + BOOL reset_could_continue = FALSE; -rlevel++; -offsetcount &= (-2); - -wscount -= 2; -wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / - (2 * INTS_PER_STATEBLOCK); - -DPRINTF(("\n%.*s---------------------\n" +rlevel++; +offsetcount &= (-2); + +wscount -= 2; +wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / + (2 * INTS_PER_STATEBLOCK); + +DPRINTF(("\n%.*s---------------------\n" "%.*sCall to internal_dfa_exec f=%d\n", rlevel*2-2, SP, rlevel*2-2, SP, rlevel)); - -ctypes = md->tables + ctypes_offset; -lcc = md->tables + lcc_offset; -fcc = md->tables + fcc_offset; - -match_count = PCRE_ERROR_NOMATCH; /* A negative number */ - -active_states = (stateblock *)(workspace + 2); -next_new_state = new_states = active_states + wscount; -new_count = 0; - -first_op = this_start_code + 1 + LINK_SIZE + + +ctypes = md->tables + ctypes_offset; +lcc = md->tables + lcc_offset; +fcc = md->tables + fcc_offset; + +match_count = PCRE_ERROR_NOMATCH; /* A negative number */ + +active_states = (stateblock *)(workspace + 2); +next_new_state = new_states = active_states + wscount; +new_count = 0; + +first_op = this_start_code + 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) ? IMM2_SIZE:0); - -/* The first thing in any (sub) pattern is a bracket of some sort. Push all -the alternative states onto the list, and find out where the end is. This -makes is possible to use this function recursively, when we want to stop at a -matching internal ket rather than at the end. - -If the first opcode in the first alternative is OP_REVERSE, we are dealing with -a backward assertion. In that case, we have to find out the maximum amount to -move back, and set up each alternative appropriately. */ - -if (*first_op == OP_REVERSE) - { - int max_back = 0; - int gone_back; - - end_code = this_start_code; - do - { - int back = GET(end_code, 2+LINK_SIZE); - if (back > max_back) max_back = back; - end_code += GET(end_code, 1); - } - while (*end_code == OP_ALT); - - /* If we can't go back the amount required for the longest lookbehind - pattern, go back as far as we can; some alternatives may still be viable. */ - + +/* The first thing in any (sub) pattern is a bracket of some sort. Push all +the alternative states onto the list, and find out where the end is. This +makes is possible to use this function recursively, when we want to stop at a +matching internal ket rather than at the end. + +If the first opcode in the first alternative is OP_REVERSE, we are dealing with +a backward assertion. In that case, we have to find out the maximum amount to +move back, and set up each alternative appropriately. */ + +if (*first_op == OP_REVERSE) + { + int max_back = 0; + int gone_back; + + end_code = this_start_code; + do + { + int back = GET(end_code, 2+LINK_SIZE); + if (back > max_back) max_back = back; + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + + /* If we can't go back the amount required for the longest lookbehind + pattern, go back as far as we can; some alternatives may still be viable. */ + #ifdef SUPPORT_UTF - /* In character mode we have to step back character by character */ - + /* In character mode we have to step back character by character */ + if (utf) - { - for (gone_back = 0; gone_back < max_back; gone_back++) - { - if (current_subject <= start_subject) break; - current_subject--; + { + for (gone_back = 0; gone_back < max_back; gone_back++) + { + if (current_subject <= start_subject) break; + current_subject--; ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--); - } - } - else -#endif - - /* In byte-mode we can do this quickly. */ - - { - gone_back = (current_subject - max_back < start_subject)? + } + } + else +#endif + + /* In byte-mode we can do this quickly. */ + + { + gone_back = (current_subject - max_back < start_subject)? (int)(current_subject - start_subject) : max_back; - current_subject -= gone_back; - } - + current_subject -= gone_back; + } + /* Save the earliest consulted character */ if (current_subject < md->start_used_ptr) md->start_used_ptr = current_subject; - /* Now we can process the individual branches. */ - - end_code = this_start_code; - do - { - int back = GET(end_code, 2+LINK_SIZE); - if (back <= gone_back) - { + /* Now we can process the individual branches. */ + + end_code = this_start_code; + do + { + int back = GET(end_code, 2+LINK_SIZE); + if (back <= gone_back) + { int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); - ADD_NEW_DATA(-bstate, 0, gone_back - back); - } - end_code += GET(end_code, 1); - } - while (*end_code == OP_ALT); - } - -/* This is the code for a "normal" subpattern (not a backward assertion). The -start of a whole pattern is always one of these. If we are at the top level, -we may be asked to restart matching from the same point that we reached for a -previous partial match. We still have to scan through the top-level branches to -find the end state. */ - -else - { - end_code = this_start_code; - - /* Restarting */ - - if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0) - { - do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); - new_count = workspace[1]; - if (!workspace[0]) - memcpy(new_states, active_states, new_count * sizeof(stateblock)); - } - - /* Not restarting */ - - else - { - int length = 1 + LINK_SIZE + + ADD_NEW_DATA(-bstate, 0, gone_back - back); + } + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + } + +/* This is the code for a "normal" subpattern (not a backward assertion). The +start of a whole pattern is always one of these. If we are at the top level, +we may be asked to restart matching from the same point that we reached for a +previous partial match. We still have to scan through the top-level branches to +find the end state. */ + +else + { + end_code = this_start_code; + + /* Restarting */ + + if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0) + { + do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); + new_count = workspace[1]; + if (!workspace[0]) + memcpy(new_states, active_states, new_count * sizeof(stateblock)); + } + + /* Not restarting */ + + else + { + int length = 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) ? IMM2_SIZE:0); - do - { + do + { ADD_NEW((int)(end_code - start_code + length), 0); - end_code += GET(end_code, 1); - length = 1 + LINK_SIZE; - } - while (*end_code == OP_ALT); - } - } - -workspace[0] = 0; /* Bit indicating which vector is current */ - + end_code += GET(end_code, 1); + length = 1 + LINK_SIZE; + } + while (*end_code == OP_ALT); + } + } + +workspace[0] = 0; /* Bit indicating which vector is current */ + DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code))); - -/* Loop for scanning the subject */ - -ptr = current_subject; -for (;;) - { - int i, j; - int clen, dlen; + +/* Loop for scanning the subject */ + +ptr = current_subject; +for (;;) + { + int i, j; + int clen, dlen; pcre_uint32 c, d; int forced_fail = 0; BOOL partial_newline = FALSE; BOOL could_continue = reset_could_continue; reset_could_continue = FALSE; - - /* Make the new state list into the active state list and empty the - new state list. */ - - temp_states = active_states; - active_states = new_states; - new_states = temp_states; - active_count = new_count; - new_count = 0; - - workspace[0] ^= 1; /* Remember for the restarting feature */ - workspace[1] = active_count; - + + /* Make the new state list into the active state list and empty the + new state list. */ + + temp_states = active_states; + active_states = new_states; + new_states = temp_states; + active_count = new_count; + new_count = 0; + + workspace[0] ^= 1; /* Remember for the restarting feature */ + workspace[1] = active_count; + #ifdef PCRE_DEBUG - printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); + printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); pchars(ptr, STRLEN_UC(ptr), stdout); - printf("\"\n"); - - printf("%.*sActive states: ", rlevel*2-2, SP); - for (i = 0; i < active_count; i++) - printf("%d/%d ", active_states[i].offset, active_states[i].count); - printf("\n"); -#endif - - /* Set the pointers for adding new states */ - - next_active_state = active_states + active_count; - next_new_state = new_states; - - /* Load the current character from the subject outside the loop, as many - different states may want to look at it, and we assume that at least one - will. */ - - if (ptr < end_subject) - { + printf("\"\n"); + + printf("%.*sActive states: ", rlevel*2-2, SP); + for (i = 0; i < active_count; i++) + printf("%d/%d ", active_states[i].offset, active_states[i].count); + printf("\n"); +#endif + + /* Set the pointers for adding new states */ + + next_active_state = active_states + active_count; + next_new_state = new_states; + + /* Load the current character from the subject outside the loop, as many + different states may want to look at it, and we assume that at least one + will. */ + + if (ptr < end_subject) + { clen = 1; /* Number of data items in the character */ #ifdef SUPPORT_UTF GETCHARLENTEST(c, ptr, clen); #else - c = *ptr; + c = *ptr; #endif /* SUPPORT_UTF */ - } - else - { - clen = 0; /* This indicates the end of the subject */ - c = NOTACHAR; /* This value should never actually be used */ - } - - /* Scan up the active states and act on each one. The result of an action - may be to add more states to the currently active list (e.g. on hitting a - parenthesis) or it may be to put states on the new list, for considering - when we move the character pointer on. */ - - for (i = 0; i < active_count; i++) - { - stateblock *current_state = active_states + i; + } + else + { + clen = 0; /* This indicates the end of the subject */ + c = NOTACHAR; /* This value should never actually be used */ + } + + /* Scan up the active states and act on each one. The result of an action + may be to add more states to the currently active list (e.g. on hitting a + parenthesis) or it may be to put states on the new list, for considering + when we move the character pointer on. */ + + for (i = 0; i < active_count; i++) + { + stateblock *current_state = active_states + i; BOOL caseless = FALSE; const pcre_uchar *code; - int state_offset = current_state->offset; + int state_offset = current_state->offset; int codevalue, rrc; int count; - + #ifdef PCRE_DEBUG - printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); - if (clen == 0) printf("EOL\n"); - else if (c > 32 && c < 127) printf("'%c'\n", c); - else printf("0x%02x\n", c); -#endif - - /* A negative offset is a special case meaning "hold off going to this - (negated) state until the number of characters in the data field have + printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); + if (clen == 0) printf("EOL\n"); + else if (c > 32 && c < 127) printf("'%c'\n", c); + else printf("0x%02x\n", c); +#endif + + /* A negative offset is a special case meaning "hold off going to this + (negated) state until the number of characters in the data field have been skipped". If the could_continue flag was passed over from a previous state, arrange for it to passed on. */ - - if (state_offset < 0) - { - if (current_state->data > 0) - { - DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); - ADD_NEW_DATA(state_offset, current_state->count, - current_state->data - 1); + + if (state_offset < 0) + { + if (current_state->data > 0) + { + DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); + ADD_NEW_DATA(state_offset, current_state->count, + current_state->data - 1); if (could_continue) reset_could_continue = TRUE; - continue; - } - else - { - current_state->offset = state_offset = -state_offset; - } - } - + continue; + } + else + { + current_state->offset = state_offset = -state_offset; + } + } + /* Check for a duplicate state with the same count, and skip if found. See the note at the head of this module about the possibility of improving performance here. */ - - for (j = 0; j < i; j++) - { - if (active_states[j].offset == state_offset && - active_states[j].count == current_state->count) - { - DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP)); - goto NEXT_ACTIVE_STATE; - } - } - - /* The state offset is the offset to the opcode */ - - code = start_code + state_offset; - codevalue = *code; - + + for (j = 0; j < i; j++) + { + if (active_states[j].offset == state_offset && + active_states[j].count == current_state->count) + { + DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP)); + goto NEXT_ACTIVE_STATE; + } + } + + /* The state offset is the offset to the opcode */ + + code = start_code + state_offset; + codevalue = *code; + /* If this opcode inspects a character, but we are at the end of the subject, remember the fact for use when testing for a partial match. */ if (clen == 0 && poptable[codevalue] != 0) could_continue = TRUE; - /* If this opcode is followed by an inline character, load it. It is - tempting to test for the presence of a subject character here, but that - is wrong, because sometimes zero repetitions of the subject are - permitted. - - We also use this mechanism for opcodes such as OP_TYPEPLUS that take an + /* If this opcode is followed by an inline character, load it. It is + tempting to test for the presence of a subject character here, but that + is wrong, because sometimes zero repetitions of the subject are + permitted. + + We also use this mechanism for opcodes such as OP_TYPEPLUS that take an argument that is not a data character - but is always one byte long because the values are small. We have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert these ones to new opcodes. */ - - if (coptable[codevalue] > 0) - { - dlen = 1; + + if (coptable[codevalue] > 0) + { + dlen = 1; #ifdef SUPPORT_UTF if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else #endif /* SUPPORT_UTF */ - d = code[coptable[codevalue]]; - if (codevalue >= OP_TYPESTAR) - { - switch(d) - { - case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM; - case OP_NOTPROP: - case OP_PROP: codevalue += OP_PROP_EXTRA; break; - case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; - case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; - case OP_NOT_HSPACE: - case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; - case OP_NOT_VSPACE: - case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; - default: break; - } - } - } - else - { - dlen = 0; /* Not strictly necessary, but compilers moan */ - d = NOTACHAR; /* if these variables are not set. */ - } - - - /* Now process the individual opcodes */ - - switch (codevalue) - { + d = code[coptable[codevalue]]; + if (codevalue >= OP_TYPESTAR) + { + switch(d) + { + case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM; + case OP_NOTPROP: + case OP_PROP: codevalue += OP_PROP_EXTRA; break; + case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; + case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; + case OP_NOT_HSPACE: + case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; + case OP_NOT_VSPACE: + case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; + default: break; + } + } + } + else + { + dlen = 0; /* Not strictly necessary, but compilers moan */ + d = NOTACHAR; /* if these variables are not set. */ + } + + + /* Now process the individual opcodes */ + + switch (codevalue) + { /* ========================================================================== */ /* These cases are never obeyed. This is a fudge that causes a compile- time error if the vectors coptable or poptable, which are indexed by opcode, are not the correct length. It seems to be the only way to do such a check at compile time, as the sizeof() operator does not work in the C preprocessor. */ - + case OP_TABLE_LENGTH: case OP_TABLE_LENGTH + ((sizeof(coptable) == OP_TABLE_LENGTH) && (sizeof(poptable) == OP_TABLE_LENGTH)): break; -/* ========================================================================== */ - /* Reached a closing bracket. If not at the end of the pattern, carry +/* ========================================================================== */ + /* Reached a closing bracket. If not at the end of the pattern, carry on with the next opcode. For repeating opcodes, also add the repeat state. Note that KETRPOS will always be encountered at the end of the subpattern, because the possessive subpattern repeats are always handled @@ -766,27 +766,27 @@ for (;;) At the end of the (sub)pattern, unless we have an empty string and PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the start of the subject, save the match data, shifting up all previous - matches so we always have the longest first. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: + matches so we always have the longest first. */ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: case OP_KETRPOS: - if (code != end_code) - { - ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); - if (codevalue != OP_KET) - { - ADD_ACTIVE(state_offset - GET(code, 1), 0); - } - } + if (code != end_code) + { + ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); + if (codevalue != OP_KET) + { + ADD_ACTIVE(state_offset - GET(code, 1), 0); + } + } else - { + { if (ptr > current_subject || ((md->moptions & PCRE_NOTEMPTY) == 0 && ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 || current_subject > start_subject + md->start_offset))) - { + { if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0; else if (match_count > 0 && ++match_count * 2 > offsetcount) match_count = 0; @@ -806,53 +806,53 @@ for (;;) match_count, rlevel*2-2, SP)); return match_count; } - } - } - break; - -/* ========================================================================== */ - /* These opcodes add to the current list of states without looking - at the current character. */ - - /*-----------------------------------------------------------------*/ - case OP_ALT: - do { code += GET(code, 1); } while (*code == OP_ALT); + } + } + break; + +/* ========================================================================== */ + /* These opcodes add to the current list of states without looking + at the current character. */ + + /*-----------------------------------------------------------------*/ + case OP_ALT: + do { code += GET(code, 1); } while (*code == OP_ALT); ADD_ACTIVE((int)(code - start_code), 0); - break; - - /*-----------------------------------------------------------------*/ - case OP_BRA: - case OP_SBRA: - do - { + break; + + /*-----------------------------------------------------------------*/ + case OP_BRA: + case OP_SBRA: + do + { ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - code += GET(code, 1); - } - while (*code == OP_ALT); - break; - - /*-----------------------------------------------------------------*/ - case OP_CBRA: - case OP_SCBRA: + code += GET(code, 1); + } + while (*code == OP_ALT); + break; + + /*-----------------------------------------------------------------*/ + case OP_CBRA: + case OP_SCBRA: ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); - code += GET(code, 1); - while (*code == OP_ALT) - { + code += GET(code, 1); + while (*code == OP_ALT) + { ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - code += GET(code, 1); - } - break; - - /*-----------------------------------------------------------------*/ - case OP_BRAZERO: - case OP_BRAMINZERO: - ADD_ACTIVE(state_offset + 1, 0); - code += 1 + GET(code, 2); - while (*code == OP_ALT) code += GET(code, 1); + code += GET(code, 1); + } + break; + + /*-----------------------------------------------------------------*/ + case OP_BRAZERO: + case OP_BRAMINZERO: + ADD_ACTIVE(state_offset + 1, 0); + code += 1 + GET(code, 2); + while (*code == OP_ALT) code += GET(code, 1); ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - break; - - /*-----------------------------------------------------------------*/ + break; + + /*-----------------------------------------------------------------*/ case OP_SKIPZERO: code += 1 + GET(code, 2); while (*code == OP_ALT) code += GET(code, 1); @@ -860,19 +860,19 @@ for (;;) break; /*-----------------------------------------------------------------*/ - case OP_CIRC: + case OP_CIRC: if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ case OP_CIRCM: if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) || (ptr != end_subject && WAS_NEWLINE(ptr))) { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ + break; + + /*-----------------------------------------------------------------*/ case OP_EOD: if (ptr >= end_subject) { @@ -880,27 +880,27 @@ for (;;) could_continue = TRUE; else { ADD_ACTIVE(state_offset + 1, 0); } } - break; - - /*-----------------------------------------------------------------*/ - case OP_SOD: - if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_SOM: - if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } - break; - - -/* ========================================================================== */ - /* These opcodes inspect the next subject character, and sometimes - the previous one as well, but do not have an argument. The variable - clen contains the length of the current character and is zero if we are - at the end of the subject. */ - - /*-----------------------------------------------------------------*/ - case OP_ANY: + break; + + /*-----------------------------------------------------------------*/ + case OP_SOD: + if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_SOM: + if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + +/* ========================================================================== */ + /* These opcodes inspect the next subject character, and sometimes + the previous one as well, but do not have an argument. The variable + clen contains the length of the current character and is zero if we are + at the end of the subject. */ + + /*-----------------------------------------------------------------*/ + case OP_ANY: if (clen > 0 && !IS_NEWLINE(ptr)) { if (ptr + 1 >= md->end_subject && @@ -921,28 +921,28 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_ALLANY: if (clen > 0) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_EODN: + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_EODN: if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) could_continue = TRUE; else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen)) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_DOLL: - if ((md->moptions & PCRE_NOTEOL) == 0) - { + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_DOLL: + if ((md->moptions & PCRE_NOTEOL) == 0) + { if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) could_continue = TRUE; else if (clen == 0 || ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && (ptr == end_subject - md->nllen) - )) - { ADD_ACTIVE(state_offset + 1, 0); } + )) + { ADD_ACTIVE(state_offset + 1, 0); } else if (ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -956,7 +956,7 @@ for (;;) } else could_continue = partial_newline = TRUE; } - } + } break; /*-----------------------------------------------------------------*/ @@ -983,42 +983,42 @@ for (;;) } } else if (IS_NEWLINE(ptr)) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - - case OP_DIGIT: - case OP_WHITESPACE: - case OP_WORDCHAR: - if (clen > 0 && c < 256 && - ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_DIGIT: - case OP_NOT_WHITESPACE: - case OP_NOT_WORDCHAR: - if (clen > 0 && (c >= 256 || - ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - { - int left_word, right_word; - - if (ptr > start_subject) - { + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + + case OP_DIGIT: + case OP_WHITESPACE: + case OP_WORDCHAR: + if (clen > 0 && c < 256 && + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + if (clen > 0 && (c >= 256 || + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + { + int left_word, right_word; + + if (ptr > start_subject) + { const pcre_uchar *temp = ptr - 1; if (temp < md->start_used_ptr) md->start_used_ptr = temp; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf) { BACKCHAR(temp); } -#endif - GETCHARTEST(d, temp); +#endif + GETCHARTEST(d, temp); #ifdef SUPPORT_UCP if ((md->poptions & PCRE_UCP) != 0) { @@ -1030,10 +1030,10 @@ for (;;) } else #endif - left_word = d < 256 && (ctypes[d] & ctype_word) != 0; - } + left_word = d < 256 && (ctypes[d] & ctype_word) != 0; + } else left_word = FALSE; - + if (clen > 0) { #ifdef SUPPORT_UCP @@ -1050,49 +1050,49 @@ for (;;) right_word = c < 256 && (ctypes[c] & ctype_word) != 0; } else right_word = FALSE; - - if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) - { ADD_ACTIVE(state_offset + 1, 0); } - } - break; - - - /*-----------------------------------------------------------------*/ - /* Check the next character by Unicode property. We will get here only - if the support is in the binary; otherwise a compile-time error occurs. - */ - -#ifdef SUPPORT_UCP - case OP_PROP: - case OP_NOTPROP: - if (clen > 0) - { - BOOL OK; + + if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) + { ADD_ACTIVE(state_offset + 1, 0); } + } + break; + + + /*-----------------------------------------------------------------*/ + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. + */ + +#ifdef SUPPORT_UCP + case OP_PROP: + case OP_NOTPROP: + if (clen > 0) + { + BOOL OK; const pcre_uint32 *cp; const ucd_record * prop = GET_UCD(c); - switch(code[1]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: + switch(code[1]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; - break; - - case PT_GC: + break; + + case PT_GC: OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; - break; - - case PT_PC: + break; + + case PT_PC: OK = prop->chartype == code[2]; - break; - - case PT_SC: + break; + + case PT_SC: OK = prop->script == code[2]; - break; - + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1140,33 +1140,33 @@ for (;;) c >= 0xe000; break; - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); } - } - break; -#endif - - - -/* ========================================================================== */ - /* These opcodes likewise inspect the subject character, but have an - argument that is not a data character. It is one of these opcodes: + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); } + } + break; +#endif + + + +/* ========================================================================== */ + /* These opcodes likewise inspect the subject character, but have an + argument that is not a data character. It is one of these opcodes: OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { if (d == OP_ANY && ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -1176,28 +1176,28 @@ for (;;) could_continue = partial_newline = TRUE; } else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && + (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (count > 0 && codevalue == OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (count > 0 && codevalue == OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { if (d == OP_ANY && ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -1207,27 +1207,27 @@ for (;;) could_continue = partial_newline = TRUE; } else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && + (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + 2, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + 2, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { if (d == OP_ANY && ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -1237,25 +1237,25 @@ for (;;) could_continue = partial_newline = TRUE; } else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && + (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSSTAR) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEEXACT: - count = current_state->count; /* Number already matched */ - if (clen > 0) - { + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSSTAR) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEEXACT: + count = current_state->count; /* Number already matched */ + if (clen > 0) + { if (d == OP_ANY && ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -1265,26 +1265,26 @@ for (;;) could_continue = partial_newline = TRUE; } else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && + (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { if (++count >= (int)GET2(code, 1)) { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); - count = current_state->count; /* Number already matched */ - if (clen > 0) - { + count = current_state->count; /* Number already matched */ + if (clen > 0) + { if (d == OP_ANY && ptr + 1 >= md->end_subject && (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && NLBLOCK->nltype == NLTYPE_FIXED && @@ -1294,63 +1294,63 @@ for (;;) could_continue = partial_newline = TRUE; } else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && + (c < 256 && (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - -/* ========================================================================== */ - /* These are virtual opcodes that are used when something like - OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its - argument. It keeps the code above fast for the other cases. The argument - is in the d variable. */ - -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEPLUS: - case OP_PROP_EXTRA + OP_TYPEMINPLUS: - case OP_PROP_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); } - if (clen > 0) - { - BOOL OK; + else + { ADD_NEW(state_offset, count); } + } + } + break; + +/* ========================================================================== */ + /* These are virtual opcodes that are used when something like + OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its + argument. It keeps the code above fast for the other cases. The argument + is in the d variable. */ + +#ifdef SUPPORT_UCP + case OP_PROP_EXTRA + OP_TYPEPLUS: + case OP_PROP_EXTRA + OP_TYPEMINPLUS: + case OP_PROP_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); } + if (clen > 0) + { + BOOL OK; const pcre_uint32 *cp; const ucd_record * prop = GET_UCD(c); - switch(code[2]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: + switch(code[2]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; - break; - - case PT_GC: + break; + + case PT_GC: OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; - break; - - case PT_PC: + break; + + case PT_PC: OK = prop->chartype == code[3]; - break; - - case PT_SC: + break; + + case PT_SC: OK = prop->script == code[3]; - break; - + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1398,211 +1398,211 @@ for (;;) c >= 0xe000; break; - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEPLUS: - case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: - case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEPLUS: + case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: + case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } if (clen > 0) - { + { int lgb, rgb; const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + int ncount = 0; + if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { + while (nptr < end_subject) + { dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; + ncount++; lgb = rgb; nptr += dlen; - } - count++; - ADD_NEW_DATA(-state_offset, count, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEPLUS: - case OP_ANYNL_EXTRA + OP_TYPEMINPLUS: - case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - int ncount = 0; - switch (c) - { + } + count++; + ADD_NEW_DATA(-state_offset, count, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEPLUS: + case OP_ANYNL_EXTRA + OP_TYPEMINPLUS: + case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + int ncount = 0; + switch (c) + { case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL01; - + if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; + goto ANYNL01; + case CHAR_CR: if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL01: + /* Fall through */ + + ANYNL01: case CHAR_LF: - if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, ncount); - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEPLUS: - case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: - case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - BOOL OK; - switch (c) - { + if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, ncount); + break; + + default: + break; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEPLUS: + case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: + case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + BOOL OK; + switch (c) + { VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_VSPACE)) - { - if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEPLUS: - case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: - case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - BOOL OK; - switch (c) - { + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_VSPACE)) + { + if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEPLUS: + case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: + case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + BOOL OK; + switch (c) + { HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEQUERY: - case OP_PROP_EXTRA + OP_TYPEMINQUERY: - case OP_PROP_EXTRA + OP_TYPEPOSQUERY: - count = 4; - goto QS1; - - case OP_PROP_EXTRA + OP_TYPESTAR: - case OP_PROP_EXTRA + OP_TYPEMINSTAR: - case OP_PROP_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS1: - - ADD_ACTIVE(state_offset + 4, 0); - if (clen > 0) - { - BOOL OK; + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ +#ifdef SUPPORT_UCP + case OP_PROP_EXTRA + OP_TYPEQUERY: + case OP_PROP_EXTRA + OP_TYPEMINQUERY: + case OP_PROP_EXTRA + OP_TYPEPOSQUERY: + count = 4; + goto QS1; + + case OP_PROP_EXTRA + OP_TYPESTAR: + case OP_PROP_EXTRA + OP_TYPEMINSTAR: + case OP_PROP_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS1: + + ADD_ACTIVE(state_offset + 4, 0); + if (clen > 0) + { + BOOL OK; const pcre_uint32 *cp; const ucd_record * prop = GET_UCD(c); - switch(code[2]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: + switch(code[2]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; - break; - - case PT_GC: + break; + + case PT_GC: OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; - break; - - case PT_PC: + break; + + case PT_PC: OK = prop->chartype == code[3]; - break; - - case PT_SC: + break; + + case PT_SC: OK = prop->script == code[3]; - break; - + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1650,250 +1650,250 @@ for (;;) c >= 0xe000; break; - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEQUERY: - case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: - case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS2; - - case OP_EXTUNI_EXTRA + OP_TYPESTAR: - case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: - case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS2: - - ADD_ACTIVE(state_offset + 2, 0); + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEQUERY: + case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: + case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS2; + + case OP_EXTUNI_EXTRA + OP_TYPESTAR: + case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: + case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS2: + + ADD_ACTIVE(state_offset + 2, 0); if (clen > 0) - { + { int lgb, rgb; const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + int ncount = 0; + if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { + while (nptr < end_subject) + { dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; + ncount++; lgb = rgb; nptr += dlen; - } - ADD_NEW_DATA(-(state_offset + count), 0, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEQUERY: - case OP_ANYNL_EXTRA + OP_TYPEMINQUERY: - case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS3; - - case OP_ANYNL_EXTRA + OP_TYPESTAR: - case OP_ANYNL_EXTRA + OP_TYPEMINSTAR: - case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS3: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - int ncount = 0; - switch (c) - { + } + ADD_NEW_DATA(-(state_offset + count), 0, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEQUERY: + case OP_ANYNL_EXTRA + OP_TYPEMINQUERY: + case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS3; + + case OP_ANYNL_EXTRA + OP_TYPESTAR: + case OP_ANYNL_EXTRA + OP_TYPEMINSTAR: + case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS3: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + int ncount = 0; + switch (c) + { case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL02; - + if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; + goto ANYNL02; + case CHAR_CR: if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL02: + /* Fall through */ + + ANYNL02: case CHAR_LF: - if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount); - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEQUERY: - case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: - case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS4; - - case OP_VSPACE_EXTRA + OP_TYPESTAR: - case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: - case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS4: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - BOOL OK; - switch (c) - { - VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - if (OK == (d == OP_VSPACE)) - { - if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEQUERY: - case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: - case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS5; - - case OP_HSPACE_EXTRA + OP_TYPESTAR: - case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: - case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS5: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - BOOL OK; - switch (c) - { - HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEEXACT: - case OP_PROP_EXTRA + OP_TYPEUPTO: - case OP_PROP_EXTRA + OP_TYPEMINUPTO: - case OP_PROP_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - const pcre_uint32 *cp; - const ucd_record * prop = GET_UCD(c); - switch(code[1 + IMM2_SIZE + 1]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; - break; - - case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; - break; - - case PT_PC: - OK = prop->chartype == code[1 + IMM2_SIZE + 2]; - break; - - case PT_SC: - OK = prop->script == code[1 + IMM2_SIZE + 2]; - break; - - /* These are specials for combination cases. */ + break; - case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + default: break; + } + } + break; - /* Perl space used to exclude VT, but from Perl 5.18 it is included, - which means that Perl space and POSIX space are now identical. PCRE - was changed at release 8.34. */ + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEQUERY: + case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: + case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS4; + + case OP_VSPACE_EXTRA + OP_TYPESTAR: + case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: + case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS4: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + BOOL OK; + switch (c) + { + VSPACE_CASES: + OK = TRUE; + break; - case PT_SPACE: /* Perl space */ - case PT_PXSPACE: /* POSIX space */ - switch(c) + default: + OK = FALSE; + break; + } + if (OK == (d == OP_VSPACE)) + { + if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEQUERY: + case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: + case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS5; + + case OP_HSPACE_EXTRA + OP_TYPESTAR: + case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: + case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS5: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + BOOL OK; + switch (c) + { + HSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ +#ifdef SUPPORT_UCP + case OP_PROP_EXTRA + OP_TYPEEXACT: + case OP_PROP_EXTRA + OP_TYPEUPTO: + case OP_PROP_EXTRA + OP_TYPEMINUPTO: + case OP_PROP_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + const pcre_uint32 *cp; + const ucd_record * prop = GET_UCD(c); + switch(code[1 + IMM2_SIZE + 1]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: + OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || + prop->chartype == ucp_Lt; + break; + + case PT_GC: + OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; + break; + + case PT_PC: + OK = prop->chartype == code[1 + IMM2_SIZE + 2]; + break; + + case PT_SC: + OK = prop->script == code[1 + IMM2_SIZE + 2]; + break; + + /* These are specials for combination cases. */ + + case PT_ALNUM: + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) { HSPACE_CASES: VSPACE_CASES: @@ -1927,357 +1927,357 @@ for (;;) c >= 0xe000; break; - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEEXACT: - case OP_EXTUNI_EXTRA + OP_TYPEUPTO: - case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: - case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEEXACT: + case OP_EXTUNI_EXTRA + OP_TYPEUPTO: + case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: + case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ + count = current_state->count; /* Number already matched */ if (clen > 0) - { + { int lgb, rgb; const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + int ncount = 0; + if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { + while (nptr < end_subject) + { dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; + ncount++; lgb = rgb; nptr += dlen; - } + } if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) reset_could_continue = TRUE; if (++count >= (int)GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } - else - { ADD_NEW_DATA(-state_offset, count, ncount); } - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEEXACT: - case OP_ANYNL_EXTRA + OP_TYPEUPTO: - case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: - case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) + else + { ADD_NEW_DATA(-state_offset, count, ncount); } + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEEXACT: + case OP_ANYNL_EXTRA + OP_TYPEUPTO: + case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: + case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - int ncount = 0; - switch (c) - { + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + int ncount = 0; + switch (c) + { case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL03; - + if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; + goto ANYNL03; + case CHAR_CR: if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL03: + /* Fall through */ + + ANYNL03: case CHAR_LF: - if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } - else - { ADD_NEW_DATA(-state_offset, count, ncount); } - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEEXACT: - case OP_VSPACE_EXTRA + OP_TYPEUPTO: - case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: - case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) + else + { ADD_NEW_DATA(-state_offset, count, ncount); } + break; + + default: + break; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEEXACT: + case OP_VSPACE_EXTRA + OP_TYPEUPTO: + case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: + case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - switch (c) - { + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + switch (c) + { VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - } - - if (OK == (d == OP_VSPACE)) - { - if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + OK = TRUE; + break; + + default: + OK = FALSE; + } + + if (OK == (d == OP_VSPACE)) + { + if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } - else - { ADD_NEW_DATA(-state_offset, count, 0); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEEXACT: - case OP_HSPACE_EXTRA + OP_TYPEUPTO: - case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: - case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) + else + { ADD_NEW_DATA(-state_offset, count, 0); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEEXACT: + case OP_HSPACE_EXTRA + OP_TYPEUPTO: + case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: + case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - switch (c) - { + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + switch (c) + { HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } - else - { ADD_NEW_DATA(-state_offset, count, 0); } - } - } - break; - -/* ========================================================================== */ - /* These opcodes are followed by a character that is usually compared - to the current subject character; it is loaded into d. We still get - here even if there is no subject character, because in some cases zero - repetitions are permitted. */ - - /*-----------------------------------------------------------------*/ - case OP_CHAR: - if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ + else + { ADD_NEW_DATA(-state_offset, count, 0); } + } + } + break; + +/* ========================================================================== */ + /* These opcodes are followed by a character that is usually compared + to the current subject character; it is loaded into d. We still get + here even if there is no subject character, because in some cases zero + repetitions are permitted. */ + + /*-----------------------------------------------------------------*/ + case OP_CHAR: + if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ case OP_CHARI: - if (clen == 0) break; - + if (clen == 0) break; + #ifdef SUPPORT_UTF if (utf) - { - if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else - { - unsigned int othercase; + { + if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else + { + unsigned int othercase; if (c < 128) othercase = fcc[c]; else /* If we have Unicode property support, we can use it to test the other case of the character. */ -#ifdef SUPPORT_UCP +#ifdef SUPPORT_UCP othercase = UCD_OTHERCASE(c); -#else +#else othercase = NOTACHAR; -#endif - - if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } - } - } - else +#endif + + if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } + } + } + else #endif /* SUPPORT_UTF */ /* Not UTF mode */ - { + { if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) { ADD_NEW(state_offset + 2, 0); } - } - break; - - -#ifdef SUPPORT_UCP - /*-----------------------------------------------------------------*/ - /* This is a tricky one because it can match more than one character. - Find out how many characters to skip, and then set up a negative state - to wait for them to pass before continuing. */ - - case OP_EXTUNI: + } + break; + + +#ifdef SUPPORT_UCP + /*-----------------------------------------------------------------*/ + /* This is a tricky one because it can match more than one character. + Find out how many characters to skip, and then set up a negative state + to wait for them to pass before continuing. */ + + case OP_EXTUNI: if (clen > 0) - { + { int lgb, rgb; const pcre_uchar *nptr = ptr + clen; - int ncount = 0; + int ncount = 0; lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { + while (nptr < end_subject) + { dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; + ncount++; lgb = rgb; nptr += dlen; - } + } if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) reset_could_continue = TRUE; - ADD_NEW_DATA(-(state_offset + 1), 0, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - /* This is a tricky like EXTUNI because it too can match more than one - character (when CR is followed by LF). In this case, set up a negative - state to wait for one character to pass before continuing. */ - - case OP_ANYNL: - if (clen > 0) switch(c) - { + ADD_NEW_DATA(-(state_offset + 1), 0, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + /* This is a tricky like EXTUNI because it too can match more than one + character (when CR is followed by LF). In this case, set up a negative + state to wait for one character to pass before continuing. */ + + case OP_ANYNL: + if (clen > 0) switch(c) + { case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - + if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; + case CHAR_LF: - ADD_NEW(state_offset + 1, 0); - break; - + ADD_NEW(state_offset + 1, 0); + break; + case CHAR_CR: if (ptr + 1 >= end_subject) - { + { ADD_NEW(state_offset + 1, 0); if ((md->moptions & PCRE_PARTIAL_HARD) != 0) reset_could_continue = TRUE; } else if (UCHAR21TEST(ptr + 1) == CHAR_LF) { - ADD_NEW_DATA(-(state_offset + 1), 0, 1); - } - else - { - ADD_NEW(state_offset + 1, 0); - } - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_VSPACE: - if (clen > 0) switch(c) - { + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else + { + ADD_NEW(state_offset + 1, 0); + } + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_VSPACE: + if (clen > 0) switch(c) + { VSPACE_CASES: - break; - - default: - ADD_NEW(state_offset + 1, 0); - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE: - if (clen > 0) switch(c) - { + break; + + default: + ADD_NEW(state_offset + 1, 0); + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE: + if (clen > 0) switch(c) + { VSPACE_CASES: - ADD_NEW(state_offset + 1, 0); - break; - + ADD_NEW(state_offset + 1, 0); + break; + default: break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_HSPACE: - if (clen > 0) switch(c) - { + } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_HSPACE: + if (clen > 0) switch(c) + { HSPACE_CASES: - break; - - default: - ADD_NEW(state_offset + 1, 0); - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE: - if (clen > 0) switch(c) - { + break; + + default: + ADD_NEW(state_offset + 1, 0); + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE: + if (clen > 0) switch(c) + { HSPACE_CASES: - ADD_NEW(state_offset + 1, 0); - break; + ADD_NEW(state_offset + 1, 0); + break; default: break; - } - break; - - /*-----------------------------------------------------------------*/ + } + break; + + /*-----------------------------------------------------------------*/ /* Match a negated single character casefully. */ - - case OP_NOT: + + case OP_NOT: if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } break; @@ -2285,8 +2285,8 @@ for (;;) /* Match a negated single character caselessly. */ case OP_NOTI: - if (clen > 0) - { + if (clen > 0) + { pcre_uint32 otherd; #ifdef SUPPORT_UTF if (utf && d >= 128) @@ -2302,10 +2302,10 @@ for (;;) otherd = TABLE_GET(d, fcc, d); if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); } - } - break; - - /*-----------------------------------------------------------------*/ + } + break; + + /*-----------------------------------------------------------------*/ case OP_PLUSI: case OP_MINPLUSI: case OP_POSPLUSI: @@ -2316,45 +2316,45 @@ for (;;) codevalue -= OP_STARI - OP_STAR; /* Fall through */ - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } - if (clen > 0) - { + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } + if (clen > 0) + { pcre_uint32 otherd = NOTACHAR; if (caseless) - { + { #ifdef SUPPORT_UTF if (utf && d >= 128) - { -#ifdef SUPPORT_UCP + { +#ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else +#endif /* SUPPORT_UCP */ + } + else #endif /* SUPPORT_UTF */ otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (count > 0 && - (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (count > 0 && + (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ case OP_QUERYI: case OP_MINQUERYI: case OP_POSQUERYI: @@ -2364,42 +2364,42 @@ for (;;) caseless = TRUE; codevalue -= OP_STARI - OP_STAR; /* Fall through */ - case OP_QUERY: - case OP_MINQUERY: - case OP_POSQUERY: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTPOSQUERY: - ADD_ACTIVE(state_offset + dlen + 1, 0); - if (clen > 0) - { + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { pcre_uint32 otherd = NOTACHAR; if (caseless) - { + { #ifdef SUPPORT_UTF if (utf && d >= 128) - { -#ifdef SUPPORT_UCP + { +#ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else +#endif /* SUPPORT_UCP */ + } + else #endif /* SUPPORT_UTF */ otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + dlen + 1, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + dlen + 1, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ case OP_STARI: case OP_MINSTARI: case OP_POSSTARI: @@ -2409,77 +2409,77 @@ for (;;) caseless = TRUE; codevalue -= OP_STARI - OP_STAR; /* Fall through */ - case OP_STAR: - case OP_MINSTAR: - case OP_POSSTAR: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPOSSTAR: - ADD_ACTIVE(state_offset + dlen + 1, 0); - if (clen > 0) - { + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPOSSTAR: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { pcre_uint32 otherd = NOTACHAR; if (caseless) - { + { #ifdef SUPPORT_UTF if (utf && d >= 128) - { -#ifdef SUPPORT_UCP + { +#ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else +#endif /* SUPPORT_UCP */ + } + else #endif /* SUPPORT_UTF */ otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ case OP_EXACTI: case OP_NOTEXACTI: caseless = TRUE; codevalue -= OP_STARI - OP_STAR; /* Fall through */ - case OP_EXACT: - case OP_NOTEXACT: - count = current_state->count; /* Number already matched */ - if (clen > 0) - { + case OP_EXACT: + case OP_NOTEXACT: + count = current_state->count; /* Number already matched */ + if (clen > 0) + { pcre_uint32 otherd = NOTACHAR; if (caseless) - { + { #ifdef SUPPORT_UTF if (utf && d >= 128) - { -#ifdef SUPPORT_UCP + { +#ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else +#endif /* SUPPORT_UCP */ + } + else #endif /* SUPPORT_UTF */ otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { if (++count >= (int)GET2(code, 1)) { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ case OP_UPTOI: case OP_MINUPTOI: case OP_POSUPTOI: @@ -2489,92 +2489,92 @@ for (;;) caseless = TRUE; codevalue -= OP_STARI - OP_STAR; /* Fall through */ - case OP_UPTO: - case OP_MINUPTO: - case OP_POSUPTO: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTPOSUPTO: + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTPOSUPTO: ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); - count = current_state->count; /* Number already matched */ - if (clen > 0) - { + count = current_state->count; /* Number already matched */ + if (clen > 0) + { pcre_uint32 otherd = NOTACHAR; if (caseless) - { + { #ifdef SUPPORT_UTF if (utf && d >= 128) - { -#ifdef SUPPORT_UCP + { +#ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else +#endif /* SUPPORT_UCP */ + } + else #endif /* SUPPORT_UTF */ otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } if (++count >= (int)GET2(code, 1)) { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - -/* ========================================================================== */ - /* These are the class-handling opcodes */ - - case OP_CLASS: - case OP_NCLASS: - case OP_XCLASS: - { - BOOL isinclass = FALSE; - int next_state_offset; + else + { ADD_NEW(state_offset, count); } + } + } + break; + + +/* ========================================================================== */ + /* These are the class-handling opcodes */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + BOOL isinclass = FALSE; + int next_state_offset; const pcre_uchar *ecode; - - /* For a simple class, there is always just a 32-byte table, and we - can set isinclass from it. */ - - if (codevalue != OP_XCLASS) - { + + /* For a simple class, there is always just a 32-byte table, and we + can set isinclass from it. */ + + if (codevalue != OP_XCLASS) + { ecode = code + 1 + (32 / sizeof(pcre_uchar)); - if (clen > 0) - { - isinclass = (c > 255)? (codevalue == OP_NCLASS) : + if (clen > 0) + { + isinclass = (c > 255)? (codevalue == OP_NCLASS) : ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0); - } - } - - /* An extended class may have a table or a list of single characters, - ranges, or both, and it may be positive or negative. There's a - function that sorts all this out. */ - - else - { - ecode = code + GET(code, 1); + } + } + + /* An extended class may have a table or a list of single characters, + ranges, or both, and it may be positive or negative. There's a + function that sorts all this out. */ + + else + { + ecode = code + GET(code, 1); if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); - } - - /* At this point, isinclass is set for all kinds of class, and ecode - points to the byte after the end of the class. If there is a - quantifier, this is where it will be. */ - + } + + /* At this point, isinclass is set for all kinds of class, and ecode + points to the byte after the end of the class. If there is a + quantifier, this is where it will be. */ + next_state_offset = (int)(ecode - start_code); - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: case OP_CRPOSSTAR: - ADD_ACTIVE(next_state_offset + 1, 0); + ADD_ACTIVE(next_state_offset + 1, 0); if (isinclass) { if (*ecode == OP_CRPOSSTAR) @@ -2584,13 +2584,13 @@ for (;;) } ADD_NEW(state_offset, 0); } - break; - - case OP_CRPLUS: - case OP_CRMINPLUS: + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: case OP_CRPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } if (isinclass) { if (count > 0 && *ecode == OP_CRPOSPLUS) @@ -2601,12 +2601,12 @@ for (;;) count++; ADD_NEW(state_offset, count); } - break; - - case OP_CRQUERY: - case OP_CRMINQUERY: + break; + + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSQUERY: - ADD_ACTIVE(next_state_offset + 1, 0); + ADD_ACTIVE(next_state_offset + 1, 0); if (isinclass) { if (*ecode == OP_CRPOSQUERY) @@ -2616,90 +2616,90 @@ for (;;) } ADD_NEW(next_state_offset + 1, 0); } - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: - count = current_state->count; /* Already matched */ + count = current_state->count; /* Already matched */ if (count >= (int)GET2(ecode, 1)) { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } - if (isinclass) - { + if (isinclass) + { int max = (int)GET2(ecode, 1 + IMM2_SIZE); if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1)) { active_count--; /* Remove non-match possibility */ next_active_state--; } - if (++count >= max && max != 0) /* Max 0 => no limit */ + if (++count >= max && max != 0) /* Max 0 => no limit */ { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - break; - - default: - if (isinclass) { ADD_NEW(next_state_offset, 0); } - break; - } - } - break; - -/* ========================================================================== */ - /* These are the opcodes for fancy brackets of various kinds. We have + else + { ADD_NEW(state_offset, count); } + } + break; + + default: + if (isinclass) { ADD_NEW(next_state_offset, 0); } + break; + } + } + break; + +/* ========================================================================== */ + /* These are the opcodes for fancy brackets of various kinds. We have to use recursion in order to handle them. The "always failing" assertion (?!) is optimised to OP_FAIL when compiling, so we have to support that, though the other "backtracking verbs" are not supported. */ - + case OP_FAIL: forced_fail++; /* Count FAILs for multiple states */ break; - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - { - int rc; - int local_offsets[2]; - int local_workspace[1000]; + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + { + int rc; + int local_offsets[2]; + int local_workspace[1000]; const pcre_uchar *endasscode = code + GET(code, 1); - - while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); - - rc = internal_dfa_exec( - md, /* static match data */ - code, /* this subexpression's code */ - ptr, /* where we currently are */ + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_exec( + md, /* static match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - + if (rc == PCRE_ERROR_DFA_UITEM) return rc; - if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) + if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_COND: - case OP_SCOND: - { - int local_offsets[1000]; - int local_workspace[1000]; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_COND: + case OP_SCOND: + { + int local_offsets[1000]; + int local_workspace[1000]; int codelink = GET(code, 1); int condcode; - + /* Because of the way auto-callout works during compile, a callout item is inserted between OP_COND and an assertion condition. This does not happen for the other conditions. */ - + if (code[LINK_SIZE+1] == OP_CALLOUT) - { + { rrc = 0; if (PUBL(callout) != NULL) { @@ -2727,8 +2727,8 @@ for (;;) } if (rrc > 0) break; /* Fail this thread */ code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ - } - + } + condcode = code[LINK_SIZE+1]; /* Back reference conditions and duplicate named recursion conditions @@ -2744,63 +2744,63 @@ for (;;) if (condcode == OP_DEF || condcode == OP_FAIL) { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - /* The only supported version of OP_RREF is for the value RREF_ANY, - which means "test if in any recursion". We can't test for specifically - recursed groups. */ - - else if (condcode == OP_RREF) - { + /* The only supported version of OP_RREF is for the value RREF_ANY, + which means "test if in any recursion". We can't test for specifically + recursed groups. */ + + else if (condcode == OP_RREF) + { int value = GET2(code, LINK_SIZE + 2); - if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; + if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; if (md->recursive != NULL) { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - } - - /* Otherwise, the condition is an assertion */ - - else - { - int rc; + } + + /* Otherwise, the condition is an assertion */ + + else + { + int rc; const pcre_uchar *asscode = code + LINK_SIZE + 1; const pcre_uchar *endasscode = asscode + GET(asscode, 1); - - while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); - - rc = internal_dfa_exec( - md, /* fixed match data */ - asscode, /* this subexpression's code */ - ptr, /* where we currently are */ + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_exec( + md, /* fixed match data */ + asscode, /* this subexpression's code */ + ptr, /* where we currently are */ (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - + if (rc == PCRE_ERROR_DFA_UITEM) return rc; - if ((rc >= 0) == - (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) + if ((rc >= 0) == + (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } - else + else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_RECURSE: - { + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_RECURSE: + { dfa_recursion_info *ri; - int local_offsets[1000]; - int local_workspace[1000]; + int local_offsets[1000]; + int local_workspace[1000]; const pcre_uchar *callpat = start_code + GET(code, 1); int recno = (callpat == md->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE); - int rc; - + int rc; + DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP)); - + /* Check for repeating a recursion without advancing the subject pointer. This should catch convoluted mutual recursions. (Some simple cases are caught at compile time.) */ @@ -2817,35 +2817,35 @@ for (;;) new_recursive.prevrec = md->recursive; md->recursive = &new_recursive; - rc = internal_dfa_exec( - md, /* fixed match data */ + rc = internal_dfa_exec( + md, /* fixed match data */ callpat, /* this subexpression's code */ - ptr, /* where we currently are */ + ptr, /* where we currently are */ (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - + md->recursive = new_recursive.prevrec; /* Done this recursion */ - + DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP, rc)); - /* Ran out of internal offsets */ - - if (rc == 0) return PCRE_ERROR_DFA_RECURSE; - - /* For each successful matched substring, set up the next state with a - count of characters to skip before trying it. Note that the count is in - characters, not bytes. */ - - if (rc > 0) - { - for (rc = rc*2 - 2; rc >= 0; rc -= 2) - { - int charcount = local_offsets[rc+1] - local_offsets[rc]; + /* Ran out of internal offsets */ + + if (rc == 0) return PCRE_ERROR_DFA_RECURSE; + + /* For each successful matched substring, set up the next state with a + count of characters to skip before trying it. Note that the count is in + characters, not bytes. */ + + if (rc > 0) + { + for (rc = rc*2 - 2; rc >= 0; rc -= 2) + { + int charcount = local_offsets[rc+1] - local_offsets[rc]; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf) { @@ -2854,21 +2854,21 @@ for (;;) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; } #endif - if (charcount > 0) - { - ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); - } - else - { - ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); - } - } - } - else if (rc != PCRE_ERROR_NOMATCH) return rc; - } - break; - - /*-----------------------------------------------------------------*/ + if (charcount > 0) + { + ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); + } + else + { + ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); + } + } + } + else if (rc != PCRE_ERROR_NOMATCH) return rc; + } + break; + + /*-----------------------------------------------------------------*/ case OP_BRAPOS: case OP_SBRAPOS: case OP_CBRAPOS: @@ -2960,78 +2960,78 @@ for (;;) break; /*-----------------------------------------------------------------*/ - case OP_ONCE: + case OP_ONCE: case OP_ONCE_NC: - { - int local_offsets[2]; - int local_workspace[1000]; - - int rc = internal_dfa_exec( - md, /* fixed match data */ - code, /* this subexpression's code */ - ptr, /* where we currently are */ + { + int local_offsets[2]; + int local_workspace[1000]; + + int rc = internal_dfa_exec( + md, /* fixed match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - - if (rc >= 0) - { + + if (rc >= 0) + { const pcre_uchar *end_subpattern = code; - int charcount = local_offsets[1] - local_offsets[0]; - int next_state_offset, repeat_state_offset; - - do { end_subpattern += GET(end_subpattern, 1); } - while (*end_subpattern == OP_ALT); + int charcount = local_offsets[1] - local_offsets[0]; + int next_state_offset, repeat_state_offset; + + do { end_subpattern += GET(end_subpattern, 1); } + while (*end_subpattern == OP_ALT); next_state_offset = (int)(end_subpattern - start_code + LINK_SIZE + 1); - - /* If the end of this subpattern is KETRMAX or KETRMIN, we must - arrange for the repeat state also to be added to the relevant list. - Calculate the offset, or set -1 for no repeat. */ - - repeat_state_offset = (*end_subpattern == OP_KETRMAX || - *end_subpattern == OP_KETRMIN)? + + /* If the end of this subpattern is KETRMAX or KETRMIN, we must + arrange for the repeat state also to be added to the relevant list. + Calculate the offset, or set -1 for no repeat. */ + + repeat_state_offset = (*end_subpattern == OP_KETRMAX || + *end_subpattern == OP_KETRMIN)? (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1; - - /* If we have matched an empty string, add the next state at the - current character pointer. This is important so that the duplicate - checking kicks in, which is what breaks infinite loops that match an - empty string. */ - - if (charcount == 0) - { - ADD_ACTIVE(next_state_offset, 0); - } - - /* Optimization: if there are no more active states, and there - are no new states yet set up, then skip over the subject string - right here, to save looping. Otherwise, set up the new state to swing + + /* If we have matched an empty string, add the next state at the + current character pointer. This is important so that the duplicate + checking kicks in, which is what breaks infinite loops that match an + empty string. */ + + if (charcount == 0) + { + ADD_ACTIVE(next_state_offset, 0); + } + + /* Optimization: if there are no more active states, and there + are no new states yet set up, then skip over the subject string + right here, to save looping. Otherwise, set up the new state to swing into action when the end of the matched substring is reached. */ - - else if (i + 1 >= active_count && new_count == 0) - { - ptr += charcount; - clen = 0; - ADD_NEW(next_state_offset, 0); - - /* If we are adding a repeat state at the new character position, - we must fudge things so that it is the only current state. - Otherwise, it might be a duplicate of one we processed before, and - that would cause it to be skipped. */ - - if (repeat_state_offset >= 0) - { - next_active_state = active_states; - active_count = 0; - i = -1; - ADD_ACTIVE(repeat_state_offset, 0); - } - } - else - { + + else if (i + 1 >= active_count && new_count == 0) + { + ptr += charcount; + clen = 0; + ADD_NEW(next_state_offset, 0); + + /* If we are adding a repeat state at the new character position, + we must fudge things so that it is the only current state. + Otherwise, it might be a duplicate of one we processed before, and + that would cause it to be skipped. */ + + if (repeat_state_offset >= 0) + { + next_active_state = active_states; + active_count = 0; + i = -1; + ADD_ACTIVE(repeat_state_offset, 0); + } + } + else + { #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (utf) { @@ -3040,29 +3040,29 @@ for (;;) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; } #endif - ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); - if (repeat_state_offset >= 0) - { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } - } - } - else if (rc != PCRE_ERROR_NOMATCH) return rc; - } - break; - - -/* ========================================================================== */ - /* Handle callouts */ - - case OP_CALLOUT: + ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); + if (repeat_state_offset >= 0) + { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } + } + } + else if (rc != PCRE_ERROR_NOMATCH) return rc; + } + break; + + +/* ========================================================================== */ + /* Handle callouts */ + + case OP_CALLOUT: rrc = 0; if (PUBL(callout) != NULL) - { + { PUBL(callout_block) cb; - cb.version = 1; /* Version 1 of the callout block */ - cb.callout_number = code[1]; - cb.offset_vector = offsets; + cb.version = 1; /* Version 1 of the callout block */ + cb.callout_number = code[1]; + cb.offset_vector = offsets; #if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)start_subject; + cb.subject = (PCRE_SPTR)start_subject; #elif defined COMPILE_PCRE16 cb.subject = (PCRE_SPTR16)start_subject; #elif defined COMPILE_PCRE32 @@ -3071,33 +3071,33 @@ for (;;) cb.subject_length = (int)(end_subject - start_subject); cb.start_match = (int)(current_subject - start_subject); cb.current_position = (int)(ptr - start_subject); - cb.pattern_position = GET(code, 2); - cb.next_item_length = GET(code, 2 + LINK_SIZE); - cb.capture_top = 1; - cb.capture_last = -1; - cb.callout_data = md->callout_data; + cb.pattern_position = GET(code, 2); + cb.next_item_length = GET(code, 2 + LINK_SIZE); + cb.capture_top = 1; + cb.capture_last = -1; + cb.callout_data = md->callout_data; cb.mark = NULL; /* No (*MARK) support */ if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ - } + } if (rrc == 0) { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } - break; - - -/* ========================================================================== */ - default: /* Unsupported opcode */ - return PCRE_ERROR_DFA_UITEM; - } - - NEXT_ACTIVE_STATE: continue; - - } /* End of loop scanning active states */ - - /* We have finished the processing at the current subject character. If no - new states have been set for the next character, we have found all the - matches that we are going to find. If we are at the top level and partial + break; + + +/* ========================================================================== */ + default: /* Unsupported opcode */ + return PCRE_ERROR_DFA_UITEM; + } + + NEXT_ACTIVE_STATE: continue; + + } /* End of loop scanning active states */ + + /* We have finished the processing at the current subject character. If no + new states have been set for the next character, we have found all the + matches that we are going to find. If we are at the top level and partial matching has been requested, check for appropriate conditions. - + The "forced_ fail" variable counts the number of (*F) encountered for the character. If it is equal to the original active_count (saved in workspace[1]) it means that (*F) was found on every active state. In this @@ -3106,8 +3106,8 @@ for (;;) The "could_continue" variable is true if a state could have continued but for the fact that the end of the subject was reached. */ - if (new_count <= 0) - { + if (new_count <= 0) + { if (rlevel == 1 && /* Top level, and */ could_continue && /* Some could go on, and */ forced_fail != workspace[1] && /* Not all forced fail & */ @@ -3124,61 +3124,61 @@ for (;;) ptr > md->start_used_ptr) /* Inspected non-empty string */ ) ) - match_count = PCRE_ERROR_PARTIAL; - DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" - "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count, - rlevel*2-2, SP)); - break; /* In effect, "return", but see the comment below */ - } - - /* One or more states are active for the next character. */ - - ptr += clen; /* Advance to next subject character */ - } /* Loop to move along the subject string */ - -/* Control gets here from "break" a few lines above. We do it this way because -if we use "return" above, we have compiler trouble. Some compilers warn if -there's nothing here because they think the function doesn't return a value. On -the other hand, if we put a dummy statement here, some more clever compilers -complain that it can't be reached. Sigh. */ - -return match_count; -} - - - - -/************************************************* -* Execute a Regular Expression - DFA engine * -*************************************************/ - -/* This external function applies a compiled re to a subject string using a DFA -engine. This function calls the internal function multiple times if the pattern -is not anchored. - -Arguments: - argument_re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets vector of match offsets - offsetcount size of same - workspace workspace vector - wscount size of same - -Returns: > 0 => number of match offset pairs placed in offsets - = 0 => offsets overflowed; longest matches are present - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - + match_count = PCRE_ERROR_PARTIAL; + DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" + "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count, + rlevel*2-2, SP)); + break; /* In effect, "return", but see the comment below */ + } + + /* One or more states are active for the next character. */ + + ptr += clen; /* Advance to next subject character */ + } /* Loop to move along the subject string */ + +/* Control gets here from "break" a few lines above. We do it this way because +if we use "return" above, we have compiler trouble. Some compilers warn if +there's nothing here because they think the function doesn't return a value. On +the other hand, if we put a dummy statement here, some more clever compilers +complain that it can't be reached. Sigh. */ + +return match_count; +} + + + + +/************************************************* +* Execute a Regular Expression - DFA engine * +*************************************************/ + +/* This external function applies a compiled re to a subject string using a DFA +engine. This function calls the internal function multiple times if the pattern +is not anchored. + +Arguments: + argument_re points to the compiled expression + extra_data points to extra data or is NULL + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + offsets vector of match offsets + offsetcount size of same + workspace workspace vector + wscount size of same + +Returns: > 0 => number of match offset pairs placed in offsets + = 0 => offsets overflowed; longest matches are present + -1 => failed to match + < -1 => some kind of unexpected problem +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, - const char *subject, int length, int start_offset, int options, int *offsets, - int offsetcount, int *workspace, int wscount) +pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, + const char *subject, int length, int start_offset, int options, int *offsets, + int offsetcount, int *workspace, int wscount) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, @@ -3190,14 +3190,14 @@ pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets, int offsetcount, int *workspace, int wscount) #endif -{ +{ REAL_PCRE *re = (REAL_PCRE *)argument_re; -dfa_match_data match_block; -dfa_match_data *md = &match_block; +dfa_match_data match_block; +dfa_match_data *md = &match_block; BOOL utf, anchored, startline, firstline; const pcre_uchar *current_subject, *end_subject; -const pcre_study_data *study = NULL; - +const pcre_study_data *study = NULL; + const pcre_uchar *req_char_ptr; const pcre_uint8 *start_bits = NULL; BOOL has_first_char = FALSE; @@ -3206,23 +3206,23 @@ pcre_uchar first_char = 0; pcre_uchar first_char2 = 0; pcre_uchar req_char = 0; pcre_uchar req_char2 = 0; -int newline; - -/* Plausibility checks */ - -if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; -if (re == NULL || subject == NULL || workspace == NULL || - (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; -if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; -if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; +int newline; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; +if (re == NULL || subject == NULL || workspace == NULL || + (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; +if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; +if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; if (length < 0) return PCRE_ERROR_BADLENGTH; if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; - + /* Check that the first field in the block is the magic number. If it is not, return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which means that the pattern is likely compiled with different endianness. */ - + if (re->magic_number != MAGIC_NUMBER) return re->magic_number == REVERSED_MAGIC_NUMBER? PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; @@ -3240,116 +3240,116 @@ if ((options & PCRE_DFA_RESTART) != 0) /* Set up study, callout, and table data */ -md->tables = re->tables; -md->callout_data = NULL; - -if (extra_data != NULL) - { +md->tables = re->tables; +md->callout_data = NULL; + +if (extra_data != NULL) + { unsigned long int flags = extra_data->flags; - if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; - if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) - return PCRE_ERROR_DFA_UMLIMIT; - if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) - md->callout_data = extra_data->callout_data; - if ((flags & PCRE_EXTRA_TABLES) != 0) - md->tables = extra_data->tables; - } - -/* Set some local values */ - + if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; + if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) + return PCRE_ERROR_DFA_UMLIMIT; + if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) + md->callout_data = extra_data->callout_data; + if ((flags & PCRE_EXTRA_TABLES) != 0) + md->tables = extra_data->tables; + } + +/* Set some local values */ + current_subject = (const pcre_uchar *)subject + start_offset; end_subject = (const pcre_uchar *)subject + length; req_char_ptr = current_subject - 1; - + #ifdef SUPPORT_UTF /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ utf = (re->options & PCRE_UTF8) != 0; -#else +#else utf = FALSE; -#endif - -anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || - (re->options & PCRE_ANCHORED) != 0; - -/* The remaining fixed data for passing around. */ - +#endif + +anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || + (re->options & PCRE_ANCHORED) != 0; + +/* The remaining fixed data for passing around. */ + md->start_code = (const pcre_uchar *)argument_re + - re->name_table_offset + re->name_count * re->name_entry_size; + re->name_table_offset + re->name_count * re->name_entry_size; md->start_subject = (const pcre_uchar *)subject; -md->end_subject = end_subject; +md->end_subject = end_subject; md->start_offset = start_offset; -md->moptions = options; -md->poptions = re->options; - -/* If the BSR option is not set at match time, copy what was set -at compile time. */ - -if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0) - { - if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) - md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE); -#ifdef BSR_ANYCRLF - else md->moptions |= PCRE_BSR_ANYCRLF; -#endif - } - -/* Handle different types of newline. The three bits give eight cases. If -nothing is set at run time, whatever was used at compile time applies. */ - -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & - PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Compile-time default */ +md->moptions = options; +md->poptions = re->options; + +/* If the BSR option is not set at match time, copy what was set +at compile time. */ + +if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0) + { + if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) + md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE); +#ifdef BSR_ANYCRLF + else md->moptions |= PCRE_BSR_ANYCRLF; +#endif + } + +/* Handle different types of newline. The three bits give eight cases. If +nothing is set at run time, whatever was used at compile time applies. */ + +switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & + PCRE_NEWLINE_BITS) + { + case 0: newline = NEWLINE; break; /* Compile-time default */ case PCRE_NEWLINE_CR: newline = CHAR_CR; break; case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ + case PCRE_NEWLINE_CR+ PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: return PCRE_ERROR_BADNEWLINE; - } - -if (newline == -2) - { - md->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - md->nltype = NLTYPE_ANY; - } -else - { - md->nltype = NLTYPE_FIXED; - if (newline > 255) - { - md->nllen = 2; - md->nl[0] = (newline >> 8) & 255; - md->nl[1] = newline & 255; - } - else - { - md->nllen = 1; - md->nl[0] = newline; - } - } - -/* Check a UTF-8 string if required. Unfortunately there's no way of passing -back the character offset. */ - + case PCRE_NEWLINE_ANY: newline = -1; break; + case PCRE_NEWLINE_ANYCRLF: newline = -2; break; + default: return PCRE_ERROR_BADNEWLINE; + } + +if (newline == -2) + { + md->nltype = NLTYPE_ANYCRLF; + } +else if (newline < 0) + { + md->nltype = NLTYPE_ANY; + } +else + { + md->nltype = NLTYPE_FIXED; + if (newline > 255) + { + md->nllen = 2; + md->nl[0] = (newline >> 8) & 255; + md->nl[1] = newline & 255; + } + else + { + md->nllen = 1; + md->nl[0] = newline; + } + } + +/* Check a UTF-8 string if required. Unfortunately there's no way of passing +back the character offset. */ + #ifdef SUPPORT_UTF if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) - { + { int erroroffset; int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset); if (errorcode != 0) - { + { if (offsetcount >= 2) - { + { offsets[0] = erroroffset; offsets[1] = errorcode; - } + } #if defined COMPILE_PCRE8 return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ? PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; @@ -3359,37 +3359,37 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) #elif defined COMPILE_PCRE32 return PCRE_ERROR_BADUTF32; #endif - } + } #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 if (start_offset > 0 && start_offset < length && NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) return PCRE_ERROR_BADUTF8_OFFSET; #endif - } -#endif - -/* If the exec call supplied NULL for tables, use the inbuilt ones. This -is a feature that makes it possible to save compiled regex and re-use them -in other programs later. */ - + } +#endif + +/* If the exec call supplied NULL for tables, use the inbuilt ones. This +is a feature that makes it possible to save compiled regex and re-use them +in other programs later. */ + if (md->tables == NULL) md->tables = PRIV(default_tables); - + /* The "must be at the start of a line" flags are used in a loop when finding where to start. */ - -startline = (re->flags & PCRE_STARTLINE) != 0; -firstline = (re->options & PCRE_FIRSTLINE) != 0; - -/* Set up the first character to match, if available. The first_byte value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->flags & PCRE_FIRSTSET) != 0) - { + +startline = (re->flags & PCRE_STARTLINE) != 0; +firstline = (re->options & PCRE_FIRSTLINE) != 0; + +/* Set up the first character to match, if available. The first_byte value is +never set for an anchored regular expression, but the anchoring may be forced +at run time, so we have to test for anchoring. The first char may be unset for +an unanchored pattern, of course. If there's no first char and the pattern was +studied, there may be a bitmap of possible first characters. */ + +if (!anchored) + { + if ((re->flags & PCRE_FIRSTSET) != 0) + { has_first_char = TRUE; first_char = first_char2 = (pcre_uchar)(re->first_char); if ((re->flags & PCRE_FCH_CASELESS) != 0) @@ -3400,20 +3400,20 @@ if (!anchored) first_char2 = UCD_OTHERCASE(first_char); #endif } - } - else - { + } + else + { if (!startline && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - start_bits = study->start_bits; - } - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. */ - -if ((re->flags & PCRE_REQCHSET) != 0) - { + start_bits = study->start_bits; + } + } + +/* For anchored or unanchored matches, there may be a "last known required +character" set. */ + +if ((re->flags & PCRE_REQCHSET) != 0) + { has_req_char = TRUE; req_char = req_char2 = (pcre_uchar)(re->req_char); if ((re->flags & PCRE_RCH_CASELESS) != 0) @@ -3424,27 +3424,27 @@ if ((re->flags & PCRE_REQCHSET) != 0) req_char2 = UCD_OTHERCASE(req_char); #endif } - } - -/* Call the main matching function, looping for a non-anchored regex after a + } + +/* Call the main matching function, looping for a non-anchored regex after a failed match. If not restarting, perform certain optimizations at the start of a match. */ - -for (;;) - { - int rc; - - if ((options & PCRE_DFA_RESTART) == 0) - { + +for (;;) + { + int rc; + + if ((options & PCRE_DFA_RESTART) == 0) + { const pcre_uchar *save_end_subject = end_subject; - + /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. Implement this by temporarily adjusting end_subject so that we stop scanning at a newline. If the match fails at the newline, later code breaks this loop. */ - - if (firstline) - { + + if (firstline) + { PCRE_PUCHAR t = current_subject; #ifdef SUPPORT_UTF if (utf) @@ -3457,10 +3457,10 @@ for (;;) } else #endif - while (t < md->end_subject && !IS_NEWLINE(t)) t++; - end_subject = t; - } - + while (t < md->end_subject && !IS_NEWLINE(t)) t++; + end_subject = t; + } + /* There are some optimizations that avoid running the match if a known starting point is not found. However, there is an option that disables these, for testing and for ensuring that all callouts do actually occur. @@ -3468,9 +3468,9 @@ for (;;) match-time options. */ if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) - { + { /* Advance to a known first pcre_uchar (i.e. data item) */ - + if (has_first_char) { if (first_char != first_char2) @@ -3485,11 +3485,11 @@ for (;;) UCHAR21TEST(current_subject) != first_char) current_subject++; } - + /* Or to just after a linebreak for a multiline match if possible */ else if (startline) - { + { if (current_subject > md->start_subject + start_offset) { #ifdef SUPPORT_UTF @@ -3507,23 +3507,23 @@ for (;;) #endif while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) current_subject++; - + /* If we have just passed a CR and the newline option is ANY or ANYCRLF, and we are now at a LF, advance the match position by one more character. */ - + if (UCHAR21TEST(current_subject - 1) == CHAR_CR && (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && current_subject < end_subject && UCHAR21TEST(current_subject) == CHAR_NL) current_subject++; } - } - + } + /* Advance to a non-unique first pcre_uchar after study */ - + else if (start_bits != NULL) - { + { while (current_subject < end_subject) { register pcre_uint32 c = UCHAR21TEST(current_subject); @@ -3533,17 +3533,17 @@ for (;;) if ((start_bits[c/8] & (1 << (c&7))) != 0) break; current_subject++; } - } - } - - /* Restore fudged end_subject */ - - end_subject = save_end_subject; - + } + } + + /* Restore fudged end_subject */ + + end_subject = save_end_subject; + /* The following two optimizations are disabled for partial matching or if disabling is explicitly requested (and of course, by the test above, this code is not obeyed when restarting after a partial match). */ - + if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0) { @@ -3552,11 +3552,11 @@ for (;;) pattern. Although the value is, strictly, in characters, we treat it as in pcre_uchar units to avoid spending too much time in this optimization. */ - + if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && (pcre_uint32)(end_subject - current_subject) < study->minlength) return PCRE_ERROR_NOMATCH; - + /* If req_char is set, we know that that pcre_uchar must appear in the subject for the match to succeed. If the first pcre_uchar is set, req_char must be later in the subject; otherwise the test starts at the @@ -3564,21 +3564,21 @@ for (;;) with nested unlimited repeats that aren't going to match. Writing separate code for cased/caseless versions makes it go faster, as does using an autoincrement and backing off on a match. - + HOWEVER: when the subject string is very, very long, searching to its end can take a long time, and give bad performance on quite ordinary patterns. This showed up when somebody was matching /^C/ on a 32-megabyte string... so we don't do this when the string is sufficiently long. */ - + if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX) - { + { register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0); /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ if (p > req_char_ptr) - { + { if (req_char != req_char2) { while (p < end_subject) @@ -3594,41 +3594,41 @@ for (;;) if (UCHAR21INCTEST(p) == req_char) { p--; break; } } } - + /* If we can't find the required pcre_uchar, break the matching loop, which will cause a return or PCRE_ERROR_NOMATCH. */ - + if (p >= end_subject) break; - + /* If we have found the required pcre_uchar, save the point where we found it, so that we don't search again next time round the loop if the start hasn't passed this point yet. */ - + req_char_ptr = p; } } - } + } } /* End of optimizations that are done when not restarting */ - - /* OK, now we can do the business */ - + + /* OK, now we can do the business */ + md->start_used_ptr = current_subject; md->recursive = NULL; - rc = internal_dfa_exec( - md, /* fixed match data */ - md->start_code, /* this subexpression's code */ - current_subject, /* where we currently are */ - start_offset, /* start offset in subject */ - offsets, /* offset vector */ - offsetcount, /* size of same */ - workspace, /* workspace vector */ - wscount, /* size of same */ + rc = internal_dfa_exec( + md, /* fixed match data */ + md->start_code, /* this subexpression's code */ + current_subject, /* where we currently are */ + start_offset, /* start offset in subject */ + offsets, /* offset vector */ + offsetcount, /* size of same */ + workspace, /* workspace vector */ + wscount, /* size of same */ 0); /* function recurse level */ - - /* Anything other than "no match" means we are done, always; otherwise, carry - on only if not anchored. */ - + + /* Anything other than "no match" means we are done, always; otherwise, carry + on only if not anchored. */ + if (rc != PCRE_ERROR_NOMATCH || anchored) { if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2) @@ -3640,37 +3640,37 @@ for (;;) } return rc; } - - /* Advance to the next subject character unless we are at the end of a line - and firstline is set. */ - - if (firstline && IS_NEWLINE(current_subject)) break; - current_subject++; + + /* Advance to the next subject character unless we are at the end of a line + and firstline is set. */ + + if (firstline && IS_NEWLINE(current_subject)) break; + current_subject++; #ifdef SUPPORT_UTF if (utf) - { + { ACROSSCHAR(current_subject < end_subject, *current_subject, current_subject++); - } + } #endif - if (current_subject > end_subject) break; - - /* If we have just passed a CR and we are now at a LF, and the pattern does - not contain any explicit matches for \r or \n, and the newline option is CRLF - or ANY or ANYCRLF, advance the match position by one more character. */ - + if (current_subject > end_subject) break; + + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF + or ANY or ANYCRLF, advance the match position by one more character. */ + if (UCHAR21TEST(current_subject - 1) == CHAR_CR && - current_subject < end_subject && + current_subject < end_subject && UCHAR21TEST(current_subject) == CHAR_NL && - (re->flags & PCRE_HASCRORLF) == 0 && - (md->nltype == NLTYPE_ANY || - md->nltype == NLTYPE_ANYCRLF || - md->nllen == 2)) - current_subject++; - - } /* "Bumpalong" loop */ - -return PCRE_ERROR_NOMATCH; -} - -/* End of pcre_dfa_exec.c */ + (re->flags & PCRE_HASCRORLF) == 0 && + (md->nltype == NLTYPE_ANY || + md->nltype == NLTYPE_ANYCRLF || + md->nllen == 2)) + current_subject++; + + } /* "Bumpalong" loop */ + +return PCRE_ERROR_NOMATCH; +} + +/* End of pcre_dfa_exec.c */ diff --git a/contrib/libs/pcre/pcre_exec.c b/contrib/libs/pcre/pcre_exec.c index 9d023d74e94..4b5cb73fea5 100644 --- a/contrib/libs/pcre/pcre_exec.c +++ b/contrib/libs/pcre/pcre_exec.c @@ -1,61 +1,61 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2018 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains pcre_exec(), the externally visible function that does -pattern matching using an NFA algorithm, trying to mimic Perl as closely as -possible. There are also some static supporting functions. */ - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains pcre_exec(), the externally visible function that does +pattern matching using an NFA algorithm, trying to mimic Perl as closely as +possible. There are also some static supporting functions. */ + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#define NLBLOCK md /* Block containing newline information */ -#define PSSTART start_subject /* Field containing processed string start */ -#define PSEND end_subject /* Field containing processed string end */ - -#include "pcre_internal.h" - -/* Undefine some potentially clashing cpp symbols */ - -#undef min -#undef max - +#endif + +#define NLBLOCK md /* Block containing newline information */ +#define PSSTART start_subject /* Field containing processed string start */ +#define PSEND end_subject /* Field containing processed string end */ + +#include "pcre_internal.h" + +/* Undefine some potentially clashing cpp symbols */ + +#undef min +#undef max + /* The md->capture_last field uses the lower 16 bits for the last captured substring (which can never be greater than 65535) and a bit in the top half to mean "capture vector overflowed". This odd way of doing things was @@ -65,11 +65,11 @@ interface, and doing it this way saved on (a) another variable, which would have increased the stack frame size (a big NO-NO in PCRE) and (b) another separate set of save/restore instructions. The following defines are used in implementing this. */ - + #define CAPLMASK 0x0000ffff /* The bits used for last_capture */ #define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */ #define OVFLBIT 0x00010000 /* The bit that is set for overflow */ - + /* Values for setting in md->match_function_type to indicate two special types of call to match(). We do it this way to save on using another stack variable, as stack usage is to be discouraged. */ @@ -77,15 +77,15 @@ as stack usage is to be discouraged. */ #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */ #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */ -/* Non-error returns from the match() function. Error returns are externally -defined PCRE_ERROR_xxx codes, which are all negative. */ - -#define MATCH_MATCH 1 -#define MATCH_NOMATCH 0 - -/* Special internal returns from the match() function. Make them sufficiently -negative to avoid the external error codes. */ - +/* Non-error returns from the match() function. Error returns are externally +defined PCRE_ERROR_xxx codes, which are all negative. */ + +#define MATCH_MATCH 1 +#define MATCH_NOMATCH 0 + +/* Special internal returns from the match() function. Make them sufficiently +negative to avoid the external error codes. */ + #define MATCH_ACCEPT (-999) #define MATCH_KETRPOS (-998) #define MATCH_ONCE (-997) @@ -98,103 +98,103 @@ for any one of them can use a range. */ #define MATCH_THEN (-992) #define MATCH_BACKTRACK_MAX MATCH_THEN #define MATCH_BACKTRACK_MIN MATCH_COMMIT - -/* Maximum number of ints of offset to save on the stack for recursive calls. -If the offset vector is bigger, malloc is used. This should be a multiple of 3, -because the offset vector is always a multiple of 3 long. */ - -#define REC_STACK_SAVE_MAX 30 - -/* Min and max values for the common repeats; for the maxima, 0 => infinity */ - + +/* Maximum number of ints of offset to save on the stack for recursive calls. +If the offset vector is bigger, malloc is used. This should be a multiple of 3, +because the offset vector is always a multiple of 3 long. */ + +#define REC_STACK_SAVE_MAX 30 + +/* Min and max values for the common repeats; for the maxima, 0 => infinity */ + static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, }; static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, }; - + #ifdef PCRE_DEBUG -/************************************************* -* Debugging function to print chars * -*************************************************/ - -/* Print a sequence of chars in printable format, stopping at the end of the -subject if the requested. - -Arguments: - p points to characters - length number to print - is_subject TRUE if printing from within md->start_subject - md pointer to matching data block, if is_subject is TRUE - -Returns: nothing -*/ - -static void +/************************************************* +* Debugging function to print chars * +*************************************************/ + +/* Print a sequence of chars in printable format, stopping at the end of the +subject if the requested. + +Arguments: + p points to characters + length number to print + is_subject TRUE if printing from within md->start_subject + md pointer to matching data block, if is_subject is TRUE + +Returns: nothing +*/ + +static void pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) -{ +{ pcre_uint32 c; BOOL utf = md->utf; -if (is_subject && length > md->end_subject - p) length = md->end_subject - p; -while (length-- > 0) +if (is_subject && length > md->end_subject - p) length = md->end_subject - p; +while (length-- > 0) if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c); -} -#endif - - - -/************************************************* -* Match a back-reference * -*************************************************/ - +} +#endif + + + +/************************************************* +* Match a back-reference * +*************************************************/ + /* Normally, if a back reference hasn't been set, the length that is passed is negative, so the match always fails. However, in JavaScript compatibility mode, the length passed is zero. Note that in caseless UTF-8 mode, the number of subject bytes matched may be different to the number of reference bytes. - -Arguments: - offset index into the offset vector + +Arguments: + offset index into the offset vector eptr pointer into the subject length length of reference to be matched (number of bytes) - md points to match data block + md points to match data block caseless TRUE if caseless - + Returns: >= 0 the number of subject bytes matched -1 no match -2 partial match; always given if at end subject -*/ - +*/ + static int match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, BOOL caseless) -{ +{ PCRE_PUCHAR eptr_start = eptr; register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; #if defined SUPPORT_UTF && defined SUPPORT_UCP BOOL utf = md->utf; #endif - + #ifdef PCRE_DEBUG -if (eptr >= md->end_subject) - printf("matching subject "); -else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - } -printf(" against backref "); -pchars(p, length, FALSE, md); -printf("\n"); -#endif - +if (eptr >= md->end_subject) + printf("matching subject "); +else + { + printf("matching subject "); + pchars(eptr, length, TRUE, md); + } +printf(" against backref "); +pchars(p, length, FALSE, md); +printf("\n"); +#endif + /* Always fail if reference not set (and not JavaScript compatible - in that case the length is passed as zero). */ - + if (length < 0) return -1; - + /* Separate the caseless case for speed. In UTF-8 mode we can only do this properly if Unicode properties are supported. Otherwise, we can check only ASCII characters. */ - + if (caseless) - { + { #if defined SUPPORT_UTF && defined SUPPORT_UCP if (utf) { @@ -251,103 +251,103 @@ are in UTF-8 mode. */ else { - while (length-- > 0) + while (length-- > 0) { if (eptr >= md->end_subject) return -2; /* Partial match */ if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; } - } - + } + return (int)(eptr - eptr_start); -} - - - -/*************************************************************************** -**************************************************************************** - RECURSION IN THE match() FUNCTION - -The match() function is highly recursive, though not every recursive call -increases the recursive depth. Nevertheless, some regular expressions can cause -it to recurse to a great depth. I was writing for Unix, so I just let it call -itself recursively. This uses the stack for saving everything that has to be -saved for a recursive call. On Unix, the stack can be large, and this works -fine. - -It turns out that on some non-Unix-like systems there are problems with -programs that use a lot of stack. (This despite the fact that every last chip -has oodles of memory these days, and techniques for extending the stack have -been known for decades.) So.... - -There is a fudge, triggered by defining NO_RECURSE, which avoids recursive -calls by keeping local variables that need to be preserved in blocks of memory -obtained from malloc() instead instead of on the stack. Macros are used to -achieve this so that the actual code doesn't look very different to what it -always used to. - -The original heap-recursive code used longjmp(). However, it seems that this -can be very slow on some operating systems. Following a suggestion from Stan -Switzer, the use of longjmp() has been abolished, at the cost of having to -provide a unique number for each call to RMATCH. There is no way of generating -a sequence of numbers at compile time in C. I have given them names, to make -them stand out more clearly. - -Crude tests on x86 Linux show a small speedup of around 5-8%. However, on -FreeBSD, avoiding longjmp() more than halves the time taken to run the standard -tests. Furthermore, not using longjmp() means that local dynamic variables -don't have indeterminate values; this has meant that the frame size can be -reduced because the result can be "passed back" by straight setting of the -variable instead of being passed in the frame. -**************************************************************************** -***************************************************************************/ - -/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN -below must be updated in sync. */ - -enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, - RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, - RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, - RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, +} + + + +/*************************************************************************** +**************************************************************************** + RECURSION IN THE match() FUNCTION + +The match() function is highly recursive, though not every recursive call +increases the recursive depth. Nevertheless, some regular expressions can cause +it to recurse to a great depth. I was writing for Unix, so I just let it call +itself recursively. This uses the stack for saving everything that has to be +saved for a recursive call. On Unix, the stack can be large, and this works +fine. + +It turns out that on some non-Unix-like systems there are problems with +programs that use a lot of stack. (This despite the fact that every last chip +has oodles of memory these days, and techniques for extending the stack have +been known for decades.) So.... + +There is a fudge, triggered by defining NO_RECURSE, which avoids recursive +calls by keeping local variables that need to be preserved in blocks of memory +obtained from malloc() instead instead of on the stack. Macros are used to +achieve this so that the actual code doesn't look very different to what it +always used to. + +The original heap-recursive code used longjmp(). However, it seems that this +can be very slow on some operating systems. Following a suggestion from Stan +Switzer, the use of longjmp() has been abolished, at the cost of having to +provide a unique number for each call to RMATCH. There is no way of generating +a sequence of numbers at compile time in C. I have given them names, to make +them stand out more clearly. + +Crude tests on x86 Linux show a small speedup of around 5-8%. However, on +FreeBSD, avoiding longjmp() more than halves the time taken to run the standard +tests. Furthermore, not using longjmp() means that local dynamic variables +don't have indeterminate values; this has meant that the frame size can be +reduced because the result can be "passed back" by straight setting of the +variable instead of being passed in the frame. +**************************************************************************** +***************************************************************************/ + +/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN +below must be updated in sync. */ + +enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, + RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, + RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, + RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, + RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, RM61, RM62, RM63, RM64, RM65, RM66, RM67 }; - -/* These versions of the macros use the stack, as normal. There are debugging -versions and production versions. Note that the "rw" argument of RMATCH isn't + +/* These versions of the macros use the stack, as normal. There are debugging +versions and production versions. Note that the "rw" argument of RMATCH isn't actually used in this definition. */ - -#ifndef NO_RECURSE -#define REGISTER register - + +#ifndef NO_RECURSE +#define REGISTER register + #ifdef PCRE_DEBUG #define RMATCH(ra,rb,rc,rd,re,rw) \ - { \ - printf("match() called in line %d\n", __LINE__); \ + { \ + printf("match() called in line %d\n", __LINE__); \ rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \ - printf("to line %d\n", __LINE__); \ - } -#define RRETURN(ra) \ - { \ + printf("to line %d\n", __LINE__); \ + } +#define RRETURN(ra) \ + { \ printf("match() returned %d from line %d\n", ra, __LINE__); \ - return ra; \ - } -#else + return ra; \ + } +#else #define RMATCH(ra,rb,rc,rd,re,rw) \ rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) -#define RRETURN(ra) return ra -#endif - -#else - - -/* These versions of the macros manage a private stack on the heap. Note that -the "rd" argument of RMATCH isn't actually used in this definition. It's the md -argument of match(), which never changes. */ - -#define REGISTER - +#define RRETURN(ra) return ra +#endif + +#else + + +/* These versions of the macros manage a private stack on the heap. Note that +the "rd" argument of RMATCH isn't actually used in this definition. It's the md +argument of match(), which never changes. */ + +#define REGISTER + #define RMATCH(ra,rb,rc,rd,re,rw)\ - {\ + {\ heapframe *newframe = frame->Xnextframe;\ if (newframe == NULL)\ {\ @@ -357,50 +357,50 @@ argument of match(), which never changes. */ frame->Xnextframe = newframe;\ }\ frame->Xwhere = rw;\ - newframe->Xeptr = ra;\ - newframe->Xecode = rb;\ - newframe->Xmstart = mstart;\ - newframe->Xoffset_top = rc;\ + newframe->Xeptr = ra;\ + newframe->Xecode = rb;\ + newframe->Xmstart = mstart;\ + newframe->Xoffset_top = rc;\ newframe->Xeptrb = re;\ - newframe->Xrdepth = frame->Xrdepth + 1;\ - newframe->Xprevframe = frame;\ - frame = newframe;\ - DPRINTF(("restarting from line %d\n", __LINE__));\ - goto HEAP_RECURSE;\ - L_##rw:\ - DPRINTF(("jumped back to line %d\n", __LINE__));\ - } - -#define RRETURN(ra)\ - {\ + newframe->Xrdepth = frame->Xrdepth + 1;\ + newframe->Xprevframe = frame;\ + frame = newframe;\ + DPRINTF(("restarting from line %d\n", __LINE__));\ + goto HEAP_RECURSE;\ + L_##rw:\ + DPRINTF(("jumped back to line %d\n", __LINE__));\ + } + +#define RRETURN(ra)\ + {\ heapframe *oldframe = frame;\ frame = oldframe->Xprevframe;\ - if (frame != NULL)\ - {\ - rrc = ra;\ - goto HEAP_RETURN;\ - }\ - return ra;\ - } - - -/* Structure for remembering the local variables in a private frame */ - -typedef struct heapframe { - struct heapframe *Xprevframe; + if (frame != NULL)\ + {\ + rrc = ra;\ + goto HEAP_RETURN;\ + }\ + return ra;\ + } + + +/* Structure for remembering the local variables in a private frame */ + +typedef struct heapframe { + struct heapframe *Xprevframe; struct heapframe *Xnextframe; - - /* Function arguments that may change */ - + + /* Function arguments that may change */ + PCRE_PUCHAR Xeptr; const pcre_uchar *Xecode; PCRE_PUCHAR Xmstart; - int Xoffset_top; - eptrblock *Xeptrb; - unsigned int Xrdepth; - - /* Function local variables */ - + int Xoffset_top; + eptrblock *Xeptrb; + unsigned int Xrdepth; + + /* Function local variables */ + PCRE_PUCHAR Xcallpat; #ifdef SUPPORT_UTF PCRE_PUCHAR Xcharptr; @@ -410,59 +410,59 @@ typedef struct heapframe { PCRE_PUCHAR Xpp; PCRE_PUCHAR Xprev; PCRE_PUCHAR Xsaved_eptr; - - recursion_info Xnew_recursive; - - BOOL Xcur_is_word; - BOOL Xcondition; - BOOL Xprev_is_word; - -#ifdef SUPPORT_UCP - int Xprop_type; + + recursion_info Xnew_recursive; + + BOOL Xcur_is_word; + BOOL Xcondition; + BOOL Xprev_is_word; + +#ifdef SUPPORT_UCP + int Xprop_type; unsigned int Xprop_value; - int Xprop_fail_result; - int Xoclength; + int Xprop_fail_result; + int Xoclength; pcre_uchar Xocchars[6]; -#endif - +#endif + int Xcodelink; - int Xctype; - unsigned int Xfc; - int Xfi; - int Xlength; - int Xmax; - int Xmin; + int Xctype; + unsigned int Xfc; + int Xfi; + int Xlength; + int Xmax; + int Xmin; unsigned int Xnumber; - int Xoffset; + int Xoffset; unsigned int Xop; pcre_int32 Xsave_capture_last; - int Xsave_offset1, Xsave_offset2, Xsave_offset3; - int Xstacksave[REC_STACK_SAVE_MAX]; - - eptrblock Xnewptrb; - - /* Where to jump back to */ - - int Xwhere; - -} heapframe; - -#endif - - -/*************************************************************************** -***************************************************************************/ - - - -/************************************************* -* Match from current position * -*************************************************/ - -/* This function is called recursively in many circumstances. Whenever it -returns a negative (error) response, the outer incarnation must also return the + int Xsave_offset1, Xsave_offset2, Xsave_offset3; + int Xstacksave[REC_STACK_SAVE_MAX]; + + eptrblock Xnewptrb; + + /* Where to jump back to */ + + int Xwhere; + +} heapframe; + +#endif + + +/*************************************************************************** +***************************************************************************/ + + + +/************************************************* +* Match from current position * +*************************************************/ + +/* This function is called recursively in many circumstances. Whenever it +returns a negative (error) response, the outer incarnation must also return the same response. */ - + /* These macros pack up tests that are used for partial matching, and which appear several times in the code. We set the "hit end" flag if the pointer is at the end of the subject and also past the start of the subject (i.e. @@ -488,29 +488,29 @@ the subject. */ /* Performance note: It might be tempting to extract commonly used fields from the md structure (e.g. utf, end_subject) into individual variables to improve -performance. Tests using gcc on a SPARC disproved this; in the first case, it -made performance worse. - -Arguments: - eptr pointer to current character in subject - ecode pointer to current position in compiled code - mstart pointer to the current match start position (can be modified - by encountering \K) - offset_top current top pointer - md pointer to "static" info for the match - eptrb pointer to chain of blocks containing eptr at start of - brackets - for testing for empty matches - rdepth the recursion depth - -Returns: MATCH_MATCH if matched ) these values are >= 0 - MATCH_NOMATCH if failed to match ) +performance. Tests using gcc on a SPARC disproved this; in the first case, it +made performance worse. + +Arguments: + eptr pointer to current character in subject + ecode pointer to current position in compiled code + mstart pointer to the current match start position (can be modified + by encountering \K) + offset_top current top pointer + md pointer to "static" info for the match + eptrb pointer to chain of blocks containing eptr at start of + brackets - for testing for empty matches + rdepth the recursion depth + +Returns: MATCH_MATCH if matched ) these values are >= 0 + MATCH_NOMATCH if failed to match ) a negative MATCH_xxx value for PRUNE, SKIP, etc - a negative PCRE_ERROR_xxx value if aborted by an error condition - (e.g. stopped by repeated call or recursion limit) -*/ - + a negative PCRE_ERROR_xxx value if aborted by an error condition + (e.g. stopped by repeated call or recursion limit) +*/ + #ifdef __GNUC__ -static int +static int match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth) __attribute__((noinline,noclone)); @@ -519,104 +519,104 @@ static int match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth) -{ -/* These variables do not need to be preserved over recursion in this function, -so they can be ordinary variables in all cases. Mark some of them with -"register" because they are used a lot in loops. */ - -register int rrc; /* Returns from recursive calls */ -register int i; /* Used for loops not involving calls to RMATCH() */ +{ +/* These variables do not need to be preserved over recursion in this function, +so they can be ordinary variables in all cases. Mark some of them with +"register" because they are used a lot in loops. */ + +register int rrc; /* Returns from recursive calls */ +register int i; /* Used for loops not involving calls to RMATCH() */ register pcre_uint32 c; /* Character values not kept over RMATCH() calls */ register BOOL utf; /* Local copy of UTF flag for speed */ - -BOOL minimize, possessive; /* Quantifier options */ + +BOOL minimize, possessive; /* Quantifier options */ BOOL caseless; int condcode; - -/* When recursion is not being used, all "local" variables that have to be + +/* When recursion is not being used, all "local" variables that have to be preserved over calls to RMATCH() are part of a "frame". We set up the top-level frame on the stack here; subsequent instantiations are obtained from the heap whenever RMATCH() does a "recursion". See the macro definitions above. Putting the top-level on the stack rather than malloc-ing them all gives a performance boost in many cases where there is not much "recursion". */ - -#ifdef NO_RECURSE + +#ifdef NO_RECURSE heapframe *frame = (heapframe *)md->match_frames_base; - -/* Copy in the original argument variables */ - -frame->Xeptr = eptr; -frame->Xecode = ecode; -frame->Xmstart = mstart; -frame->Xoffset_top = offset_top; -frame->Xeptrb = eptrb; -frame->Xrdepth = rdepth; - -/* This is where control jumps back to to effect "recursion" */ - -HEAP_RECURSE: - -/* Macros make the argument variables come from the current frame */ - -#define eptr frame->Xeptr -#define ecode frame->Xecode -#define mstart frame->Xmstart -#define offset_top frame->Xoffset_top -#define eptrb frame->Xeptrb -#define rdepth frame->Xrdepth - -/* Ditto for the local variables */ - + +/* Copy in the original argument variables */ + +frame->Xeptr = eptr; +frame->Xecode = ecode; +frame->Xmstart = mstart; +frame->Xoffset_top = offset_top; +frame->Xeptrb = eptrb; +frame->Xrdepth = rdepth; + +/* This is where control jumps back to to effect "recursion" */ + +HEAP_RECURSE: + +/* Macros make the argument variables come from the current frame */ + +#define eptr frame->Xeptr +#define ecode frame->Xecode +#define mstart frame->Xmstart +#define offset_top frame->Xoffset_top +#define eptrb frame->Xeptrb +#define rdepth frame->Xrdepth + +/* Ditto for the local variables */ + #ifdef SUPPORT_UTF -#define charptr frame->Xcharptr -#endif -#define callpat frame->Xcallpat +#define charptr frame->Xcharptr +#endif +#define callpat frame->Xcallpat #define codelink frame->Xcodelink -#define data frame->Xdata -#define next frame->Xnext -#define pp frame->Xpp -#define prev frame->Xprev -#define saved_eptr frame->Xsaved_eptr - -#define new_recursive frame->Xnew_recursive - -#define cur_is_word frame->Xcur_is_word -#define condition frame->Xcondition -#define prev_is_word frame->Xprev_is_word - -#ifdef SUPPORT_UCP -#define prop_type frame->Xprop_type -#define prop_value frame->Xprop_value -#define prop_fail_result frame->Xprop_fail_result -#define oclength frame->Xoclength -#define occhars frame->Xocchars -#endif - -#define ctype frame->Xctype -#define fc frame->Xfc -#define fi frame->Xfi -#define length frame->Xlength -#define max frame->Xmax -#define min frame->Xmin -#define number frame->Xnumber -#define offset frame->Xoffset -#define op frame->Xop -#define save_capture_last frame->Xsave_capture_last -#define save_offset1 frame->Xsave_offset1 -#define save_offset2 frame->Xsave_offset2 -#define save_offset3 frame->Xsave_offset3 -#define stacksave frame->Xstacksave - -#define newptrb frame->Xnewptrb - -/* When recursion is being used, local variables are allocated on the stack and -get preserved during recursion in the normal way. In this environment, fi and -i, and fc and c, can be the same variables. */ - -#else /* NO_RECURSE not defined */ -#define fi i -#define fc c - +#define data frame->Xdata +#define next frame->Xnext +#define pp frame->Xpp +#define prev frame->Xprev +#define saved_eptr frame->Xsaved_eptr + +#define new_recursive frame->Xnew_recursive + +#define cur_is_word frame->Xcur_is_word +#define condition frame->Xcondition +#define prev_is_word frame->Xprev_is_word + +#ifdef SUPPORT_UCP +#define prop_type frame->Xprop_type +#define prop_value frame->Xprop_value +#define prop_fail_result frame->Xprop_fail_result +#define oclength frame->Xoclength +#define occhars frame->Xocchars +#endif + +#define ctype frame->Xctype +#define fc frame->Xfc +#define fi frame->Xfi +#define length frame->Xlength +#define max frame->Xmax +#define min frame->Xmin +#define number frame->Xnumber +#define offset frame->Xoffset +#define op frame->Xop +#define save_capture_last frame->Xsave_capture_last +#define save_offset1 frame->Xsave_offset1 +#define save_offset2 frame->Xsave_offset2 +#define save_offset3 frame->Xsave_offset3 +#define stacksave frame->Xstacksave + +#define newptrb frame->Xnewptrb + +/* When recursion is being used, local variables are allocated on the stack and +get preserved during recursion in the normal way. In this environment, fi and +i, and fc and c, can be the same variables. */ + +#else /* NO_RECURSE not defined */ +#define fi i +#define fc c + /* Many of the following variables are used only in small blocks of the code. My normal style of coding would have declared them within each of those blocks. However, in order to accommodate the version of this code that uses an external @@ -624,7 +624,7 @@ However, in order to accommodate the version of this code that uses an external declarations can be cut out in a block. The only declarations within blocks below are for variables that do not have to be preserved over a recursive call to RMATCH(). */ - + #ifdef SUPPORT_UTF const pcre_uchar *charptr; #endif @@ -638,30 +638,30 @@ PCRE_PUCHAR saved_eptr; recursion_info new_recursive; BOOL cur_is_word; -BOOL condition; -BOOL prev_is_word; - -#ifdef SUPPORT_UCP -int prop_type; +BOOL condition; +BOOL prev_is_word; + +#ifdef SUPPORT_UCP +int prop_type; unsigned int prop_value; -int prop_fail_result; -int oclength; +int prop_fail_result; +int oclength; pcre_uchar occhars[6]; -#endif - +#endif + int codelink; -int ctype; -int length; -int max; -int min; +int ctype; +int length; +int max; +int min; unsigned int number; -int offset; +int offset; unsigned int op; pcre_int32 save_capture_last; -int save_offset1, save_offset2, save_offset3; -int stacksave[REC_STACK_SAVE_MAX]; - -eptrblock newptrb; +int save_offset1, save_offset2, save_offset3; +int stacksave[REC_STACK_SAVE_MAX]; + +eptrblock newptrb; /* There is a special fudge for calling match() in a way that causes it to measure the size of its basic stack frame when the stack is being used for @@ -679,8 +679,8 @@ if (ecode == NULL) return (len > 0)? -len : len; } } -#endif /* NO_RECURSE */ - +#endif /* NO_RECURSE */ + /* To save space on the stack and in the heap frame, I have doubled up on some of the local variables that are used only in localised parts of the code, but still need to be preserved over recursive calls of match(). These macros define @@ -694,47 +694,47 @@ the alternative names that are used. */ #define foc number #define save_mark data -/* These statements are here to stop the compiler complaining about unitialized -variables. */ - -#ifdef SUPPORT_UCP -prop_value = 0; -prop_fail_result = 0; -#endif - - -/* This label is used for tail recursion, which is used in a few cases even -when NO_RECURSE is not defined, in order to reduce the amount of stack that is -used. Thanks to Ian Taylor for noticing this possibility and sending the -original patch. */ - -TAIL_RECURSE: - -/* OK, now we can get on with the real code of the function. Recursive calls -are specified by the macro RMATCH and RRETURN is used to return. When -NO_RECURSE is *not* defined, these just turn into a recursive call to match() +/* These statements are here to stop the compiler complaining about unitialized +variables. */ + +#ifdef SUPPORT_UCP +prop_value = 0; +prop_fail_result = 0; +#endif + + +/* This label is used for tail recursion, which is used in a few cases even +when NO_RECURSE is not defined, in order to reduce the amount of stack that is +used. Thanks to Ian Taylor for noticing this possibility and sending the +original patch. */ + +TAIL_RECURSE: + +/* OK, now we can get on with the real code of the function. Recursive calls +are specified by the macro RMATCH and RRETURN is used to return. When +NO_RECURSE is *not* defined, these just turn into a recursive call to match() and a "return", respectively (possibly with some debugging if PCRE_DEBUG is -defined). However, RMATCH isn't like a function call because it's quite a -complicated macro. It has to be used in one particular way. This shouldn't, -however, impact performance when true recursion is being used. */ - +defined). However, RMATCH isn't like a function call because it's quite a +complicated macro. It has to be used in one particular way. This shouldn't, +however, impact performance when true recursion is being used. */ + #ifdef SUPPORT_UTF utf = md->utf; /* Local copy of the flag */ -#else +#else utf = FALSE; -#endif - -/* First check that we haven't called match() too many times, or that we -haven't exceeded the recursive call limit. */ - -if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); -if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT); - -/* At the start of a group with an unlimited repeat that may match an empty +#endif + +/* First check that we haven't called match() too many times, or that we +haven't exceeded the recursive call limit. */ + +if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); +if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT); + +/* At the start of a group with an unlimited repeat that may match an empty string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is done this way to save having to use another function argument, which would take up space on the stack. See also MATCH_CONDASSERT below. - + When MATCH_CBEGROUP is set, add the current subject pointer to the chain of such remembered pointers, to be checked when we hit the closing ket, in order to break infinite loops that match no characters. When match() is called in @@ -743,20 +743,20 @@ NOT be used with tail recursion, because the memory block that is used is on the stack, so a new one may be required for each match(). */ if (md->match_function_type == MATCH_CBEGROUP) - { - newptrb.epb_saved_eptr = eptr; - newptrb.epb_prev = eptrb; - eptrb = &newptrb; + { + newptrb.epb_saved_eptr = eptr; + newptrb.epb_prev = eptrb; + eptrb = &newptrb; md->match_function_type = 0; - } - -/* Now start processing the opcodes. */ - -for (;;) - { - minimize = possessive = FALSE; - op = *ecode; - + } + +/* Now start processing the opcodes. */ + +for (;;) + { + minimize = possessive = FALSE; + op = *ecode; + switch(op) { case OP_MARK: @@ -766,14 +766,14 @@ for (;;) eptrb, RM55); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; - + /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an argument, and we must check whether that argument matches this MARK's argument. It is passed back in md->start_match_ptr (an overloading of that variable). If it does match, we reset that variable to the current subject position and return MATCH_SKIP. Otherwise, pass back the return code unaltered. */ - + else if (rrc == MATCH_SKIP_ARG && STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0) { @@ -782,21 +782,21 @@ for (;;) } RRETURN(rrc); - case OP_FAIL: - RRETURN(MATCH_NOMATCH); - + case OP_FAIL: + RRETURN(MATCH_NOMATCH); + case OP_COMMIT: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM52); if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_COMMIT); - case OP_PRUNE: + case OP_PRUNE: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM51); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - RRETURN(MATCH_PRUNE); - + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + RRETURN(MATCH_PRUNE); + case OP_PRUNE_ARG: md->nomatch_mark = ecode + 2; md->mark = NULL; /* In case previously set by assertion */ @@ -804,16 +804,16 @@ for (;;) eptrb, RM56); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_PRUNE); - - case OP_SKIP: + + case OP_SKIP: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM53); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->start_match_ptr = eptr; /* Pass back current position */ - RRETURN(MATCH_SKIP); - + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + md->start_match_ptr = eptr; /* Pass back current position */ + RRETURN(MATCH_SKIP); + /* Note that, for Perl compatibility, SKIP with an argument does NOT set nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was not a matching mark, we have to re-run the match, ignoring the SKIP_ARG @@ -845,13 +845,13 @@ for (;;) the branch in which it occurs can be determined. Overload the start of match pointer to do this. */ - case OP_THEN: + case OP_THEN: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM54); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->start_match_ptr = ecode; - RRETURN(MATCH_THEN); - + RRETURN(MATCH_THEN); + case OP_THEN_ARG: md->nomatch_mark = ecode + 2; md->mark = NULL; /* In case previously set by assertion */ @@ -862,7 +862,7 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->start_match_ptr = ecode; RRETURN(MATCH_THEN); - + /* Handle an atomic group that does not contain any capturing parentheses. This can be handled like an assertion. Prior to 8.13, all atomic groups were handled this way. In 8.13, the code was changed as below for ONCE, so @@ -870,7 +870,7 @@ for (;;) However, this uses a lot more stack, so in 8.20, atomic groups that do not contain any captures generate OP_ONCE_NC, which can be handled in the old, less stack intensive way. - + Check the alternative branches in turn - the matching won't pass the KET for this kind of subpattern. If any one branch matches, we carry on as at the end of a normal bracket, leaving the subject pointer, but resetting @@ -955,36 +955,36 @@ for (;;) the working value and also the values of the final offsets, in case they were set by a previous iteration of the same bracket. - If there isn't enough space in the offset vector, treat this as if it were - a non-capturing bracket. Don't worry about setting the flag for the error - case here; that is handled in the code for KET. */ - - case OP_CBRA: - case OP_SCBRA: - number = GET2(ecode, 1+LINK_SIZE); - offset = number << 1; - + If there isn't enough space in the offset vector, treat this as if it were + a non-capturing bracket. Don't worry about setting the flag for the error + case here; that is handled in the code for KET. */ + + case OP_CBRA: + case OP_SCBRA: + number = GET2(ecode, 1+LINK_SIZE); + offset = number << 1; + #ifdef PCRE_DEBUG - printf("start bracket %d\n", number); - printf("subject="); - pchars(eptr, 16, TRUE, md); - printf("\n"); -#endif - - if (offset < md->offset_max) - { - save_offset1 = md->offset_vector[offset]; - save_offset2 = md->offset_vector[offset+1]; - save_offset3 = md->offset_vector[md->offset_end - number]; - save_capture_last = md->capture_last; + printf("start bracket %d\n", number); + printf("subject="); + pchars(eptr, 16, TRUE, md); + printf("\n"); +#endif + + if (offset < md->offset_max) + { + save_offset1 = md->offset_vector[offset]; + save_offset2 = md->offset_vector[offset+1]; + save_offset3 = md->offset_vector[md->offset_end - number]; + save_capture_last = md->capture_last; save_mark = md->mark; - - DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); + + DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); md->offset_vector[md->offset_end - number] = (int)(eptr - md->start_subject); - + for (;;) - { + { if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM1); @@ -1012,36 +1012,36 @@ for (;;) /* Anything other than NOMATCH is passed back. */ if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->capture_last = save_capture_last; - ecode += GET(ecode, 1); + md->capture_last = save_capture_last; + ecode += GET(ecode, 1); md->mark = save_mark; if (*ecode != OP_ALT) break; - } - - DPRINTF(("bracket %d failed\n", number)); - md->offset_vector[offset] = save_offset1; - md->offset_vector[offset+1] = save_offset2; - md->offset_vector[md->offset_end - number] = save_offset3; - + } + + DPRINTF(("bracket %d failed\n", number)); + md->offset_vector[offset] = save_offset1; + md->offset_vector[offset+1] = save_offset2; + md->offset_vector[md->offset_end - number] = save_offset3; + /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ RRETURN(rrc); - } - - /* FALL THROUGH ... Insufficient room for saving captured contents. Treat - as a non-capturing bracket. */ - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - - DPRINTF(("insufficient capture room: treat as non-capturing\n")); - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - + } + + /* FALL THROUGH ... Insufficient room for saving captured contents. Treat + as a non-capturing bracket. */ + + /* VVVVVVVVVVVVVVVVVVVVVVVVV */ + /* VVVVVVVVVVVVVVVVVVVVVVVVV */ + + DPRINTF(("insufficient capture room: treat as non-capturing\n")); + + /* VVVVVVVVVVVVVVVVVVVVVVVVV */ + /* VVVVVVVVVVVVVVVVVVVVVVVVV */ + /* Non-capturing or atomic group, except for possessive with unlimited repeat and ONCE group with no captures. Loop for all the alternatives. - + When we get to the final alternative within the brackets, we used to return the result of a recursive call to match() whatever happened so it was possible to reduce stack usage by turning this into a tail recursion, @@ -1060,12 +1060,12 @@ for (;;) previous backup points can be taken. */ case OP_ONCE: - case OP_BRA: - case OP_SBRA: - DPRINTF(("start non-capturing bracket\n")); + case OP_BRA: + case OP_SBRA: + DPRINTF(("start non-capturing bracket\n")); - for (;;) - { + for (;;) + { if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; @@ -1074,7 +1074,7 @@ for (;;) above. */ else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) - { + { ecode += PRIV(OP_lengths)[*ecode]; goto TAIL_RECURSE; } @@ -1100,7 +1100,7 @@ for (;;) if (rrc != MATCH_NOMATCH) { if (rrc == MATCH_ONCE) - { + { const pcre_uchar *scode = ecode; if (*scode != OP_ONCE) /* If not at start, find it */ { @@ -1108,7 +1108,7 @@ for (;;) scode -= GET(scode, 1); } if (md->once_target == scode) rrc = MATCH_NOMATCH; - } + } RRETURN(rrc); } ecode += GET(ecode, 1); @@ -1116,9 +1116,9 @@ for (;;) if (*ecode != OP_ALT) break; md->capture_last = save_capture_last; } - + RRETURN(MATCH_NOMATCH); - + /* Handle possessive capturing brackets with an unlimited repeat. We come here from BRAZERO with allow_zero set TRUE. The offset_vector values are handled similarly to the normal case above. However, the matching is @@ -1184,11 +1184,11 @@ for (;;) } eptr = md->end_match_ptr; continue; - } - + } + /* See comment in the code for capturing groups above about handling THEN. */ - + if (rrc == MATCH_THEN) { next = ecode + GET(ecode,1); @@ -1199,9 +1199,9 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->capture_last = save_capture_last; - ecode += GET(ecode, 1); + ecode += GET(ecode, 1); if (*ecode != OP_ALT) break; - } + } if (!matched_once) { @@ -1276,15 +1276,15 @@ for (;;) } RRETURN(MATCH_NOMATCH); - /* Control never reaches here. */ - + /* Control never reaches here. */ + /* Conditional group: compilation checked that there are no more than two branches. If the condition is false, skipping the first branch takes us past the end of the item if there is only one branch, but that's exactly what we want. */ - - case OP_COND: - case OP_SCOND: + + case OP_COND: + case OP_SCOND: /* The variable codelink will be added to ecode when the condition is false, to get to the second branch. Setting it to the offset to the ALT @@ -1298,7 +1298,7 @@ for (;;) inserted between OP_COND and an assertion condition. */ if (*ecode == OP_CALLOUT) - { + { if (PUBL(callout) != NULL) { PUBL(callout_block) cb; @@ -1332,13 +1332,13 @@ for (;;) ecode += PRIV(OP_lengths)[OP_CALLOUT]; codelink -= PRIV(OP_lengths)[OP_CALLOUT]; - } - + } + /* Test the various possible conditions */ condition = FALSE; switch(condcode = *ecode) - { + { case OP_RREF: /* Numbered group recursion test */ if (md->recursive != NULL) /* Not recursing => FALSE */ { @@ -1364,9 +1364,9 @@ for (;;) case OP_CREF: /* Numbered group used test */ offset = GET2(ecode, 1) << 1; /* Doubled ref number */ - condition = offset < offset_top && md->offset_vector[offset] >= 0; + condition = offset < offset_top && md->offset_vector[offset] >= 0; break; - + case OP_DNCREF: /* Duplicate named group used test */ { int count = GET2(ecode, 1 + IMM2_SIZE); @@ -1380,11 +1380,11 @@ for (;;) } } break; - + case OP_DEF: /* DEFINE - always false */ case OP_FAIL: /* From optimized (?!) condition */ break; - + /* The condition is an assertion. Call match() to evaluate it - setting md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of an assertion. */ @@ -1392,11 +1392,11 @@ for (;;) default: md->match_function_type = MATCH_CONDASSERT; RMATCH(eptr, ecode, offset_top, md, NULL, RM3); - if (rrc == MATCH_MATCH) - { + if (rrc == MATCH_MATCH) + { if (md->end_offset_top > offset_top) offset_top = md->end_offset_top; /* Captures may have happened */ - condition = TRUE; + condition = TRUE; /* Advance ecode past the assertion to the start of the first branch, but adjust it so that the general choosing code below works. If the @@ -1405,23 +1405,23 @@ for (;;) if (*ecode == OP_BRAZERO) ecode++; ecode += GET(ecode, 1); - while (*ecode == OP_ALT) ecode += GET(ecode, 1); + while (*ecode == OP_ALT) ecode += GET(ecode, 1); ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode]; - } + } /* PCRE doesn't allow the effect of (*THEN) to escape beyond an assertion; it is therefore treated as NOMATCH. Any other return is an error. */ - else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) - { - RRETURN(rrc); /* Need braces because of following else */ - } + else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) + { + RRETURN(rrc); /* Need braces because of following else */ + } break; - } - + } + /* Choose branch according to the condition */ - + ecode += condition? PRIV(OP_lengths)[condcode] : codelink; /* We are now at the branch that is to be obeyed. As there is only one, we @@ -1435,28 +1435,28 @@ for (;;) of alternatives, of course). */ if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT) - { + { if (op != OP_SCOND) - { - goto TAIL_RECURSE; - } + { + goto TAIL_RECURSE; + } md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, ecode, offset_top, md, eptrb, RM49); RRETURN(rrc); - } + } /* Condition false & no alternative; continue after the group. */ else - { - } - break; - - + { + } + break; + + /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close any currently open capturing brackets. */ - + case OP_CLOSE: number = GET2(ecode, 1); /* Must be less than 65536 */ offset = number << 1; @@ -1468,7 +1468,7 @@ for (;;) md->capture_last = (md->capture_last & OVFLMASK) | number; if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else - { + { md->offset_vector[offset] = md->offset_vector[md->offset_end - number]; md->offset_vector[offset+1] = (int)(eptr - md->start_subject); @@ -1484,11 +1484,11 @@ for (;;) while (iptr < iend) *iptr++ = -1; offset_top = offset + 2; } - } + } ecode += 1 + IMM2_SIZE; break; - - + + /* End of the pattern, either real or forced. */ case OP_END: @@ -1509,29 +1509,29 @@ for (;;) /* Otherwise, we have a match. */ - md->end_match_ptr = eptr; /* Record where we ended */ - md->end_offset_top = offset_top; /* and how many extracts were taken */ - md->start_match_ptr = mstart; /* and the start (\K can modify) */ - + md->end_match_ptr = eptr; /* Record where we ended */ + md->end_offset_top = offset_top; /* and how many extracts were taken */ + md->start_match_ptr = mstart; /* and the start (\K can modify) */ + /* For some reason, the macros don't work properly if an expression is given as the argument to RRETURN when the heap is in use. */ - + rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; RRETURN(rrc); - - /* Assertion brackets. Check the alternative branches in turn - the - matching won't pass the KET for an assertion. If any one branch matches, - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the - start of each branch to move the current point backwards, so the code at + + /* Assertion brackets. Check the alternative branches in turn - the + matching won't pass the KET for an assertion. If any one branch matches, + the assertion is true. Lookbehind assertions have an OP_REVERSE item at the + start of each branch to move the current point backwards, so the code at this level is identical to the lookahead case. When the assertion is part of a condition, we want to return immediately afterwards. The caller of this incarnation of the match() function will have set MATCH_CONDASSERT in md->match_function type, and one of these opcodes will be the first opcode that is processed. We use a local variable that is preserved over calls to match() to remember this case. */ - - case OP_ASSERT: - case OP_ASSERTBACK: + + case OP_ASSERT: + case OP_ASSERTBACK: save_mark = md->mark; if (md->match_function_type == MATCH_CONDASSERT) { @@ -1542,8 +1542,8 @@ for (;;) /* Loop for each branch */ - do - { + do + { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); /* A match means that the assertion is true; break out of the loop @@ -1577,32 +1577,32 @@ for (;;) Perl. */ if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode += GET(ecode, 1); - } + ecode += GET(ecode, 1); + } while (*ecode == OP_ALT); /* Continue for next alternative */ /* If we have tried all the alternative branches, the assertion has failed. If not, we broke out after a match. */ - if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); - - /* If checking an assertion for a condition, return MATCH_MATCH. */ - - if (condassert) RRETURN(MATCH_MATCH); - + if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); + + /* If checking an assertion for a condition, return MATCH_MATCH. */ + + if (condassert) RRETURN(MATCH_MATCH); + /* Continue from after a successful assertion, updating the offsets high water mark, since extracts may have been taken during the assertion. */ - - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - ecode += 1 + LINK_SIZE; - offset_top = md->end_offset_top; - continue; - + + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + ecode += 1 + LINK_SIZE; + offset_top = md->end_offset_top; + continue; + /* Negative assertion: all branches must fail to match for the assertion to succeed. */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK_NOT: + + case OP_ASSERT_NOT: + case OP_ASSERTBACK_NOT: save_mark = md->mark; if (md->match_function_type == MATCH_CONDASSERT) { @@ -1613,8 +1613,8 @@ for (;;) /* Loop for each alternative branch. */ - do - { + do + { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); md->mark = save_mark; /* Always restore the mark setting */ @@ -1660,63 +1660,63 @@ for (;;) /* Continue with next branch */ - ecode += GET(ecode,1); - } - while (*ecode == OP_ALT); - + ecode += GET(ecode,1); + } + while (*ecode == OP_ALT); + /* All branches in the assertion failed to match. */ - + NEG_ASSERT_TRUE: if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ ecode += 1 + LINK_SIZE; /* Continue with current branch */ - continue; - - /* Move the subject pointer back. This occurs only at the start of - each branch of a lookbehind assertion. If we are too close to the start to - move back, this match function fails. When working with UTF-8 we move - back a number of characters, not bytes. */ - - case OP_REVERSE: + continue; + + /* Move the subject pointer back. This occurs only at the start of + each branch of a lookbehind assertion. If we are too close to the start to + move back, this match function fails. When working with UTF-8 we move + back a number of characters, not bytes. */ + + case OP_REVERSE: #ifdef SUPPORT_UTF if (utf) - { - i = GET(ecode, 1); - while (i-- > 0) - { - eptr--; - if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); - BACKCHAR(eptr); - } - } - else -#endif - - /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ - - { - eptr -= GET(ecode, 1); - if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); - } - + { + i = GET(ecode, 1); + while (i-- > 0) + { + eptr--; + if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); + BACKCHAR(eptr); + } + } + else +#endif + + /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ + + { + eptr -= GET(ecode, 1); + if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); + } + /* Save the earliest consulted character, then skip to next op code */ - + if (eptr < md->start_used_ptr) md->start_used_ptr = eptr; - ecode += 1 + LINK_SIZE; - break; - - /* The callout item calls an external function, if one is provided, passing - details of the match so far. This is mainly for debugging, though the - function is able to force a failure. */ - - case OP_CALLOUT: + ecode += 1 + LINK_SIZE; + break; + + /* The callout item calls an external function, if one is provided, passing + details of the match so far. This is mainly for debugging, though the + function is able to force a failure. */ + + case OP_CALLOUT: if (PUBL(callout) != NULL) - { + { PUBL(callout_block) cb; cb.version = 2; /* Version 1 of the callout block */ - cb.callout_number = ecode[1]; - cb.offset_vector = md->offset_vector; + cb.callout_number = ecode[1]; + cb.offset_vector = md->offset_vector; #if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)md->start_subject; + cb.subject = (PCRE_SPTR)md->start_subject; #elif defined COMPILE_PCRE16 cb.subject = (PCRE_SPTR16)md->start_subject; #elif defined COMPILE_PCRE32 @@ -1725,24 +1725,24 @@ for (;;) cb.subject_length = (int)(md->end_subject - md->start_subject); cb.start_match = (int)(mstart - md->start_subject); cb.current_position = (int)(eptr - md->start_subject); - cb.pattern_position = GET(ecode, 2); - cb.next_item_length = GET(ecode, 2 + LINK_SIZE); - cb.capture_top = offset_top/2; + cb.pattern_position = GET(ecode, 2); + cb.next_item_length = GET(ecode, 2 + LINK_SIZE); + cb.capture_top = offset_top/2; cb.capture_last = md->capture_last & CAPLMASK; /* Internal change requires this for API compatibility. */ if (cb.capture_last == 0) cb.capture_last = -1; - cb.callout_data = md->callout_data; + cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); - if (rrc < 0) RRETURN(rrc); - } - ecode += 2 + 2*LINK_SIZE; - break; - - /* Recursion either matches the current regex, or some subexpression. The - offset data is the offset to the starting bracket from the start of the - whole pattern. (This is so that it works from duplicated subpatterns.) - + if (rrc < 0) RRETURN(rrc); + } + ecode += 2 + 2*LINK_SIZE; + break; + + /* Recursion either matches the current regex, or some subexpression. The + offset data is the offset to the starting bracket from the start of the + whole pattern. (This is so that it works from duplicated subpatterns.) + The state of the capturing groups is preserved over recursion, and re-instated afterwards. We don't know how many are started and not yet finished (offset_top records the completed total) so we just have to save @@ -1750,21 +1750,21 @@ for (;;) large to put on the stack, but using malloc for small numbers seems expensive. As a compromise, the stack is used when there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc is used. - - There are also other values that have to be saved. We use a chained - sequence of blocks that actually live on the stack. Thanks to Robin Houston + + There are also other values that have to be saved. We use a chained + sequence of blocks that actually live on the stack. Thanks to Robin Houston for the original version of this logic. It has, however, been hacked around a lot, so he is not to blame for the current way it works. */ - - case OP_RECURSE: - { + + case OP_RECURSE: + { recursion_info *ri; unsigned int recno; - callpat = md->start_code + GET(ecode, 1); + callpat = md->start_code + GET(ecode, 1); recno = (callpat == md->start_code)? 0 : - GET2(callpat, 1 + LINK_SIZE); - + GET2(callpat, 1 + LINK_SIZE); + /* Check for repeating a recursion without advancing the subject pointer. This should catch convoluted mutual recursions. (Some simple cases are caught at compile time.) */ @@ -1773,41 +1773,41 @@ for (;;) if (recno == ri->group_num && eptr == ri->subject_position) RRETURN(PCRE_ERROR_RECURSELOOP); - /* Add to "recursing stack" */ - + /* Add to "recursing stack" */ + new_recursive.group_num = recno; new_recursive.saved_capture_last = md->capture_last; new_recursive.subject_position = eptr; - new_recursive.prevrec = md->recursive; - md->recursive = &new_recursive; - + new_recursive.prevrec = md->recursive; + md->recursive = &new_recursive; + /* Where to continue from afterwards */ - - ecode += 1 + LINK_SIZE; - + + ecode += 1 + LINK_SIZE; + /* Now save the offset data */ - - new_recursive.saved_max = md->offset_end; - if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) - new_recursive.offset_save = stacksave; - else - { - new_recursive.offset_save = + + new_recursive.saved_max = md->offset_end; + if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) + new_recursive.offset_save = stacksave; + else + { + new_recursive.offset_save = (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); - if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); - } - memcpy(new_recursive.offset_save, md->offset_vector, - new_recursive.saved_max * sizeof(int)); - + if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); + } + memcpy(new_recursive.offset_save, md->offset_vector, + new_recursive.saved_max * sizeof(int)); + /* OK, now we can do the recursion. After processing each alternative, restore the offset data and the last captured value. If there were nested recursions, md->recursive might be changed, so reset it before looping. */ - - DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); + + DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); cbegroup = (*callpat >= OP_SBRA); - do - { + do + { if (cbegroup) md->match_function_type = MATCH_CBEGROUP; RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, md, eptrb, RM6); @@ -1816,9 +1816,9 @@ for (;;) md->capture_last = new_recursive.saved_capture_last; md->recursive = new_recursive.prevrec; if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) - { - DPRINTF(("Recursion matched\n")); - if (new_recursive.offset_save != stacksave) + { + DPRINTF(("Recursion matched\n")); + if (new_recursive.offset_save != stacksave) (PUBL(free))(new_recursive.offset_save); /* Set where we got to in the subject, and reset the start in case @@ -1828,14 +1828,14 @@ for (;;) eptr = md->end_match_ptr; mstart = md->start_match_ptr; goto RECURSION_MATCHED; /* Exit loop; end processing */ - } + } /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a recursion; they cause a NOMATCH for the entire recursion. These codes are defined in a range that can be tested for. */ if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) - { + { if (new_recursive.offset_save != stacksave) (PUBL(free))(new_recursive.offset_save); RRETURN(MATCH_NOMATCH); @@ -1845,56 +1845,56 @@ for (;;) if (rrc != MATCH_NOMATCH) { - DPRINTF(("Recursion gave error %d\n", rrc)); + DPRINTF(("Recursion gave error %d\n", rrc)); if (new_recursive.offset_save != stacksave) (PUBL(free))(new_recursive.offset_save); - RRETURN(rrc); - } - - md->recursive = &new_recursive; - callpat += GET(callpat, 1); - } - while (*callpat == OP_ALT); - - DPRINTF(("Recursion didn't match\n")); - md->recursive = new_recursive.prevrec; - if (new_recursive.offset_save != stacksave) + RRETURN(rrc); + } + + md->recursive = &new_recursive; + callpat += GET(callpat, 1); + } + while (*callpat == OP_ALT); + + DPRINTF(("Recursion didn't match\n")); + md->recursive = new_recursive.prevrec; + if (new_recursive.offset_save != stacksave) (PUBL(free))(new_recursive.offset_save); - RRETURN(MATCH_NOMATCH); - } - + RRETURN(MATCH_NOMATCH); + } + RECURSION_MATCHED: break; - - /* An alternation is the end of a branch; scan along to find the end of the - bracketed group and go to there. */ - - case OP_ALT: - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; - + + /* An alternation is the end of a branch; scan along to find the end of the + bracketed group and go to there. */ + + case OP_ALT: + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + break; + /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group, indicating that it may occur zero times. It may repeat infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets with fixed upper repeat limits are compiled as a number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO. */ - - case OP_BRAZERO: + + case OP_BRAZERO: next = ecode + 1; RMATCH(eptr, next, offset_top, md, eptrb, RM10); if (rrc != MATCH_NOMATCH) RRETURN(rrc); do next += GET(next, 1); while (*next == OP_ALT); ecode = next + 1 + LINK_SIZE; - break; - - case OP_BRAMINZERO: + break; + + case OP_BRAMINZERO: next = ecode + 1; do next += GET(next, 1); while (*next == OP_ALT); RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode++; - break; - + break; + case OP_SKIPZERO: next = ecode+1; do next += GET(next,1); while (*next == OP_ALT); @@ -1910,72 +1910,72 @@ for (;;) if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE; goto POSSESSIVE_NON_CAPTURE; - /* End of a group, repeated or non-repeating. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: + /* End of a group, repeated or non-repeating. */ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: case OP_KETRPOS: - prev = ecode - GET(ecode, 1); - - /* If this was a group that remembered the subject start, in order to break - infinite repeats of empty string matches, retrieve the subject start from - the chain. Otherwise, set it NULL. */ - + prev = ecode - GET(ecode, 1); + + /* If this was a group that remembered the subject start, in order to break + infinite repeats of empty string matches, retrieve the subject start from + the chain. Otherwise, set it NULL. */ + if (*prev >= OP_SBRA || *prev == OP_ONCE) - { - saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ - eptrb = eptrb->epb_prev; /* Backup to previous group */ - } - else saved_eptr = NULL; - + { + saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ + eptrb = eptrb->epb_prev; /* Backup to previous group */ + } + else saved_eptr = NULL; + /* If we are at the end of an assertion group or a non-capturing atomic group, stop matching and return MATCH_MATCH, but record the current high water mark for use by positive assertions. We also need to record the match start in case it was changed by \K. */ - + if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) || *prev == OP_ONCE_NC) - { + { md->end_match_ptr = eptr; /* For ONCE_NC */ - md->end_offset_top = offset_top; + md->end_offset_top = offset_top; md->start_match_ptr = mstart; RRETURN(MATCH_MATCH); /* Sets md->mark */ - } - - /* For capturing groups we have to check the group number back at the start - and if necessary complete handling an extraction by setting the offsets and + } + + /* For capturing groups we have to check the group number back at the start + and if necessary complete handling an extraction by setting the offsets and bumping the high water mark. Whole-pattern recursion is coded as a recurse into group 0, so it won't be picked up here. Instead, we catch it when the OP_END is reached. Other recursion is handled here. We just have to record the current subject position and start match pointer and give a MATCH return. */ - + if (*prev == OP_CBRA || *prev == OP_SCBRA || *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS) - { - number = GET2(prev, 1+LINK_SIZE); - offset = number << 1; - + { + number = GET2(prev, 1+LINK_SIZE); + offset = number << 1; + #ifdef PCRE_DEBUG - printf("end bracket %d", number); - printf("\n"); -#endif - + printf("end bracket %d", number); + printf("\n"); +#endif + /* Handle a recursively called group. */ if (md->recursive != NULL && md->recursive->group_num == number) - { + { md->end_match_ptr = eptr; md->start_match_ptr = mstart; RRETURN(MATCH_MATCH); - } - + } + /* Deal with capturing */ - + md->capture_last = (md->capture_last & OVFLMASK) | number; if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else - { + { /* If offset is greater than offset_top, it means that we are "skipping" a capturing group, and that group's offsets must be marked unset. In earlier versions of PCRE, all the offsets were unset at the @@ -1999,14 +1999,14 @@ for (;;) md->offset_vector[md->offset_end - number]; md->offset_vector[offset+1] = (int)(eptr - md->start_subject); if (offset_top <= offset) offset_top = offset + 2; - } - } - + } + } + /* OP_KETRPOS is a possessive repeating ket. Remember the current position, and return the MATCH_KETRPOS. This makes it possible to do the repeats one at a time from the outer level, thus saving stack. This must precede the empty string test - in this case that test is done at the outer level. */ - + if (*ecode == OP_KETRPOS) { md->start_match_ptr = mstart; /* In case \K reset it */ @@ -2014,7 +2014,7 @@ for (;;) md->end_offset_top = offset_top; RRETURN(MATCH_KETRPOS); } - + /* For an ordinary non-repeating ket, just continue at this level. This also happens for a repeating ket if no characters were matched in the group. This is the forcible breaking of infinite loops as implemented in @@ -2023,9 +2023,9 @@ for (;;) level. If this results in a NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby bypassing intermediate backup points, but resetting any captures that happened along the way. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { + + if (*ecode == OP_KET || eptr == saved_eptr) + { if (*prev == OP_ONCE) { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12); @@ -2034,21 +2034,21 @@ for (;;) RRETURN(MATCH_ONCE); } ecode += 1 + LINK_SIZE; /* Carry on at this level */ - break; - } - + break; + } + /* The normal repeating kets try the rest of the pattern or restart from the preceding bracket, in the appropriate order. In the second case, we can use tail recursion to avoid using another stack frame, unless we have an an atomic group or an unlimited repeat of a group that can match an empty string. */ - - if (*ecode == OP_KETRMIN) - { + + if (*ecode == OP_KETRMIN) + { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (*prev == OP_ONCE) - { + { RMATCH(eptr, prev, offset_top, md, eptrb, RM8); if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */ @@ -2057,16 +2057,16 @@ for (;;) if (*prev >= OP_SBRA) /* Could match an empty string */ { RMATCH(eptr, prev, offset_top, md, eptrb, RM50); - RRETURN(rrc); - } - ecode = prev; - goto TAIL_RECURSE; - } - else /* OP_KETRMAX */ - { + RRETURN(rrc); + } + ecode = prev; + goto TAIL_RECURSE; + } + else /* OP_KETRMAX */ + { RMATCH(eptr, prev, offset_top, md, eptrb, RM13); if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (*prev == OP_ONCE) { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9); @@ -2074,23 +2074,23 @@ for (;;) md->once_target = prev; RRETURN(MATCH_ONCE); } - ecode += 1 + LINK_SIZE; - goto TAIL_RECURSE; - } - /* Control never gets here */ - + ecode += 1 + LINK_SIZE; + goto TAIL_RECURSE; + } + /* Control never gets here */ + /* Not multiline mode: start of subject assertion, unless notbol. */ - - case OP_CIRC: - if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); - - /* Start of subject assertion */ - - case OP_SOD: - if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); - ecode++; - break; - + + case OP_CIRC: + if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); + + /* Start of subject assertion */ + + case OP_SOD: + if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); + ecode++; + break; + /* Multiline mode: start of subject unless notbol, or after any newline. */ case OP_CIRCM: @@ -2101,26 +2101,26 @@ for (;;) ecode++; break; - /* Start of match assertion */ - - case OP_SOM: - if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); - ecode++; - break; - - /* Reset the start of match point */ - - case OP_SET_SOM: - mstart = eptr; - ecode++; - break; - + /* Start of match assertion */ + + case OP_SOM: + if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + /* Reset the start of match point */ + + case OP_SET_SOM: + mstart = eptr; + ecode++; + break; + /* Multiline mode: assert before any newline, or before end of subject unless noteol is set. */ - + case OP_DOLLM: if (eptr < md->end_subject) - { + { if (!IS_NEWLINE(eptr)) { if (md->partial != 0 && @@ -2134,12 +2134,12 @@ for (;;) } RRETURN(MATCH_NOMATCH); } - } - else - { - if (md->noteol) RRETURN(MATCH_NOMATCH); + } + else + { + if (md->noteol) RRETURN(MATCH_NOMATCH); SCHECK_PARTIAL(); - } + } ecode++; break; @@ -2150,22 +2150,22 @@ for (;;) if (md->noteol) RRETURN(MATCH_NOMATCH); if (!md->endonly) goto ASSERT_NL_OR_EOS; - /* ... else fall through for endonly */ - - /* End of subject assertion (\z) */ - - case OP_EOD: - if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); + /* ... else fall through for endonly */ + + /* End of subject assertion (\z) */ + + case OP_EOD: + if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); SCHECK_PARTIAL(); - ecode++; - break; - - /* End of subject or ending \n assertion (\Z) */ - - case OP_EODN: + ecode++; + break; + + /* End of subject or ending \n assertion (\Z) */ + + case OP_EODN: ASSERT_NL_OR_EOS: if (eptr < md->end_subject && - (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) + (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) { if (md->partial != 0 && eptr + 1 >= md->end_subject && @@ -2176,37 +2176,37 @@ for (;;) md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } /* Either at end of string or \n before end. */ SCHECK_PARTIAL(); - ecode++; - break; - - /* Word boundary assertions */ - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - { - - /* Find out if the previous and current characters are "word" characters. - It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to + ecode++; + break; + + /* Word boundary assertions */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + { + + /* Find out if the previous and current characters are "word" characters. + It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to be "non-word" characters. Remember the earliest consulted character for partial matching. */ - + #ifdef SUPPORT_UTF if (utf) - { + { /* Get status of previous character */ - if (eptr == md->start_subject) prev_is_word = FALSE; else - { + if (eptr == md->start_subject) prev_is_word = FALSE; else + { PCRE_PUCHAR lastptr = eptr - 1; BACKCHAR(lastptr); if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; - GETCHAR(c, lastptr); + GETCHAR(c, lastptr); #ifdef SUPPORT_UCP if (md->use_ucp) { @@ -2218,19 +2218,19 @@ for (;;) } else #endif - prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } + prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; + } /* Get status of next character */ if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); cur_is_word = FALSE; } else { - GETCHAR(c, eptr); + GETCHAR(c, eptr); #ifdef SUPPORT_UCP if (md->use_ucp) { @@ -2242,16 +2242,16 @@ for (;;) } else #endif - cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } - } - else -#endif - + cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; + } + } + else +#endif + /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for consistency with the behaviour of \w we do use it in this case. */ - - { + + { /* Get status of previous character */ if (eptr == md->start_subject) prev_is_word = FALSE; else @@ -2295,30 +2295,30 @@ for (;;) #endif cur_is_word = MAX_255(*eptr) && ((md->ctypes[*eptr] & ctype_word) != 0); - } - - /* Now see if the situation is what we want */ - - if ((*ecode++ == OP_WORD_BOUNDARY)? - cur_is_word == prev_is_word : cur_is_word != prev_is_word) - RRETURN(MATCH_NOMATCH); - } - break; - + } + + /* Now see if the situation is what we want */ + + if ((*ecode++ == OP_WORD_BOUNDARY)? + cur_is_word == prev_is_word : cur_is_word != prev_is_word) + RRETURN(MATCH_NOMATCH); + } + break; + /* Match any single character type except newline; have to take care with CRLF newlines and partial matching. */ - - case OP_ANY: + + case OP_ANY: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (md->partial != 0 && eptr == md->end_subject - 1 && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && UCHAR21TEST(eptr) == NLBLOCK->nl[0]) - { + { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } + } /* Fall through */ @@ -2334,134 +2334,134 @@ for (;;) #ifdef SUPPORT_UTF if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); #endif - ecode++; - break; - - /* Match a single byte, even in UTF-8 mode. This opcode really does match - any byte, even newline, independent of the setting of PCRE_DOTALL. */ - - case OP_ANYBYTE: + ecode++; + break; + + /* Match a single byte, even in UTF-8 mode. This opcode really does match + any byte, even newline, independent of the setting of PCRE_DOTALL. */ + + case OP_ANYBYTE: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ { /* not be updated before SCHECK_PARTIAL. */ SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } eptr++; - ecode++; - break; - - case OP_NOT_DIGIT: + ecode++; + break; + + case OP_NOT_DIGIT: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_digit) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_DIGIT: + c < 256 && +#endif + (md->ctypes[c] & ctype_digit) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_DIGIT: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c > 255 || -#endif - (md->ctypes[c] & ctype_digit) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_NOT_WHITESPACE: +#endif + (md->ctypes[c] & ctype_digit) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_NOT_WHITESPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_space) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_WHITESPACE: + c < 256 && +#endif + (md->ctypes[c] & ctype_space) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_WHITESPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c > 255 || -#endif - (md->ctypes[c] & ctype_space) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_NOT_WORDCHAR: +#endif + (md->ctypes[c] & ctype_space) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_NOT_WORDCHAR: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_word) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_WORDCHAR: + c < 256 && +#endif + (md->ctypes[c] & ctype_word) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_WORDCHAR: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - if ( + GETCHARINCTEST(c, eptr); + if ( #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c > 255 || -#endif - (md->ctypes[c] & ctype_word) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_ANYNL: +#endif + (md->ctypes[c] & ctype_word) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_ANYNL: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + switch(c) + { + default: RRETURN(MATCH_NOMATCH); case CHAR_CR: if (eptr >= md->end_subject) @@ -2469,128 +2469,128 @@ for (;;) SCHECK_PARTIAL(); } else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++; - break; - + break; + case CHAR_LF: - break; - + break; + case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - ecode++; - break; - - case OP_NOT_HSPACE: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); + break; + } + ecode++; + break; + + case OP_NOT_HSPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - switch(c) - { + GETCHARINCTEST(c, eptr); + switch(c) + { HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ - default: break; - } - ecode++; - break; - - case OP_HSPACE: + default: break; + } + ecode++; + break; + + case OP_HSPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - switch(c) - { + GETCHARINCTEST(c, eptr); + switch(c) + { HSPACE_CASES: break; /* Byte and multibyte cases */ - default: RRETURN(MATCH_NOMATCH); - } - ecode++; - break; - - case OP_NOT_VSPACE: + default: RRETURN(MATCH_NOMATCH); + } + ecode++; + break; + + case OP_NOT_VSPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - switch(c) - { + GETCHARINCTEST(c, eptr); + switch(c) + { VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - ecode++; - break; - - case OP_VSPACE: + default: break; + } + ecode++; + break; + + case OP_VSPACE: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - switch(c) - { + GETCHARINCTEST(c, eptr); + switch(c) + { VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - ecode++; - break; - -#ifdef SUPPORT_UCP - /* Check the next character by Unicode property. We will get here only - if the support is in the binary; otherwise a compile-time error occurs. */ - - case OP_PROP: - case OP_NOTPROP: + default: RRETURN(MATCH_NOMATCH); + } + ecode++; + break; + +#ifdef SUPPORT_UCP + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. */ + + case OP_PROP: + case OP_NOTPROP: if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - { + GETCHARINCTEST(c, eptr); + { const pcre_uint32 *cp; const ucd_record *prop = GET_UCD(c); - - switch(ecode[1]) - { - case PT_ANY: - if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); - break; - - case PT_LAMP: + + switch(ecode[1]) + { + case PT_ANY: + if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + break; + + case PT_LAMP: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); break; - - case PT_GC: + + case PT_GC: if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_PC: + RRETURN(MATCH_NOMATCH); + break; + + case PT_PC: if ((ecode[2] != prop->chartype) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_SC: + RRETURN(MATCH_NOMATCH); + break; + + case PT_SC: if ((ecode[2] != prop->script) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - + RRETURN(MATCH_NOMATCH); + break; + /* These are specials */ case PT_ALNUM: @@ -2646,20 +2646,20 @@ for (;;) /* This should never occur */ - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - - ecode += 3; - } - break; - - /* Match an extended Unicode sequence. We will get here only if the support - is in the binary; otherwise a compile-time error occurs. */ - - case OP_EXTUNI: + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + + ecode += 3; + } + break; + + /* Match an extended Unicode sequence. We will get here only if the support + is in the binary; otherwise a compile-time error occurs. */ + + case OP_EXTUNI: if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -2668,30 +2668,30 @@ for (;;) int lgb, rgb; GETCHARINCTEST(c, eptr); lgb = UCD_GRAPHBREAK(c); - while (eptr < md->end_subject) - { - int len = 1; + while (eptr < md->end_subject) + { + int len = 1; if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; lgb = rgb; - eptr += len; - } - } + eptr += len; + } + } CHECK_PARTIAL(); - ecode++; - break; + ecode++; + break; #endif /* SUPPORT_UCP */ - - - /* Match a back reference, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. The code is similar - to that for character classes, but repeated for efficiency. Then obey - similar code to character type repeats - written out again for speed. - However, if the referenced string is the empty string, always treat - it as matched, any number of times (otherwise there could be infinite + + + /* Match a back reference, possibly repeatedly. Look past the end of the + item to see if there is repeat information following. The code is similar + to that for character classes, but repeated for efficiency. Then obey + similar code to character type repeats - written out again for speed. + However, if the referenced string is the empty string, always treat + it as matched, any number of times (otherwise there could be infinite loops). If the reference is unset, there are two possibilities: - + (a) In the default, Perl-compatible state, set the length negative; this ensures that every attempt at a match fails. We can't just fail here, because of the possibility of quantifiers with zero minima. @@ -2710,19 +2710,19 @@ for (;;) case OP_DNREF: case OP_DNREFI: caseless = op == OP_DNREFI; - { + { int count = GET2(ecode, 1+IMM2_SIZE); pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size; ecode += 1 + 2*IMM2_SIZE; - + /* Setting the default length first and initializing 'offset' avoids compiler warnings in the REF_REPEAT code. */ - + length = (md->jscript_compat)? 0 : -1; offset = 0; - + while (count-- > 0) - { + { offset = GET2(slot, 0) << 1; if (offset < offset_top && md->offset_vector[offset] >= 0) { @@ -2733,7 +2733,7 @@ for (;;) } } goto REF_REPEAT; - + case OP_REF: case OP_REFI: caseless = op == OP_REFI; @@ -2743,7 +2743,7 @@ for (;;) length = (md->jscript_compat)? 0 : -1; else length = md->offset_vector[offset+1] - md->offset_vector[offset]; - + /* Set up for repetition, or handle the non-repeated case */ REF_REPEAT: @@ -2777,72 +2777,72 @@ for (;;) if (length == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } + } eptr += length; continue; /* With the main loop */ } - + /* Handle repeated back references. If the length of the reference is zero, just continue with the main loop. If the length is negative, it means the reference is unset in non-Java-compatible mode. If the minimum is zero, we can continue at the same level without recursion. For any other minimum, carrying on will result in NOMATCH. */ - + if (length == 0) continue; if (length < 0 && min == 0) continue; - + /* First, ensure the minimum number of matches are present. We get back the length of the reference string explicitly rather than passing the address of eptr, so that eptr can be a register variable. */ - + for (i = 1; i <= min; i++) { int slength; if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { + { if (slength == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } + } eptr += slength; } - + /* If min = max, continue at the same level without recursion. They are not both allowed to be zero. */ - + if (min == max) continue; - + /* If minimizing, keep trying and advancing the pointer */ - + if (minimize) { for (fi = min;; fi++) - { + { int slength; RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { + { if (slength == -2) eptr = md->end_subject; /* Partial match */ CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } + } eptr += slength; - } + } /* Control never gets here */ } - + /* If maximizing, find the longest string and work backwards */ - + else { pp = eptr; for (i = min; i < max; i++) - { + { int slength; if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { + { /* Can't use CHECK_PARTIAL because we don't want to update eptr in the soft partial matching case. */ @@ -2853,9 +2853,9 @@ for (;;) if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } break; - } + } eptr += slength; - } + } while (eptr >= pp) { @@ -2864,97 +2864,97 @@ for (;;) eptr -= length; } RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - /* Match a bit-mapped character class, possibly repeatedly. This op code is - used when all the characters in the class have values in the range 0-255, - and either the matching is caseful, or the characters are in the range - 0-127 when UTF-8 processing is enabled. The only difference between - OP_CLASS and OP_NCLASS occurs when a data character outside the range is - encountered. - - First, look past the end of the item to see if there is repeat information - following. Then obey similar code to character type repeats - written out - again for speed. */ - - case OP_NCLASS: - case OP_CLASS: - { + } + /* Control never gets here */ + + /* Match a bit-mapped character class, possibly repeatedly. This op code is + used when all the characters in the class have values in the range 0-255, + and either the matching is caseful, or the characters are in the range + 0-127 when UTF-8 processing is enabled. The only difference between + OP_CLASS and OP_NCLASS occurs when a data character outside the range is + encountered. + + First, look past the end of the item to see if there is repeat information + following. Then obey similar code to character type repeats - written out + again for speed. */ + + case OP_NCLASS: + case OP_CLASS: + { /* The data variable is saved across frames, so the byte map needs to be stored there. */ #define BYTE_MAP ((pcre_uint8 *)data) - data = ecode + 1; /* Save for matching */ + data = ecode + 1; /* Save for matching */ ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSSTAR: case OP_CRPOSPLUS: case OP_CRPOSQUERY: - c = *ecode++ - OP_CRSTAR; + c = *ecode++ - OP_CRSTAR; if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0; else possessive = TRUE; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: - minimize = (*ecode == OP_CRMINRANGE); + minimize = (*ecode == OP_CRMINRANGE); possessive = (*ecode == OP_CRPOSRANGE); - min = GET2(ecode, 1); + min = GET2(ecode, 1); max = GET2(ecode, 1 + IMM2_SIZE); - if (max == 0) max = INT_MAX; + if (max == 0) max = INT_MAX; ecode += 1 + 2 * IMM2_SIZE; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - + break; + + default: /* No repeat follows */ + min = max = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + #ifdef SUPPORT_UTF if (utf) - { - for (i = 1; i <= min; i++) - { + { + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else + GETCHARINC(c, eptr); + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - else -#endif + } + } + else +#endif /* Not UTF mode */ - { - for (i = 1; i <= min; i++) - { + { + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - c = *eptr++; + c = *eptr++; #ifndef COMPILE_PCRE8 if (c > 255) { @@ -2963,56 +2963,56 @@ for (;;) else #endif if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { + } + } + + /* If max == min we can continue with the main loop without the + need to recurse. */ + + if (min == max) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (minimize) + { #ifdef SUPPORT_UTF if (utf) - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else + GETCHARINC(c, eptr); + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - else -#endif + } + } + else +#endif /* Not UTF mode */ - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - c = *eptr++; + c = *eptr++; #ifndef COMPILE_PCRE8 if (c > 255) { @@ -3021,60 +3021,60 @@ for (;;) else #endif if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - pp = eptr; - + } + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + pp = eptr; + #ifdef SUPPORT_UTF if (utf) - { - for (i = min; i < max; i++) - { - int len = 1; + { + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c > 255) - { - if (op == OP_CLASS) break; - } - else + GETCHARLEN(c, eptr, len); + if (c > 255) + { + if (op == OP_CLASS) break; + } + else if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; - eptr += len; - } + eptr += len; + } if (possessive) continue; /* No backtracking */ - for (;;) - { + for (;;) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM18); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- <= pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif + BACKCHAR(eptr); + } + } + else +#endif /* Not UTF mode */ - { - for (i = min; i < max; i++) - { + { + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - c = *eptr; + c = *eptr; #ifndef COMPILE_PCRE8 if (c > 255) { @@ -3083,76 +3083,76 @@ for (;;) else #endif if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; - eptr++; - } + eptr++; + } if (possessive) continue; /* No backtracking */ - while (eptr >= pp) - { + while (eptr >= pp) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM19); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } - - RRETURN(MATCH_NOMATCH); - } + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr--; + } + } + + RRETURN(MATCH_NOMATCH); + } #undef BYTE_MAP - } - /* Control never gets here */ - - + } + /* Control never gets here */ + + /* Match an extended character class. In the 8-bit library, this opcode is encountered only when UTF-8 mode mode is supported. In the 16-bit and 32-bit libraries, codepoints greater than 255 may be encountered even when UTF is not supported. */ - + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - { - data = ecode + 1 + LINK_SIZE; /* Save for matching */ - ecode += GET(ecode, 1); /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: + case OP_XCLASS: + { + data = ecode + 1 + LINK_SIZE; /* Save for matching */ + ecode += GET(ecode, 1); /* Advance past the item */ + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSSTAR: case OP_CRPOSPLUS: case OP_CRPOSQUERY: - c = *ecode++ - OP_CRSTAR; + c = *ecode++ - OP_CRSTAR; if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0; else possessive = TRUE; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: - minimize = (*ecode == OP_CRMINRANGE); + minimize = (*ecode == OP_CRMINRANGE); possessive = (*ecode == OP_CRPOSRANGE); - min = GET2(ecode, 1); + min = GET2(ecode, 1); max = GET2(ecode, 1 + IMM2_SIZE); - if (max == 0) max = INT_MAX; + if (max == 0) max = INT_MAX; ecode += 1 + 2 * IMM2_SIZE; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - - for (i = 1; i <= min; i++) - { + break; + + default: /* No repeat follows */ + min = max = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -3160,22 +3160,22 @@ for (;;) } GETCHARINCTEST(c, eptr); if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { - for (fi = min;; fi++) - { + } + + /* If max == min we can continue with the main loop without the + need to recurse. */ + + if (min == max) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (minimize) + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { @@ -3184,18 +3184,18 @@ for (;;) } GETCHARINCTEST(c, eptr); if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - pp = eptr; - for (i = min; i < max; i++) - { - int len = 1; + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + pp = eptr; + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -3207,63 +3207,63 @@ for (;;) c = *eptr; #endif if (!PRIV(xclass)(c, data, utf)) break; - eptr += len; - } + eptr += len; + } if (possessive) continue; /* No backtracking */ - for(;;) - { + for(;;) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- <= pp) break; /* Stop if tried at original pos */ #ifdef SUPPORT_UTF if (utf) BACKCHAR(eptr); #endif - } - RRETURN(MATCH_NOMATCH); - } - - /* Control never gets here */ - } -#endif /* End of XCLASS */ - - /* Match a single character, casefully */ - - case OP_CHAR: + } + RRETURN(MATCH_NOMATCH); + } + + /* Control never gets here */ + } +#endif /* End of XCLASS */ + + /* Match a single character, casefully */ + + case OP_CHAR: #ifdef SUPPORT_UTF if (utf) - { - length = 1; - ecode++; - GETCHARLEN(fc, ecode, length); + { + length = 1; + ecode++; + GETCHARLEN(fc, ecode, length); if (length > md->end_subject - eptr) { CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ RRETURN(MATCH_NOMATCH); } while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH); - } - else -#endif + } + else +#endif /* Not UTF mode */ - { + { if (md->end_subject - eptr < 1) { SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ RRETURN(MATCH_NOMATCH); } - if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); - ecode += 2; - } - break; - + if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); + ecode += 2; + } + break; + /* Match a single character, caselessly. If we are at the end of the subject, give up immediately. */ - + case OP_CHARI: if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -3271,128 +3271,128 @@ for (;;) #ifdef SUPPORT_UTF if (utf) { - length = 1; - ecode++; - GETCHARLEN(fc, ecode, length); - - /* If the pattern character's value is < 128, we have only one byte, and + length = 1; + ecode++; + GETCHARLEN(fc, ecode, length); + + /* If the pattern character's value is < 128, we have only one byte, and we know that its other case must also be one byte long, so we can use the fast lookup table. We know that there is at least one byte left in the subject. */ - - if (fc < 128) - { + + if (fc < 128) + { pcre_uint32 cc = UCHAR21(eptr); if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH); ecode++; eptr++; - } - + } + /* Otherwise we must pick up the subject character. Note that we cannot use the value of "length" to check for sufficient bytes left, because the other case of the character may have more or fewer bytes. */ - - else - { + + else + { pcre_uint32 dc; - GETCHARINC(dc, eptr); - ecode += length; - - /* If we have Unicode property support, we can use it to test the other - case of the character, if there is one. */ - - if (fc != dc) - { -#ifdef SUPPORT_UCP + GETCHARINC(dc, eptr); + ecode += length; + + /* If we have Unicode property support, we can use it to test the other + case of the character, if there is one. */ + + if (fc != dc) + { +#ifdef SUPPORT_UCP if (dc != UCD_OTHERCASE(fc)) -#endif - RRETURN(MATCH_NOMATCH); - } - } - } - else +#endif + RRETURN(MATCH_NOMATCH); + } + } + } + else #endif /* SUPPORT_UTF */ - + /* Not UTF mode */ - { + { if (TABLE_GET(ecode[1], md->lcc, ecode[1]) != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); eptr++; - ecode += 2; - } - break; - - /* Match a single character repeatedly. */ - - case OP_EXACT: + ecode += 2; + } + break; + + /* Match a single character repeatedly. */ + + case OP_EXACT: case OP_EXACTI: - min = max = GET2(ecode, 1); + min = max = GET2(ecode, 1); ecode += 1 + IMM2_SIZE; - goto REPEATCHAR; - - case OP_POSUPTO: + goto REPEATCHAR; + + case OP_POSUPTO: case OP_POSUPTOI: - possessive = TRUE; - /* Fall through */ - - case OP_UPTO: + possessive = TRUE; + /* Fall through */ + + case OP_UPTO: case OP_UPTOI: - case OP_MINUPTO: + case OP_MINUPTO: case OP_MINUPTOI: - min = 0; - max = GET2(ecode, 1); + min = 0; + max = GET2(ecode, 1); minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; ecode += 1 + IMM2_SIZE; - goto REPEATCHAR; - - case OP_POSSTAR: + goto REPEATCHAR; + + case OP_POSSTAR: case OP_POSSTARI: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATCHAR; - - case OP_POSPLUS: + possessive = TRUE; + min = 0; + max = INT_MAX; + ecode++; + goto REPEATCHAR; + + case OP_POSPLUS: case OP_POSPLUSI: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATCHAR; - - case OP_POSQUERY: + possessive = TRUE; + min = 1; + max = INT_MAX; + ecode++; + goto REPEATCHAR; + + case OP_POSQUERY: case OP_POSQUERYI: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATCHAR; - - case OP_STAR: + possessive = TRUE; + min = 0; + max = 1; + ecode++; + goto REPEATCHAR; + + case OP_STAR: case OP_STARI: - case OP_MINSTAR: + case OP_MINSTAR: case OP_MINSTARI: - case OP_PLUS: + case OP_PLUS: case OP_PLUSI: - case OP_MINPLUS: + case OP_MINPLUS: case OP_MINPLUSI: - case OP_QUERY: + case OP_QUERY: case OP_QUERYI: - case OP_MINQUERY: + case OP_MINQUERY: case OP_MINQUERYI: c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI); - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + /* Common code for all repeated single-character matches. We first check for the minimum number of characters. If the minimum equals the maximum, we are done. Otherwise, if minimizing, check the rest of the pattern for a match; if there isn't one, advance up to the maximum, one character at a time. - + If maximizing, advance up to the maximum number of matching characters, until eptr is past the end of the maximum run. If possessive, we are then done (no backing up). Otherwise, match at this position; anything @@ -3404,128 +3404,128 @@ for (;;) The various UTF/non-UTF and caseful/caseless cases are handled separately, for speed. */ - REPEATCHAR: + REPEATCHAR: #ifdef SUPPORT_UTF if (utf) - { - length = 1; - charptr = ecode; - GETCHARLEN(fc, ecode, length); - ecode += length; - - /* Handle multibyte character matching specially here. There is - support for caseless matching if UCP support is present. */ - - if (length > 1) - { -#ifdef SUPPORT_UCP + { + length = 1; + charptr = ecode; + GETCHARLEN(fc, ecode, length); + ecode += length; + + /* Handle multibyte character matching specially here. There is + support for caseless matching if UCP support is present. */ + + if (length > 1) + { +#ifdef SUPPORT_UCP pcre_uint32 othercase; if (op >= OP_STARI && /* Caseless */ (othercase = UCD_OTHERCASE(fc)) != fc) oclength = PRIV(ord2utf)(othercase, occhars); - else oclength = 0; -#endif /* SUPPORT_UCP */ - - for (i = 1; i <= min; i++) - { + else oclength = 0; +#endif /* SUPPORT_UCP */ + + for (i = 1; i <= min; i++) + { if (eptr <= md->end_subject - length && memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP +#ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ - else - { + else + { CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } - } - - if (min == max) continue; - - if (minimize) - { - for (fi = min;; fi++) - { + } + } + + if (min == max) continue; + + if (minimize) + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr <= md->end_subject - length && memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP +#ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ - else - { + else + { CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { + } + } + /* Control never gets here */ + } + + else /* Maximize */ + { + pp = eptr; + for (i = min; i < max; i++) + { if (eptr <= md->end_subject - length && memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP +#ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ - else - { + else + { CHECK_PARTIAL(); break; - } - } - + } + } + if (possessive) continue; /* No backtracking */ - for(;;) + for(;;) { if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); if (rrc != MATCH_NOMATCH) RRETURN(rrc); -#ifdef SUPPORT_UCP +#ifdef SUPPORT_UCP eptr--; BACKCHAR(eptr); -#else /* without SUPPORT_UCP */ +#else /* without SUPPORT_UCP */ eptr -= length; -#endif /* SUPPORT_UCP */ - } - } - /* Control never gets here */ - } - - /* If the length of a UTF-8 character is 1, we fall through here, and - obey the code as for non-UTF-8 characters below, though in this case the - value of fc will always be < 128. */ - } - else +#endif /* SUPPORT_UCP */ + } + } + /* Control never gets here */ + } + + /* If the length of a UTF-8 character is 1, we fall through here, and + obey the code as for non-UTF-8 characters below, though in this case the + value of fc will always be < 128. */ + } + else #endif /* SUPPORT_UTF */ /* When not in UTF-8 mode, load a single-byte character. */ - fc = *ecode++; - + fc = *ecode++; + /* The value of fc at this point is always one character, though we may or may not be in UTF mode. The code is duplicated for the caseless and - caseful cases, for speed, since matching characters is likely to be quite - common. First, ensure the minimum number of matches are present. If min = - max, continue at the same level without recursing. Otherwise, if - minimizing, keep trying the rest of the expression and advancing one - matching character if failing, up to the maximum. Alternatively, if - maximizing, find the maximum number of characters and work backwards. */ - - DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, + caseful cases, for speed, since matching characters is likely to be quite + common. First, ensure the minimum number of matches are present. If min = + max, continue at the same level without recursing. Otherwise, if + minimizing, keep trying the rest of the expression and advancing one + matching character if failing, up to the maximum. Alternatively, if + maximizing, find the maximum number of characters and work backwards. */ + + DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, max, (char *)eptr)); - + if (op >= OP_STARI) /* Caseless */ - { + { #ifdef COMPILE_PCRE8 /* fc must be < 128 if UTF is enabled. */ foc = md->fcc[fc]; @@ -3543,7 +3543,7 @@ for (;;) foc = TABLE_GET(fc, md->fcc, fc); #endif /* COMPILE_PCRE8 */ - for (i = 1; i <= min; i++) + for (i = 1; i <= min; i++) { pcre_uint32 cc; /* Faster than pcre_uchar */ if (eptr >= md->end_subject) @@ -3555,31 +3555,31 @@ for (;;) if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); eptr++; } - if (min == max) continue; - if (minimize) - { - for (fi = min;; fi++) - { + if (min == max) continue; + if (minimize) + { + for (fi = min;; fi++) + { pcre_uint32 cc; /* Faster than pcre_uchar */ RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21TEST(eptr); if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); eptr++; - } - /* Control never gets here */ - } - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { + } + /* Control never gets here */ + } + else /* Maximize */ + { + pp = eptr; + for (i = min; i < max; i++) + { pcre_uint32 cc; /* Faster than pcre_uchar */ if (eptr >= md->end_subject) { @@ -3588,24 +3588,24 @@ for (;;) } cc = UCHAR21TEST(eptr); if (fc != cc && foc != cc) break; - eptr++; - } + eptr++; + } if (possessive) continue; /* No backtracking */ for (;;) - { + { if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM25); - eptr--; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - } + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } /* Control never gets here */ - } - } - - /* Caseful comparisons (includes all multi-byte characters) */ - - else - { + } + } + + /* Caseful comparisons (includes all multi-byte characters) */ + + else + { for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) @@ -3616,60 +3616,60 @@ for (;;) if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH); } - if (min == max) continue; + if (min == max) continue; - if (minimize) - { - for (fi = min;; fi++) - { + if (minimize) + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { + } + /* Control never gets here */ + } + else /* Maximize */ + { + pp = eptr; + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } if (fc != UCHAR21TEST(eptr)) break; - eptr++; - } + eptr++; + } if (possessive) continue; /* No backtracking */ for (;;) - { + { if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM27); - eptr--; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - } + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } /* Control never gets here */ - } - } - /* Control never gets here */ - - /* Match a negated single one-byte character. The character we are - checking can be multibyte. */ - - case OP_NOT: + } + } + /* Control never gets here */ + + /* Match a negated single one-byte character. The character we are + checking can be multibyte. */ + + case OP_NOT: case OP_NOTI: if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } + } #ifdef SUPPORT_UTF if (utf) { @@ -3697,108 +3697,108 @@ for (;;) if (ch == c || och == c) RRETURN(MATCH_NOMATCH); } } - else + else #endif - { + { register pcre_uint32 ch = ecode[1]; c = *eptr++; if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) RRETURN(MATCH_NOMATCH); ecode += 2; - } - break; - - /* Match a negated single one-byte character repeatedly. This is almost a - repeat of the code for a repeated single character, but I haven't found a - nice way of commoning these up that doesn't require a test of the - positive/negative option for each character match. Maybe that wouldn't add - very much to the time taken, but character matching *is* what this is all - about... */ - - case OP_NOTEXACT: + } + break; + + /* Match a negated single one-byte character repeatedly. This is almost a + repeat of the code for a repeated single character, but I haven't found a + nice way of commoning these up that doesn't require a test of the + positive/negative option for each character match. Maybe that wouldn't add + very much to the time taken, but character matching *is* what this is all + about... */ + + case OP_NOTEXACT: case OP_NOTEXACTI: - min = max = GET2(ecode, 1); + min = max = GET2(ecode, 1); ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTUPTO: + goto REPEATNOTCHAR; + + case OP_NOTUPTO: case OP_NOTUPTOI: - case OP_NOTMINUPTO: + case OP_NOTMINUPTO: case OP_NOTMINUPTOI: - min = 0; - max = GET2(ecode, 1); + min = 0; + max = GET2(ecode, 1); minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTPOSSTAR: + goto REPEATNOTCHAR; + + case OP_NOTPOSSTAR: case OP_NOTPOSSTARI: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSPLUS: + possessive = TRUE; + min = 0; + max = INT_MAX; + ecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSPLUS: case OP_NOTPOSPLUSI: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSQUERY: + possessive = TRUE; + min = 1; + max = INT_MAX; + ecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSUPTO: + possessive = TRUE; + min = 0; + max = 1; + ecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSUPTO: case OP_NOTPOSUPTOI: - possessive = TRUE; - min = 0; - max = GET2(ecode, 1); + possessive = TRUE; + min = 0; + max = GET2(ecode, 1); ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTSTAR: + goto REPEATNOTCHAR; + + case OP_NOTSTAR: case OP_NOTSTARI: - case OP_NOTMINSTAR: + case OP_NOTMINSTAR: case OP_NOTMINSTARI: - case OP_NOTPLUS: + case OP_NOTPLUS: case OP_NOTPLUSI: - case OP_NOTMINPLUS: + case OP_NOTMINPLUS: case OP_NOTMINPLUSI: - case OP_NOTQUERY: + case OP_NOTQUERY: case OP_NOTQUERYI: - case OP_NOTMINQUERY: + case OP_NOTMINQUERY: case OP_NOTMINQUERYI: c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR); - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + /* Common code for all repeated single-byte matches. */ - - REPEATNOTCHAR: + + REPEATNOTCHAR: GETCHARINCTEST(fc, ecode); - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, + + /* The code is duplicated for the caseless and caseful cases, for speed, + since matching characters is likely to be quite common. First, ensure the + minimum number of matches are present. If min = max, continue at the same + level without recursing. Otherwise, if minimizing, keep trying the rest of + the expression and advancing one matching character if failing, up to the + maximum. Alternatively, if maximizing, find the maximum number of + characters and work backwards. */ + + DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, max, (char *)eptr)); - + if (op >= OP_NOTSTARI) /* Caseless */ - { + { #ifdef SUPPORT_UTF #ifdef SUPPORT_UCP if (utf && fc > 127) @@ -3810,27 +3810,27 @@ for (;;) else #endif /* SUPPORT_UTF */ foc = TABLE_GET(fc, md->fcc, fc); - + #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (i = 1; i <= min; i++) - { + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(d, eptr); + GETCHARINC(d, eptr); if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); - } - } - else + } + } + else #endif /* SUPPORT_UTF */ /* Not UTF mode */ - { - for (i = 1; i <= min; i++) + { + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { @@ -3840,407 +3840,407 @@ for (;;) if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); eptr++; } - } - - if (min == max) continue; - - if (minimize) - { + } + + if (min == max) continue; + + if (minimize) + { #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (fi = min;; fi++) - { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(d, eptr); + GETCHARINC(d, eptr); if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); - } - } - else + } + } + else #endif /*SUPPORT_UTF */ /* Not UTF mode */ - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); eptr++; - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - pp = eptr; - + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + pp = eptr; + #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (i = min; i < max; i++) - { - int len = 1; + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(d, eptr, len); + GETCHARLEN(d, eptr, len); if (fc == d || (unsigned int)foc == d) break; - eptr += len; - } + eptr += len; + } if (possessive) continue; /* No backtracking */ for(;;) - { + { if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; - BACKCHAR(eptr); - } - } - else + BACKCHAR(eptr); + } + } + else #endif /* SUPPORT_UTF */ /* Not UTF mode */ - { - for (i = min; i < max; i++) - { + { + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } if (fc == *eptr || foc == *eptr) break; - eptr++; - } + eptr++; + } if (possessive) continue; /* No backtracking */ for (;;) - { + { if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM31); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr--; + } + } /* Control never gets here */ - } - } - - /* Caseful comparisons */ - - else - { + } + } + + /* Caseful comparisons */ + + else + { #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (i = 1; i <= min; i++) - { + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(d, eptr); - if (fc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif + GETCHARINC(d, eptr); + if (fc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif /* Not UTF mode */ - { - for (i = 1; i <= min; i++) + { + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc == *eptr++) RRETURN(MATCH_NOMATCH); + if (fc == *eptr++) RRETURN(MATCH_NOMATCH); } - } - - if (min == max) continue; - - if (minimize) - { + } + + if (min == max) continue; + + if (minimize) + { #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (fi = min;; fi++) - { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(d, eptr); + GETCHARINC(d, eptr); if (fc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif + } + } + else +#endif /* Not UTF mode */ - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (fc == *eptr++) RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - pp = eptr; - + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + pp = eptr; + #ifdef SUPPORT_UTF if (utf) - { + { register pcre_uint32 d; - for (i = min; i < max; i++) - { - int len = 1; + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(d, eptr, len); - if (fc == d) break; - eptr += len; - } + GETCHARLEN(d, eptr, len); + if (fc == d) break; + eptr += len; + } if (possessive) continue; /* No backtracking */ - for(;;) - { + for(;;) + { if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM34); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; - BACKCHAR(eptr); - } - } - else -#endif + BACKCHAR(eptr); + } + } + else +#endif /* Not UTF mode */ - { - for (i = min; i < max; i++) - { + { + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } if (fc == *eptr) break; - eptr++; - } + eptr++; + } if (possessive) continue; /* No backtracking */ for (;;) - { + { if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM35); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr--; + } + } /* Control never gets here */ - } - } - /* Control never gets here */ - - /* Match a single character type repeatedly; several different opcodes - share code. This is very similar to the code for single characters, but we - repeat it in the interests of efficiency. */ - - case OP_TYPEEXACT: - min = max = GET2(ecode, 1); - minimize = TRUE; + } + } + /* Control never gets here */ + + /* Match a single character type repeatedly; several different opcodes + share code. This is very similar to the code for single characters, but we + repeat it in the interests of efficiency. */ + + case OP_TYPEEXACT: + min = max = GET2(ecode, 1); + minimize = TRUE; ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_TYPEMINUPTO; + goto REPEATTYPE; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + min = 0; + max = GET2(ecode, 1); + minimize = *ecode == OP_TYPEMINUPTO; ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPEPOSSTAR: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSPLUS: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSQUERY: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSUPTO: - possessive = TRUE; - min = 0; - max = GET2(ecode, 1); + goto REPEATTYPE; + + case OP_TYPEPOSSTAR: + possessive = TRUE; + min = 0; + max = INT_MAX; + ecode++; + goto REPEATTYPE; + + case OP_TYPEPOSPLUS: + possessive = TRUE; + min = 1; + max = INT_MAX; + ecode++; + goto REPEATTYPE; + + case OP_TYPEPOSQUERY: + possessive = TRUE; + min = 0; + max = 1; + ecode++; + goto REPEATTYPE; + + case OP_TYPEPOSUPTO: + possessive = TRUE; + min = 0; + max = GET2(ecode, 1); ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - c = *ecode++ - OP_TYPESTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single character type matches. Note that - in UTF-8 mode, '.' matches a character of any length, but for the other - character types, the valid characters are all one-byte long. */ - - REPEATTYPE: - ctype = *ecode++; /* Code for the character type */ - -#ifdef SUPPORT_UCP - if (ctype == OP_PROP || ctype == OP_NOTPROP) - { - prop_fail_result = ctype == OP_NOTPROP; - prop_type = *ecode++; - prop_value = *ecode++; - } - else prop_type = -1; -#endif - - /* First, ensure the minimum number of matches are present. Use inline - code for maximizing the speed, and do the type test once at the start + goto REPEATTYPE; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + c = *ecode++ - OP_TYPESTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + + /* Common code for all repeated single character type matches. Note that + in UTF-8 mode, '.' matches a character of any length, but for the other + character types, the valid characters are all one-byte long. */ + + REPEATTYPE: + ctype = *ecode++; /* Code for the character type */ + +#ifdef SUPPORT_UCP + if (ctype == OP_PROP || ctype == OP_NOTPROP) + { + prop_fail_result = ctype == OP_NOTPROP; + prop_type = *ecode++; + prop_value = *ecode++; + } + else prop_type = -1; +#endif + + /* First, ensure the minimum number of matches are present. Use inline + code for maximizing the speed, and do the type test once at the start (i.e. keep it out of the loop). Separate the UTF-8 code completely as that - is tidier. Also separate the UCP code, which can be the same for both UTF-8 - and single-bytes. */ - - if (min > 0) - { -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - if (prop_fail_result) RRETURN(MATCH_NOMATCH); - for (i = 1; i <= min; i++) - { + is tidier. Also separate the UCP code, which can be the same for both UTF-8 + and single-bytes. */ + + if (min > 0) + { +#ifdef SUPPORT_UCP + if (prop_type >= 0) + { + switch(prop_type) + { + case PT_ANY: + if (prop_fail_result) RRETURN(MATCH_NOMATCH); + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); - } - break; - - case PT_LAMP: - for (i = 1; i <= min; i++) - { + GETCHARINCTEST(c, eptr); + } + break; + + case PT_LAMP: + for (i = 1; i <= min; i++) + { int chartype; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); + GETCHARINCTEST(c, eptr); chartype = UCD_CHARTYPE(c); if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_GC: - for (i = 1; i <= min; i++) - { + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_GC: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); + GETCHARINCTEST(c, eptr); if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_PC: - for (i = 1; i <= min; i++) - { + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_PC: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); + GETCHARINCTEST(c, eptr); if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_SC: - for (i = 1; i <= min; i++) - { + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SC: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINCTEST(c, eptr); + GETCHARINCTEST(c, eptr); if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - + RRETURN(MATCH_NOMATCH); + } + break; + case PT_ALNUM: for (i = 1; i <= min; i++) { @@ -4342,20 +4342,20 @@ for (;;) /* This should not occur */ - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* Match extended Unicode sequences. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (i = 1; i <= min; i++) - { + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -4365,34 +4365,34 @@ for (;;) GETCHARINCTEST(c, eptr); lgb = UCD_GRAPHBREAK(c); while (eptr < md->end_subject) - { + { int len = 1; if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; lgb = rgb; eptr += len; - } - } + } + } CHECK_PARTIAL(); - } - } - - else -#endif /* SUPPORT_UCP */ - -/* Handle all other cases when the coding is UTF-8 */ - + } + } + + else +#endif /* SUPPORT_UCP */ + +/* Handle all other cases when the coding is UTF-8 */ + #ifdef SUPPORT_UTF if (utf) switch(ctype) - { - case OP_ANY: - for (i = 1; i <= min; i++) - { + { + case OP_ANY: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (md->partial != 0 && @@ -4404,11 +4404,11 @@ for (;;) md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } - eptr++; + eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - + } + break; + case OP_ALLANY: for (i = 1; i <= min; i++) { @@ -4422,231 +4422,231 @@ for (;;) } break; - case OP_ANYBYTE: + case OP_ANYBYTE: if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); - eptr += min; - break; - - case OP_ANYNL: - for (i = 1; i <= min; i++) - { + eptr += min; + break; + + case OP_ANYNL: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + switch(c) + { + default: RRETURN(MATCH_NOMATCH); case CHAR_CR: if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++; - break; - + break; + case CHAR_LF: - break; - + break; + case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - } - break; - - case OP_NOT_HSPACE: - for (i = 1; i <= min; i++) - { + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - switch(c) - { + GETCHARINC(c, eptr); + switch(c) + { HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ - default: break; - } - } - break; - - case OP_HSPACE: - for (i = 1; i <= min; i++) - { + default: break; + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - switch(c) - { + GETCHARINC(c, eptr); + switch(c) + { HSPACE_CASES: break; /* Byte and multibyte cases */ - default: RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_NOT_VSPACE: - for (i = 1; i <= min; i++) - { + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - switch(c) - { + GETCHARINC(c, eptr); + switch(c) + { VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - } - break; - - case OP_VSPACE: - for (i = 1; i <= min; i++) - { + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - switch(c) - { + GETCHARINC(c, eptr); + switch(c) + { VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - { + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - GETCHARINC(c, eptr); - if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - { + GETCHARINC(c, eptr); + if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_DIGIT: + for (i = 1; i <= min; i++) + { pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - { + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= min; i++) + { pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21(eptr); if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - { + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= min; i++) + { pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - { + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= min; i++) + { pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21(eptr); if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - { + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= min; i++) + { pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } /* End switch(ctype) */ - - else + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } /* End switch(ctype) */ + + else #endif /* SUPPORT_UTF */ - - /* Code for the non-UTF-8 case for minimum matching of operators other + + /* Code for the non-UTF-8 case for minimum matching of operators other than OP_PROP and OP_NOTPROP. */ - - switch(ctype) - { - case OP_ANY: + + switch(ctype) + { + case OP_ANY: for (i = 1; i <= min; i++) - { + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); - } + } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (md->partial != 0 && eptr + 1 >= md->end_subject && @@ -4658,9 +4658,9 @@ for (;;) if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } eptr++; - } - break; - + } + break; + case OP_ALLANY: if (eptr > md->end_subject - min) { @@ -4670,34 +4670,34 @@ for (;;) eptr += min; break; - case OP_ANYBYTE: + case OP_ANYBYTE: if (eptr > md->end_subject - min) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - eptr += min; - break; - - case OP_ANYNL: - for (i = 1; i <= min; i++) - { + eptr += min; + break; + + case OP_ANYNL: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); + switch(*eptr++) + { + default: RRETURN(MATCH_NOMATCH); case CHAR_CR: if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; - break; + break; case CHAR_LF: - break; - + break; + case CHAR_VT: case CHAR_FF: case CHAR_NEL: @@ -4705,94 +4705,94 @@ for (;;) case 0x2028: case 0x2029: #endif - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - } - break; - - case OP_NOT_HSPACE: - for (i = 1; i <= min; i++) - { + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - switch(*eptr++) - { - default: break; + switch(*eptr++) + { + default: break; HSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 HSPACE_MULTIBYTE_CASES: #endif - RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_HSPACE: - for (i = 1; i <= min; i++) - { + RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); + switch(*eptr++) + { + default: RRETURN(MATCH_NOMATCH); HSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 HSPACE_MULTIBYTE_CASES: #endif - break; - } - } - break; - - case OP_NOT_VSPACE: - for (i = 1; i <= min; i++) - { + break; + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - switch(*eptr++) - { + switch(*eptr++) + { VSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 VSPACE_MULTIBYTE_CASES: #endif RRETURN(MATCH_NOMATCH); - default: break; - } - } - break; - - case OP_VSPACE: - for (i = 1; i <= min; i++) - { + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= min; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); + switch(*eptr++) + { + default: RRETURN(MATCH_NOMATCH); VSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 VSPACE_MULTIBYTE_CASES: #endif - break; - } - } - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) + break; + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { @@ -4803,10 +4803,10 @@ for (;;) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) + break; + + case OP_DIGIT: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { @@ -4817,10 +4817,10 @@ for (;;) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { @@ -4831,10 +4831,10 @@ for (;;) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) + break; + + case OP_WHITESPACE: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { @@ -4845,61 +4845,61 @@ for (;;) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) + break; + + case OP_WORDCHAR: + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); eptr++; } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* If min = max, continue at the same level without recursing */ - - if (min == max) continue; - - /* If minimizing, we have to test the rest of the pattern before each - subsequent match. Again, separate the UTF-8 case for speed, and also - separate the UCP cases. */ - - if (minimize) - { -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - for (fi = min;; fi++) - { + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + + /* If min = max, continue at the same level without recursing */ + + if (min == max) continue; + + /* If minimizing, we have to test the rest of the pattern before each + subsequent match. Again, separate the UTF-8 case for speed, and also + separate the UCP cases. */ + + if (minimize) + { +#ifdef SUPPORT_UCP + if (prop_type >= 0) + { + switch(prop_type) + { + case PT_ANY: + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { @@ -4907,21 +4907,21 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); - if (prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_LAMP: - for (fi = min;; fi++) - { + if (prop_fail_result) RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_LAMP: + for (fi = min;; fi++) + { int chartype; RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); chartype = UCD_CHARTYPE(c); @@ -4929,60 +4929,60 @@ for (;;) chartype == ucp_Ll || chartype == ucp_Lt) == prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_GC: - for (fi = min;; fi++) - { + } + /* Control never gets here */ + + case PT_GC: + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_PC: - for (fi = min;; fi++) - { + } + /* Control never gets here */ + + case PT_PC: + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_SC: - for (fi = min;; fi++) - { + } + /* Control never gets here */ + + case PT_SC: + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - + } + /* Control never gets here */ + case PT_ALNUM: for (fi = min;; fi++) { @@ -5100,23 +5100,23 @@ for (;;) /* Control never gets here */ /* This should never occur */ - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* Match extended Unicode sequences. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (fi = min;; fi++) - { + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM41); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } @@ -5126,39 +5126,39 @@ for (;;) GETCHARINCTEST(c, eptr); lgb = UCD_GRAPHBREAK(c); while (eptr < md->end_subject) - { + { int len = 1; if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; lgb = rgb; eptr += len; - } - } + } + } CHECK_PARTIAL(); - } - } - else -#endif /* SUPPORT_UCP */ - + } + } + else +#endif /* SUPPORT_UCP */ + #ifdef SUPPORT_UTF if (utf) - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM42); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (ctype == OP_ANY && IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - GETCHARINC(c, eptr); - switch(ctype) - { + GETCHARINC(c, eptr); + switch(ctype) + { case OP_ANY: /* This is the non-NL case */ if (md->partial != 0 && /* Take care with CRLF partial */ eptr >= md->end_subject && @@ -5169,121 +5169,121 @@ for (;;) md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } - break; - + break; + case OP_ALLANY: - case OP_ANYBYTE: - break; - - case OP_ANYNL: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(c) + { + default: RRETURN(MATCH_NOMATCH); case CHAR_CR: if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++; - break; + break; case CHAR_LF: - break; - + break; + case CHAR_VT: case CHAR_FF: case CHAR_NEL: #ifndef EBCDIC - case 0x2028: - case 0x2029: + case 0x2028: + case 0x2029: #endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - break; - - case OP_NOT_HSPACE: - switch(c) - { + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(c) + { HSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - break; - - case OP_HSPACE: - switch(c) - { + default: break; + } + break; + + case OP_HSPACE: + switch(c) + { HSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - break; - - case OP_NOT_VSPACE: - switch(c) - { + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_VSPACE: + switch(c) + { VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - break; - - case OP_VSPACE: - switch(c) - { + default: break; + } + break; + + case OP_VSPACE: + switch(c) + { VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - break; - - case OP_NOT_DIGIT: - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_DIGIT: - if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WHITESPACE: - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_WHITESPACE: + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_DIGIT: + if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (c < 256 && (md->ctypes[c] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WORDCHAR: - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_WORDCHAR: - if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) - RRETURN(MATCH_NOMATCH); - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - } - else -#endif + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (c < 256 && (md->ctypes[c] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + } + else +#endif /* Not UTF mode */ - { - for (fi = min;; fi++) - { + { + for (fi = min;; fi++) + { RMATCH(eptr, ecode, offset_top, md, eptrb, RM43); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); } if (ctype == OP_ANY && IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - c = *eptr++; - switch(ctype) - { + c = *eptr++; + switch(ctype) + { case OP_ANY: /* This is the non-NL case */ if (md->partial != 0 && /* Take care with CRLF partial */ eptr >= md->end_subject && @@ -5294,23 +5294,23 @@ for (;;) md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } - break; - + break; + case OP_ALLANY: - case OP_ANYBYTE: - break; - - case OP_ANYNL: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(c) + { + default: RRETURN(MATCH_NOMATCH); case CHAR_CR: if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; - break; - + break; + case CHAR_LF: - break; - + break; + case CHAR_VT: case CHAR_FF: case CHAR_NEL: @@ -5318,128 +5318,128 @@ for (;;) case 0x2028: case 0x2029: #endif - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - break; - - case OP_NOT_HSPACE: - switch(c) - { - default: break; + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(c) + { + default: break; HSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 HSPACE_MULTIBYTE_CASES: #endif - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_HSPACE: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_HSPACE: + switch(c) + { + default: RRETURN(MATCH_NOMATCH); HSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 HSPACE_MULTIBYTE_CASES: #endif - break; - } - break; - - case OP_NOT_VSPACE: - switch(c) - { - default: break; + break; + } + break; + + case OP_NOT_VSPACE: + switch(c) + { + default: break; VSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 VSPACE_MULTIBYTE_CASES: #endif - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_VSPACE: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_VSPACE: + switch(c) + { + default: RRETURN(MATCH_NOMATCH); VSPACE_BYTE_CASES: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 VSPACE_MULTIBYTE_CASES: #endif - break; - } - break; - - case OP_NOT_DIGIT: + break; + } + break; + + case OP_NOT_DIGIT: if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_DIGIT: + break; + + case OP_DIGIT: if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WHITESPACE: + break; + + case OP_NOT_WHITESPACE: if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_WHITESPACE: + break; + + case OP_WHITESPACE: if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WORDCHAR: + break; + + case OP_NOT_WORDCHAR: if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_WORDCHAR: + break; + + case OP_WORDCHAR: if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - } - /* Control never gets here */ - } - - /* If maximizing, it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). Again, keep the - UTF-8 and UCP stuff separate. */ - - else - { - pp = eptr; /* Remember where we started */ - -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - for (i = min; i < max; i++) - { - int len = 1; + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + } + /* Control never gets here */ + } + + /* If maximizing, it is worth using inline code for speed, doing the type + test once at the start (i.e. keep it out of the loop). Again, keep the + UTF-8 and UCP stuff separate. */ + + else + { + pp = eptr; /* Remember where we started */ + +#ifdef SUPPORT_UCP + if (prop_type >= 0) + { + switch(prop_type) + { + case PT_ANY: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } GETCHARLENTEST(c, eptr, len); - if (prop_fail_result) break; - eptr+= len; - } - break; - - case PT_LAMP: - for (i = min; i < max; i++) - { + if (prop_fail_result) break; + eptr+= len; + } + break; + + case PT_LAMP: + for (i = min; i < max; i++) + { int chartype; - int len = 1; + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } GETCHARLENTEST(c, eptr, len); chartype = UCD_CHARTYPE(c); @@ -5447,54 +5447,54 @@ for (;;) chartype == ucp_Ll || chartype == ucp_Lt) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_GC: - for (i = min; i < max; i++) - { - int len = 1; + eptr+= len; + } + break; + + case PT_GC: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } GETCHARLENTEST(c, eptr, len); if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_PC: - for (i = min; i < max; i++) - { - int len = 1; + eptr+= len; + } + break; + + case PT_PC: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } GETCHARLENTEST(c, eptr, len); if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_SC: - for (i = min; i < max; i++) - { - int len = 1; + eptr+= len; + } + break; + + case PT_SC: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } GETCHARLENTEST(c, eptr, len); if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; + eptr+= len; + } + break; case PT_ALNUM: for (i = min; i < max; i++) @@ -5609,30 +5609,30 @@ for (;;) default: RRETURN(PCRE_ERROR_INTERNAL); - } - - /* eptr is now past the end of the maximum run */ - + } + + /* eptr is now past the end of the maximum run */ + if (possessive) continue; /* No backtracking */ - for(;;) - { + for(;;) + { if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; if (utf) BACKCHAR(eptr); - } - } - + } + } + /* Match extended Unicode grapheme clusters. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (i = min; i < max; i++) - { + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); break; } @@ -5642,20 +5642,20 @@ for (;;) GETCHARINCTEST(c, eptr); lgb = UCD_GRAPHBREAK(c); while (eptr < md->end_subject) - { + { int len = 1; if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } rgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; lgb = rgb; eptr += len; - } - } + } + } CHECK_PARTIAL(); - } - - /* eptr is now past the end of the maximum run */ - + } + + /* eptr is now past the end of the maximum run */ + if (possessive) continue; /* No backtracking */ /* We use <= pp rather than == pp to detect the start of the run while @@ -5663,14 +5663,14 @@ for (;;) move back past pp. This is just palliative; the use of \C in UTF mode is fraught with danger. */ - for(;;) - { + for(;;) + { int lgb, rgb; PCRE_PUCHAR fptr; if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */ RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); /* Backtracking over an extended grapheme cluster involves inspecting the previous two characters (if present) to see if a break is @@ -5678,7 +5678,7 @@ for (;;) eptr--; if (!utf) c = *eptr; else - { + { BACKCHAR(eptr); GETCHAR(c, eptr); } @@ -5689,276 +5689,276 @@ for (;;) if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */ fptr = eptr - 1; if (!utf) c = *fptr; else - { + { BACKCHAR(fptr); GETCHAR(c, fptr); - } + } lgb = UCD_GRAPHBREAK(c); if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; eptr = fptr; rgb = lgb; - } - } - } - - else -#endif /* SUPPORT_UCP */ - + } + } + } + + else +#endif /* SUPPORT_UCP */ + #ifdef SUPPORT_UTF if (utf) - { - switch(ctype) - { - case OP_ANY: + { + switch(ctype) + { + case OP_ANY: for (i = min; i < max; i++) - { + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); break; - } + } if (IS_NEWLINE(eptr)) break; if (md->partial != 0 && /* Take care with CRLF partial */ eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && UCHAR21(eptr) == NLBLOCK->nl[0]) - { + { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } + } eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } + } break; - + case OP_ALLANY: if (max < INT_MAX) - { + { for (i = min; i < max; i++) - { + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); break; - } + } eptr++; ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - } + } + } else { eptr = md->end_subject; /* Unlimited UTF-8 repeat */ SCHECK_PARTIAL(); } - break; - - /* The byte case is the same as non-UTF8 */ - - case OP_ANYBYTE: - c = max - min; - if (c > (unsigned int)(md->end_subject - eptr)) + break; + + /* The byte case is the same as non-UTF8 */ + + case OP_ANYBYTE: + c = max - min; + if (c > (unsigned int)(md->end_subject - eptr)) { eptr = md->end_subject; SCHECK_PARTIAL(); } else eptr += c; - break; - - case OP_ANYNL: - for (i = min; i < max; i++) - { - int len = 1; + break; + + case OP_ANYNL: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); + GETCHARLEN(c, eptr, len); if (c == CHAR_CR) - { - if (++eptr >= md->end_subject) break; + { + if (++eptr >= md->end_subject) break; if (UCHAR21(eptr) == CHAR_LF) eptr++; - } - else - { + } + else + { if (c != CHAR_LF && - (md->bsr_anycrlf || + (md->bsr_anycrlf || (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL #ifndef EBCDIC && c != 0x2028 && c != 0x2029 #endif /* Not EBCDIC */ ))) - break; - eptr += len; - } - } - break; - - case OP_NOT_HSPACE: - case OP_HSPACE: - for (i = min; i < max; i++) - { - BOOL gotspace; - int len = 1; + break; + eptr += len; + } + } + break; + + case OP_NOT_HSPACE: + case OP_HSPACE: + for (i = min; i < max; i++) + { + BOOL gotspace; + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - switch(c) - { + GETCHARLEN(c, eptr, len); + switch(c) + { HSPACE_CASES: gotspace = TRUE; break; - default: gotspace = FALSE; break; - } - if (gotspace == (ctype == OP_NOT_HSPACE)) break; - eptr += len; - } - break; - - case OP_NOT_VSPACE: - case OP_VSPACE: - for (i = min; i < max; i++) - { - BOOL gotspace; - int len = 1; + default: gotspace = FALSE; break; + } + if (gotspace == (ctype == OP_NOT_HSPACE)) break; + eptr += len; + } + break; + + case OP_NOT_VSPACE: + case OP_VSPACE: + for (i = min; i < max; i++) + { + BOOL gotspace; + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - switch(c) - { + GETCHARLEN(c, eptr, len); + switch(c) + { VSPACE_CASES: gotspace = TRUE; break; - default: gotspace = FALSE; break; - } - if (gotspace == (ctype == OP_NOT_VSPACE)) break; - eptr += len; - } - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; + default: gotspace = FALSE; break; + } + if (gotspace == (ctype == OP_NOT_VSPACE)) break; + eptr += len; + } + break; + + case OP_NOT_DIGIT: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; - eptr+= len; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; + eptr+= len; + } + break; + + case OP_DIGIT: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; + GETCHARLEN(c, eptr, len); + if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; + eptr+= len; + } + break; + + case OP_NOT_WHITESPACE: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; - eptr+= len; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; + eptr+= len; + } + break; + + case OP_WHITESPACE: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; + GETCHARLEN(c, eptr, len); + if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; + eptr+= len; + } + break; + + case OP_NOT_WORDCHAR: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; - eptr+= len; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; + eptr+= len; + } + break; + + case OP_WORDCHAR: + for (i = min; i < max; i++) + { + int len = 1; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - GETCHARLEN(c, eptr, len); - if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; - eptr+= len; - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - + GETCHARLEN(c, eptr, len); + if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; + eptr+= len; + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + if (possessive) continue; /* No backtracking */ - for(;;) - { + for(;;) + { if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM46); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; - BACKCHAR(eptr); + BACKCHAR(eptr); if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL && UCHAR21(eptr - 1) == CHAR_CR) eptr--; - } - } - else + } + } + else #endif /* SUPPORT_UTF */ /* Not UTF mode */ - { - switch(ctype) - { - case OP_ANY: + { + switch(ctype) + { + case OP_ANY: for (i = min; i < max; i++) - { + { if (eptr >= md->end_subject) - { + { SCHECK_PARTIAL(); break; - } + } if (IS_NEWLINE(eptr)) break; if (md->partial != 0 && /* Take care with CRLF partial */ eptr + 1 >= md->end_subject && @@ -5970,50 +5970,50 @@ for (;;) if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); } eptr++; - } + } break; - + case OP_ALLANY: - case OP_ANYBYTE: - c = max - min; - if (c > (unsigned int)(md->end_subject - eptr)) + case OP_ANYBYTE: + c = max - min; + if (c > (unsigned int)(md->end_subject - eptr)) { eptr = md->end_subject; SCHECK_PARTIAL(); } else eptr += c; - break; - - case OP_ANYNL: - for (i = min; i < max; i++) - { + break; + + case OP_ANYNL: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); break; } - c = *eptr; + c = *eptr; if (c == CHAR_CR) - { - if (++eptr >= md->end_subject) break; + { + if (++eptr >= md->end_subject) break; if (*eptr == CHAR_LF) eptr++; - } - else - { + } + else + { if (c != CHAR_LF && (md->bsr_anycrlf || (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 && c != 0x2028 && c != 0x2029 #endif ))) break; - eptr++; - } - } - break; - - case OP_NOT_HSPACE: - for (i = min; i < max; i++) - { + eptr++; + } + } + break; + + case OP_NOT_HSPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -6028,13 +6028,13 @@ for (;;) #endif goto ENDLOOP00; } - } + } ENDLOOP00: - break; - - case OP_HSPACE: - for (i = min; i < max; i++) - { + break; + + case OP_HSPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -6049,17 +6049,17 @@ for (;;) #endif eptr++; break; } - } + } ENDLOOP01: - break; - - case OP_NOT_VSPACE: - for (i = min; i < max; i++) - { + break; + + case OP_NOT_VSPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } switch(*eptr) { @@ -6070,17 +6070,17 @@ for (;;) #endif goto ENDLOOP02; } - } + } ENDLOOP02: - break; - - case OP_VSPACE: - for (i = min; i < max; i++) - { + break; + + case OP_VSPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } switch(*eptr) { @@ -6091,136 +6091,136 @@ for (;;) #endif eptr++; break; } - } + } ENDLOOP03: - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { + break; + + case OP_NOT_DIGIT: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break; - eptr++; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { + eptr++; + } + break; + + case OP_DIGIT: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break; - eptr++; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { + eptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break; - eptr++; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { + eptr++; + } + break; + + case OP_WHITESPACE: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break; - eptr++; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { + eptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break; - eptr++; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { + eptr++; + } + break; + + case OP_WORDCHAR: + for (i = min; i < max; i++) + { if (eptr >= md->end_subject) { SCHECK_PARTIAL(); - break; + break; } if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break; - eptr++; - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - + eptr++; + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + if (possessive) continue; /* No backtracking */ for (;;) - { + { if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM47); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; + eptr--; if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF && eptr[-1] == CHAR_CR) eptr--; - } - } - + } + } + /* Control never gets here */ - } - - /* There's been some horrible disaster. Arrival here can only mean there is - something seriously wrong in the code above or the OP_xxx definitions. */ - - default: - DPRINTF(("Unknown opcode %d\n", *ecode)); - RRETURN(PCRE_ERROR_UNKNOWN_OPCODE); - } - - /* Do not stick any code in here without much thought; it is assumed - that "continue" in the code above comes out to here to repeat the main - loop. */ - - } /* End of main loop */ -/* Control never reaches here */ - - -/* When compiling to use the heap rather than the stack for recursive calls to -match(), the RRETURN() macro jumps here. The number that is saved in -frame->Xwhere indicates which label we actually want to return to. */ - -#ifdef NO_RECURSE -#define LBL(val) case val: goto L_RM##val; -HEAP_RETURN: -switch (frame->Xwhere) - { - LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) - LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) - LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) - LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) + } + + /* There's been some horrible disaster. Arrival here can only mean there is + something seriously wrong in the code above or the OP_xxx definitions. */ + + default: + DPRINTF(("Unknown opcode %d\n", *ecode)); + RRETURN(PCRE_ERROR_UNKNOWN_OPCODE); + } + + /* Do not stick any code in here without much thought; it is assumed + that "continue" in the code above comes out to here to repeat the main + loop. */ + + } /* End of main loop */ +/* Control never reaches here */ + + +/* When compiling to use the heap rather than the stack for recursive calls to +match(), the RRETURN() macro jumps here. The number that is saved in +frame->Xwhere indicates which label we actually want to return to. */ + +#ifdef NO_RECURSE +#define LBL(val) case val: goto L_RM##val; +HEAP_RETURN: +switch (frame->Xwhere) + { + LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) + LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) + LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) + LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) LBL(65) LBL(66) #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 @@ -6229,80 +6229,80 @@ switch (frame->Xwhere) #ifdef SUPPORT_UTF LBL(16) LBL(18) LBL(22) LBL(23) LBL(28) LBL(30) - LBL(32) LBL(34) LBL(42) LBL(46) -#ifdef SUPPORT_UCP - LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) + LBL(32) LBL(34) LBL(42) LBL(46) +#ifdef SUPPORT_UCP + LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) -#endif /* SUPPORT_UCP */ +#endif /* SUPPORT_UCP */ #endif /* SUPPORT_UTF */ - default: - DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); - return PCRE_ERROR_INTERNAL; - } -#undef LBL -#endif /* NO_RECURSE */ -} - - -/*************************************************************************** -**************************************************************************** - RECURSION IN THE match() FUNCTION - -Undefine all the macros that were defined above to handle this. */ - -#ifdef NO_RECURSE -#undef eptr -#undef ecode -#undef mstart -#undef offset_top -#undef eptrb -#undef flags - -#undef callpat -#undef charptr -#undef data -#undef next -#undef pp -#undef prev -#undef saved_eptr - -#undef new_recursive - -#undef cur_is_word -#undef condition -#undef prev_is_word - -#undef ctype -#undef length -#undef max -#undef min -#undef number -#undef offset -#undef op -#undef save_capture_last -#undef save_offset1 -#undef save_offset2 -#undef save_offset3 -#undef stacksave - -#undef newptrb - -#endif - -/* These two are defined as macros in both cases */ - -#undef fc -#undef fi - -/*************************************************************************** -***************************************************************************/ - - + default: + DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); + return PCRE_ERROR_INTERNAL; + } +#undef LBL +#endif /* NO_RECURSE */ +} + + +/*************************************************************************** +**************************************************************************** + RECURSION IN THE match() FUNCTION + +Undefine all the macros that were defined above to handle this. */ + +#ifdef NO_RECURSE +#undef eptr +#undef ecode +#undef mstart +#undef offset_top +#undef eptrb +#undef flags + +#undef callpat +#undef charptr +#undef data +#undef next +#undef pp +#undef prev +#undef saved_eptr + +#undef new_recursive + +#undef cur_is_word +#undef condition +#undef prev_is_word + +#undef ctype +#undef length +#undef max +#undef min +#undef number +#undef offset +#undef op +#undef save_capture_last +#undef save_offset1 +#undef save_offset2 +#undef save_offset3 +#undef stacksave + +#undef newptrb + +#endif + +/* These two are defined as macros in both cases */ + +#undef fc +#undef fi + +/*************************************************************************** +***************************************************************************/ + + #ifdef NO_RECURSE /************************************************* * Release allocated heap frames * *************************************************/ - + /* This function releases all the allocated frames. The base frame is on the machine stack, and so must not be freed. @@ -6324,35 +6324,35 @@ while (nextframe != NULL) #endif -/************************************************* -* Execute a Regular Expression * -*************************************************/ - -/* This function applies a compiled re to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - argument_re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets points to a vector of ints to be filled in with offsets - offsetcount the number of elements in the vector - -Returns: > 0 => success; value is the number of elements filled in - = 0 => success, but offsets is not big enough - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - +/************************************************* +* Execute a Regular Expression * +*************************************************/ + +/* This function applies a compiled re to a subject string and picks out +portions of the string if it matches. Two elements in the vector are set for +each substring: the offsets to the start and end of the substring. + +Arguments: + argument_re points to the compiled expression + extra_data points to extra data or is NULL + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + offsets points to a vector of ints to be filled in with offsets + offsetcount the number of elements in the vector + +Returns: > 0 => success; value is the number of elements filled in + = 0 => success, but offsets is not big enough + -1 => failed to match + < -1 => some kind of unexpected problem +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, - PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, - int offsetcount) +pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, + PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, + int offsetcount) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, @@ -6364,13 +6364,13 @@ pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets, int offsetcount) #endif -{ +{ int rc, ocount, arg_offset_max; -int newline; -BOOL using_temporary_offsets = FALSE; -BOOL anchored; -BOOL startline; -BOOL firstline; +int newline; +BOOL using_temporary_offsets = FALSE; +BOOL anchored; +BOOL startline; +BOOL firstline; BOOL utf; BOOL has_first_char = FALSE; BOOL has_req_char = FALSE; @@ -6378,8 +6378,8 @@ pcre_uchar first_char = 0; pcre_uchar first_char2 = 0; pcre_uchar req_char = 0; pcre_uchar req_char2 = 0; -match_data match_block; -match_data *md = &match_block; +match_data match_block; +match_data *md = &match_block; const pcre_uint8 *tables; const pcre_uint8 *start_bits = NULL; PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; @@ -6387,17 +6387,17 @@ PCRE_PUCHAR end_subject; PCRE_PUCHAR start_partial = NULL; PCRE_PUCHAR match_partial = NULL; PCRE_PUCHAR req_char_ptr = start_match - 1; - -const pcre_study_data *study; + +const pcre_study_data *study; const REAL_PCRE *re = (const REAL_PCRE *)argument_re; - + #ifdef NO_RECURSE heapframe frame_zero; frame_zero.Xprevframe = NULL; /* Marks the top level */ frame_zero.Xnextframe = NULL; /* None are allocated yet */ md->match_frames_base = &frame_zero; #endif - + /* Check for the special magic call that measures the size of the stack used per recursive call of match(). Without the funny casting for sizeof, a Windows compiler gave this error: "unary minus operator applied to unsigned type, @@ -6411,15 +6411,15 @@ if (re == NULL && extra_data == NULL && subject == NULL && length == -999 && return match(NULL, NULL, NULL, 0, NULL, NULL, 0); #endif -/* Plausibility checks */ - -if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; +/* Plausibility checks */ + +if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; -if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; +if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; if (length < 0) return PCRE_ERROR_BADLENGTH; if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; - + /* Check that the first field in the block is the magic number. If it is not, return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which @@ -6503,34 +6503,34 @@ md->name_table = (pcre_uchar *)re + re->name_table_offset; md->name_count = re->name_count; md->name_entry_size = re->name_entry_size; -/* Fish out the optional data from the extra_data structure, first setting -the default values. */ - -study = NULL; -md->match_limit = MATCH_LIMIT; -md->match_limit_recursion = MATCH_LIMIT_RECURSION; -md->callout_data = NULL; - -/* The table pointer is always in native byte order. */ - +/* Fish out the optional data from the extra_data structure, first setting +the default values. */ + +study = NULL; +md->match_limit = MATCH_LIMIT; +md->match_limit_recursion = MATCH_LIMIT_RECURSION; +md->callout_data = NULL; + +/* The table pointer is always in native byte order. */ + tables = re->tables; - + /* The two limit values override the defaults, whatever their value. */ -if (extra_data != NULL) - { +if (extra_data != NULL) + { unsigned long int flags = extra_data->flags; - if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) - md->match_limit = extra_data->match_limit; - if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) - md->match_limit_recursion = extra_data->match_limit_recursion; - if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) - md->callout_data = extra_data->callout_data; - if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; - } - + if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) + md->match_limit = extra_data->match_limit; + if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) + md->match_limit_recursion = extra_data->match_limit_recursion; + if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) + md->callout_data = extra_data->callout_data; + if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; + } + /* Limits in the regex override only if they are smaller. */ if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit) @@ -6540,171 +6540,171 @@ if ((re->flags & PCRE_RLSET) != 0 && re->limit_recursion < md->match_limit_recursion) md->match_limit_recursion = re->limit_recursion; -/* If the exec call supplied NULL for tables, use the inbuilt ones. This -is a feature that makes it possible to save compiled regex and re-use them -in other programs later. */ - +/* If the exec call supplied NULL for tables, use the inbuilt ones. This +is a feature that makes it possible to save compiled regex and re-use them +in other programs later. */ + if (tables == NULL) tables = PRIV(default_tables); - -/* Set up other data */ - -anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -startline = (re->flags & PCRE_STARTLINE) != 0; -firstline = (re->options & PCRE_FIRSTLINE) != 0; - -/* The code starts after the real_pcre block and the capture name table. */ - + +/* Set up other data */ + +anchored = ((re->options | options) & PCRE_ANCHORED) != 0; +startline = (re->flags & PCRE_STARTLINE) != 0; +firstline = (re->options & PCRE_FIRSTLINE) != 0; + +/* The code starts after the real_pcre block and the capture name table. */ + md->start_code = (const pcre_uchar *)re + re->name_table_offset + - re->name_count * re->name_entry_size; - + re->name_count * re->name_entry_size; + md->start_subject = (PCRE_PUCHAR)subject; -md->start_offset = start_offset; -md->end_subject = md->start_subject + length; -end_subject = md->end_subject; - -md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; +md->start_offset = start_offset; +md->end_subject = md->start_subject + length; +end_subject = md->end_subject; + +md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; md->use_ucp = (re->options & PCRE_UCP) != 0; md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; md->ignore_skip_arg = 0; - + /* Some options are unpacked into BOOL variables in the hope that testing them will be faster than individual option bits. */ -md->notbol = (options & PCRE_NOTBOL) != 0; -md->noteol = (options & PCRE_NOTEOL) != 0; -md->notempty = (options & PCRE_NOTEMPTY) != 0; +md->notbol = (options & PCRE_NOTBOL) != 0; +md->noteol = (options & PCRE_NOTEOL) != 0; +md->notempty = (options & PCRE_NOTEMPTY) != 0; md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; -md->hitend = FALSE; +md->hitend = FALSE; md->mark = md->nomatch_mark = NULL; /* In case never set */ - -md->recursive = NULL; /* No recursion at top level */ + +md->recursive = NULL; /* No recursion at top level */ md->hasthen = (re->flags & PCRE_HASTHEN) != 0; - -md->lcc = tables + lcc_offset; + +md->lcc = tables + lcc_offset; md->fcc = tables + fcc_offset; -md->ctypes = tables + ctypes_offset; - -/* Handle different \R options. */ - -switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) - { - case 0: - if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) - md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; - else -#ifdef BSR_ANYCRLF - md->bsr_anycrlf = TRUE; -#else - md->bsr_anycrlf = FALSE; -#endif - break; - - case PCRE_BSR_ANYCRLF: - md->bsr_anycrlf = TRUE; - break; - - case PCRE_BSR_UNICODE: - md->bsr_anycrlf = FALSE; - break; - - default: return PCRE_ERROR_BADNEWLINE; - } - -/* Handle different types of newline. The three bits give eight cases. If -nothing is set at run time, whatever was used at compile time applies. */ - -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : - (pcre_uint32)options) & PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Compile-time default */ +md->ctypes = tables + ctypes_offset; + +/* Handle different \R options. */ + +switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) + { + case 0: + if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) + md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; + else +#ifdef BSR_ANYCRLF + md->bsr_anycrlf = TRUE; +#else + md->bsr_anycrlf = FALSE; +#endif + break; + + case PCRE_BSR_ANYCRLF: + md->bsr_anycrlf = TRUE; + break; + + case PCRE_BSR_UNICODE: + md->bsr_anycrlf = FALSE; + break; + + default: return PCRE_ERROR_BADNEWLINE; + } + +/* Handle different types of newline. The three bits give eight cases. If +nothing is set at run time, whatever was used at compile time applies. */ + +switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : + (pcre_uint32)options) & PCRE_NEWLINE_BITS) + { + case 0: newline = NEWLINE; break; /* Compile-time default */ case PCRE_NEWLINE_CR: newline = CHAR_CR; break; case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ + case PCRE_NEWLINE_CR+ PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: return PCRE_ERROR_BADNEWLINE; - } - -if (newline == -2) - { - md->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - md->nltype = NLTYPE_ANY; - } -else - { - md->nltype = NLTYPE_FIXED; - if (newline > 255) - { - md->nllen = 2; - md->nl[0] = (newline >> 8) & 255; - md->nl[1] = newline & 255; - } - else - { - md->nllen = 1; - md->nl[0] = newline; - } - } - + case PCRE_NEWLINE_ANY: newline = -1; break; + case PCRE_NEWLINE_ANYCRLF: newline = -2; break; + default: return PCRE_ERROR_BADNEWLINE; + } + +if (newline == -2) + { + md->nltype = NLTYPE_ANYCRLF; + } +else if (newline < 0) + { + md->nltype = NLTYPE_ANY; + } +else + { + md->nltype = NLTYPE_FIXED; + if (newline > 255) + { + md->nllen = 2; + md->nl[0] = (newline >> 8) & 255; + md->nl[1] = newline & 255; + } + else + { + md->nllen = 1; + md->nl[0] = newline; + } + } + /* Partial matching was originally supported only for a restricted set of regexes; from release 8.00 there are no restrictions, but the bits are still defined (though never set). So there's no harm in leaving this code. */ - -if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) - return PCRE_ERROR_BADPARTIAL; - -/* If the expression has got more back references than the offsets supplied can -hold, we get a temporary chunk of working store to use during the matching. -Otherwise, we can use the vector supplied, rounding down its size to a multiple -of 3. */ - -ocount = offsetcount - (offsetcount % 3); + +if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) + return PCRE_ERROR_BADPARTIAL; + +/* If the expression has got more back references than the offsets supplied can +hold, we get a temporary chunk of working store to use during the matching. +Otherwise, we can use the vector supplied, rounding down its size to a multiple +of 3. */ + +ocount = offsetcount - (offsetcount % 3); arg_offset_max = (2*ocount)/3; - -if (re->top_backref > 0 && re->top_backref >= ocount/3) - { - ocount = re->top_backref * 3 + 3; + +if (re->top_backref > 0 && re->top_backref >= ocount/3) + { + ocount = re->top_backref * 3 + 3; md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int)); - if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; - using_temporary_offsets = TRUE; - DPRINTF(("Got memory to hold back references\n")); - } -else md->offset_vector = offsets; -md->offset_end = ocount; -md->offset_max = (2*ocount)/3; + if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; + using_temporary_offsets = TRUE; + DPRINTF(("Got memory to hold back references\n")); + } +else md->offset_vector = offsets; +md->offset_end = ocount; +md->offset_max = (2*ocount)/3; md->capture_last = 0; - -/* Reset the working variable associated with each extraction. These should -never be used unless previously set, but they get saved and restored, and so we + +/* Reset the working variable associated with each extraction. These should +never be used unless previously set, but they get saved and restored, and so we initialize them to avoid reading uninitialized locations. Also, unset the offsets for the matched string. This is really just for tidiness with callouts, in case they inspect these fields. */ - -if (md->offset_vector != NULL) - { + +if (md->offset_vector != NULL) + { register int *iptr = md->offset_vector + ocount; register int *iend = iptr - re->top_bracket; if (iend < md->offset_vector + 2) iend = md->offset_vector + 2; - while (--iptr >= iend) *iptr = -1; + while (--iptr >= iend) *iptr = -1; if (offsetcount > 0) md->offset_vector[0] = -1; if (offsetcount > 1) md->offset_vector[1] = -1; - } - + } + /* Set up the first character to match, if available. The first_char value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->flags & PCRE_FIRSTSET) != 0) - { +never set for an anchored regular expression, but the anchoring may be forced +at run time, so we have to test for anchoring. The first char may be unset for +an unanchored pattern, of course. If there's no first char and the pattern was +studied, there may be a bitmap of possible first characters. */ + +if (!anchored) + { + if ((re->flags & PCRE_FIRSTSET) != 0) + { has_first_char = TRUE; first_char = first_char2 = (pcre_uchar)(re->first_char); if ((re->flags & PCRE_FCH_CASELESS) != 0) @@ -6715,18 +6715,18 @@ if (!anchored) first_char2 = UCD_OTHERCASE(first_char); #endif } - } - else - if (!startline && study != NULL && + } + else + if (!startline && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - start_bits = study->start_bits; - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. */ - -if ((re->flags & PCRE_REQCHSET) != 0) - { + start_bits = study->start_bits; + } + +/* For anchored or unanchored matches, there may be a "last known required +character" set. */ + +if ((re->flags & PCRE_REQCHSET) != 0) + { has_req_char = TRUE; req_char = req_char2 = (pcre_uchar)(re->req_char); if ((re->flags & PCRE_RCH_CASELESS) != 0) @@ -6737,27 +6737,27 @@ if ((re->flags & PCRE_REQCHSET) != 0) req_char2 = UCD_OTHERCASE(req_char); #endif } - } - - -/* ==========================================================================*/ - -/* Loop for handling unanchored repeated matching attempts; for anchored regexs -the loop runs just once. */ - -for(;;) - { + } + + +/* ==========================================================================*/ + +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +the loop runs just once. */ + +for(;;) + { PCRE_PUCHAR save_end_subject = end_subject; PCRE_PUCHAR new_start_match; - + /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. That is, the match must be before or at the first newline. Implement this by temporarily adjusting end_subject so that we stop scanning at a newline. If the match fails at the newline, later code breaks this loop. */ - - if (firstline) - { + + if (firstline) + { PCRE_PUCHAR t = start_match; #ifdef SUPPORT_UTF if (utf) @@ -6770,24 +6770,24 @@ for(;;) } else #endif - while (t < md->end_subject && !IS_NEWLINE(t)) t++; - end_subject = t; - } - + while (t < md->end_subject && !IS_NEWLINE(t)) t++; + end_subject = t; + } + /* There are some optimizations that avoid running the match if a known starting point is not found, or if a known later character is not present. However, there is an option that disables these, for testing and for ensuring that all callouts do actually occur. The option can be set in the regex by (*NO_START_OPT) or passed in match-time options. */ - + if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) - { + { /* Advance to a unique first char if there is one. */ - + if (has_first_char) { pcre_uchar smc; - + if (first_char != first_char2) while (start_match < end_subject && (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2) @@ -6800,7 +6800,7 @@ for(;;) /* Or to just after a linebreak for a multiline match */ else if (startline) - { + { if (start_match > md->start_subject + start_offset) { #ifdef SUPPORT_UTF @@ -6817,7 +6817,7 @@ for(;;) #endif while (start_match < end_subject && !WAS_NEWLINE(start_match)) start_match++; - + /* If we have just passed a CR and the newline option is ANY or ANYCRLF, and we are now at a LF, advance the match position by one more character. */ @@ -6828,12 +6828,12 @@ for(;;) UCHAR21TEST(start_match) == CHAR_NL) start_match++; } - } - + } + /* Or to a non-unique first byte after study */ - + else if (start_bits != NULL) - { + { while (start_match < end_subject) { register pcre_uint32 c = UCHAR21TEST(start_match); @@ -6843,30 +6843,30 @@ for(;;) if ((start_bits[c/8] & (1 << (c&7))) != 0) break; start_match++; } - } + } } /* Starting optimizations */ - - /* Restore fudged end_subject */ - - end_subject = save_end_subject; - + + /* Restore fudged end_subject */ + + end_subject = save_end_subject; + /* The following two optimizations are disabled for partial matching or if disabling is explicitly requested. */ - + if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) { /* If the pattern was studied, a minimum subject length may be set. This is a lower bound; no actual string of that length may actually match the pattern. Although the value is, strictly, in characters, we treat it as bytes to avoid spending too much time in this optimization. */ - + if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && (pcre_uint32)(end_subject - start_match) < study->minlength) { rc = MATCH_NOMATCH; break; } - + /* If req_char is set, we know that that character must appear in the subject for the match to succeed. If the first character is set, req_char must be later in the subject; otherwise the test starts at the match point. @@ -6874,56 +6874,56 @@ for(;;) nested unlimited repeats that aren't going to match. Writing separate code for cased/caseless versions makes it go faster, as does using an autoincrement and backing off on a match. - + HOWEVER: when the subject string is very, very long, searching to its end can take a long time, and give bad performance on quite ordinary patterns. This showed up when somebody was matching something like /^\d+C/ on a 32-megabyte string... so we don't do this when the string is sufficiently long. */ - + if (has_req_char && end_subject - start_match < REQ_BYTE_MAX) { register PCRE_PUCHAR p = start_match + (has_first_char? 1:0); - + /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ if (p > req_char_ptr) - { + { if (req_char != req_char2) - { + { while (p < end_subject) { register pcre_uint32 pp = UCHAR21INCTEST(p); if (pp == req_char || pp == req_char2) { p--; break; } } - } + } else - { + { while (p < end_subject) { if (UCHAR21INCTEST(p) == req_char) { p--; break; } } - } - + } + /* If we can't find the required character, break the matching loop, forcing a match failure. */ - + if (p >= end_subject) { rc = MATCH_NOMATCH; break; } - + /* If we have found the required character, save the point where we found it, so that we don't search again next time round the loop if the start hasn't passed this character yet. */ - + req_char_ptr = p; } - } - } - + } + } + #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */ printf(">>>> Match against: "); pchars(start_match, end_subject - start_match, TRUE, md); @@ -6933,9 +6933,9 @@ for(;;) /* OK, we can now run the match. If "hitend" is set afterwards, remember the first starting point for which a partial match was found. */ - md->start_match_ptr = start_match; + md->start_match_ptr = start_match; md->start_used_ptr = start_match; - md->match_call_count = 0; + md->match_call_count = 0; md->match_function_type = 0; md->end_offset_top = 0; md->skip_arg_count = 0; @@ -6945,9 +6945,9 @@ for(;;) start_partial = md->start_used_ptr; match_partial = start_match; } - - switch(rc) - { + + switch(rc) + { /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP entirely. The only way we can do that is to re-do the match at the same @@ -6971,117 +6971,117 @@ for(;;) } /* Fall through */ - /* NOMATCH and PRUNE advance by one character. THEN at this level acts + /* NOMATCH and PRUNE advance by one character. THEN at this level acts exactly like PRUNE. Unset ignore SKIP-with-argument. */ - - case MATCH_NOMATCH: - case MATCH_PRUNE: - case MATCH_THEN: + + case MATCH_NOMATCH: + case MATCH_PRUNE: + case MATCH_THEN: md->ignore_skip_arg = 0; - new_start_match = start_match + 1; + new_start_match = start_match + 1; #ifdef SUPPORT_UTF if (utf) ACROSSCHAR(new_start_match < end_subject, *new_start_match, new_start_match++); -#endif - break; - - /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ - - case MATCH_COMMIT: - rc = MATCH_NOMATCH; - goto ENDLOOP; - +#endif + break; + + /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ + + case MATCH_COMMIT: + rc = MATCH_NOMATCH; + goto ENDLOOP; + /* Any other return is either a match, or some kind of error. */ - - default: - goto ENDLOOP; - } - - /* Control reaches here for the various types of "no match at this point" - result. Reset the code to MATCH_NOMATCH for subsequent checking. */ - - rc = MATCH_NOMATCH; - - /* If PCRE_FIRSTLINE is set, the match must happen before or at the first - newline in the subject (though it may continue over the newline). Therefore, - if we have just failed to match, starting at a newline, do not continue. */ - - if (firstline && IS_NEWLINE(start_match)) break; - - /* Advance to new matching position */ - - start_match = new_start_match; - - /* Break the loop if the pattern is anchored or if we have passed the end of - the subject. */ - - if (anchored || start_match > end_subject) break; - - /* If we have just passed a CR and we are now at a LF, and the pattern does - not contain any explicit matches for \r or \n, and the newline option is CRLF + + default: + goto ENDLOOP; + } + + /* Control reaches here for the various types of "no match at this point" + result. Reset the code to MATCH_NOMATCH for subsequent checking. */ + + rc = MATCH_NOMATCH; + + /* If PCRE_FIRSTLINE is set, the match must happen before or at the first + newline in the subject (though it may continue over the newline). Therefore, + if we have just failed to match, starting at a newline, do not continue. */ + + if (firstline && IS_NEWLINE(start_match)) break; + + /* Advance to new matching position */ + + start_match = new_start_match; + + /* Break the loop if the pattern is anchored or if we have passed the end of + the subject. */ + + if (anchored || start_match > end_subject) break; + + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF or ANY or ANYCRLF, advance the match position by one more character. In normal matching start_match will aways be greater than the first position at this stage, but a failed *SKIP can cause a return at the same point, which is why the first test exists. */ - + if (start_match > (PCRE_PUCHAR)subject + start_offset && start_match[-1] == CHAR_CR && - start_match < end_subject && + start_match < end_subject && *start_match == CHAR_NL && - (re->flags & PCRE_HASCRORLF) == 0 && - (md->nltype == NLTYPE_ANY || - md->nltype == NLTYPE_ANYCRLF || - md->nllen == 2)) - start_match++; - + (re->flags & PCRE_HASCRORLF) == 0 && + (md->nltype == NLTYPE_ANY || + md->nltype == NLTYPE_ANYCRLF || + md->nllen == 2)) + start_match++; + md->mark = NULL; /* Reset for start of next match attempt */ } /* End of for(;;) "bumpalong" loop */ - -/* ==========================================================================*/ - -/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping -conditions is true: - -(1) The pattern is anchored or the match was failed by (*COMMIT); - -(2) We are past the end of the subject; - -(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because - this option requests that a match occur at or before the first newline in - the subject. - -When we have a match and the offset vector is big enough to deal with any -backreferences, captured substring offsets will already be set up. In the case -where we had to get some local store to hold offsets for backreference -processing, copy those that we can. In this case there need not be overflow if -certain parts of the pattern were not used, even though there are more -capturing parentheses than vector slots. */ - -ENDLOOP: - + +/* ==========================================================================*/ + +/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping +conditions is true: + +(1) The pattern is anchored or the match was failed by (*COMMIT); + +(2) We are past the end of the subject; + +(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because + this option requests that a match occur at or before the first newline in + the subject. + +When we have a match and the offset vector is big enough to deal with any +backreferences, captured substring offsets will already be set up. In the case +where we had to get some local store to hold offsets for backreference +processing, copy those that we can. In this case there need not be overflow if +certain parts of the pattern were not used, even though there are more +capturing parentheses than vector slots. */ + +ENDLOOP: + if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) - { - if (using_temporary_offsets) - { + { + if (using_temporary_offsets) + { if (arg_offset_max >= 4) - { - memcpy(offsets + 2, md->offset_vector + 2, + { + memcpy(offsets + 2, md->offset_vector + 2, (arg_offset_max - 2) * sizeof(int)); - DPRINTF(("Copied offsets from temporary memory\n")); - } + DPRINTF(("Copied offsets from temporary memory\n")); + } if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT; - DPRINTF(("Freeing temporary memory\n")); + DPRINTF(("Freeing temporary memory\n")); (PUBL(free))(md->offset_vector); - } - + } + /* Set the return code to the number of captured strings, or 0 if there were - too many to fit into the vector. */ - + too many to fit into the vector. */ + rc = ((md->capture_last & OVFLBIT) != 0 && md->end_offset_top >= arg_offset_max)? 0 : md->end_offset_top/2; - + /* If there is space in the offset vector, set any unused pairs at the end of the pattern to -1 for backwards compatibility. It is documented that this happens. In earlier versions, the whole set of potential capturing offsets @@ -7101,52 +7101,52 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) while (iptr < iend) *iptr++ = -1; } - /* If there is space, set up the whole thing as substring 0. The value of - md->start_match_ptr might be modified if \K was encountered on the success - matching path. */ - - if (offsetcount < 2) rc = 0; else - { + /* If there is space, set up the whole thing as substring 0. The value of + md->start_match_ptr might be modified if \K was encountered on the success + matching path. */ + + if (offsetcount < 2) rc = 0; else + { offsets[0] = (int)(md->start_match_ptr - md->start_subject); offsets[1] = (int)(md->end_match_ptr - md->start_subject); - } - + } + /* Return MARK data if requested */ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) *(extra_data->mark) = (pcre_uchar *)md->mark; - DPRINTF((">>>> returning %d\n", rc)); + DPRINTF((">>>> returning %d\n", rc)); #ifdef NO_RECURSE release_match_heapframes(&frame_zero); #endif - return rc; - } - -/* Control gets here if there has been an error, or if the overall match -attempt has failed at all permitted starting positions. */ - -if (using_temporary_offsets) - { - DPRINTF(("Freeing temporary memory\n")); + return rc; + } + +/* Control gets here if there has been an error, or if the overall match +attempt has failed at all permitted starting positions. */ + +if (using_temporary_offsets) + { + DPRINTF(("Freeing temporary memory\n")); (PUBL(free))(md->offset_vector); - } - + } + /* For anything other than nomatch or partial match, just return the code. */ if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) - { - DPRINTF((">>>> error: returning %d\n", rc)); + { + DPRINTF((">>>> error: returning %d\n", rc)); #ifdef NO_RECURSE release_match_heapframes(&frame_zero); #endif - return rc; - } + return rc; + } /* Handle partial matches - disable any mark data */ if (match_partial != NULL) - { - DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); + { + DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); md->mark = NULL; if (offsetcount > 1) { @@ -7156,15 +7156,15 @@ if (match_partial != NULL) offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject); } rc = PCRE_ERROR_PARTIAL; - } + } /* This is the classic nomatch case */ -else - { - DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); +else + { + DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); rc = PCRE_ERROR_NOMATCH; - } + } /* Return the MARK data if it has been requested. */ @@ -7174,6 +7174,6 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) release_match_heapframes(&frame_zero); #endif return rc; -} - -/* End of pcre_exec.c */ +} + +/* End of pcre_exec.c */ diff --git a/contrib/libs/pcre/pcre_fullinfo.c b/contrib/libs/pcre/pcre_fullinfo.c index ac066ecf20f..bfccc02598e 100644 --- a/contrib/libs/pcre/pcre_fullinfo.c +++ b/contrib/libs/pcre/pcre_fullinfo.c @@ -1,70 +1,70 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + /* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2013 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_fullinfo(), which returns -information about a compiled pattern. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_fullinfo(), which returns +information about a compiled pattern. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Return info about compiled pattern * -*************************************************/ - -/* This is a newer "info" function which has an extensible interface so -that additional items can be added compatibly. - -Arguments: - argument_re points to compiled code - extra_data points extra data, or NULL - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Return info about compiled pattern * +*************************************************/ + +/* This is a newer "info" function which has an extensible interface so +that additional items can be added compatibly. + +Arguments: + argument_re points to compiled code + extra_data points extra data, or NULL + what what information is required + where where to put the information + +Returns: 0 if data returned, negative on error +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, @@ -78,42 +78,42 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data, int what, void *where) #endif -{ +{ const REAL_PCRE *re = (const REAL_PCRE *)argument_re; -const pcre_study_data *study = NULL; - -if (re == NULL || where == NULL) return PCRE_ERROR_NULL; - -if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - +const pcre_study_data *study = NULL; + +if (re == NULL || where == NULL) return PCRE_ERROR_NULL; + +if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + /* Check that the first field in the block is the magic number. If it is not, return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which means that the pattern is likely compiled with different endianness. */ -if (re->magic_number != MAGIC_NUMBER) +if (re->magic_number != MAGIC_NUMBER) return re->magic_number == REVERSED_MAGIC_NUMBER? PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; - + /* Check that this pattern was compiled in the correct bit mode */ if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; -switch (what) - { - case PCRE_INFO_OPTIONS: +switch (what) + { + case PCRE_INFO_OPTIONS: *((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS; - break; - - case PCRE_INFO_SIZE: - *((size_t *)where) = re->size; - break; - - case PCRE_INFO_STUDYSIZE: - *((size_t *)where) = (study == NULL)? 0 : study->size; - break; - + break; + + case PCRE_INFO_SIZE: + *((size_t *)where) = re->size; + break; + + case PCRE_INFO_STUDYSIZE: + *((size_t *)where) = (study == NULL)? 0 : study->size; + break; + case PCRE_INFO_JITSIZE: #ifdef SUPPORT_JIT *((size_t *)where) = @@ -126,20 +126,20 @@ switch (what) #endif break; - case PCRE_INFO_CAPTURECOUNT: - *((int *)where) = re->top_bracket; - break; - - case PCRE_INFO_BACKREFMAX: - *((int *)where) = re->top_backref; - break; - - case PCRE_INFO_FIRSTBYTE: - *((int *)where) = + case PCRE_INFO_CAPTURECOUNT: + *((int *)where) = re->top_bracket; + break; + + case PCRE_INFO_BACKREFMAX: + *((int *)where) = re->top_backref; + break; + + case PCRE_INFO_FIRSTBYTE: + *((int *)where) = ((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char : - ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; - break; - + ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; + break; + case PCRE_INFO_FIRSTCHARACTER: *((pcre_uint32 *)where) = (re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0; @@ -151,15 +151,15 @@ switch (what) ((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0; break; - /* Make sure we pass back the pointer to the bit vector in the external - block, not the internal copy (with flipped integer fields). */ - - case PCRE_INFO_FIRSTTABLE: + /* Make sure we pass back the pointer to the bit vector in the external + block, not the internal copy (with flipped integer fields). */ + + case PCRE_INFO_FIRSTTABLE: *((const pcre_uint8 **)where) = (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)? - ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; - break; - + ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; + break; + case PCRE_INFO_MINLENGTH: *((int *)where) = (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)? @@ -172,11 +172,11 @@ switch (what) extra_data->executable_jit != NULL; break; - case PCRE_INFO_LASTLITERAL: - *((int *)where) = + case PCRE_INFO_LASTLITERAL: + *((int *)where) = ((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1; - break; - + break; + case PCRE_INFO_REQUIREDCHAR: *((pcre_uint32 *)where) = ((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0; @@ -187,37 +187,37 @@ switch (what) ((re->flags & PCRE_REQCHSET) != 0); break; - case PCRE_INFO_NAMEENTRYSIZE: - *((int *)where) = re->name_entry_size; - break; - - case PCRE_INFO_NAMECOUNT: - *((int *)where) = re->name_count; - break; - - case PCRE_INFO_NAMETABLE: + case PCRE_INFO_NAMEENTRYSIZE: + *((int *)where) = re->name_entry_size; + break; + + case PCRE_INFO_NAMECOUNT: + *((int *)where) = re->name_count; + break; + + case PCRE_INFO_NAMETABLE: *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset; - break; - - case PCRE_INFO_DEFAULT_TABLES: + break; + + case PCRE_INFO_DEFAULT_TABLES: *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables)); - break; - + break; + /* From release 8.00 this will always return TRUE because NOPARTIAL is no longer ever set (the restrictions have been removed). */ - case PCRE_INFO_OKPARTIAL: - *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0; - break; - - case PCRE_INFO_JCHANGED: - *((int *)where) = (re->flags & PCRE_JCHANGED) != 0; - break; - - case PCRE_INFO_HASCRORLF: - *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; - break; - + case PCRE_INFO_OKPARTIAL: + *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0; + break; + + case PCRE_INFO_JCHANGED: + *((int *)where) = (re->flags & PCRE_JCHANGED) != 0; + break; + + case PCRE_INFO_HASCRORLF: + *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; + break; + case PCRE_INFO_MAXLOOKBEHIND: *((int *)where) = re->max_lookbehind; break; @@ -236,10 +236,10 @@ switch (what) *((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0; break; - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - -/* End of pcre_fullinfo.c */ + default: return PCRE_ERROR_BADOPTION; + } + +return 0; +} + +/* End of pcre_fullinfo.c */ diff --git a/contrib/libs/pcre/pcre_get.c b/contrib/libs/pcre/pcre_get.c index c6b1e97536b..11392db08ed 100644 --- a/contrib/libs/pcre/pcre_get.c +++ b/contrib/libs/pcre/pcre_get.c @@ -1,73 +1,73 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains some convenience functions for extracting substrings -from the subject string after a regex match has succeeded. The original idea -for these functions came from Scott Wimer. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains some convenience functions for extracting substrings +from the subject string after a regex match has succeeded. The original idea +for these functions came from Scott Wimer. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Find number for named string * -*************************************************/ - -/* This function is used by the get_first_set() function below, as well -as being generally available. It assumes that names are unique. - -Arguments: - code the compiled regex - stringname the name whose number is required - -Returns: the number of the named parentheses, or a negative number - (PCRE_ERROR_NOSUBSTRING) if not found -*/ - +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Find number for named string * +*************************************************/ + +/* This function is used by the get_first_set() function below, as well +as being generally available. It assumes that names are unique. + +Arguments: + code the compiled regex + stringname the name whose number is required + +Returns: the number of the named parentheses, or a negative number + (PCRE_ERROR_NOSUBSTRING) if not found +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_stringnumber(const pcre *code, const char *stringname) +pcre_get_stringnumber(const pcre *code, const char *stringname) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) @@ -75,27 +75,27 @@ pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname) #endif -{ -int rc; -int entrysize; -int top, bot; +{ +int rc; +int entrysize; +int top, bot; pcre_uchar *nametable; - + #ifdef COMPILE_PCRE8 -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + return rc; +if (top <= 0) return PCRE_ERROR_NOSUBSTRING; + +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) + return rc; +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) + return rc; #endif #ifdef COMPILE_PCRE16 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - + if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) @@ -112,43 +112,43 @@ if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; #endif -bot = 0; -while (top > bot) - { - int mid = (top + bot) / 2; +bot = 0; +while (top > bot) + { + int mid = (top + bot) / 2; pcre_uchar *entry = nametable + entrysize*mid; int c = STRCMP_UC_UC((pcre_uchar *)stringname, (pcre_uchar *)(entry + IMM2_SIZE)); if (c == 0) return GET2(entry, 0); - if (c > 0) bot = mid + 1; else top = mid; - } - -return PCRE_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Find (multiple) entries for named string * -*************************************************/ - -/* This is used by the get_first_set() function below, as well as being -generally available. It is used when duplicated names are permitted. - -Arguments: - code the compiled regex - stringname the name whose entries required - firstptr where to put the pointer to the first entry - lastptr where to put the pointer to the last entry - -Returns: the length of each entry, or a negative number - (PCRE_ERROR_NOSUBSTRING) if not found -*/ - + if (c > 0) bot = mid + 1; else top = mid; + } + +return PCRE_ERROR_NOSUBSTRING; +} + + + +/************************************************* +* Find (multiple) entries for named string * +*************************************************/ + +/* This is used by the get_first_set() function below, as well as being +generally available. It is used when duplicated names are permitted. + +Arguments: + code the compiled regex + stringname the name whose entries required + firstptr where to put the pointer to the first entry + lastptr where to put the pointer to the last entry + +Returns: the length of each entry, or a negative number + (PCRE_ERROR_NOSUBSTRING) if not found +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_stringtable_entries(const pcre *code, const char *stringname, - char **firstptr, char **lastptr) +pcre_get_stringtable_entries(const pcre *code, const char *stringname, + char **firstptr, char **lastptr) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname, @@ -158,27 +158,27 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname, PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr) #endif -{ -int rc; -int entrysize; -int top, bot; +{ +int rc; +int entrysize; +int top, bot; pcre_uchar *nametable, *lastentry; - + #ifdef COMPILE_PCRE8 -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + return rc; +if (top <= 0) return PCRE_ERROR_NOSUBSTRING; + +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) + return rc; +if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) + return rc; #endif #ifdef COMPILE_PCRE16 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - + if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) @@ -195,33 +195,33 @@ if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; #endif -lastentry = nametable + entrysize * (top - 1); -bot = 0; -while (top > bot) - { - int mid = (top + bot) / 2; +lastentry = nametable + entrysize * (top - 1); +bot = 0; +while (top > bot) + { + int mid = (top + bot) / 2; pcre_uchar *entry = nametable + entrysize*mid; int c = STRCMP_UC_UC((pcre_uchar *)stringname, (pcre_uchar *)(entry + IMM2_SIZE)); - if (c == 0) - { + if (c == 0) + { pcre_uchar *first = entry; pcre_uchar *last = entry; - while (first > nametable) - { + while (first > nametable) + { if (STRCMP_UC_UC((pcre_uchar *)stringname, (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break; - first -= entrysize; - } - while (last < lastentry) - { + first -= entrysize; + } + while (last < lastentry) + { if (STRCMP_UC_UC((pcre_uchar *)stringname, (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break; - last += entrysize; - } + last += entrysize; + } #if defined COMPILE_PCRE8 - *firstptr = (char *)first; - *lastptr = (char *)last; + *firstptr = (char *)first; + *lastptr = (char *)last; #elif defined COMPILE_PCRE16 *firstptr = (PCRE_UCHAR16 *)first; *lastptr = (PCRE_UCHAR16 *)last; @@ -229,36 +229,36 @@ while (top > bot) *firstptr = (PCRE_UCHAR32 *)first; *lastptr = (PCRE_UCHAR32 *)last; #endif - return entrysize; - } - if (c > 0) bot = mid + 1; else top = mid; - } - -return PCRE_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Find first set of multiple named strings * -*************************************************/ - -/* This function allows for duplicate names in the table of named substrings. -It returns the number of the first one that was set in a pattern match. - -Arguments: - code the compiled regex - stringname the name of the capturing substring - ovector the vector of matched substrings + return entrysize; + } + if (c > 0) bot = mid + 1; else top = mid; + } + +return PCRE_ERROR_NOSUBSTRING; +} + + + +/************************************************* +* Find first set of multiple named strings * +*************************************************/ + +/* This function allows for duplicate names in the table of named substrings. +It returns the number of the first one that was set in a pattern match. + +Arguments: + code the compiled regex + stringname the name of the capturing substring + ovector the vector of matched substrings stringcount number of captured substrings - -Returns: the number of the first that is set, - or the number of the last one if none are set, - or a negative number on error -*/ - + +Returns: the number of the first that is set, + or the number of the last one if none are set, + or a negative number on error +*/ + #if defined COMPILE_PCRE8 -static int +static int get_first_set(const pcre *code, const char *stringname, int *ovector, int stringcount) #elif defined COMPILE_PCRE16 @@ -270,12 +270,12 @@ static int get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector, int stringcount) #endif -{ +{ const REAL_PCRE *re = (const REAL_PCRE *)code; -int entrysize; +int entrysize; pcre_uchar *entry; #if defined COMPILE_PCRE8 -char *first, *last; +char *first, *last; #elif defined COMPILE_PCRE16 PCRE_UCHAR16 *first, *last; #elif defined COMPILE_PCRE32 @@ -283,9 +283,9 @@ PCRE_UCHAR32 *first, *last; #endif #if defined COMPILE_PCRE8 -if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) - return pcre_get_stringnumber(code, stringname); -entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); +if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) + return pcre_get_stringnumber(code, stringname); +entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); #elif defined COMPILE_PCRE16 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) return pcre16_get_stringnumber(code, stringname); @@ -295,49 +295,49 @@ if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) return pcre32_get_stringnumber(code, stringname); entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last); #endif -if (entrysize <= 0) return entrysize; +if (entrysize <= 0) return entrysize; for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) - { + { int n = GET2(entry, 0); if (n < stringcount && ovector[n*2] >= 0) return n; - } + } return GET2(entry, 0); -} - - - - -/************************************************* -* Copy captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer. -Note that we use memcpy() rather than strncpy() in case there are binary zeros -in the string. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - +} + + + + +/************************************************* +* Copy captured string to given buffer * +*************************************************/ + +/* This function copies a single captured substring into a given buffer. +Note that we use memcpy() rather than strncpy() in case there are binary zeros +in the string. + +Arguments: + subject the subject string that was matched + ovector pointer to the offsets table + stringcount the number of substrings that were captured + (i.e. the yield of the pcre_exec call, unless + that was zero, in which case it should be 1/3 + of the offset table size) + stringnumber the number of the required substring + buffer where to put the substring + size the size of the buffer + +Returns: if successful: + the length of the copied string, not including the zero + that is put on the end; can be zero + if not successful: + PCRE_ERROR_NOMEMORY (-6) buffer too small + PCRE_ERROR_NOSUBSTRING (-7) no such captured substring +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_copy_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, char *buffer, int size) +pcre_copy_substring(const char *subject, int *ovector, int stringcount, + int stringnumber, char *buffer, int size) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, @@ -347,48 +347,48 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, int stringnumber, PCRE_UCHAR32 *buffer, int size) #endif -{ -int yield; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -if (size < yield + 1) return PCRE_ERROR_NOMEMORY; +{ +int yield; +if (stringnumber < 0 || stringnumber >= stringcount) + return PCRE_ERROR_NOSUBSTRING; +stringnumber *= 2; +yield = ovector[stringnumber+1] - ovector[stringnumber]; +if (size < yield + 1) return PCRE_ERROR_NOMEMORY; memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield)); -buffer[yield] = 0; -return yield; -} - - - -/************************************************* -* Copy named captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer, -identifying it by name. If the regex permits duplicate names, the first -substring that is set is chosen. - -Arguments: - code the compiled regex - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringname the name of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - +buffer[yield] = 0; +return yield; +} + + + +/************************************************* +* Copy named captured string to given buffer * +*************************************************/ + +/* This function copies a single captured substring into a given buffer, +identifying it by name. If the regex permits duplicate names, the first +substring that is set is chosen. + +Arguments: + code the compiled regex + subject the subject string that was matched + ovector pointer to the offsets table + stringcount the number of substrings that were captured + (i.e. the yield of the pcre_exec call, unless + that was zero, in which case it should be 1/3 + of the offset table size) + stringname the name of the required substring + buffer where to put the substring + size the size of the buffer + +Returns: if successful: + the length of the copied string, not including the zero + that is put on the end; can be zero + if not successful: + PCRE_ERROR_NOMEMORY (-6) buffer too small + PCRE_ERROR_NOSUBSTRING (-7) no such captured substring +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_copy_named_substring(const pcre *code, const char *subject, @@ -405,45 +405,45 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, int *ovector, int stringcount, PCRE_SPTR32 stringname, PCRE_UCHAR32 *buffer, int size) #endif -{ +{ int n = get_first_set(code, stringname, ovector, stringcount); -if (n <= 0) return n; +if (n <= 0) return n; #if defined COMPILE_PCRE8 -return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); +return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); #elif defined COMPILE_PCRE16 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size); #elif defined COMPILE_PCRE32 return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size); #endif -} - - - -/************************************************* -* Copy all captured strings to new store * -*************************************************/ - -/* This function gets one chunk of store and builds a list of pointers and all -of the captured substrings in it. A NULL pointer is put on the end of the list. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - listptr set to point to the list of pointers - -Returns: if successful: 0 - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store -*/ - +} + + + +/************************************************* +* Copy all captured strings to new store * +*************************************************/ + +/* This function gets one chunk of store and builds a list of pointers and all +of the captured substrings in it. A NULL pointer is put on the end of the list. + +Arguments: + subject the subject string that was matched + ovector pointer to the offsets table + stringcount the number of substrings that were captured + (i.e. the yield of the pcre_exec call, unless + that was zero, in which case it should be 1/3 + of the offset table size) + listptr set to point to the list of pointers + +Returns: if successful: 0 + if not successful: + PCRE_ERROR_NOMEMORY (-6) failed to get store +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_substring_list(const char *subject, int *ovector, int stringcount, - const char ***listptr) +pcre_get_substring_list(const char *subject, int *ovector, int stringcount, + const char ***listptr) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount, @@ -453,61 +453,61 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount, PCRE_SPTR32 **listptr) #endif -{ -int i; +{ +int i; int size = sizeof(pcre_uchar *); -int double_count = stringcount * 2; +int double_count = stringcount * 2; pcre_uchar **stringlist; pcre_uchar *p; - -for (i = 0; i < double_count; i += 2) + +for (i = 0; i < double_count; i += 2) { size += sizeof(pcre_uchar *) + IN_UCHARS(1); if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]); } - + stringlist = (pcre_uchar **)(PUBL(malloc))(size); -if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; - +if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; + #if defined COMPILE_PCRE8 -*listptr = (const char **)stringlist; +*listptr = (const char **)stringlist; #elif defined COMPILE_PCRE16 *listptr = (PCRE_SPTR16 *)stringlist; #elif defined COMPILE_PCRE32 *listptr = (PCRE_SPTR32 *)stringlist; #endif p = (pcre_uchar *)(stringlist + stringcount + 1); - -for (i = 0; i < double_count; i += 2) - { + +for (i = 0; i < double_count; i += 2) + { int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; memcpy(p, subject + ovector[i], IN_UCHARS(len)); - *stringlist++ = p; - p += len; - *p++ = 0; - } - -*stringlist = NULL; -return 0; -} - - - -/************************************************* -* Free store obtained by get_substring_list * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C + *stringlist++ = p; + p += len; + *p++ = 0; + } + +*stringlist = NULL; +return 0; +} + + + +/************************************************* +* Free store obtained by get_substring_list * +*************************************************/ + +/* This function exists for the benefit of people calling PCRE from non-C programs that can call its functions, but not free() or (PUBL(free))() directly. - -Argument: the result of a previous pcre_get_substring_list() -Returns: nothing -*/ - + +Argument: the result of a previous pcre_get_substring_list() +Returns: nothing +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre_free_substring_list(const char **pointer) +pcre_free_substring_list(const char **pointer) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre16_free_substring_list(PCRE_SPTR16 *pointer) @@ -515,41 +515,41 @@ pcre16_free_substring_list(PCRE_SPTR16 *pointer) PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre32_free_substring_list(PCRE_SPTR32 *pointer) #endif -{ +{ (PUBL(free))((void *)pointer); -} - - - -/************************************************* -* Copy captured string to new store * -*************************************************/ - -/* This function copies a single captured substring into a piece of new -store - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - stringptr where to put a pointer to the substring - -Returns: if successful: - the length of the string, not including the zero that - is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store - PCRE_ERROR_NOSUBSTRING (-7) substring not present -*/ - +} + + + +/************************************************* +* Copy captured string to new store * +*************************************************/ + +/* This function copies a single captured substring into a piece of new +store + +Arguments: + subject the subject string that was matched + ovector pointer to the offsets table + stringcount the number of substrings that were captured + (i.e. the yield of the pcre_exec call, unless + that was zero, in which case it should be 1/3 + of the offset table size) + stringnumber the number of the required substring + stringptr where to put a pointer to the substring + +Returns: if successful: + the length of the string, not including the zero that + is put on the end; can be zero + if not successful: + PCRE_ERROR_NOMEMORY (-6) failed to get store + PCRE_ERROR_NOSUBSTRING (-7) substring not present +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, const char **stringptr) +pcre_get_substring(const char *subject, int *ovector, int stringcount, + int stringnumber, const char **stringptr) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, @@ -559,17 +559,17 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, int stringnumber, PCRE_SPTR32 *stringptr) #endif -{ -int yield; +{ +int yield; pcre_uchar *substring; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; +if (stringnumber < 0 || stringnumber >= stringcount) + return PCRE_ERROR_NOSUBSTRING; +stringnumber *= 2; +yield = ovector[stringnumber+1] - ovector[stringnumber]; substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); -if (substring == NULL) return PCRE_ERROR_NOMEMORY; +if (substring == NULL) return PCRE_ERROR_NOMEMORY; memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); -substring[yield] = 0; +substring[yield] = 0; #if defined COMPILE_PCRE8 *stringptr = (const char *)substring; #elif defined COMPILE_PCRE16 @@ -577,38 +577,38 @@ substring[yield] = 0; #elif defined COMPILE_PCRE32 *stringptr = (PCRE_SPTR32)substring; #endif -return yield; -} - - - -/************************************************* -* Copy named captured string to new store * -*************************************************/ - -/* This function copies a single captured substring, identified by name, into -new store. If the regex permits duplicate names, the first substring that is -set is chosen. - -Arguments: - code the compiled regex - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringname the name of the required substring - stringptr where to put the pointer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) couldn't get memory - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - +return yield; +} + + + +/************************************************* +* Copy named captured string to new store * +*************************************************/ + +/* This function copies a single captured substring, identified by name, into +new store. If the regex permits duplicate names, the first substring that is +set is chosen. + +Arguments: + code the compiled regex + subject the subject string that was matched + ovector pointer to the offsets table + stringcount the number of substrings that were captured + (i.e. the yield of the pcre_exec call, unless + that was zero, in which case it should be 1/3 + of the offset table size) + stringname the name of the required substring + stringptr where to put the pointer + +Returns: if successful: + the length of the copied string, not including the zero + that is put on the end; can be zero + if not successful: + PCRE_ERROR_NOMEMORY (-6) couldn't get memory + PCRE_ERROR_NOSUBSTRING (-7) no such captured substring +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_named_substring(const pcre *code, const char *subject, @@ -625,36 +625,36 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, int *ovector, int stringcount, PCRE_SPTR32 stringname, PCRE_SPTR32 *stringptr) #endif -{ +{ int n = get_first_set(code, stringname, ovector, stringcount); -if (n <= 0) return n; +if (n <= 0) return n; #if defined COMPILE_PCRE8 -return pcre_get_substring(subject, ovector, stringcount, n, stringptr); +return pcre_get_substring(subject, ovector, stringcount, n, stringptr); #elif defined COMPILE_PCRE16 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); #elif defined COMPILE_PCRE32 return pcre32_get_substring(subject, ovector, stringcount, n, stringptr); #endif -} - - - - -/************************************************* -* Free store obtained by get_substring * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C +} + + + + +/************************************************* +* Free store obtained by get_substring * +*************************************************/ + +/* This function exists for the benefit of people calling PCRE from non-C programs that can call its functions, but not free() or (PUBL(free))() directly. - -Argument: the result of a previous pcre_get_substring() -Returns: nothing -*/ - + +Argument: the result of a previous pcre_get_substring() +Returns: nothing +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre_free_substring(const char *pointer) +pcre_free_substring(const char *pointer) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre16_free_substring(PCRE_SPTR16 pointer) @@ -662,8 +662,8 @@ pcre16_free_substring(PCRE_SPTR16 pointer) PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre32_free_substring(PCRE_SPTR32 pointer) #endif -{ +{ (PUBL(free))((void *)pointer); -} - -/* End of pcre_get.c */ +} + +/* End of pcre_get.c */ diff --git a/contrib/libs/pcre/pcre_globals.c b/contrib/libs/pcre/pcre_globals.c index 21a2fe9de93..b0418be5976 100644 --- a/contrib/libs/pcre/pcre_globals.c +++ b/contrib/libs/pcre/pcre_globals.c @@ -1,50 +1,50 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2014 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains global variables that are exported by the PCRE library. -PCRE is thread-clean and doesn't use any global variables in the normal sense. -However, it calls memory allocation and freeing functions via the four -indirections below, and it can optionally do callouts, using the fifth -indirection. These values can be changed by the caller, but are shared between + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains global variables that are exported by the PCRE library. +PCRE is thread-clean and doesn't use any global variables in the normal sense. +However, it calls memory allocation and freeing functions via the four +indirections below, and it can optionally do callouts, using the fifth +indirection. These values can be changed by the caller, but are shared between all threads. - + For MS Visual Studio and Symbian OS, there are problems in initializing these variables to non-local functions. In these cases, therefore, an indirection via a local function is used. @@ -52,12 +52,12 @@ a local function is used. Also, when compiling for Virtual Pascal, things are done differently, and global variables are not used. */ -#ifdef HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - +#endif + +#include "pcre_internal.h" + #if defined _MSC_VER || defined __SYMBIAN32__ static void* LocalPcreMalloc(size_t aSize) { @@ -81,6 +81,6 @@ PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc; PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free; PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL; PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL; -#endif - -/* End of pcre_globals.c */ +#endif + +/* End of pcre_globals.c */ diff --git a/contrib/libs/pcre/pcre_internal.h b/contrib/libs/pcre/pcre_internal.h index 7d7d2c4bac9..97ff55d03b3 100644 --- a/contrib/libs/pcre/pcre_internal.h +++ b/contrib/libs/pcre/pcre_internal.h @@ -1,57 +1,57 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2016 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This header contains definitions that are shared between the different -modules, but which are not relevant to the exported API. This includes some + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This header contains definitions that are shared between the different +modules, but which are not relevant to the exported API. This includes some functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_" depending on the PRIV macro. */ - -#ifndef PCRE_INTERNAL_H -#define PCRE_INTERNAL_H - + +#ifndef PCRE_INTERNAL_H +#define PCRE_INTERNAL_H + /* Define PCRE_DEBUG to get debugging output on stdout. */ - -#if 0 + +#if 0 #define PCRE_DEBUG -#endif - +#endif + /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */ #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32 @@ -86,66 +86,66 @@ script prevents both being selected, but not everybody uses "configure". */ #error The use of both EBCDIC and SUPPORT_UTF is not supported. #endif -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements, or at least there were when I first wrote this. After -all, it had only been about 10 years then... - -It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so -be absolutely sure we get our version. */ - -#undef DPRINTF +/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef +inline, and there are *still* stupid compilers about that don't like indented +pre-processor statements, or at least there were when I first wrote this. After +all, it had only been about 10 years then... + +It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so +be absolutely sure we get our version. */ + +#undef DPRINTF #ifdef PCRE_DEBUG -#define DPRINTF(p) printf p -#else -#define DPRINTF(p) /* Nothing */ -#endif - - -/* Standard C headers plus the external interface definition. The only time -setjmp and stdarg are used is when NO_RECURSE is set. */ - -#include -#include -#include -#include -#include -#include - +#define DPRINTF(p) printf p +#else +#define DPRINTF(p) /* Nothing */ +#endif + + +/* Standard C headers plus the external interface definition. The only time +setjmp and stdarg are used is when NO_RECURSE is set. */ + +#include +#include +#include +#include +#include +#include + /* Valgrind (memcheck) support */ #ifdef SUPPORT_VALGRIND #include #endif -/* When compiling a DLL for Windows, the exported symbols have to be declared -using some MS magic. I found some useful information on this web page: -http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the -information there, using __declspec(dllexport) without "extern" we have a -definition; with "extern" we have a declaration. The settings here override the -setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, -which is all that is needed for applications (they just import the symbols). We -use: - - PCRE_EXP_DECL for declarations - PCRE_EXP_DEFN for definitions of exported functions - PCRE_EXP_DATA_DEFN for definitions of exported variables - -The reason for the two DEFN macros is that in non-Windows environments, one -does not want to have "extern" before variable definitions because it leads to -compiler warnings. So we distinguish between functions and variables. In -Windows, the two should always be the same. - -The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, -which is an application, but needs to import this file in order to "peek" at -internals, can #include pcre.h first to get an application's-eye view. - -In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, -special-purpose environments) might want to stick other stuff in front of -exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and -PCRE_EXP_DATA_DEFN only if they are not already set. */ - -#ifndef PCRE_EXP_DECL +/* When compiling a DLL for Windows, the exported symbols have to be declared +using some MS magic. I found some useful information on this web page: +http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the +information there, using __declspec(dllexport) without "extern" we have a +definition; with "extern" we have a declaration. The settings here override the +setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, +which is all that is needed for applications (they just import the symbols). We +use: + + PCRE_EXP_DECL for declarations + PCRE_EXP_DEFN for definitions of exported functions + PCRE_EXP_DATA_DEFN for definitions of exported variables + +The reason for the two DEFN macros is that in non-Windows environments, one +does not want to have "extern" before variable definitions because it leads to +compiler warnings. So we distinguish between functions and variables. In +Windows, the two should always be the same. + +The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, +which is an application, but needs to import this file in order to "peek" at +internals, can #include pcre.h first to get an application's-eye view. + +In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, +special-purpose environments) might want to stick other stuff in front of +exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and +PCRE_EXP_DATA_DEFN only if they are not already set. */ + +#ifndef PCRE_EXP_DECL # ifdef _WIN32 # ifndef PCRE_STATIC # define PCRE_EXP_DECL extern __declspec(dllexport) @@ -169,8 +169,8 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */ # define PCRE_EXP_DATA_DEFN # endif # endif -#endif - +#endif + /* When compiling with the MSVC compiler, it is sometimes necessary to include a "calling convention" before exported function names. (This is secondhand information; I know nothing about MSVC myself). For example, something like @@ -186,27 +186,27 @@ set, we ensure here that it has no effect. */ #endif /* We need to have types that specify unsigned 8, 16 and 32-bit integers. We -cannot determine these outside the compilation (e.g. by running a program as -part of "configure") because PCRE is often cross-compiled for use on other -systems. Instead we make use of the maximum sizes that are available at -preprocessor time in standard C environments. */ - +cannot determine these outside the compilation (e.g. by running a program as +part of "configure") because PCRE is often cross-compiled for use on other +systems. Instead we make use of the maximum sizes that are available at +preprocessor time in standard C environments. */ + typedef unsigned char pcre_uint8; -#if USHRT_MAX == 65535 +#if USHRT_MAX == 65535 typedef unsigned short pcre_uint16; typedef short pcre_int16; #define PCRE_UINT16_MAX USHRT_MAX #define PCRE_INT16_MAX SHRT_MAX -#elif UINT_MAX == 65535 +#elif UINT_MAX == 65535 typedef unsigned int pcre_uint16; typedef int pcre_int16; #define PCRE_UINT16_MAX UINT_MAX #define PCRE_INT16_MAX INT_MAX -#else +#else #error Cannot determine a type for 16-bit integers -#endif - +#endif + #if UINT_MAX == 4294967295U typedef unsigned int pcre_uint32; typedef int pcre_int32; @@ -217,10 +217,10 @@ typedef unsigned long int pcre_uint32; typedef long int pcre_int32; #define PCRE_UINT32_MAX ULONG_MAX #define PCRE_INT32_MAX LONG_MAX -#else +#else #error Cannot determine a type for 32-bit integers -#endif - +#endif + /* When checking for integer overflow in pcre_compile(), we need to handle large integers. If a 64-bit integer type is available, we can use that. Otherwise we have to cast to double, which of course requires floating point @@ -241,16 +241,16 @@ by "configure". */ #define INT64_OR_DOUBLE double #endif -/* All character handling must be done as unsigned characters. Otherwise there -are problems with top-bit-set characters and functions such as isspace(). +/* All character handling must be done as unsigned characters. Otherwise there +are problems with top-bit-set characters and functions such as isspace(). However, we leave the interface to the outside world as char * or short *, because that should make things easier for callers. This character type is called pcre_uchar. - + The IN_UCHARS macro multiply its argument with the byte size of the current pcre_uchar type. Useful for memcpy and such operations, whose require the byte size of their input/output buffers. - + The MAX_255 macro checks whether its pcre_uchar input is less than 256. The TABLE_GET macro is designed for accessing elements of tables whose contain @@ -291,182 +291,182 @@ typedef pcre_uint32 pcre_uchar; #error Unsupported compiling mode #endif /* COMPILE_PCRE[8|16|32] */ -/* This is an unsigned int value that no character can ever have. UTF-8 -characters only go up to 0x7fffffff (though Unicode doesn't go beyond -0x0010ffff). */ - -#define NOTACHAR 0xffffffff - -/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, -"any" and "anycrlf" at present). The following macros are used to package up -testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various -modules to indicate in which datablock the parameters exist, and what the -start/end of string field names are. */ - -#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ -#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ -#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ - -/* This macro checks for a newline at the given position */ - -#define IS_NEWLINE(p) \ - ((NLBLOCK->nltype != NLTYPE_FIXED)? \ - ((p) < NLBLOCK->PSEND && \ +/* This is an unsigned int value that no character can ever have. UTF-8 +characters only go up to 0x7fffffff (though Unicode doesn't go beyond +0x0010ffff). */ + +#define NOTACHAR 0xffffffff + +/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, +"any" and "anycrlf" at present). The following macros are used to package up +testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various +modules to indicate in which datablock the parameters exist, and what the +start/end of string field names are. */ + +#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ +#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ +#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ + +/* This macro checks for a newline at the given position */ + +#define IS_NEWLINE(p) \ + ((NLBLOCK->nltype != NLTYPE_FIXED)? \ + ((p) < NLBLOCK->PSEND && \ PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ &(NLBLOCK->nllen), utf)) \ - : \ - ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ + : \ + ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ UCHAR21TEST(p) == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \ - ) \ - ) - -/* This macro checks for a newline immediately preceding the given position */ - -#define WAS_NEWLINE(p) \ - ((NLBLOCK->nltype != NLTYPE_FIXED)? \ - ((p) > NLBLOCK->PSSTART && \ + ) \ + ) + +/* This macro checks for a newline immediately preceding the given position */ + +#define WAS_NEWLINE(p) \ + ((NLBLOCK->nltype != NLTYPE_FIXED)? \ + ((p) > NLBLOCK->PSSTART && \ PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ &(NLBLOCK->nllen), utf)) \ - : \ - ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ + : \ + ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ - ) \ - ) - -/* When PCRE is compiled as a C++ library, the subject pointer can be replaced -with a custom type. This makes it possible, for example, to allow pcre_exec() -to process subject strings that are discontinuous by using a smart pointer -class. It must always be possible to inspect all of the subject string in -pcre_exec() because of the way it backtracks. Two macros are required in the -normal case, for sign-unspecified and unsigned char pointers. The former is -used for the external interface and appears in pcre.h, which is why its name -must begin with PCRE_. */ - -#ifdef CUSTOM_SUBJECT_PTR + ) \ + ) + +/* When PCRE is compiled as a C++ library, the subject pointer can be replaced +with a custom type. This makes it possible, for example, to allow pcre_exec() +to process subject strings that are discontinuous by using a smart pointer +class. It must always be possible to inspect all of the subject string in +pcre_exec() because of the way it backtracks. Two macros are required in the +normal case, for sign-unspecified and unsigned char pointers. The former is +used for the external interface and appears in pcre.h, which is why its name +must begin with PCRE_. */ + +#ifdef CUSTOM_SUBJECT_PTR #define PCRE_PUCHAR CUSTOM_SUBJECT_PTR -#else +#else #define PCRE_PUCHAR const pcre_uchar * -#endif - -/* Include the public PCRE header and the definitions of UCP character property -values. */ - -#include "pcre.h" -#include "ucp.h" - +#endif + +/* Include the public PCRE header and the definitions of UCP character property +values. */ + +#include "pcre.h" +#include "ucp.h" + #ifdef COMPILE_PCRE32 /* Assert that the public PCRE_UCHAR32 is a 32-bit type */ typedef int __assert_pcre_uchar32_size[sizeof(PCRE_UCHAR32) == 4 ? 1 : -1]; #endif -/* When compiling for use with the Virtual Pascal compiler, these functions -need to have their names changed. PCRE must be compiled with the -DVPCOMPAT -option on the command line. */ - -#ifdef VPCOMPAT -#define strlen(s) _strlen(s) -#define strncmp(s1,s2,m) _strncmp(s1,s2,m) -#define memcmp(s,c,n) _memcmp(s,c,n) -#define memcpy(d,s,n) _memcpy(d,s,n) -#define memmove(d,s,n) _memmove(d,s,n) -#define memset(s,c,n) _memset(s,c,n) -#else /* VPCOMPAT */ - -/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), -define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY -is set. Otherwise, include an emulating function for those systems that have -neither (there some non-Unix environments where this is the case). */ - -#ifndef HAVE_MEMMOVE -#undef memmove /* some systems may have a macro */ -#ifdef HAVE_BCOPY -#define memmove(a, b, c) bcopy(b, a, c) -#else /* HAVE_BCOPY */ -static void * -pcre_memmove(void *d, const void *s, size_t n) -{ -size_t i; -unsigned char *dest = (unsigned char *)d; -const unsigned char *src = (const unsigned char *)s; -if (dest > src) - { - dest += n; - src += n; - for (i = 0; i < n; ++i) *(--dest) = *(--src); - return (void *)dest; - } -else - { - for (i = 0; i < n; ++i) *dest++ = *src++; - return (void *)(dest - n); - } -} -#define memmove(a, b, c) pcre_memmove(a, b, c) -#endif /* not HAVE_BCOPY */ -#endif /* not HAVE_MEMMOVE */ -#endif /* not VPCOMPAT */ - - -/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored -in big-endian order) by default. These are used, for example, to link from the -start of a subpattern to its alternatives and its end. The use of 2 bytes per -offset limits the size of the compiled regex to around 64K, which is big enough -for almost everybody. However, I received a request for an even bigger limit. -For this reason, and also to make the code easier to maintain, the storing and -loading of offsets from the byte string is now handled by the macros that are -defined here. - -The macros are controlled by the value of LINK_SIZE. This defaults to 2 in -the config.h file, but can be overridden by using -D on the command line. This -is automated on Unix systems via the "configure" command. */ - +/* When compiling for use with the Virtual Pascal compiler, these functions +need to have their names changed. PCRE must be compiled with the -DVPCOMPAT +option on the command line. */ + +#ifdef VPCOMPAT +#define strlen(s) _strlen(s) +#define strncmp(s1,s2,m) _strncmp(s1,s2,m) +#define memcmp(s,c,n) _memcmp(s,c,n) +#define memcpy(d,s,n) _memcpy(d,s,n) +#define memmove(d,s,n) _memmove(d,s,n) +#define memset(s,c,n) _memset(s,c,n) +#else /* VPCOMPAT */ + +/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), +define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY +is set. Otherwise, include an emulating function for those systems that have +neither (there some non-Unix environments where this is the case). */ + +#ifndef HAVE_MEMMOVE +#undef memmove /* some systems may have a macro */ +#ifdef HAVE_BCOPY +#define memmove(a, b, c) bcopy(b, a, c) +#else /* HAVE_BCOPY */ +static void * +pcre_memmove(void *d, const void *s, size_t n) +{ +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +} +#define memmove(a, b, c) pcre_memmove(a, b, c) +#endif /* not HAVE_BCOPY */ +#endif /* not HAVE_MEMMOVE */ +#endif /* not VPCOMPAT */ + + +/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored +in big-endian order) by default. These are used, for example, to link from the +start of a subpattern to its alternatives and its end. The use of 2 bytes per +offset limits the size of the compiled regex to around 64K, which is big enough +for almost everybody. However, I received a request for an even bigger limit. +For this reason, and also to make the code easier to maintain, the storing and +loading of offsets from the byte string is now handled by the macros that are +defined here. + +The macros are controlled by the value of LINK_SIZE. This defaults to 2 in +the config.h file, but can be overridden by using -D on the command line. This +is automated on Unix systems via the "configure" command. */ + #if defined COMPILE_PCRE8 -#if LINK_SIZE == 2 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 8), \ - (a[(n)+1] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 8) | (a)[(n)+1]) - -#define MAX_PATTERN_SIZE (1 << 16) - - -#elif LINK_SIZE == 3 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 16), \ - (a[(n)+1] = (d) >> 8), \ - (a[(n)+2] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) - -#define MAX_PATTERN_SIZE (1 << 24) - - -#elif LINK_SIZE == 4 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 24), \ - (a[(n)+1] = (d) >> 16), \ - (a[(n)+2] = (d) >> 8), \ - (a[(n)+3] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) - +#if LINK_SIZE == 2 + +#define PUT(a,n,d) \ + (a[n] = (d) >> 8), \ + (a[(n)+1] = (d) & 255) + +#define GET(a,n) \ + (((a)[n] << 8) | (a)[(n)+1]) + +#define MAX_PATTERN_SIZE (1 << 16) + + +#elif LINK_SIZE == 3 + +#define PUT(a,n,d) \ + (a[n] = (d) >> 16), \ + (a[(n)+1] = (d) >> 8), \ + (a[(n)+2] = (d) & 255) + +#define GET(a,n) \ + (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) + +#define MAX_PATTERN_SIZE (1 << 24) + + +#elif LINK_SIZE == 4 + +#define PUT(a,n,d) \ + (a[n] = (d) >> 24), \ + (a[(n)+1] = (d) >> 16), \ + (a[(n)+2] = (d) >> 8), \ + (a[(n)+3] = (d) & 255) + +#define GET(a,n) \ + (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) + /* Keep it positive */ #define MAX_PATTERN_SIZE (1 << 30) - + #else #error LINK_SIZE must be either 2, 3, or 4 #endif - + #elif defined COMPILE_PCRE16 #if LINK_SIZE == 2 @@ -499,12 +499,12 @@ is automated on Unix systems via the "configure" command. */ /* Keep it positive */ #define MAX_PATTERN_SIZE (1 << 30) -#else -#error LINK_SIZE must be either 2, 3, or 4 -#endif - +#else +#error LINK_SIZE must be either 2, 3, or 4 +#endif + #elif defined COMPILE_PCRE32 - + /* Only supported LINK_SIZE is 4 */ /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */ #undef LINK_SIZE @@ -523,37 +523,37 @@ is automated on Unix systems via the "configure" command. */ #error Unsupported compiling mode #endif /* COMPILE_PCRE[8|16|32] */ -/* Convenience macro defined in terms of the others */ - -#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE - - -/* PCRE uses some other 2-byte quantities that do not change when the size of -offsets changes. There are used for repeat counts and for other things such as -capturing parenthesis numbers in back references. */ - +/* Convenience macro defined in terms of the others */ + +#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE + + +/* PCRE uses some other 2-byte quantities that do not change when the size of +offsets changes. There are used for repeat counts and for other things such as +capturing parenthesis numbers in back references. */ + #if defined COMPILE_PCRE8 #define IMM2_SIZE 2 -#define PUT2(a,n,d) \ - a[n] = (d) >> 8; \ - a[(n)+1] = (d) & 255 - +#define PUT2(a,n,d) \ + a[n] = (d) >> 8; \ + a[(n)+1] = (d) & 255 + /* For reasons that I do not understand, the expression in this GET2 macro is treated by gcc as a signed expression, even when a is declared as unsigned. It seems that any kind of arithmetic results in a signed value. */ -#define GET2(a,n) \ +#define GET2(a,n) \ (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) - + #elif defined COMPILE_PCRE16 - + #define IMM2_SIZE 1 - + #define PUT2(a,n,d) \ a[n] = d - + #define GET2(a,n) \ a[n] @@ -610,27 +610,27 @@ UTF support is omitted, we don't even define them. */ /* #define HAS_EXTRALEN(c) */ /* #define GET_EXTRALEN(c) */ /* #define NOT_FIRSTCHAR(c) */ -#define GETCHAR(c, eptr) c = *eptr; -#define GETCHARTEST(c, eptr) c = *eptr; -#define GETCHARINC(c, eptr) c = *eptr++; -#define GETCHARINCTEST(c, eptr) c = *eptr++; -#define GETCHARLEN(c, eptr, len) c = *eptr; +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARTEST(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARINCTEST(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; /* #define GETCHARLENTEST(c, eptr, len) */ -/* #define BACKCHAR(eptr) */ +/* #define BACKCHAR(eptr) */ /* #define FORWARDCHAR(eptr) */ /* #define ACROSSCHAR(condition, eptr, action) */ - + #else /* SUPPORT_UTF */ - + /* Tests whether the code point needs extra characters to decode. */ - + #define HASUTF8EXTRALEN(c) ((c) >= 0xc0) - + /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer. */ - + #define GETUTF8(c, eptr) \ - { \ + { \ if ((c & 0x20) == 0) \ c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ else if ((c & 0x10) == 0) \ @@ -656,10 +656,10 @@ the pointer. */ if ((c & 0x20) == 0) \ c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \ else if ((c & 0x10) == 0) \ - { \ + { \ c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \ eptr += 2; \ - } \ + } \ else if ((c & 0x08) == 0) \ { \ c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \ @@ -680,8 +680,8 @@ the pointer. */ ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \ eptr += 5; \ } \ - } - + } + #if defined COMPILE_PCRE8 /* These macros were originally written in the form of loops that used data @@ -714,37 +714,37 @@ we know we are in UTF-8 mode. */ c = *eptr; \ if (c >= 0xc0) GETUTF8(c, eptr); -/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the -pointer. */ - -#define GETCHARTEST(c, eptr) \ - c = *eptr; \ +/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *eptr; \ if (utf && c >= 0xc0) GETUTF8(c, eptr); - -/* Get the next UTF-8 character, advancing the pointer. This is called when we -know we are in UTF-8 mode. */ - -#define GETCHARINC(c, eptr) \ - c = *eptr++; \ + +/* Get the next UTF-8 character, advancing the pointer. This is called when we +know we are in UTF-8 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ if (c >= 0xc0) GETUTF8INC(c, eptr); - + /* Get the next character, testing for UTF-8 mode, and advancing the pointer. This is called when we don't know if we are in UTF-8 mode. */ - -#define GETCHARINCTEST(c, eptr) \ - c = *eptr++; \ + +#define GETCHARINCTEST(c, eptr) \ + c = *eptr++; \ if (utf && c >= 0xc0) GETUTF8INC(c, eptr); /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer, incrementing the length. */ #define GETUTF8LEN(c, eptr, len) \ - { \ + { \ if ((c & 0x20) == 0) \ - { \ + { \ c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ len++; \ - } \ + } \ else if ((c & 0x10) == 0) \ { \ c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ @@ -770,15 +770,15 @@ advancing the pointer, incrementing the length. */ ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ len += 5; \ } \ - } - -/* Get the next UTF-8 character, not advancing the pointer, incrementing length -if there are extra bytes. This is called when we know we are in UTF-8 mode. */ - -#define GETCHARLEN(c, eptr, len) \ - c = *eptr; \ + } + +/* Get the next UTF-8 character, not advancing the pointer, incrementing length +if there are extra bytes. This is called when we know we are in UTF-8 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ if (c >= 0xc0) GETUTF8LEN(c, eptr, len); - + /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the pointer, incrementing length if there are extra bytes. This is called when we do not know if we are in UTF-8 mode. */ @@ -787,21 +787,21 @@ do not know if we are in UTF-8 mode. */ c = *eptr; \ if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len); -/* If the pointer is not at the start of a character, move it back until -it is. This is called only in UTF-8 mode - we don't put a test within the macro -because almost all calls are already within a block of UTF-8 only code. */ - -#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- - +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-8 mode - we don't put a test within the macro +because almost all calls are already within a block of UTF-8 only code. */ + +#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- + /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++ - + /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ while((condition) && ((eptr) & 0xc0) == 0x80) action - + #elif defined COMPILE_PCRE16 - + /* Tells the biggest code point which can be encoded as a single character. */ #define MAX_VALUE_FOR_SINGLE_CHAR 65535 @@ -1052,29 +1052,29 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ #else #define VSPACE_LIST \ CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR -#endif - +#endif + #define VSPACE_BYTE_CASES \ case CHAR_LF: \ case CHAR_VT: \ case CHAR_FF: \ case CHAR_CR: \ case CHAR_NEL - + #define VSPACE_CASES VSPACE_BYTE_CASES #endif /* EBCDIC */ - + /* ------ End of whitespace macros ------ */ - -/* Private flags containing information about the compiled regex. They used to + +/* Private flags containing information about the compiled regex. They used to live at the top end of the options word, but that got almost full, so they were moved to a 16-bit flags word - which got almost full, so now they are in a 32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the restrictions on partial matching have been lifted. It remains for backwards compatibility. */ - + #define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */ #define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */ #define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */ @@ -1090,7 +1090,7 @@ compatibility. */ #define PCRE_MLSET 0x00002000 /* match limit set by regex */ #define PCRE_RLSET 0x00004000 /* recursion limit set by regex */ #define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */ - + #if defined COMPILE_PCRE8 #define PCRE_MODE PCRE_MODE8 #elif defined COMPILE_PCRE16 @@ -1099,70 +1099,70 @@ compatibility. */ #define PCRE_MODE PCRE_MODE32 #endif #define PCRE_MODE_MASK (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32) - + /* Flags for the "extra" block produced by pcre_study(). */ - + #define PCRE_STUDY_MAPPED 0x0001 /* a map of starting chars exists */ #define PCRE_STUDY_MINLEN 0x0002 /* a minimum length field exists */ -/* Masks for identifying the public options that are permitted at compile -time, run time, or study time, respectively. */ - -#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ - PCRE_NEWLINE_ANYCRLF) - +/* Masks for identifying the public options that are permitted at compile +time, run time, or study time, respectively. */ + +#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ + PCRE_NEWLINE_ANYCRLF) + #define PUBLIC_COMPILE_OPTIONS \ - (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ + (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ + PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \ PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF) - -#define PUBLIC_EXEC_OPTIONS \ + +#define PUBLIC_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \ PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE) - -#define PUBLIC_DFA_EXEC_OPTIONS \ + +#define PUBLIC_DFA_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \ PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ PCRE_NO_START_OPTIMIZE) - + #define PUBLIC_STUDY_OPTIONS \ (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED) - + #define PUBLIC_JIT_EXEC_OPTIONS \ (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\ PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD) - + /* Magic number to provide a small check against being handed junk. */ -#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ - +#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ + /* This variable is used to detect a loaded regular expression in different endianness. */ - + #define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ - -/* The maximum remaining length of subject we are prepared to search for a -req_byte match. */ - -#define REQ_BYTE_MAX 1000 - + +/* The maximum remaining length of subject we are prepared to search for a +req_byte match. */ + +#define REQ_BYTE_MAX 1000 + /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in environments where these macros are defined elsewhere. Unfortunately, there is no way to do the same for the typedef. */ - -typedef int BOOL; - + +typedef int BOOL; + #ifndef FALSE -#define FALSE 0 -#define TRUE 1 +#define FALSE 0 +#define TRUE 1 #endif - + /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal character constants like '*' because the compiler would emit their EBCDIC code, which is different from their ASCII/UTF-8 code. Instead we define macros for @@ -1764,42 +1764,42 @@ only. */ #endif /* SUPPORT_UTF */ -/* Escape items that are just an encoding of a particular data value. */ - +/* Escape items that are just an encoding of a particular data value. */ + #ifndef ESC_a #define ESC_a CHAR_BEL #endif -#ifndef ESC_e +#ifndef ESC_e #define ESC_e CHAR_ESC -#endif - -#ifndef ESC_f +#endif + +#ifndef ESC_f #define ESC_f CHAR_FF -#endif - -#ifndef ESC_n +#endif + +#ifndef ESC_n #define ESC_n CHAR_LF -#endif - -#ifndef ESC_r +#endif + +#ifndef ESC_r #define ESC_r CHAR_CR -#endif - -/* We can't officially use ESC_t because it is a POSIX reserved identifier -(presumably because of all the others like size_t). */ - -#ifndef ESC_tee +#endif + +/* We can't officially use ESC_t because it is a POSIX reserved identifier +(presumably because of all the others like size_t). */ + +#ifndef ESC_tee #define ESC_tee CHAR_HT -#endif - -/* Codes for different types of Unicode property */ - -#define PT_ANY 0 /* Any property - matches all chars */ -#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ +#endif + +/* Codes for different types of Unicode property */ + +#define PT_ANY 0 /* Any property - matches all chars */ +#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ #define PT_GC 2 /* Specified general characteristic (e.g. L) */ #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script (e.g. Han) */ +#define PT_SC 4 /* Script (e.g. Han) */ #define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ #define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ #define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ @@ -1807,7 +1807,7 @@ only. */ #define PT_CLIST 9 /* Pseudo-property: match character list */ #define PT_UCNC 10 /* Universal Character nameable character */ #define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ - + /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE_UCP is set - in other words, for Unicode handling of these classes. They are not available via the \p or \P escapes like @@ -1818,27 +1818,27 @@ table. */ #define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ #define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ -/* Flag bits and data types for the extended class (OP_XCLASS) for classes that +/* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ - + #define XCL_NOT 0x01 /* Flag: this is a negative class */ #define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ #define XCL_HASPROP 0x04 /* Flag: property checks are present. */ - -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ -#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ - -/* These are escaped items that aren't just an encoding of a particular data + +#define XCL_END 0 /* Marks end of individual items */ +#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ + +/* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 for a data character. Also, they must appear in the same order as in the opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL mode rather than an escape sequence. It is also used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves like \N. - + The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. when PCRE_UCP is set and replacement of \d etc by \p sequences is required. They must be contiguous, and remain in order so that the replacements can be @@ -1851,17 +1851,17 @@ repeated. These are the types that consume characters. If any new escapes are put in between that don't consume a character, that code will have to change. */ -enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, +enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k, ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; - - + + /********************** Opcode definitions ******************/ - + /****** NOTE NOTE NOTE ****** - + Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in order to the list of escapes immediately above. Furthermore, values up to OP_DOLLM must not be changed without adjusting the table called autoposstab in @@ -1883,23 +1883,23 @@ auto-possessified. */ #define LAST_AUTOTAB_LEFT_OP OP_EXTUNI #define LAST_AUTOTAB_RIGHT_OP OP_DOLLM -enum { - OP_END, /* 0 End of pattern */ - - /* Values corresponding to backslashed metacharacters */ - - OP_SOD, /* 1 Start of data: \A */ - OP_SOM, /* 2 Start of match (subject + offset): \G */ - OP_SET_SOM, /* 3 Set start of match (\K) */ - OP_NOT_WORD_BOUNDARY, /* 4 \B */ - OP_WORD_BOUNDARY, /* 5 \b */ - OP_NOT_DIGIT, /* 6 \D */ - OP_DIGIT, /* 7 \d */ - OP_NOT_WHITESPACE, /* 8 \S */ - OP_WHITESPACE, /* 9 \s */ - OP_NOT_WORDCHAR, /* 10 \W */ - OP_WORDCHAR, /* 11 \w */ - +enum { + OP_END, /* 0 End of pattern */ + + /* Values corresponding to backslashed metacharacters */ + + OP_SOD, /* 1 Start of data: \A */ + OP_SOM, /* 2 Start of match (subject + offset): \G */ + OP_SET_SOM, /* 3 Set start of match (\K) */ + OP_NOT_WORD_BOUNDARY, /* 4 \B */ + OP_WORD_BOUNDARY, /* 5 \b */ + OP_NOT_DIGIT, /* 6 \D */ + OP_DIGIT, /* 7 \d */ + OP_NOT_WHITESPACE, /* 8 \S */ + OP_WHITESPACE, /* 9 \s */ + OP_NOT_WORDCHAR, /* 10 \W */ + OP_WORDCHAR, /* 11 \w */ + OP_ANY, /* 12 Match any character except newline (\N) */ OP_ALLANY, /* 13 Match any character */ OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */ @@ -1913,81 +1913,81 @@ enum { OP_EXTUNI, /* 22 \X (extended Unicode sequence */ OP_EODN, /* 23 End of data or \n at end of data (\Z) */ OP_EOD, /* 24 End of data (\z) */ - + /* Line end assertions */ - + OP_DOLL, /* 25 End of line - not multiline */ OP_DOLLM, /* 26 End of line - multiline */ OP_CIRC, /* 27 Start of line - not multiline */ OP_CIRCM, /* 28 Start of line - multiline */ - + /* Single characters; caseful must precede the caseless ones */ - + OP_CHAR, /* 29 Match one character, casefully */ OP_CHARI, /* 30 Match one character, caselessly */ OP_NOT, /* 31 Match one character, not the given one, casefully */ OP_NOTI, /* 32 Match one character, not the given one, caselessly */ - + /* The following sets of 13 opcodes must always be kept in step because the offset from the first one is used to generate the others. */ - + /* Repeated characters; caseful must precede the caseless ones */ - + OP_STAR, /* 33 The maximizing and minimizing versions of */ OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ OP_PLUS, /* 35 the minimizing one second. */ OP_MINPLUS, /* 36 */ OP_QUERY, /* 37 */ OP_MINQUERY, /* 38 */ - + OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/ OP_MINUPTO, /* 40 */ OP_EXACT, /* 41 Exactly n matches */ - + OP_POSSTAR, /* 42 Possessified star, caseful */ OP_POSPLUS, /* 43 Possessified plus, caseful */ OP_POSQUERY, /* 44 Posesssified query, caseful */ OP_POSUPTO, /* 45 Possessified upto, caseful */ - + /* Repeated characters; caseless must follow the caseful ones */ - + OP_STARI, /* 46 */ OP_MINSTARI, /* 47 */ OP_PLUSI, /* 48 */ OP_MINPLUSI, /* 49 */ OP_QUERYI, /* 50 */ OP_MINQUERYI, /* 51 */ - + OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */ OP_MINUPTOI, /* 53 */ OP_EXACTI, /* 54 */ - + OP_POSSTARI, /* 55 Possessified star, caseless */ OP_POSPLUSI, /* 56 Possessified plus, caseless */ OP_POSQUERYI, /* 57 Posesssified query, caseless */ OP_POSUPTOI, /* 58 Possessified upto, caseless */ - + /* The negated ones must follow the non-negated ones, and match them */ /* Negated repeated character, caseful; must precede the caseless ones */ - + OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ OP_NOTPLUS, /* 61 the minimizing one second. They must be in */ OP_NOTMINPLUS, /* 62 exactly the same order as those above. */ OP_NOTQUERY, /* 63 */ OP_NOTMINQUERY, /* 64 */ - + OP_NOTUPTO, /* 65 From 0 to n matches, caseful */ OP_NOTMINUPTO, /* 66 */ OP_NOTEXACT, /* 67 Exactly n matches */ - + OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */ OP_NOTPOSPLUS, /* 69 */ OP_NOTPOSQUERY, /* 70 */ OP_NOTPOSUPTO, /* 71 */ - + /* Negated repeated character, caseless; must follow the caseful ones */ - + OP_NOTSTARI, /* 72 */ OP_NOTMINSTARI, /* 73 */ OP_NOTPLUSI, /* 74 */ @@ -2084,28 +2084,28 @@ enum { OP_COND, /* 135 Conditional group */ /* These five must follow the previous five, in the same order. There's a - check for >= SBRA to distinguish the two sets. */ - + check for >= SBRA to distinguish the two sets. */ + OP_SBRA, /* 136 Start of non-capturing bracket, check empty */ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */ OP_SCBRA, /* 138 Start of capturing bracket, check empty */ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */ OP_SCOND, /* 140 Conditional group, check empty */ - + /* The next two pairs must (respectively) be kept together. */ - + OP_CREF, /* 141 Used to hold a capture number as condition */ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */ OP_RREF, /* 143 Used to hold a recursion number as condition */ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */ OP_DEF, /* 145 The DEFINE condition */ - + OP_BRAZERO, /* 146 These two must remain together and in this */ OP_BRAMINZERO, /* 147 order. */ OP_BRAPOSZERO, /* 148 */ - /* These are backtracking control verbs */ - + /* These are backtracking control verbs */ + OP_MARK, /* 149 always has an argument */ OP_PRUNE, /* 150 */ OP_PRUNE_ARG, /* 151 same, but with argument */ @@ -2114,9 +2114,9 @@ enum { OP_THEN, /* 154 */ OP_THEN_ARG, /* 155 same, but with argument */ OP_COMMIT, /* 156 */ - - /* These are forced failure and success verbs */ - + + /* These are forced failure and success verbs */ + OP_FAIL, /* 157 */ OP_ACCEPT, /* 158 */ OP_ASSERT_ACCEPT, /* 159 Used inside assertions */ @@ -2131,40 +2131,40 @@ enum { some in the past. */ OP_TABLE_LENGTH -}; - +}; + /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro definitions that follow must also be updated to match. There are also tables called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ - -/* This macro defines textual names for all the opcodes. These are used only + +/* This macro defines textual names for all the opcodes. These are used only for debugging, and some of them are only partial names. The macro is referenced only in pcre_printint.c, which fills out the full names in many cases (and in some cases doesn't actually use these names at all). */ - -#define OP_NAME_LIST \ - "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ + +#define OP_NAME_LIST \ + "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ - "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ - "extuni", "\\Z", "\\z", \ + "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ + "extuni", "\\Z", "\\z", \ "$", "$", "^", "^", "char", "chari", "not", "noti", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ - "*+","++", "?+", "{", \ + "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ - "*+","++", "?+", "{", \ + "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", "{", "{", \ + "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", "{", "{", \ "*+","++", "?+", "{", \ "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ "Recurse", "Callout", \ @@ -2181,29 +2181,29 @@ some cases doesn't actually use these names at all). */ "*THEN", "*THEN", "*COMMIT", "*FAIL", \ "*ACCEPT", "*ASSERT_ACCEPT", \ "Close", "Skip zero" - - -/* This macro defines the length of fixed length operations in the compiled -regex. The lengths are used when searching for specific things, and also in the -debugging printing of a compiled regex. We use a macro so that it can be -defined close to the definitions of the opcodes themselves. - -As things have been extended, some of these are no longer fixed lenths, but are -minima instead. For example, the length of a single-character repeat may vary -in UTF-8 mode. The code that uses this table must know about such things. */ - -#define OP_LENGTHS \ - 1, /* End */ \ - 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ - 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ + + +/* This macro defines the length of fixed length operations in the compiled +regex. The lengths are used when searching for specific things, and also in the +debugging printing of a compiled regex. We use a macro so that it can be +defined close to the definitions of the opcodes themselves. + +As things have been extended, some of these are no longer fixed lenths, but are +minima instead. For example, the length of a single-character repeat may vary +in UTF-8 mode. The code that uses this table must know about such things. */ + +#define OP_LENGTHS \ + 1, /* End */ \ + 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ + 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ 1, 1, 1, /* Any, AllAny, Anybyte */ \ 3, 3, /* \P, \p */ \ - 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ + 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ 1, /* \X */ \ 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ - 2, /* Char - the minimum length */ \ + 2, /* Char - the minimum length */ \ 2, /* Chari - the minimum length */ \ - 2, /* not */ \ + 2, /* not */ \ 2, /* noti */ \ /* Positive single-char repeats ** These are */ \ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ @@ -2214,8 +2214,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ 2+IMM2_SIZE, /* exact I */ \ 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ - /* Negative single-char repeats - only for chars < 256 */ \ - 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ + /* Negative single-char repeats - only for chars < 256 */ \ + 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ 2+IMM2_SIZE, /* NOT exact */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ @@ -2223,85 +2223,85 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ 2+IMM2_SIZE, /* NOT exact I */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ - /* Positive type repeats */ \ - 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ + /* Positive type repeats */ \ + 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ 2+IMM2_SIZE, /* Type exact */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ - /* Character class & ref repeats */ \ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ + /* Character class & ref repeats */ \ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \ 1+(32/sizeof(pcre_uchar)), /* CLASS */ \ 1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ - 0, /* XCLASS - variable length */ \ + 0, /* XCLASS - variable length */ \ 1+IMM2_SIZE, /* REF */ \ 1+IMM2_SIZE, /* REFI */ \ 1+2*IMM2_SIZE, /* DNREF */ \ 1+2*IMM2_SIZE, /* DNREFI */ \ - 1+LINK_SIZE, /* RECURSE */ \ - 2+2*LINK_SIZE, /* CALLOUT */ \ - 1+LINK_SIZE, /* Alt */ \ - 1+LINK_SIZE, /* Ket */ \ - 1+LINK_SIZE, /* KetRmax */ \ - 1+LINK_SIZE, /* KetRmin */ \ + 1+LINK_SIZE, /* RECURSE */ \ + 2+2*LINK_SIZE, /* CALLOUT */ \ + 1+LINK_SIZE, /* Alt */ \ + 1+LINK_SIZE, /* Ket */ \ + 1+LINK_SIZE, /* KetRmax */ \ + 1+LINK_SIZE, /* KetRmin */ \ 1+LINK_SIZE, /* KetRpos */ \ 1+LINK_SIZE, /* Reverse */ \ - 1+LINK_SIZE, /* Assert */ \ - 1+LINK_SIZE, /* Assert not */ \ - 1+LINK_SIZE, /* Assert behind */ \ - 1+LINK_SIZE, /* Assert behind not */ \ - 1+LINK_SIZE, /* ONCE */ \ + 1+LINK_SIZE, /* Assert */ \ + 1+LINK_SIZE, /* Assert not */ \ + 1+LINK_SIZE, /* Assert behind */ \ + 1+LINK_SIZE, /* Assert behind not */ \ + 1+LINK_SIZE, /* ONCE */ \ 1+LINK_SIZE, /* ONCE_NC */ \ - 1+LINK_SIZE, /* BRA */ \ + 1+LINK_SIZE, /* BRA */ \ 1+LINK_SIZE, /* BRAPOS */ \ 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ - 1+LINK_SIZE, /* COND */ \ - 1+LINK_SIZE, /* SBRA */ \ + 1+LINK_SIZE, /* COND */ \ + 1+LINK_SIZE, /* SBRA */ \ 1+LINK_SIZE, /* SBRAPOS */ \ 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ - 1+LINK_SIZE, /* SCOND */ \ + 1+LINK_SIZE, /* SCOND */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ - 1, /* DEF */ \ + 1, /* DEF */ \ 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ - -/* A magic value for OP_RREF to indicate the "any recursion" condition. */ - -#define RREF_ANY 0xffff - + +/* A magic value for OP_RREF to indicate the "any recursion" condition. */ + +#define RREF_ANY 0xffff + /* Compile time error code numbers. They are given names so that they can more easily be tracked. When a new number is added, the table called eint in pcreposix.c must be updated. */ - -enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, - ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, - ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, - ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, - ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, - ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, + +enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, + ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, + ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, + ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, + ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, + ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT }; - + /* JIT compiling modes. The function list is indexed by them. */ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, JIT_NUMBER_OF_COMPILE_MODES }; -/* The real format of the start of the pcre block; the index of names and the -code vector run on as long as necessary after the end. We store an explicit -offset to the name table so that if a regex is compiled on one host, saved, and -then run on another where the size of pointers is different, all might still +/* The real format of the start of the pcre block; the index of names and the +code vector run on as long as necessary after the end. We store an explicit +offset to the name table so that if a regex is compiled on one host, saved, and +then run on another where the size of pointers is different, all might still be well. - + The size of the structure must be a multiple of 8 bytes. For the case of compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so that there are an even number of pointers which therefore are a multiple of 8 @@ -2320,12 +2320,12 @@ when a compiled regex is reloaded on a host with different endianness. There is also similar byte-flipping code in pcretest.c, which is used for testing the byte-flipping features. It must also be kept in step. *** WARNING *** -*/ - +*/ + typedef struct real_pcre8_or_16 { - pcre_uint32 magic_number; - pcre_uint32 size; /* Total that was malloced */ - pcre_uint32 options; /* Public options */ + pcre_uint32 magic_number; + pcre_uint32 size; /* Total that was malloced */ + pcre_uint32 options; /* Public options */ pcre_uint32 flags; /* Private flags */ pcre_uint32 limit_match; /* Limit set from regex */ pcre_uint32 limit_recursion; /* Limit set from regex */ @@ -2334,20 +2334,20 @@ typedef struct real_pcre8_or_16 { pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ pcre_uint16 top_bracket; /* Highest numbered group */ pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint16 name_table_offset; /* Offset to name table that follows */ - pcre_uint16 name_entry_size; /* Size of any name items */ - pcre_uint16 name_count; /* Number of name items */ - pcre_uint16 ref_count; /* Reference count */ + pcre_uint16 name_table_offset; /* Offset to name table that follows */ + pcre_uint16 name_entry_size; /* Size of any name items */ + pcre_uint16 name_count; /* Number of name items */ + pcre_uint16 ref_count; /* Reference count */ pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */ pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */ pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ void *nullpad; /* NULL padding */ } real_pcre8_or_16; - + typedef struct real_pcre8_or_16 real_pcre; typedef struct real_pcre8_or_16 real_pcre16; - + typedef struct real_pcre32 { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ @@ -2389,16 +2389,16 @@ typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 #define REAL_PCRE_OPTIONS(re) (((REAL_PCRE*)re)->options) #define REAL_PCRE_FLAGS(re) (((REAL_PCRE*)re)->flags) -/* The format of the block used to store data from pcre_study(). The same -remark (see NOTE above) about extending this structure applies. */ - -typedef struct pcre_study_data { - pcre_uint32 size; /* Total that was malloced */ +/* The format of the block used to store data from pcre_study(). The same +remark (see NOTE above) about extending this structure applies. */ + +typedef struct pcre_study_data { + pcre_uint32 size; /* Total that was malloced */ pcre_uint32 flags; /* Private flags */ pcre_uint8 start_bits[32]; /* Starting char bits */ pcre_uint32 minlength; /* Minimum subject length */ -} pcre_study_data; - +} pcre_study_data; + /* Structure for building a chain of open capturing subpatterns during compiling, so that instructions to close them can be compiled when (*ACCEPT) is encountered. This is also used to identify subpatterns that contain recursive @@ -2419,10 +2419,10 @@ typedef struct named_group { pcre_uint32 number; /* Group number */ } named_group; -/* Structure for passing "static" information around between the functions -doing the compiling, so that they are thread-safe. */ - -typedef struct compile_data { +/* Structure for passing "static" information around between the functions +doing the compiling, so that they are thread-safe. */ + +typedef struct compile_data { const pcre_uint8 *lcc; /* Points to lower casing table */ const pcre_uint8 *fcc; /* Points to case-flipping table */ const pcre_uint8 *cbits; /* Points to character type table */ @@ -2459,16 +2459,16 @@ typedef struct compile_data { int nltype; /* Newline type */ int nllen; /* Newline string length */ pcre_uchar nl[4]; /* Newline string when fixed length */ -} compile_data; - -/* Structure for maintaining a chain of pointers to the currently incomplete +} compile_data; + +/* Structure for maintaining a chain of pointers to the currently incomplete branches, for testing for left recursion while compiling. */ - -typedef struct branch_chain { - struct branch_chain *outer; + +typedef struct branch_chain { + struct branch_chain *outer; pcre_uchar *current_branch; -} branch_chain; - +} branch_chain; + /* Structure for mutual recursion detection. */ typedef struct recurse_check { @@ -2476,18 +2476,18 @@ typedef struct recurse_check { const pcre_uchar *group; } recurse_check; -/* Structure for items in a linked list that represents an explicit recursive +/* Structure for items in a linked list that represents an explicit recursive call within the pattern; used by pcre_exec(). */ - -typedef struct recursion_info { - struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ + +typedef struct recursion_info { + struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ unsigned int group_num; /* Number of group that was called */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */ int saved_capture_last; /* Last capture number */ PCRE_PUCHAR subject_position; /* Position at start of recursion */ -} recursion_info; - +} recursion_info; + /* A similar structure for pcre_dfa_exec(). */ typedef struct dfa_recursion_info { @@ -2496,24 +2496,24 @@ typedef struct dfa_recursion_info { PCRE_PUCHAR subject_position; } dfa_recursion_info; -/* Structure for building a chain of data for holding the values of the subject -pointer at the start of each subpattern, so as to detect when an empty string +/* Structure for building a chain of data for holding the values of the subject +pointer at the start of each subpattern, so as to detect when an empty string has been matched by a subpattern - to break infinite loops; used by pcre_exec(). */ - -typedef struct eptrblock { - struct eptrblock *epb_prev; + +typedef struct eptrblock { + struct eptrblock *epb_prev; PCRE_PUCHAR epb_saved_eptr; -} eptrblock; - - -/* Structure for passing "static" information around between the functions -doing traditional NFA matching, so that they are thread-safe. */ - -typedef struct match_data { - unsigned long int match_call_count; /* As it says */ - unsigned long int match_limit; /* As it says */ - unsigned long int match_limit_recursion; /* As it says */ +} eptrblock; + + +/* Structure for passing "static" information around between the functions +doing traditional NFA matching, so that they are thread-safe. */ + +typedef struct match_data { + unsigned long int match_call_count; /* As it says */ + unsigned long int match_limit; /* As it says */ + unsigned long int match_limit_recursion; /* As it says */ int *offset_vector; /* Offset vector */ int offset_end; /* One past the end */ int offset_max; /* The maximum usable for return data */ @@ -2560,12 +2560,12 @@ typedef struct match_data { #ifdef NO_RECURSE void *match_frames_base; /* For remembering malloc'd frames */ #endif -} match_data; - -/* A similar structure is used for the same purpose by the DFA matching -functions. */ - -typedef struct dfa_match_data { +} match_data; + +/* A similar structure is used for the same purpose by the DFA matching +functions. */ + +typedef struct dfa_match_data { const pcre_uchar *start_code; /* Start of the compiled pattern */ const pcre_uchar *start_subject ; /* Start of the subject string */ const pcre_uchar *end_subject; /* End of subject string */ @@ -2579,41 +2579,41 @@ typedef struct dfa_match_data { pcre_uchar nl[4]; /* Newline string when fixed */ void *callout_data; /* To pass back to callouts */ dfa_recursion_info *recursive; /* Linked list of recursion data */ -} dfa_match_data; - -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_letter 0x02 -#define ctype_digit 0x04 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphanumeric or '_' */ -#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ - -#define cbit_space 0 /* [:space:] or \s */ -#define cbit_xdigit 32 /* [:xdigit:] */ -#define cbit_digit 64 /* [:digit:] or \d */ -#define cbit_upper 96 /* [:upper:] */ -#define cbit_lower 128 /* [:lower:] */ -#define cbit_word 160 /* [:word:] or \w */ -#define cbit_graph 192 /* [:graph:] */ -#define cbit_print 224 /* [:print:] */ -#define cbit_punct 256 /* [:punct:] */ -#define cbit_cntrl 288 /* [:cntrl:] */ -#define cbit_length 320 /* Length of the cbits table */ - -/* Offsets of the various tables from the base tables pointer, and -total length. */ - -#define lcc_offset 0 -#define fcc_offset 256 -#define cbits_offset 512 -#define ctypes_offset (cbits_offset + cbit_length) -#define tables_length (ctypes_offset + 256) - +} dfa_match_data; + +/* Bit definitions for entries in the pcre_ctypes table. */ + +#define ctype_space 0x01 +#define ctype_letter 0x02 +#define ctype_digit 0x04 +#define ctype_xdigit 0x08 +#define ctype_word 0x10 /* alphanumeric or '_' */ +#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ + +/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set +of bits for a class map. Some classes are built by combining these tables. */ + +#define cbit_space 0 /* [:space:] or \s */ +#define cbit_xdigit 32 /* [:xdigit:] */ +#define cbit_digit 64 /* [:digit:] or \d */ +#define cbit_upper 96 /* [:upper:] */ +#define cbit_lower 128 /* [:lower:] */ +#define cbit_word 160 /* [:word:] or \w */ +#define cbit_graph 192 /* [:graph:] */ +#define cbit_print 224 /* [:print:] */ +#define cbit_punct 256 /* [:punct:] */ +#define cbit_cntrl 288 /* [:cntrl:] */ +#define cbit_length 320 /* Length of the cbits table */ + +/* Offsets of the various tables from the base tables pointer, and +total length. */ + +#define lcc_offset 0 +#define fcc_offset 256 +#define cbits_offset 512 +#define ctypes_offset (cbits_offset + cbit_length) +#define tables_length (ctypes_offset + 256) + /* Internal function and data prefixes. */ #if defined COMPILE_PCRE8 @@ -2641,23 +2641,23 @@ total length. */ #error Unsupported compiling mode #endif /* COMPILE_PCRE[8|16|32] */ -/* Layout of the UCP type table that translates property names into types and -codes. Each entry used to point directly to a name, but to reduce the number of -relocations in shared libraries, it now has an offset into a single string -instead. */ - -typedef struct { - pcre_uint16 name_offset; - pcre_uint16 type; - pcre_uint16 value; -} ucp_type_table; - - -/* Internal shared data tables. These are tables that are used by more than one -of the exported public functions. They have to be "external" in the C sense, -but are not part of the PCRE public API. The data for these tables is in the -pcre_tables.c module. */ - +/* Layout of the UCP type table that translates property names into types and +codes. Each entry used to point directly to a name, but to reduce the number of +relocations in shared libraries, it now has an offset into a single string +instead. */ + +typedef struct { + pcre_uint16 name_offset; + pcre_uint16 type; + pcre_uint16 value; +} ucp_type_table; + + +/* Internal shared data tables. These are tables that are used by more than one +of the exported public functions. They have to be "external" in the C sense, +but are not part of the PCRE public API. The data for these tables is in the +pcre_tables.c module. */ + #ifdef COMPILE_PCRE8 extern const int PRIV(utf8_table1)[]; extern const int PRIV(utf8_table1_size); @@ -2665,25 +2665,25 @@ extern const int PRIV(utf8_table2)[]; extern const int PRIV(utf8_table3)[]; extern const pcre_uint8 PRIV(utf8_table4)[]; #endif /* COMPILE_PCRE8 */ - + extern const char PRIV(utt_names)[]; extern const ucp_type_table PRIV(utt)[]; extern const int PRIV(utt_size); - + extern const pcre_uint8 PRIV(OP_lengths)[]; extern const pcre_uint8 PRIV(default_tables)[]; - + extern const pcre_uint32 PRIV(hspace_list)[]; extern const pcre_uint32 PRIV(vspace_list)[]; - - -/* Internal shared functions. These are functions that are used by more than -one of the exported public functions. They have to be "external" in the C -sense, but are not part of the PCRE public API. */ - + + +/* Internal shared functions. These are functions that are used by more than +one of the exported public functions. They have to be "external" in the C +sense, but are not part of the PCRE public API. */ + /* String comparison functions. */ #if defined COMPILE_PCRE8 - + #define STRCMP_UC_UC(str1, str2) \ strcmp((char *)(str1), (char *)(str2)) #define STRCMP_UC_C8(str1, str2) \ @@ -2754,8 +2754,8 @@ extern int PRIV(jit_exec)(const PUBL(extra) *, extern void PRIV(jit_free)(void *); extern int PRIV(jit_get_size)(void *); extern const char* PRIV(jit_get_target)(void); -#endif - +#endif + /* Unicode character database (UCD) */ typedef struct { @@ -2804,4 +2804,4 @@ extern const int PRIV(ucp_typerange)[]; #endif -/* End of pcre_internal.h */ +/* End of pcre_internal.h */ diff --git a/contrib/libs/pcre/pcre_maketables.c b/contrib/libs/pcre/pcre_maketables.c index 5328e966733..873b46aa914 100644 --- a/contrib/libs/pcre/pcre_maketables.c +++ b/contrib/libs/pcre/pcre_maketables.c @@ -1,74 +1,74 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_maketables(), which builds -character tables for PCRE in the current locale. The file is compiled on its -own as part of the PCRE library. However, it is also included in the -compilation of dftables.c, in which case the macro DFTABLES is defined. */ - - -#ifndef DFTABLES -# ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_maketables(), which builds +character tables for PCRE in the current locale. The file is compiled on its +own as part of the PCRE library. However, it is also included in the +compilation of dftables.c, in which case the macro DFTABLES is defined. */ + + +#ifndef DFTABLES +# ifdef HAVE_CONFIG_H # include "pcre_config.h" -# endif -# include "pcre_internal.h" -#endif - - -/************************************************* -* Create PCRE character tables * -*************************************************/ - -/* This function builds a set of character tables for use by PCRE and returns -a pointer to them. They are build using the ctype functions, and consequently -their contents will depend upon the current locale setting. When compiled as +# endif +# include "pcre_internal.h" +#endif + + +/************************************************* +* Create PCRE character tables * +*************************************************/ + +/* This function builds a set of character tables for use by PCRE and returns +a pointer to them. They are build using the ctype functions, and consequently +their contents will depend upon the current locale setting. When compiled as part of the library, the store is obtained via PUBL(malloc)(), but when compiled inside dftables, use malloc(). - -Arguments: none -Returns: pointer to the contiguous block of data -*/ - + +Arguments: none +Returns: pointer to the contiguous block of data +*/ + #if defined COMPILE_PCRE8 -const unsigned char * -pcre_maketables(void) +const unsigned char * +pcre_maketables(void) #elif defined COMPILE_PCRE16 const unsigned char * pcre16_maketables(void) @@ -76,30 +76,30 @@ pcre16_maketables(void) const unsigned char * pcre32_maketables(void) #endif -{ -unsigned char *yield, *p; -int i; - -#ifndef DFTABLES +{ +unsigned char *yield, *p; +int i; + +#ifndef DFTABLES yield = (unsigned char*)(PUBL(malloc))(tables_length); -#else -yield = (unsigned char*)malloc(tables_length); -#endif - -if (yield == NULL) return NULL; -p = yield; - -/* First comes the lower casing table */ - -for (i = 0; i < 256; i++) *p++ = tolower(i); - -/* Next the case-flipping table */ - -for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); - -/* Then the character class tables. Don't try to be clever and save effort on +#else +yield = (unsigned char*)malloc(tables_length); +#endif + +if (yield == NULL) return NULL; +p = yield; + +/* First comes the lower casing table */ + +for (i = 0; i < 256; i++) *p++ = tolower(i); + +/* Next the case-flipping table */ + +for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); + +/* Then the character class tables. Don't try to be clever and save effort on exclusive ones - in some locales things may be different. - + Note that the table for "space" includes everything "isspace" gives, including VT in the default locale. This makes it work for the POSIX class [:space:]. From release 8.34 is is also correct for Perl space, because Perl added VT at @@ -110,47 +110,47 @@ being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must test for alnum specially. */ -memset(p, 0, cbit_length); -for (i = 0; i < 256; i++) - { - if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); - if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); - if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); - if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); - if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); - if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); - if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); - if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); - if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); - if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); - if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); - } -p += cbit_length; - +memset(p, 0, cbit_length); +for (i = 0; i < 256; i++) + { + if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); + if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); + if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); + if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); + if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); + if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); + if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); + if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); + if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); + if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); + if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); + } +p += cbit_length; + /* Finally, the character type table. In this, we used to exclude VT from the white space chars, because Perl didn't recognize it as such for \s and for comments within regexes. However, Perl changed at release 5.18, so PCRE changed at release 8.34. */ - -for (i = 0; i < 256; i++) - { - int x = 0; + +for (i = 0; i < 256; i++) + { + int x = 0; if (isspace(i)) x += ctype_space; - if (isalpha(i)) x += ctype_letter; - if (isdigit(i)) x += ctype_digit; - if (isxdigit(i)) x += ctype_xdigit; - if (isalnum(i) || i == '_') x += ctype_word; - - /* Note: strchr includes the terminating zero in the characters it considers. - In this instance, that is ok because we want binary zero to be flagged as a - meta-character, which in this sense is any character that terminates a run - of data characters. */ - - if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; - *p++ = x; - } - -return yield; -} - -/* End of pcre_maketables.c */ + if (isalpha(i)) x += ctype_letter; + if (isdigit(i)) x += ctype_digit; + if (isxdigit(i)) x += ctype_xdigit; + if (isalnum(i) || i == '_') x += ctype_word; + + /* Note: strchr includes the terminating zero in the characters it considers. + In this instance, that is ok because we want binary zero to be flagged as a + meta-character, which in this sense is any character that terminates a run + of data characters. */ + + if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; + *p++ = x; + } + +return yield; +} + +/* End of pcre_maketables.c */ diff --git a/contrib/libs/pcre/pcre_newline.c b/contrib/libs/pcre/pcre_newline.c index c8b5e374ae1..252cad9c9e3 100644 --- a/contrib/libs/pcre/pcre_newline.c +++ b/contrib/libs/pcre/pcre_newline.c @@ -1,81 +1,81 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains internal functions for testing newlines when more than -one kind of newline is to be recognized. When a newline is found, its length is -returned. In principle, we could implement several newline "types", each -referring to a different set of newline characters. At present, PCRE supports -only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, -and NLTYPE_ANY. The full list of Unicode newline characters is taken from -http://unicode.org/unicode/reports/tr18/. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains internal functions for testing newlines when more than +one kind of newline is to be recognized. When a newline is found, its length is +returned. In principle, we could implement several newline "types", each +referring to a different set of newline characters. At present, PCRE supports +only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, +and NLTYPE_ANY. The full list of Unicode newline characters is taken from +http://unicode.org/unicode/reports/tr18/. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - - -/************************************************* -* Check for newline at given position * -*************************************************/ - -/* It is guaranteed that the initial value of ptr is less than the end of the -string that is being processed. - -Arguments: - ptr pointer to possible newline - type the newline type - endptr pointer to the end of the string - lenptr where to return the length +#endif + +#include "pcre_internal.h" + + + +/************************************************* +* Check for newline at given position * +*************************************************/ + +/* It is guaranteed that the initial value of ptr is less than the end of the +string that is being processed. + +Arguments: + ptr pointer to possible newline + type the newline type + endptr pointer to the end of the string + lenptr where to return the length utf TRUE if in utf mode - -Returns: TRUE or FALSE -*/ - -BOOL + +Returns: TRUE or FALSE +*/ + +BOOL PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, BOOL utf) -{ +{ pcre_uint32 c; (void)utf; #ifdef SUPPORT_UTF @@ -86,21 +86,21 @@ if (utf) else #endif /* SUPPORT_UTF */ c = *ptr; - + /* Note that this function is called only for ANY or ANYCRLF. */ -if (type == NLTYPE_ANYCRLF) switch(c) - { +if (type == NLTYPE_ANYCRLF) switch(c) + { case CHAR_LF: *lenptr = 1; return TRUE; case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; return TRUE; - default: return FALSE; - } - -/* NLTYPE_ANY */ - -else switch(c) - { + default: return FALSE; + } + +/* NLTYPE_ANY */ + +else switch(c) + { #ifdef EBCDIC case CHAR_NEL: #endif @@ -115,8 +115,8 @@ else switch(c) #ifndef EBCDIC #ifdef COMPILE_PCRE8 case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; - case 0x2028: /* LS */ - case 0x2029: *lenptr = 3; return TRUE; /* PS */ + case 0x2028: /* LS */ + case 0x2029: *lenptr = 3; return TRUE; /* PS */ #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ case CHAR_NEL: case 0x2028: /* LS */ @@ -124,62 +124,62 @@ else switch(c) #endif /* COMPILE_PCRE8 */ #endif /* Not EBCDIC */ - default: return FALSE; - } -} - - - -/************************************************* -* Check for newline at previous position * -*************************************************/ - -/* It is guaranteed that the initial value of ptr is greater than the start of -the string that is being processed. - -Arguments: - ptr pointer to possible newline - type the newline type - startptr pointer to the start of the string - lenptr where to return the length + default: return FALSE; + } +} + + + +/************************************************* +* Check for newline at previous position * +*************************************************/ + +/* It is guaranteed that the initial value of ptr is greater than the start of +the string that is being processed. + +Arguments: + ptr pointer to possible newline + type the newline type + startptr pointer to the start of the string + lenptr where to return the length utf TRUE if in utf mode - -Returns: TRUE or FALSE -*/ - -BOOL + +Returns: TRUE or FALSE +*/ + +BOOL PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, BOOL utf) -{ +{ pcre_uint32 c; (void)utf; -ptr--; +ptr--; #ifdef SUPPORT_UTF if (utf) - { - BACKCHAR(ptr); - GETCHAR(c, ptr); - } + { + BACKCHAR(ptr); + GETCHAR(c, ptr); + } else #endif /* SUPPORT_UTF */ c = *ptr; - + /* Note that this function is called only for ANY or ANYCRLF. */ -if (type == NLTYPE_ANYCRLF) switch(c) - { +if (type == NLTYPE_ANYCRLF) switch(c) + { case CHAR_LF: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; return TRUE; case CHAR_CR: *lenptr = 1; return TRUE; - default: return FALSE; - } - + default: return FALSE; + } + /* NLTYPE_ANY */ -else switch(c) - { +else switch(c) + { case CHAR_LF: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; return TRUE; @@ -203,8 +203,8 @@ else switch(c) #endif /* COMPILE_PCRE8 */ #endif /* NotEBCDIC */ - default: return FALSE; - } -} - -/* End of pcre_newline.c */ + default: return FALSE; + } +} + +/* End of pcre_newline.c */ diff --git a/contrib/libs/pcre/pcre_ord2utf8.c b/contrib/libs/pcre/pcre_ord2utf8.c index 827a9fa22af..e608a29a30e 100644 --- a/contrib/libs/pcre/pcre_ord2utf8.c +++ b/contrib/libs/pcre/pcre_ord2utf8.c @@ -1,94 +1,94 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This file contains a private PCRE function that converts an ordinal -character value into a UTF8 string. */ - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This file contains a private PCRE function that converts an ordinal +character value into a UTF8 string. */ + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - +#endif + #define COMPILE_PCRE8 -#include "pcre_internal.h" - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - +#include "pcre_internal.h" + +/************************************************* +* Convert character value to UTF-8 * +*************************************************/ + /* This function takes an integer value in the range 0 - 0x10ffff and encodes it as a UTF-8 character in 1 to 4 pcre_uchars. - -Arguments: - cvalue the character value + +Arguments: + cvalue the character value buffer pointer to buffer for result - at least 6 pcre_uchars long - -Returns: number of characters placed in the buffer -*/ - + +Returns: number of characters placed in the buffer +*/ + unsigned -int +int PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer) -{ +{ #ifdef SUPPORT_UTF -register int i, j; +register int i, j; for (i = 0; i < PRIV(utf8_table1_size); i++) if ((int)cvalue <= PRIV(utf8_table1)[i]) break; -buffer += i; -for (j = i; j > 0; j--) - { - *buffer-- = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } *buffer = PRIV(utf8_table2)[i] | cvalue; -return i + 1; +return i + 1; -#else +#else (void)(cvalue); /* Keep compiler happy; this function won't ever be */ (void)(buffer); /* called when SUPPORT_UTF is not defined. */ return 0; #endif -} - -/* End of pcre_ord2utf8.c */ +} + +/* End of pcre_ord2utf8.c */ diff --git a/contrib/libs/pcre/pcre_refcount.c b/contrib/libs/pcre/pcre_refcount.c index 5de3422a14b..65a3c23a8f1 100644 --- a/contrib/libs/pcre/pcre_refcount.c +++ b/contrib/libs/pcre/pcre_refcount.c @@ -1,76 +1,76 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_refcount(), which is an -auxiliary function that can be used to maintain a reference count in a compiled -pattern data block. This might be helpful in applications where the block is -shared by different users. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_refcount(), which is an +auxiliary function that can be used to maintain a reference count in a compiled +pattern data block. This might be helpful in applications where the block is +shared by different users. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Maintain reference count * -*************************************************/ - -/* The reference count is a 16-bit field, initialized to zero. It is not -possible to transfer a non-zero count from one host to a different host that -has a different byte order - though I can't see why anyone in their right mind -would ever want to do that! - -Arguments: - argument_re points to compiled code - adjust value to add to the count - -Returns: the (possibly updated) count value (a non-negative number), or - a negative error number -*/ - +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Maintain reference count * +*************************************************/ + +/* The reference count is a 16-bit field, initialized to zero. It is not +possible to transfer a non-zero count from one host to a different host that +has a different byte order - though I can't see why anyone in their right mind +would ever want to do that! + +Arguments: + argument_re points to compiled code + adjust value to add to the count + +Returns: the (possibly updated) count value (a non-negative number), or + a negative error number +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_refcount(pcre *argument_re, int adjust) +pcre_refcount(pcre *argument_re, int adjust) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_refcount(pcre16 *argument_re, int adjust) @@ -78,15 +78,15 @@ pcre16_refcount(pcre16 *argument_re, int adjust) PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_refcount(pcre32 *argument_re, int adjust) #endif -{ +{ REAL_PCRE *re = (REAL_PCRE *)argument_re; -if (re == NULL) return PCRE_ERROR_NULL; +if (re == NULL) return PCRE_ERROR_NULL; if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; -re->ref_count = (-adjust > re->ref_count)? 0 : - (adjust + re->ref_count > 65535)? 65535 : - re->ref_count + adjust; -return re->ref_count; -} - -/* End of pcre_refcount.c */ +re->ref_count = (-adjust > re->ref_count)? 0 : + (adjust + re->ref_count > 65535)? 65535 : + re->ref_count + adjust; +return re->ref_count; +} + +/* End of pcre_refcount.c */ diff --git a/contrib/libs/pcre/pcre_study.c b/contrib/libs/pcre/pcre_study.c index d4ee0295d4d..b6088fe8827 100644 --- a/contrib/libs/pcre/pcre_study.c +++ b/contrib/libs/pcre/pcre_study.c @@ -1,62 +1,62 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_study(), along with local -supporting functions. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_study(), along with local +supporting functions. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - +#endif + +#include "pcre_internal.h" + #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7)) - -/* Returns from set_start_bits() */ - + +/* Returns from set_start_bits() */ + enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; - - -/************************************************* + + +/************************************************* * Find the minimum subject length for a group * *************************************************/ @@ -613,24 +613,24 @@ for (;;) /************************************************* -* Set a bit and maybe its alternate case * -*************************************************/ - +* Set a bit and maybe its alternate case * +*************************************************/ + /* Given a character, set its first byte's bit in the table, and also the corresponding bit for the other version of a letter if we are caseless. In UTF-8 mode, for characters greater than 127, we can only do the caseless thing when Unicode property support is available. - -Arguments: - start_bits points to the bit map + +Arguments: + start_bits points to the bit map p points to the character - caseless the caseless flag - cd the block with char table pointers + caseless the caseless flag + cd the block with char table pointers utf TRUE for UTF-8 / UTF-16 / UTF-32 mode - + Returns: pointer after the character -*/ - +*/ + static const pcre_uchar * set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, compile_data *cd, BOOL utf) @@ -719,10 +719,10 @@ Arguments: Returns: nothing */ -static void +static void set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, compile_data *cd) -{ +{ register pcre_uint32 c; for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; #if defined SUPPORT_UTF && defined COMPILE_PCRE8 @@ -737,13 +737,13 @@ for (c = 128; c < 256; c++) } } #endif -} - - +} + + /************************************************* * Set bits for a negative character type * *************************************************/ - + /* This function sets starting bits for a negative character type such as \D. In UTF-8 mode, we can only do a direct setting for bytes less than 128, as otherwise there can be confusion with bytes in the middle of UTF-8 characters. @@ -774,78 +774,78 @@ if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; -/************************************************* -* Create bitmap of starting bytes * -*************************************************/ - -/* This function scans a compiled unanchored expression recursively and -attempts to build a bitmap of the set of possible starting bytes. As time goes -by, we may be able to get more clever at doing this. The SSB_CONTINUE return is -useful for parenthesized groups in patterns such as (a*)b where the group -provides some optional starting bytes but scanning must continue at the outer -level to find at least one mandatory byte. At the outermost level, this -function fails unless the result is SSB_DONE. - -Arguments: - code points to an expression - start_bits points to a 32-byte table, initialized to 0 +/************************************************* +* Create bitmap of starting bytes * +*************************************************/ + +/* This function scans a compiled unanchored expression recursively and +attempts to build a bitmap of the set of possible starting bytes. As time goes +by, we may be able to get more clever at doing this. The SSB_CONTINUE return is +useful for parenthesized groups in patterns such as (a*)b where the group +provides some optional starting bytes but scanning must continue at the outer +level to find at least one mandatory byte. At the outermost level, this +function fails unless the result is SSB_DONE. + +Arguments: + code points to an expression + start_bits points to a 32-byte table, initialized to 0 utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode - cd the block with char table pointers - -Returns: SSB_FAIL => Failed to find any starting bytes - SSB_DONE => Found mandatory starting bytes - SSB_CONTINUE => Found optional starting bytes + cd the block with char table pointers + +Returns: SSB_FAIL => Failed to find any starting bytes + SSB_DONE => Found mandatory starting bytes + SSB_CONTINUE => Found optional starting bytes SSB_UNKNOWN => Hit an unrecognized opcode -*/ - -static int +*/ + +static int set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, compile_data *cd) -{ +{ register pcre_uint32 c; -int yield = SSB_DONE; +int yield = SSB_DONE; #if defined SUPPORT_UTF && defined COMPILE_PCRE8 int table_limit = utf? 16:32; #else int table_limit = 32; #endif - -#if 0 -/* ========================================================================= */ -/* The following comment and code was inserted in January 1999. In May 2006, -when it was observed to cause compiler warnings about unused values, I took it -out again. If anybody is still using OS/2, they will have to put it back -manually. */ - -/* This next statement and the later reference to dummy are here in order to -trick the optimizer of the IBM C compiler for OS/2 into generating correct -code. Apparently IBM isn't going to fix the problem, and we would rather not -disable optimization (in this module it actually makes a big difference, and -the pcre module can use all the optimization it can get). */ - -volatile int dummy; -/* ========================================================================= */ -#endif - -do - { - BOOL try_next = TRUE; + +#if 0 +/* ========================================================================= */ +/* The following comment and code was inserted in January 1999. In May 2006, +when it was observed to cause compiler warnings about unused values, I took it +out again. If anybody is still using OS/2, they will have to put it back +manually. */ + +/* This next statement and the later reference to dummy are here in order to +trick the optimizer of the IBM C compiler for OS/2 into generating correct +code. Apparently IBM isn't going to fix the problem, and we would rather not +disable optimization (in this module it actually makes a big difference, and +the pcre module can use all the optimization it can get). */ + +volatile int dummy; +/* ========================================================================= */ +#endif + +do + { + BOOL try_next = TRUE; const pcre_uchar *tcode = code + 1 + LINK_SIZE; - + if (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; - while (try_next) /* Loop for items in this branch */ - { - int rc; + while (try_next) /* Loop for items in this branch */ + { + int rc; - switch(*tcode) - { + switch(*tcode) + { /* If we reach something we don't understand, it means a new opcode has been created that hasn't been added to this code. Hopefully this problem will be discovered during testing. */ - - default: + + default: return SSB_UNKNOWN; /* Fail for a valid opcode that implies no starting bits. */ @@ -920,8 +920,8 @@ do case OP_SOM: case OP_THEN: case OP_THEN_ARG: - return SSB_FAIL; - + return SSB_FAIL; + /* A "real" property test implies no starting bits, but the fake property PT_CLIST identifies a list of characters. These lists are short, as they are used for characters with more than one "other case", so there is no @@ -954,80 +954,80 @@ do tcode++; break; - /* If we hit a bracket or a positive lookahead assertion, recurse to set - bits from within the subpattern. If it can't find anything, we have to - give up. If it finds some mandatory character(s), we are done for this - branch. Otherwise, carry on scanning after the subpattern. */ - - case OP_BRA: - case OP_SBRA: - case OP_CBRA: - case OP_SCBRA: + /* If we hit a bracket or a positive lookahead assertion, recurse to set + bits from within the subpattern. If it can't find anything, we have to + give up. If it finds some mandatory character(s), we are done for this + branch. Otherwise, carry on scanning after the subpattern. */ + + case OP_BRA: + case OP_SBRA: + case OP_CBRA: + case OP_SCBRA: case OP_BRAPOS: case OP_SBRAPOS: case OP_CBRAPOS: case OP_SCBRAPOS: - case OP_ONCE: + case OP_ONCE: case OP_ONCE_NC: - case OP_ASSERT: + case OP_ASSERT: rc = set_start_bits(tcode, start_bits, utf, cd); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; - if (rc == SSB_DONE) try_next = FALSE; else - { - do tcode += GET(tcode, 1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - } - break; - - /* If we hit ALT or KET, it means we haven't found anything mandatory in - this branch, though we might have found something optional. For ALT, we - continue with the next alternative, but we have to arrange that the final - result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, - return SSB_CONTINUE: if this is the top level, that indicates failure, - but after a nested subpattern, it causes scanning to continue. */ - - case OP_ALT: - yield = SSB_CONTINUE; - try_next = FALSE; - break; - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: + if (rc == SSB_DONE) try_next = FALSE; else + { + do tcode += GET(tcode, 1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + } + break; + + /* If we hit ALT or KET, it means we haven't found anything mandatory in + this branch, though we might have found something optional. For ALT, we + continue with the next alternative, but we have to arrange that the final + result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, + return SSB_CONTINUE: if this is the top level, that indicates failure, + but after a nested subpattern, it causes scanning to continue. */ + + case OP_ALT: + yield = SSB_CONTINUE; + try_next = FALSE; + break; + + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: case OP_KETRPOS: - return SSB_CONTINUE; - - /* Skip over callout */ - - case OP_CALLOUT: - tcode += 2 + 2*LINK_SIZE; - break; - - /* Skip over lookbehind and negative lookahead assertions */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do tcode += GET(tcode, 1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; - - /* BRAZERO does the bracket, but carries on. */ - - case OP_BRAZERO: - case OP_BRAMINZERO: + return SSB_CONTINUE; + + /* Skip over callout */ + + case OP_CALLOUT: + tcode += 2 + 2*LINK_SIZE; + break; + + /* Skip over lookbehind and negative lookahead assertions */ + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do tcode += GET(tcode, 1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; + + /* BRAZERO does the bracket, but carries on. */ + + case OP_BRAZERO: + case OP_BRAMINZERO: case OP_BRAPOSZERO: rc = set_start_bits(++tcode, start_bits, utf, cd); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; -/* ========================================================================= - See the comment at the head of this function concerning the next line, - which was an old fudge for the benefit of OS/2. - dummy = 1; - ========================================================================= */ - do tcode += GET(tcode,1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; - +/* ========================================================================= + See the comment at the head of this function concerning the next line, + which was an old fudge for the benefit of OS/2. + dummy = 1; + ========================================================================= */ + do tcode += GET(tcode,1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; + /* SKIPZERO skips the bracket. */ case OP_SKIPZERO: @@ -1036,17 +1036,17 @@ do tcode += 1 + LINK_SIZE; break; - /* Single-char * or ? sets the bit and tries the next item */ - - case OP_STAR: - case OP_MINSTAR: - case OP_POSSTAR: - case OP_QUERY: - case OP_MINQUERY: - case OP_POSQUERY: + /* Single-char * or ? sets the bit and tries the next item */ + + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); - break; - + break; + case OP_STARI: case OP_MINSTARI: case OP_POSSTARI: @@ -1056,33 +1056,33 @@ do tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); break; - /* Single-char upto sets the bit and tries the next */ - - case OP_UPTO: - case OP_MINUPTO: - case OP_POSUPTO: + /* Single-char upto sets the bit and tries the next */ + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); - break; - + break; + case OP_UPTOI: case OP_MINUPTOI: case OP_POSUPTOI: tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); break; - /* At least one single char sets the bit and stops */ - + /* At least one single char sets the bit and stops */ + case OP_EXACT: tcode += IMM2_SIZE; /* Fall through */ - case OP_CHAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: + case OP_CHAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); - try_next = FALSE; - break; - + try_next = FALSE; + break; + case OP_EXACTI: tcode += IMM2_SIZE; /* Fall through */ @@ -1093,7 +1093,7 @@ do (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); try_next = FALSE; break; - + /* Special spacing and line-terminating items. These recognize specific lists of characters. The difference between VSPACE and ANYNL is that the latter can match the two-character CRLF sequence, but that is not @@ -1162,74 +1162,74 @@ do properties. Therefore, these apply in the case when only characters less than 256 are recognized to match the types. */ - case OP_NOT_DIGIT: + case OP_NOT_DIGIT: set_nottype_bits(start_bits, cbit_digit, table_limit, cd); - try_next = FALSE; - break; - - case OP_DIGIT: + try_next = FALSE; + break; + + case OP_DIGIT: set_type_bits(start_bits, cbit_digit, table_limit, cd); - try_next = FALSE; - break; - + try_next = FALSE; + break; + /* The cbit_space table has vertical tab as whitespace; we no longer have to play fancy tricks because Perl added VT to its whitespace at release 5.18. PCRE added it at release 8.34. */ - - case OP_NOT_WHITESPACE: + + case OP_NOT_WHITESPACE: set_nottype_bits(start_bits, cbit_space, table_limit, cd); - try_next = FALSE; - break; - - case OP_WHITESPACE: + try_next = FALSE; + break; + + case OP_WHITESPACE: set_type_bits(start_bits, cbit_space, table_limit, cd); - try_next = FALSE; - break; - - case OP_NOT_WORDCHAR: + try_next = FALSE; + break; + + case OP_NOT_WORDCHAR: set_nottype_bits(start_bits, cbit_word, table_limit, cd); - try_next = FALSE; - break; - - case OP_WORDCHAR: + try_next = FALSE; + break; + + case OP_WORDCHAR: set_type_bits(start_bits, cbit_word, table_limit, cd); - try_next = FALSE; - break; - - /* One or more character type fudges the pointer and restarts, knowing - it will hit a single character type and stop there. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: + try_next = FALSE; + break; + + /* One or more character type fudges the pointer and restarts, knowing + it will hit a single character type and stop there. */ + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: case OP_TYPEPOSPLUS: - tcode++; - break; - - case OP_TYPEEXACT: + tcode++; + break; + + case OP_TYPEEXACT: tcode += 1 + IMM2_SIZE; - break; - - /* Zero or more repeats of character types set the bits and then - try again. */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: + break; + + /* Zero or more repeats of character types set the bits and then + try again. */ + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: tcode += IMM2_SIZE; /* Fall through */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - switch(tcode[1]) - { + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + switch(tcode[1]) + { default: - case OP_ANY: + case OP_ANY: case OP_ALLANY: - return SSB_FAIL; - + return SSB_FAIL; + case OP_HSPACE: SET_BIT(CHAR_HT); SET_BIT(CHAR_SPACE); @@ -1275,44 +1275,44 @@ do SET_BIT(CHAR_NEL); break; - case OP_NOT_DIGIT: + case OP_NOT_DIGIT: set_nottype_bits(start_bits, cbit_digit, table_limit, cd); - break; - - case OP_DIGIT: + break; + + case OP_DIGIT: set_type_bits(start_bits, cbit_digit, table_limit, cd); - break; - + break; + /* The cbit_space table has vertical tab as whitespace; we no longer have to play fancy tricks because Perl added VT to its whitespace at release 5.18. PCRE added it at release 8.34. */ - - case OP_NOT_WHITESPACE: + + case OP_NOT_WHITESPACE: set_nottype_bits(start_bits, cbit_space, table_limit, cd); - break; - - case OP_WHITESPACE: + break; + + case OP_WHITESPACE: set_type_bits(start_bits, cbit_space, table_limit, cd); - break; - - case OP_NOT_WORDCHAR: + break; + + case OP_NOT_WORDCHAR: set_nottype_bits(start_bits, cbit_word, table_limit, cd); - break; - - case OP_WORDCHAR: + break; + + case OP_WORDCHAR: set_type_bits(start_bits, cbit_word, table_limit, cd); - break; - } - - tcode += 2; - break; - - /* Character class where all the information is in a bit map: set the - bits and either carry on or not, according to the repeat count. If it was - a negative class, and we are operating with UTF-8 characters, any byte - with a value >= 0xc4 is a potentially valid starter because it starts a - character with a value > 255. */ - + break; + } + + tcode += 2; + break; + + /* Character class where all the information is in a bit map: set the + bits and either carry on or not, according to the repeat count. If it was + a negative class, and we are operating with UTF-8 characters, any byte + with a value >= 0xc4 is a potentially valid starter because it starts a + character with a value > 255. */ + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0) @@ -1323,21 +1323,21 @@ do #endif /* Fall through */ - case OP_NCLASS: + case OP_NCLASS: #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (utf) - { - start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ - memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ - } -#endif + { + start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ + memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ + } +#endif #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xFF); /* For characters > 255 */ #endif - /* Fall through */ - - case OP_CLASS: - { + /* Fall through */ + + case OP_CLASS: + { pcre_uint8 *map; #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 map = NULL; @@ -1354,102 +1354,102 @@ do map = (pcre_uint8 *)tcode; tcode += 32 / sizeof(pcre_uchar); } - - /* In UTF-8 mode, the bits in a bit map correspond to character - values, not to byte values. However, the bit map we are constructing is - for byte values. So we have to do a conversion for characters whose - value is > 127. In fact, there are only two possible starting bytes for - characters in the range 128 - 255. */ - + + /* In UTF-8 mode, the bits in a bit map correspond to character + values, not to byte values. However, the bit map we are constructing is + for byte values. So we have to do a conversion for characters whose + value is > 127. In fact, there are only two possible starting bytes for + characters in the range 128 - 255. */ + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (map != NULL) #endif - { + { #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (utf) - { + { for (c = 0; c < 16; c++) start_bits[c] |= map[c]; for (c = 128; c < 256; c++) - { + { if ((map[c/8] & (1 << (c&7))) != 0) { int d = (c >> 6) | 0xc0; /* Set bit for this starter */ start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ } - } - } + } + } else -#endif +#endif { /* In non-UTF-8 mode, the two bit maps are completely compatible. */ for (c = 0; c < 32; c++) start_bits[c] |= map[c]; } - } - + } + /* Advance past the bit map, and act on what follows. For a zero minimum repeat, continue; otherwise stop processing. */ - - switch (*tcode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: + + switch (*tcode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: case OP_CRPOSSTAR: case OP_CRPOSQUERY: - tcode++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: + tcode++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: case OP_CRPOSRANGE: if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; - else try_next = FALSE; - break; - - default: - try_next = FALSE; - break; - } - } - break; /* End of bitmap class handling */ - - } /* End of switch */ - } /* End of try_next loop */ - - code += GET(code, 1); /* Advance to next branch */ - } -while (*code == OP_ALT); -return yield; -} - - - - - -/************************************************* -* Study a compiled expression * -*************************************************/ - -/* This function is handed a compiled expression that it must study to produce + else try_next = FALSE; + break; + + default: + try_next = FALSE; + break; + } + } + break; /* End of bitmap class handling */ + + } /* End of switch */ + } /* End of try_next loop */ + + code += GET(code, 1); /* Advance to next branch */ + } +while (*code == OP_ALT); +return yield; +} + + + + + +/************************************************* +* Study a compiled expression * +*************************************************/ + +/* This function is handed a compiled expression that it must study to produce information that will speed up the matching. It returns a pcre[16]_extra block -which then gets handed back to pcre_exec(). - -Arguments: - re points to the compiled expression - options contains option bits - errorptr points to where to place error messages; - set NULL unless error - +which then gets handed back to pcre_exec(). + +Arguments: + re points to the compiled expression + options contains option bits + errorptr points to where to place error messages; + set NULL unless error + Returns: pointer to a pcre[16]_extra block, with study_data filled in and the appropriate flags set; - NULL on error or if no optimization possible -*/ - + NULL on error or if no optimization possible +*/ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION -pcre_study(const pcre *external_re, int options, const char **errorptr) +pcre_study(const pcre *external_re, int options, const char **errorptr) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION pcre16_study(const pcre16 *external_re, int options, const char **errorptr) @@ -1457,27 +1457,27 @@ pcre16_study(const pcre16 *external_re, int options, const char **errorptr) PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION pcre32_study(const pcre32 *external_re, int options, const char **errorptr) #endif -{ +{ int min; int count = 0; BOOL bits_set = FALSE; pcre_uint8 start_bits[32]; PUBL(extra) *extra = NULL; -pcre_study_data *study; +pcre_study_data *study; const pcre_uint8 *tables; pcre_uchar *code; -compile_data compile_block; +compile_data compile_block; const REAL_PCRE *re = (const REAL_PCRE *)external_re; - - -*errorptr = NULL; - -if (re == NULL || re->magic_number != MAGIC_NUMBER) - { - *errorptr = "argument is not a compiled regular expression"; - return NULL; - } - + + +*errorptr = NULL; + +if (re == NULL || re->magic_number != MAGIC_NUMBER) + { + *errorptr = "argument is not a compiled regular expression"; + return NULL; + } + if ((re->flags & PCRE_MODE) == 0) { #if defined COMPILE_PCRE8 @@ -1490,28 +1490,28 @@ if ((re->flags & PCRE_MODE) == 0) return NULL; } -if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) - { - *errorptr = "unknown or incorrect option bit(s) set"; - return NULL; - } - +if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) + { + *errorptr = "unknown or incorrect option bit(s) set"; + return NULL; + } + code = (pcre_uchar *)re + re->name_table_offset + - (re->name_count * re->name_entry_size); - -/* For an anchored pattern, or an unanchored pattern that has a first char, or + (re->name_count * re->name_entry_size); + +/* For an anchored pattern, or an unanchored pattern that has a first char, or a multiline pattern that matches only at "line starts", there is no point in seeking a list of starting bytes. */ - + if ((re->options & PCRE_ANCHORED) == 0 && (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0) { int rc; - + /* Set the character tables in the block that is passed around */ - + tables = re->tables; - + #if defined COMPILE_PCRE8 if (tables == NULL) (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, @@ -1525,14 +1525,14 @@ if ((re->options & PCRE_ANCHORED) == 0 && (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, (void *)(&tables)); #endif - + compile_block.lcc = tables + lcc_offset; compile_block.fcc = tables + fcc_offset; compile_block.cbits = tables + cbits_offset; compile_block.ctypes = tables + ctypes_offset; - + /* See if we can find a fixed set of initial characters for the pattern. */ - + memset(start_bits, 0, 32 * sizeof(pcre_uint8)); rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, &compile_block); @@ -1543,16 +1543,16 @@ if ((re->options & PCRE_ANCHORED) == 0 && return NULL; } } - + /* Find the minimum length of subject string. */ - + switch(min = find_minlength(re, code, code, re->options, NULL, &count)) - { + { case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; case -3: *errorptr = "internal error: opcode not recognized"; return NULL; default: break; - } - + } + /* If a set of starting bytes has been identified, or if the minimum length is greater than zero, or if JIT optimization has been requested, or if PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a @@ -1561,7 +1561,7 @@ by the former, which may also get additional data set later by the calling program. At the moment, the size of pcre_study_data is fixed. We nevertheless save it in a field for returning via the pcre_fullinfo() function so that if it becomes variable in the future, we don't have to change that code. */ - + if (bits_set || min > 0 || (options & ( #ifdef SUPPORT_JIT PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | @@ -1576,7 +1576,7 @@ if (bits_set || min > 0 || (options & ( *errorptr = "failed to get memory"; return NULL; } - + study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra))); extra->flags = PCRE_EXTRA_STUDY_DATA; extra->study_data = study; @@ -1648,9 +1648,9 @@ if (bits_set || min > 0 || (options & ( #endif } -return extra; -} - +return extra; +} + /************************************************* * Free the study data * @@ -1683,4 +1683,4 @@ if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && PUBL(free)(extra); } -/* End of pcre_study.c */ +/* End of pcre_study.c */ diff --git a/contrib/libs/pcre/pcre_tables.c b/contrib/libs/pcre/pcre_tables.c index 00abeff330f..179038d0259 100644 --- a/contrib/libs/pcre/pcre_tables.c +++ b/contrib/libs/pcre/pcre_tables.c @@ -1,103 +1,103 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2017 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + #ifndef PCRE_INCLUDED - -/* This module contains some fixed tables that are used by more than one of the -PCRE code modules. The tables are also #included by the pcretest program, which -uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name -clashes with the library. */ - - -#ifdef HAVE_CONFIG_H + +/* This module contains some fixed tables that are used by more than one of the +PCRE code modules. The tables are also #included by the pcretest program, which +uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name +clashes with the library. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - +#endif + +#include "pcre_internal.h" + #endif /* PCRE_INCLUDED */ - -/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that -the definition is next to the definition of the opcodes in pcre_internal.h. */ - + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in pcre_internal.h. */ + const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; - + /* Tables of horizontal and vertical whitespace characters, suitable for adding to classes. */ - + const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST }; const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST }; - -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - + +/************************************************* +* Tables for UTF-8 support * +*************************************************/ + +/* These are the breakpoints for different numbers of bytes in a UTF-8 +character. */ + #if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ || (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)) - + /* These tables are also required by pcretest in 16- or 32-bit mode. */ const int PRIV(utf8_table1)[] = - { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; + const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - + +/* These are the indicator bits and the mask for the data bits to set in the +first byte of a character, indexed by the number of additional bytes. */ + const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra bytes, indexed by the first byte masked with -0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ - + +/* Table of the number of extra bytes, indexed by the first byte masked with +0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ + const pcre_uint8 PRIV(utf8_table4)[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; + #endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/ #ifdef SUPPORT_UTF @@ -190,18 +190,18 @@ const int PRIV(ucp_typerange)[] = { }; #endif /* SUPPORT_JIT */ -/* The pcre_utt[] table below translates Unicode property names into type and -code values. It is searched by binary chop, so must be in collating sequence of -name. Originally, the table contained pointers to the name strings in the first -field of each entry. However, that leads to a large number of relocations when -a shared library is dynamically loaded. A significant reduction is made by -putting all the names into a single, large string and then using offsets in the -table itself. Maintenance is more error-prone, but frequent changes to this +/* The pcre_utt[] table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and then using offsets in the +table itself. Maintenance is more error-prone, but frequent changes to this data are unlikely. - + July 2008: There is now a script called maint/GenerateUtt.py that can be used to generate this data automatically instead of maintaining it by hand. - + The script was updated in March 2009 to generate a new EBCDIC-compliant version. Like all other character and string literals that are compared against the regular expression pattern, we must use STR_ macros instead of literal @@ -718,10 +718,10 @@ const ucp_type_table PRIV(utt)[] = { { 1277, PT_PC, ucp_Zl }, { 1280, PT_PC, ucp_Zp }, { 1283, PT_PC, ucp_Zs } -}; - +}; + const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); - + #endif /* SUPPORT_UTF */ - -/* End of pcre_tables.c */ + +/* End of pcre_tables.c */ diff --git a/contrib/libs/pcre/pcre_valid_utf8.c b/contrib/libs/pcre/pcre_valid_utf8.c index d81291b7f22..3983ed1d684 100644 --- a/contrib/libs/pcre/pcre_valid_utf8.c +++ b/contrib/libs/pcre/pcre_valid_utf8.c @@ -1,72 +1,72 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2013 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function for validating UTF-8 character -strings. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains an internal function for validating UTF-8 character +strings. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Validate a UTF-8 string * -*************************************************/ - -/* This function is called (optionally) at the start of compile or match, to +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Validate a UTF-8 string * +*************************************************/ + +/* This function is called (optionally) at the start of compile or match, to check that a supposed UTF-8 string is actually valid. The early check means -that subsequent code can assume it is dealing with a valid string. The check +that subsequent code can assume it is dealing with a valid string. The check can be turned off for maximum performance, but the consequences of supplying an invalid string are then undefined. - -Originally, this function checked according to RFC 2279, allowing for values in -the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in -the canonical format. Once somebody had pointed out RFC 3629 to me (it -obsoletes 2279), additional restrictions were applied. The values are now -limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the + +Originally, this function checked according to RFC 2279, allowing for values in +the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in +the canonical format. Once somebody had pointed out RFC 3629 to me (it +obsoletes 2279), additional restrictions were applied. The values are now +limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte characters is still checked. - + From release 8.13 more information about the details of the error are passed back in the returned value: @@ -94,31 +94,31 @@ PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character) PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff PCRE_UTF8_ERR22 Unused (was non-character) -Arguments: - string points to the string - length length of string, or -1 if the string is zero-terminated +Arguments: + string points to the string + length length of string, or -1 if the string is zero-terminated errp pointer to an error position offset variable - + Returns: = 0 if the string is a valid UTF-8 string > 0 otherwise, setting the offset of the bad character -*/ - -int +*/ + +int PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) -{ +{ #ifdef SUPPORT_UTF register PCRE_PUCHAR p; - -if (length < 0) - { - for (p = string; *p != 0; p++); + +if (length < 0) + { + for (p = string; *p != 0; p++); length = (int)(p - string); - } - -for (p = string; length-- > 0; p++) - { + } + +for (p = string; length-- > 0; p++) + { register pcre_uchar ab, c, d; - + c = *p; if (c < 128) continue; /* ASCII character */ @@ -142,35 +142,35 @@ for (p = string; length-- > 0; p++) } length -= ab; /* Length remaining */ - /* Check top bits in the second byte */ - + /* Check top bits in the second byte */ + if (((d = *(++p)) & 0xc0) != 0x80) { *erroroffset = (int)(p - string) - 1; return PCRE_UTF8_ERR6; } - + /* For each length, check that the remaining bytes start with the 0x80 bit set and not the 0x40 bit. Then check for an overlong sequence, and for the excluded range 0xd800 to 0xdfff. */ - switch (ab) - { + switch (ab) + { /* 2-byte character. No further bytes to check for 0x80. Check first byte for for xx00 000x (overlong sequence). */ - + case 1: if ((c & 0x3e) == 0) { *erroroffset = (int)(p - string) - 1; return PCRE_UTF8_ERR15; } break; - + /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes for 1110 0000, xx0x xxxx (overlong sequence) or 1110 1101, 1010 xxxx (0xd800 - 0xdfff) */ - - case 2: + + case 2: if ((*(++p) & 0xc0) != 0x80) /* Third byte */ { *erroroffset = (int)(p - string) - 2; @@ -186,13 +186,13 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 2; return PCRE_UTF8_ERR14; } - break; - + break; + /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2 bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a character greater than 0x0010ffff (f4 8f bf bf) */ - - case 3: + + case 3: if ((*(++p) & 0xc0) != 0x80) /* Third byte */ { *erroroffset = (int)(p - string) - 2; @@ -213,17 +213,17 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 3; return PCRE_UTF8_ERR13; } - break; - + break; + /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be rejected by the length test below. However, we do the appropriate tests here so that overlong sequences get diagnosed, and also in case there is ever an option for handling these larger code points. */ - + /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for 1111 1000, xx00 0xxx */ - case 4: + case 4: if ((*(++p) & 0xc0) != 0x80) /* Third byte */ { *erroroffset = (int)(p - string) - 2; @@ -244,12 +244,12 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 4; return PCRE_UTF8_ERR18; } - break; - + break; + /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for 1111 1100, xx00 00xx. */ - case 5: + case 5: if ((*(++p) & 0xc0) != 0x80) /* Third byte */ { *erroroffset = (int)(p - string) - 2; @@ -275,27 +275,27 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 5; return PCRE_UTF8_ERR19; } - break; - } - + break; + } + /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are excluded by RFC 3629. The pointer p is currently at the last byte of the character. */ if (ab > 3) - { + { *erroroffset = (int)(p - string) - ab; return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12; - } - } + } + } #else /* Not SUPPORT_UTF */ (void)(string); /* Keep picky compilers happy */ (void)(length); (void)(erroroffset); -#endif - +#endif + return PCRE_UTF8_ERR0; /* This indicates success */ -} - -/* End of pcre_valid_utf8.c */ +} + +/* End of pcre_valid_utf8.c */ diff --git a/contrib/libs/pcre/pcre_version.c b/contrib/libs/pcre/pcre_version.c index ea896e1d80b..2ff2b79b8ce 100644 --- a/contrib/libs/pcre/pcre_version.c +++ b/contrib/libs/pcre/pcre_version.c @@ -1,87 +1,87 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_version(), which returns a -string that identifies the PCRE version that is in use. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_version(), which returns a +string that identifies the PCRE version that is in use. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Return version string * -*************************************************/ - -/* These macros are the standard way of turning unquoted text into C strings. -They allow macros like PCRE_MAJOR to be defined without quotes, which is -convenient for user programs that want to test its value. */ - -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -/* A problem turned up with PCRE_PRERELEASE, which is defined empty for -production releases. Originally, it was used naively in this code: - - return XSTRING(PCRE_MAJOR) - "." XSTRING(PCRE_MINOR) - XSTRING(PCRE_PRERELEASE) - " " XSTRING(PCRE_DATE); - -However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of -STRING(). The C standard states: "If (before argument substitution) any -argument consists of no preprocessing tokens, the behavior is undefined." It -turns out the gcc treats this case as a single empty string - which is what we -really want - but Visual C grumbles about the lack of an argument for the -macro. Unfortunately, both are within their rights. To cope with both ways of -handling this, I had resort to some messy hackery that does a test at run time. -I could find no way of detecting that a macro is defined as an empty string at -pre-processor time. This hack uses a standard trick for avoiding calling -the STRING macro with an empty argument when doing the test. */ - +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Return version string * +*************************************************/ + +/* These macros are the standard way of turning unquoted text into C strings. +They allow macros like PCRE_MAJOR to be defined without quotes, which is +convenient for user programs that want to test its value. */ + +#define STRING(a) # a +#define XSTRING(s) STRING(s) + +/* A problem turned up with PCRE_PRERELEASE, which is defined empty for +production releases. Originally, it was used naively in this code: + + return XSTRING(PCRE_MAJOR) + "." XSTRING(PCRE_MINOR) + XSTRING(PCRE_PRERELEASE) + " " XSTRING(PCRE_DATE); + +However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of +STRING(). The C standard states: "If (before argument substitution) any +argument consists of no preprocessing tokens, the behavior is undefined." It +turns out the gcc treats this case as a single empty string - which is what we +really want - but Visual C grumbles about the lack of an argument for the +macro. Unfortunately, both are within their rights. To cope with both ways of +handling this, I had resort to some messy hackery that does a test at run time. +I could find no way of detecting that a macro is defined as an empty string at +pre-processor time. This hack uses a standard trick for avoiding calling +the STRING macro with an empty argument when doing the test. */ + #if defined COMPILE_PCRE8 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION -pcre_version(void) +pcre_version(void) #elif defined COMPILE_PCRE16 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION pcre16_version(void) @@ -89,10 +89,10 @@ pcre16_version(void) PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION pcre32_version(void) #endif -{ -return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? - XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : - XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE); -} - -/* End of pcre_version.c */ +{ +return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? + XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : + XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE); +} + +/* End of pcre_version.c */ diff --git a/contrib/libs/pcre/pcre_xclass.c b/contrib/libs/pcre/pcre_xclass.c index f42b4a1a9cf..942696ed7c1 100644 --- a/contrib/libs/pcre/pcre_xclass.c +++ b/contrib/libs/pcre/pcre_xclass.c @@ -1,86 +1,86 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2013 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that is used to match an extended + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains an internal function that is used to match an extended class. It is used by both pcre_exec() and pcre_def_exec(). */ - - -#ifdef HAVE_CONFIG_H + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Match character against an XCLASS * -*************************************************/ - -/* This function is called to match a character against an extended class that +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Match character against an XCLASS * +*************************************************/ + +/* This function is called to match a character against an extended class that might contain values > 255 and/or Unicode properties. - -Arguments: - c the character - data points to the flag byte of the XCLASS data - -Returns: TRUE if character matches, else FALSE -*/ - -BOOL + +Arguments: + c the character + data points to the flag byte of the XCLASS data + +Returns: TRUE if character matches, else FALSE +*/ + +BOOL PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf) -{ +{ pcre_uchar t; -BOOL negated = (*data & XCL_NOT) != 0; - +BOOL negated = (*data & XCL_NOT) != 0; + (void)utf; #ifdef COMPILE_PCRE8 /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ utf = TRUE; #endif -/* Character values < 256 are matched against a bitmap, if one is present. If -not, we still carry on, because there may be ranges that start below 256 in the -additional data. */ - -if (c < 256) - { +/* Character values < 256 are matched against a bitmap, if one is present. If +not, we still carry on, because there may be ranges that start below 256 in the +additional data. */ + +if (c < 256) + { if ((*data & XCL_HASPROP) == 0) { if ((*data & XCL_MAP) == 0) return negated; @@ -89,19 +89,19 @@ if (c < 256) if ((*data & XCL_MAP) != 0 && (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0) return !negated; /* char found */ - } - -/* First skip the bit map if present. Then match against the list of Unicode -properties or large chars or ranges that end with a large char. We won't ever -encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ - + } + +/* First skip the bit map if present. Then match against the list of Unicode +properties or large chars or ranges that end with a large char. We won't ever +encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ + if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar); - -while ((t = *data++) != XCL_END) - { + +while ((t = *data++) != XCL_END) + { pcre_uint32 x, y; - if (t == XCL_SINGLE) - { + if (t == XCL_SINGLE) + { #ifdef SUPPORT_UTF if (utf) { @@ -110,10 +110,10 @@ while ((t = *data++) != XCL_END) else #endif x = *data++; - if (c == x) return !negated; - } - else if (t == XCL_RANGE) - { + if (c == x) return !negated; + } + else if (t == XCL_RANGE) + { #ifdef SUPPORT_UTF if (utf) { @@ -126,39 +126,39 @@ while ((t = *data++) != XCL_END) x = *data++; y = *data++; } - if (c >= x && c <= y) return !negated; - } - -#ifdef SUPPORT_UCP - else /* XCL_PROP & XCL_NOTPROP */ - { + if (c >= x && c <= y) return !negated; + } + +#ifdef SUPPORT_UCP + else /* XCL_PROP & XCL_NOTPROP */ + { const ucd_record *prop = GET_UCD(c); BOOL isprop = t == XCL_PROP; - - switch(*data) - { - case PT_ANY: + + switch(*data) + { + case PT_ANY: if (isprop) return !negated; - break; - - case PT_LAMP: + break; + + case PT_LAMP: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) == isprop) return !negated; - break; - - case PT_GC: + break; + + case PT_GC: if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) return !negated; - break; - - case PT_PC: + break; + + case PT_PC: if ((data[1] == prop->chartype) == isprop) return !negated; - break; - - case PT_SC: + break; + + case PT_SC: if ((data[1] == prop->script) == isprop) return !negated; - break; - + break; + case PT_ALNUM: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop) @@ -250,19 +250,19 @@ while ((t = *data++) != XCL_END) return !negated; break; - /* This should never occur, but compilers may mutter if there is no - default. */ - - default: - return FALSE; - } - - data += 2; - } -#endif /* SUPPORT_UCP */ - } - -return negated; /* char did not match */ -} - -/* End of pcre_xclass.c */ + /* This should never occur, but compilers may mutter if there is no + default. */ + + default: + return FALSE; + } + + data += 2; + } +#endif /* SUPPORT_UCP */ + } + +return negated; /* char did not match */ +} + +/* End of pcre_xclass.c */ diff --git a/contrib/libs/pcre/pcrecpp/ya.make b/contrib/libs/pcre/pcrecpp/ya.make index 8eb2dacc7f2..c832b9e56ef 100644 --- a/contrib/libs/pcre/pcrecpp/ya.make +++ b/contrib/libs/pcre/pcrecpp/ya.make @@ -9,8 +9,8 @@ OWNER( g:cpp-contrib ) -LICENSE(BSD-3-Clause) - +LICENSE(BSD-3-Clause) + PEERDIR( contrib/libs/pcre ) diff --git a/contrib/libs/pcre/pcreposix.c b/contrib/libs/pcre/pcreposix.c index 94a82336ebf..55972c1c232 100644 --- a/contrib/libs/pcre/pcreposix.c +++ b/contrib/libs/pcre/pcreposix.c @@ -1,57 +1,57 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel Copyright (c) 1997-2020 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module is a wrapper that provides a POSIX API to the underlying PCRE -functions. */ - - -#ifdef HAVE_CONFIG_H + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module is a wrapper that provides a POSIX API to the underlying PCRE +functions. */ + + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - - -/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for -compiling these functions. This must come before including pcreposix.h, where -they are set for an application (using these functions) if they have not -previously been set. */ - +#endif + + +/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for +compiling these functions. This must come before including pcreposix.h, where +they are set for an application (using these functions) if they have not +previously been set. */ + #if defined(_WIN32) && !defined(PCRE_STATIC) # define PCREPOSIX_EXP_DECL extern __declspec(dllexport) # define PCREPOSIX_EXP_DEFN __declspec(dllexport) @@ -62,89 +62,89 @@ are declared as "import" for Windows by defining PCRE_EXP_DECL as "import". This is needed even though pcre_internal.h itself includes pcre.h, because it does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */ -#include "pcre.h" -#include "pcre_internal.h" -#include "pcreposix.h" - - -/* Table to translate PCRE compile time error codes into POSIX error codes. */ - -static const int eint[] = { - 0, /* no error */ - REG_EESCAPE, /* \ at end of pattern */ - REG_EESCAPE, /* \c at end of pattern */ - REG_EESCAPE, /* unrecognized character follows \ */ - REG_BADBR, /* numbers out of order in {} quantifier */ +#include "pcre.h" +#include "pcre_internal.h" +#include "pcreposix.h" + + +/* Table to translate PCRE compile time error codes into POSIX error codes. */ + +static const int eint[] = { + 0, /* no error */ + REG_EESCAPE, /* \ at end of pattern */ + REG_EESCAPE, /* \c at end of pattern */ + REG_EESCAPE, /* unrecognized character follows \ */ + REG_BADBR, /* numbers out of order in {} quantifier */ /* 5 */ - REG_BADBR, /* number too big in {} quantifier */ - REG_EBRACK, /* missing terminating ] for character class */ - REG_ECTYPE, /* invalid escape sequence in character class */ - REG_ERANGE, /* range out of order in character class */ - REG_BADRPT, /* nothing to repeat */ + REG_BADBR, /* number too big in {} quantifier */ + REG_EBRACK, /* missing terminating ] for character class */ + REG_ECTYPE, /* invalid escape sequence in character class */ + REG_ERANGE, /* range out of order in character class */ + REG_BADRPT, /* nothing to repeat */ /* 10 */ - REG_BADRPT, /* operand of unlimited repeat could match the empty string */ - REG_ASSERT, /* internal error: unexpected repeat */ - REG_BADPAT, /* unrecognized character after (? */ - REG_BADPAT, /* POSIX named classes are supported only within a class */ - REG_EPAREN, /* missing ) */ + REG_BADRPT, /* operand of unlimited repeat could match the empty string */ + REG_ASSERT, /* internal error: unexpected repeat */ + REG_BADPAT, /* unrecognized character after (? */ + REG_BADPAT, /* POSIX named classes are supported only within a class */ + REG_EPAREN, /* missing ) */ /* 15 */ - REG_ESUBREG, /* reference to non-existent subpattern */ - REG_INVARG, /* erroffset passed as NULL */ - REG_INVARG, /* unknown option bit(s) set */ - REG_EPAREN, /* missing ) after comment */ - REG_ESIZE, /* parentheses nested too deeply */ + REG_ESUBREG, /* reference to non-existent subpattern */ + REG_INVARG, /* erroffset passed as NULL */ + REG_INVARG, /* unknown option bit(s) set */ + REG_EPAREN, /* missing ) after comment */ + REG_ESIZE, /* parentheses nested too deeply */ /* 20 */ - REG_ESIZE, /* regular expression too large */ - REG_ESPACE, /* failed to get memory */ + REG_ESIZE, /* regular expression too large */ + REG_ESPACE, /* failed to get memory */ REG_EPAREN, /* unmatched parentheses */ - REG_ASSERT, /* internal error: code overflow */ - REG_BADPAT, /* unrecognized character after (?< */ + REG_ASSERT, /* internal error: code overflow */ + REG_BADPAT, /* unrecognized character after (?< */ /* 25 */ - REG_BADPAT, /* lookbehind assertion is not fixed length */ - REG_BADPAT, /* malformed number or name after (?( */ - REG_BADPAT, /* conditional group contains more than two branches */ - REG_BADPAT, /* assertion expected after (?( */ - REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ + REG_BADPAT, /* lookbehind assertion is not fixed length */ + REG_BADPAT, /* malformed number or name after (?( */ + REG_BADPAT, /* conditional group contains more than two branches */ + REG_BADPAT, /* assertion expected after (?( */ + REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ /* 30 */ - REG_ECTYPE, /* unknown POSIX class name */ - REG_BADPAT, /* POSIX collating elements are not supported */ - REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ - REG_BADPAT, /* spare error */ + REG_ECTYPE, /* unknown POSIX class name */ + REG_BADPAT, /* POSIX collating elements are not supported */ + REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ + REG_BADPAT, /* spare error */ REG_BADPAT, /* character value in \x{} or \o{} is too large */ /* 35 */ - REG_BADPAT, /* invalid condition (?(0) */ - REG_BADPAT, /* \C not allowed in lookbehind assertion */ - REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ - REG_BADPAT, /* number after (?C is > 255 */ - REG_BADPAT, /* closing ) for (?C expected */ + REG_BADPAT, /* invalid condition (?(0) */ + REG_BADPAT, /* \C not allowed in lookbehind assertion */ + REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ + REG_BADPAT, /* number after (?C is > 255 */ + REG_BADPAT, /* closing ) for (?C expected */ /* 40 */ - REG_BADPAT, /* recursive call could loop indefinitely */ - REG_BADPAT, /* unrecognized character after (?P */ - REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ - REG_BADPAT, /* two named subpatterns have the same name */ - REG_BADPAT, /* invalid UTF-8 string */ + REG_BADPAT, /* recursive call could loop indefinitely */ + REG_BADPAT, /* unrecognized character after (?P */ + REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ + REG_BADPAT, /* two named subpatterns have the same name */ + REG_BADPAT, /* invalid UTF-8 string */ /* 45 */ - REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ - REG_BADPAT, /* malformed \P or \p sequence */ - REG_BADPAT, /* unknown property name after \P or \p */ - REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ - REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ + REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ + REG_BADPAT, /* malformed \P or \p sequence */ + REG_BADPAT, /* unknown property name after \P or \p */ + REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ + REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ /* 50 */ - REG_BADPAT, /* repeated subpattern is too long */ - REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ - REG_BADPAT, /* internal error: overran compiling workspace */ - REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ - REG_BADPAT, /* DEFINE group contains more than one branch */ + REG_BADPAT, /* repeated subpattern is too long */ + REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ + REG_BADPAT, /* internal error: overran compiling workspace */ + REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ + REG_BADPAT, /* DEFINE group contains more than one branch */ /* 55 */ - REG_BADPAT, /* repeating a DEFINE group is not allowed */ - REG_INVARG, /* inconsistent NEWLINE options */ - REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ + REG_BADPAT, /* repeating a DEFINE group is not allowed */ + REG_INVARG, /* inconsistent NEWLINE options */ + REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ REG_BADPAT, /* a numbered reference must not be zero */ REG_BADPAT, /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */ /* 60 */ REG_BADPAT, /* (*VERB) not recognized */ - REG_BADPAT, /* number is too big */ - REG_BADPAT, /* subpattern name expected */ + REG_BADPAT, /* number is too big */ + REG_BADPAT, /* subpattern name expected */ REG_BADPAT, /* digit expected after (?+ */ REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */ /* 65 */ @@ -175,105 +175,105 @@ static const int eint[] = { REG_BADPAT, /* parentheses too deeply nested (stack check) */ REG_BADPAT, /* missing digits in \x{} or \o{} */ REG_BADPAT /* pattern too complicated */ -}; - -/* Table of texts corresponding to POSIX error codes */ - -static const char *const pstring[] = { - "", /* Dummy for value 0 */ - "internal error", /* REG_ASSERT */ - "invalid repeat counts in {}", /* BADBR */ - "pattern error", /* BADPAT */ - "? * + invalid", /* BADRPT */ - "unbalanced {}", /* EBRACE */ - "unbalanced []", /* EBRACK */ - "collation error - not relevant", /* ECOLLATE */ - "bad class", /* ECTYPE */ - "bad escape sequence", /* EESCAPE */ - "empty expression", /* EMPTY */ - "unbalanced ()", /* EPAREN */ - "bad range inside []", /* ERANGE */ - "expression too big", /* ESIZE */ - "failed to get memory", /* ESPACE */ - "bad back reference", /* ESUBREG */ - "bad argument", /* INVARG */ - "match failed" /* NOMATCH */ -}; - - - - -/************************************************* -* Translate error code to string * -*************************************************/ - +}; + +/* Table of texts corresponding to POSIX error codes */ + +static const char *const pstring[] = { + "", /* Dummy for value 0 */ + "internal error", /* REG_ASSERT */ + "invalid repeat counts in {}", /* BADBR */ + "pattern error", /* BADPAT */ + "? * + invalid", /* BADRPT */ + "unbalanced {}", /* EBRACE */ + "unbalanced []", /* EBRACK */ + "collation error - not relevant", /* ECOLLATE */ + "bad class", /* ECTYPE */ + "bad escape sequence", /* EESCAPE */ + "empty expression", /* EMPTY */ + "unbalanced ()", /* EPAREN */ + "bad range inside []", /* ERANGE */ + "expression too big", /* ESIZE */ + "failed to get memory", /* ESPACE */ + "bad back reference", /* ESUBREG */ + "bad argument", /* INVARG */ + "match failed" /* NOMATCH */ +}; + + + + +/************************************************* +* Translate error code to string * +*************************************************/ + PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION -regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) -{ -const char *message, *addmessage; -size_t length, addlength; - -message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? - "unknown error code" : pstring[errcode]; -length = strlen(message) + 1; - -addmessage = " at offset "; +regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) +{ +const char *message, *addmessage; +size_t length, addlength; + +message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? + "unknown error code" : pstring[errcode]; +length = strlen(message) + 1; + +addmessage = " at offset "; addlength = (preg != NULL && (int)preg->re_erroffset != -1)? - strlen(addmessage) + 6 : 0; - -if (errbuf_size > 0) - { - if (addlength > 0 && errbuf_size >= length + addlength) - sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); - else - { - strncpy(errbuf, message, errbuf_size - 1); - errbuf[errbuf_size-1] = 0; - } - } - -return length + addlength; -} - - - - -/************************************************* -* Free store held by a regex * -*************************************************/ - + strlen(addmessage) + 6 : 0; + +if (errbuf_size > 0) + { + if (addlength > 0 && errbuf_size >= length + addlength) + sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); + else + { + strncpy(errbuf, message, errbuf_size - 1); + errbuf[errbuf_size-1] = 0; + } + } + +return length + addlength; +} + + + + +/************************************************* +* Free store held by a regex * +*************************************************/ + PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION -regfree(regex_t *preg) -{ +regfree(regex_t *preg) +{ (PUBL(free))(preg->re_pcre); -} - - - - -/************************************************* -* Compile a regular expression * -*************************************************/ - -/* -Arguments: - preg points to a structure for recording the compiled expression - pattern the pattern to compile - cflags compilation flags - -Returns: 0 on success - various non-zero codes on failure -*/ - +} + + + + +/************************************************* +* Compile a regular expression * +*************************************************/ + +/* +Arguments: + preg points to a structure for recording the compiled expression + pattern the pattern to compile + cflags compilation flags + +Returns: 0 on success + various non-zero codes on failure +*/ + PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION -regcomp(regex_t *preg, const char *pattern, int cflags) -{ -const char *errorptr; -int erroffset; -int errorcode; -int options = 0; +regcomp(regex_t *preg, const char *pattern, int cflags) +{ +const char *errorptr; +int erroffset; +int errorcode; +int options = 0; int re_nsub = 0; - + if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; @@ -281,14 +281,14 @@ if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE; if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8; if ((cflags & REG_UCP) != 0) options |= PCRE_UCP; if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY; - -preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, - &erroffset, NULL); -preg->re_erroffset = erroffset; - + +preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, + &erroffset, NULL); +preg->re_erroffset = erroffset; + /* Safety: if the error code is too big for the translation vector (which should not happen, but we all make mistakes), return REG_BADPAT. */ - + if (preg->re_pcre == NULL) { return (errorcode < (int)(sizeof(eint)/sizeof(const int)))? @@ -299,69 +299,69 @@ if (preg->re_pcre == NULL) &re_nsub); preg->re_nsub = (size_t)re_nsub; preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */ -return 0; -} - - - - -/************************************************* -* Match a regular expression * -*************************************************/ - -/* Unfortunately, PCRE requires 3 ints of working space for each captured -substring, so we have to get and release working store instead of just using -the POSIX structures as was done in earlier releases when PCRE needed only 2 -ints. However, if the number of possible capturing brackets is small, use a -block of store on the stack, to reduce the use of malloc/free. The threshold is -in a macro that can be changed at configure time. - -If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will -be set. When this is the case, the nmatch and pmatch arguments are ignored, and -the only result is yes/no/error. */ - +return 0; +} + + + + +/************************************************* +* Match a regular expression * +*************************************************/ + +/* Unfortunately, PCRE requires 3 ints of working space for each captured +substring, so we have to get and release working store instead of just using +the POSIX structures as was done in earlier releases when PCRE needed only 2 +ints. However, if the number of possible capturing brackets is small, use a +block of store on the stack, to reduce the use of malloc/free. The threshold is +in a macro that can be changed at configure time. + +If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will +be set. When this is the case, the nmatch and pmatch arguments are ignored, and +the only result is yes/no/error. */ + PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION -regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ +regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ int rc, so, eo; -int options = 0; -int *ovector = NULL; -int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; -BOOL allocated_ovector = FALSE; -BOOL nosub = +int options = 0; +int *ovector = NULL; +int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; +BOOL allocated_ovector = FALSE; +BOOL nosub = (REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0; - -if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; -if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; + +if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; +if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY; - + /* When no string data is being returned, or no vector has been passed in which to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding the return data is large enough. */ - + if (nosub || pmatch == NULL) nmatch = 0; - -else if (nmatch > 0) - { - if (nmatch <= POSIX_MALLOC_THRESHOLD) - { - ovector = &(small_ovector[0]); - } - else - { - if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE; - ovector = (int *)malloc(sizeof(int) * nmatch * 3); - if (ovector == NULL) return REG_ESPACE; - allocated_ovector = TRUE; - } - } - + +else if (nmatch > 0) + { + if (nmatch <= POSIX_MALLOC_THRESHOLD) + { + ovector = &(small_ovector[0]); + } + else + { + if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE; + ovector = (int *)malloc(sizeof(int) * nmatch * 3); + if (ovector == NULL) return REG_ESPACE; + allocated_ovector = TRUE; + } + } + /* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. The man page from OS X says "REG_STARTEND affects only the location of the string, not how it is matched". That is why the "so" value is used to bump the start location rather than being passed as a PCRE "starting offset". */ - + if ((eflags & REG_STARTEND) != 0) { if (pmatch == NULL) return REG_INVARG; @@ -373,7 +373,7 @@ else so = 0; eo = (int)strlen(string); } - + rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so), 0, options, ovector, (int)(nmatch * 3)); @@ -381,27 +381,27 @@ if (rc == 0) rc = (int)nmatch; /* All captured slots were filled in */ /* Successful match */ -if (rc >= 0) - { - size_t i; - if (!nosub) - { - for (i = 0; i < (size_t)rc; i++) - { +if (rc >= 0) + { + size_t i; + if (!nosub) + { + for (i = 0; i < (size_t)rc; i++) + { pmatch[i].rm_so = (ovector[i*2] < 0)? -1 : ovector[i*2] + so; pmatch[i].rm_eo = (ovector[i*2+1] < 0)? -1: ovector[i*2+1] + so; - } - if (allocated_ovector) free(ovector); - for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - } - return 0; - } - + } + if (allocated_ovector) free(ovector); + for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + } + return 0; + } + /* Unsuccessful match */ if (allocated_ovector) free(ovector); switch(rc) - { + { /* ========================================================================== */ /* These cases are never obeyed. This is a fudge that causes a compile-time error if the vector eint, which is indexed by compile-time error number, is @@ -425,7 +425,7 @@ switch(rc) case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG; case PCRE_ERROR_BADMODE: return REG_INVARG; default: return REG_ASSERT; - } -} - -/* End of pcreposix.c */ + } +} + +/* End of pcreposix.c */ diff --git a/contrib/libs/pcre/pcreposix.h b/contrib/libs/pcre/pcreposix.h index 4667ea388d1..62cf33ae174 100644 --- a/contrib/libs/pcre/pcreposix.h +++ b/contrib/libs/pcre/pcreposix.h @@ -1,57 +1,57 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -#ifndef _PCREPOSIX_H -#define _PCREPOSIX_H - -/* This is the header for the POSIX wrapper interface to the PCRE Perl- -Compatible Regular Expression library. It defines the things POSIX says should -be there. I hope. - +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +#ifndef _PCREPOSIX_H +#define _PCREPOSIX_H + +/* This is the header for the POSIX wrapper interface to the PCRE Perl- +Compatible Regular Expression library. It defines the things POSIX says should +be there. I hope. + Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* Have to include stdlib.h in order to ensure that size_t is defined. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* Have to include stdlib.h in order to ensure that size_t is defined. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + /* Options, mostly defined by POSIX, but with some extras. */ - + #define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */ #define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */ #define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */ @@ -63,64 +63,64 @@ extern "C" { #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */ #define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */ #define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */ - -/* This is not used by PCRE, but by defining it we make it easier -to slot PCRE into existing programs that make POSIX calls. */ - -#define REG_EXTENDED 0 - -/* Error values. Not all these are relevant or used by the wrapper. */ - -enum { - REG_ASSERT = 1, /* internal error ? */ - REG_BADBR, /* invalid repeat counts in {} */ - REG_BADPAT, /* pattern error */ - REG_BADRPT, /* ? * + invalid */ - REG_EBRACE, /* unbalanced {} */ - REG_EBRACK, /* unbalanced [] */ - REG_ECOLLATE, /* collation error - not relevant */ - REG_ECTYPE, /* bad class */ - REG_EESCAPE, /* bad escape sequence */ - REG_EMPTY, /* empty expression */ - REG_EPAREN, /* unbalanced () */ - REG_ERANGE, /* bad range inside [] */ - REG_ESIZE, /* expression too big */ - REG_ESPACE, /* failed to get memory */ - REG_ESUBREG, /* bad back reference */ - REG_INVARG, /* bad argument */ - REG_NOMATCH /* match failed */ -}; - - -/* The structure representing a compiled regular expression. */ - -typedef struct { - void *re_pcre; - size_t re_nsub; - size_t re_erroffset; -} regex_t; - -/* The structure in which a captured offset is returned. */ - -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; - -/* When an application links to a PCRE DLL in Windows, the symbols that are -imported have to be identified as such. When building PCRE, the appropriate -export settings are needed, and are set in pcreposix.c before including this -file. */ - + +/* This is not used by PCRE, but by defining it we make it easier +to slot PCRE into existing programs that make POSIX calls. */ + +#define REG_EXTENDED 0 + +/* Error values. Not all these are relevant or used by the wrapper. */ + +enum { + REG_ASSERT = 1, /* internal error ? */ + REG_BADBR, /* invalid repeat counts in {} */ + REG_BADPAT, /* pattern error */ + REG_BADRPT, /* ? * + invalid */ + REG_EBRACE, /* unbalanced {} */ + REG_EBRACK, /* unbalanced [] */ + REG_ECOLLATE, /* collation error - not relevant */ + REG_ECTYPE, /* bad class */ + REG_EESCAPE, /* bad escape sequence */ + REG_EMPTY, /* empty expression */ + REG_EPAREN, /* unbalanced () */ + REG_ERANGE, /* bad range inside [] */ + REG_ESIZE, /* expression too big */ + REG_ESPACE, /* failed to get memory */ + REG_ESUBREG, /* bad back reference */ + REG_INVARG, /* bad argument */ + REG_NOMATCH /* match failed */ +}; + + +/* The structure representing a compiled regular expression. */ + +typedef struct { + void *re_pcre; + size_t re_nsub; + size_t re_erroffset; +} regex_t; + +/* The structure in which a captured offset is returned. */ + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE, the appropriate +export settings are needed, and are set in pcreposix.c before including this +file. */ + #if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL) # define PCREPOSIX_EXP_DECL extern __declspec(dllimport) # define PCREPOSIX_EXP_DEFN __declspec(dllimport) #endif -/* By default, we use the standard "extern" declarations. */ - +/* By default, we use the standard "extern" declarations. */ + #ifndef PCREPOSIX_EXP_DECL # ifdef __cplusplus # define PCREPOSIX_EXP_DECL extern "C" @@ -131,8 +131,8 @@ file. */ # endif #endif -/* The functions */ - +/* The functions */ + #define regcomp pcre_regcomp #define regexec pcre_regexec #define regerror pcre_regerror @@ -140,12 +140,12 @@ file. */ PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int); PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t, - regmatch_t *, int); + regmatch_t *, int); PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t); PCREPOSIX_EXP_DECL void regfree(regex_t *); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcreposix.h */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* End of pcreposix.h */ diff --git a/contrib/libs/pcre/ucp.h b/contrib/libs/pcre/ucp.h index a052b6d0693..2fa00296e42 100644 --- a/contrib/libs/pcre/ucp.h +++ b/contrib/libs/pcre/ucp.h @@ -1,14 +1,14 @@ -/************************************************* -* Unicode Property Table handler * -*************************************************/ - -#ifndef _UCP_H -#define _UCP_H - -/* This file contains definitions of the property values that are returned by +/************************************************* +* Unicode Property Table handler * +*************************************************/ + +#ifndef _UCP_H +#define _UCP_H + +/* This file contains definitions of the property values that are returned by the UCD access macros. New values that are added for new releases of Unicode should always be at the end of each enum, for backwards compatibility. - + IMPORTANT: Note also that the specific numeric values of the enums have to be the same as the values that are generated by the maint/MultiStage2.py script, where the equivalent property descriptive names are listed in vectors. @@ -16,53 +16,53 @@ where the equivalent property descriptive names are listed in vectors. ALSO: The specific values of the first two enums are assumed for the table called catposstab in pcre_compile.c. */ -/* These are the general character categories. */ - -enum { - ucp_C, /* Other */ - ucp_L, /* Letter */ - ucp_M, /* Mark */ - ucp_N, /* Number */ - ucp_P, /* Punctuation */ - ucp_S, /* Symbol */ - ucp_Z /* Separator */ -}; - +/* These are the general character categories. */ + +enum { + ucp_C, /* Other */ + ucp_L, /* Letter */ + ucp_M, /* Mark */ + ucp_N, /* Number */ + ucp_P, /* Punctuation */ + ucp_S, /* Symbol */ + ucp_Z /* Separator */ +}; + /* These are the particular character categories. */ - -enum { - ucp_Cc, /* Control */ - ucp_Cf, /* Format */ - ucp_Cn, /* Unassigned */ - ucp_Co, /* Private use */ - ucp_Cs, /* Surrogate */ - ucp_Ll, /* Lower case letter */ - ucp_Lm, /* Modifier letter */ - ucp_Lo, /* Other letter */ - ucp_Lt, /* Title case letter */ - ucp_Lu, /* Upper case letter */ - ucp_Mc, /* Spacing mark */ - ucp_Me, /* Enclosing mark */ - ucp_Mn, /* Non-spacing mark */ - ucp_Nd, /* Decimal number */ - ucp_Nl, /* Letter number */ - ucp_No, /* Other number */ - ucp_Pc, /* Connector punctuation */ - ucp_Pd, /* Dash punctuation */ - ucp_Pe, /* Close punctuation */ - ucp_Pf, /* Final punctuation */ - ucp_Pi, /* Initial punctuation */ - ucp_Po, /* Other punctuation */ - ucp_Ps, /* Open punctuation */ - ucp_Sc, /* Currency symbol */ - ucp_Sk, /* Modifier symbol */ - ucp_Sm, /* Mathematical symbol */ - ucp_So, /* Other symbol */ - ucp_Zl, /* Line separator */ - ucp_Zp, /* Paragraph separator */ - ucp_Zs /* Space separator */ -}; - + +enum { + ucp_Cc, /* Control */ + ucp_Cf, /* Format */ + ucp_Cn, /* Unassigned */ + ucp_Co, /* Private use */ + ucp_Cs, /* Surrogate */ + ucp_Ll, /* Lower case letter */ + ucp_Lm, /* Modifier letter */ + ucp_Lo, /* Other letter */ + ucp_Lt, /* Title case letter */ + ucp_Lu, /* Upper case letter */ + ucp_Mc, /* Spacing mark */ + ucp_Me, /* Enclosing mark */ + ucp_Mn, /* Non-spacing mark */ + ucp_Nd, /* Decimal number */ + ucp_Nl, /* Letter number */ + ucp_No, /* Other number */ + ucp_Pc, /* Connector punctuation */ + ucp_Pd, /* Dash punctuation */ + ucp_Pe, /* Close punctuation */ + ucp_Pf, /* Final punctuation */ + ucp_Pi, /* Initial punctuation */ + ucp_Po, /* Other punctuation */ + ucp_Ps, /* Open punctuation */ + ucp_Sc, /* Currency symbol */ + ucp_Sk, /* Modifier symbol */ + ucp_Sm, /* Mathematical symbol */ + ucp_So, /* Other symbol */ + ucp_Zl, /* Line separator */ + ucp_Zp, /* Paragraph separator */ + ucp_Zs /* Space separator */ +}; + /* These are grapheme break properties. Note that the code for processing them assumes that the values are less than 16. If more values are added that take the number to 16 or more, the code will have to be rewritten. */ @@ -83,70 +83,70 @@ enum { ucp_gbOther /* 12 */ }; -/* These are the script identifications. */ - -enum { - ucp_Arabic, - ucp_Armenian, - ucp_Bengali, - ucp_Bopomofo, - ucp_Braille, - ucp_Buginese, - ucp_Buhid, - ucp_Canadian_Aboriginal, - ucp_Cherokee, - ucp_Common, - ucp_Coptic, - ucp_Cypriot, - ucp_Cyrillic, - ucp_Deseret, - ucp_Devanagari, - ucp_Ethiopic, - ucp_Georgian, - ucp_Glagolitic, - ucp_Gothic, - ucp_Greek, - ucp_Gujarati, - ucp_Gurmukhi, - ucp_Han, - ucp_Hangul, - ucp_Hanunoo, - ucp_Hebrew, - ucp_Hiragana, - ucp_Inherited, - ucp_Kannada, - ucp_Katakana, - ucp_Kharoshthi, - ucp_Khmer, - ucp_Lao, - ucp_Latin, - ucp_Limbu, - ucp_Linear_B, - ucp_Malayalam, - ucp_Mongolian, - ucp_Myanmar, - ucp_New_Tai_Lue, - ucp_Ogham, - ucp_Old_Italic, - ucp_Old_Persian, - ucp_Oriya, - ucp_Osmanya, - ucp_Runic, - ucp_Shavian, - ucp_Sinhala, - ucp_Syloti_Nagri, - ucp_Syriac, - ucp_Tagalog, - ucp_Tagbanwa, - ucp_Tai_Le, - ucp_Tamil, - ucp_Telugu, - ucp_Thaana, - ucp_Thai, - ucp_Tibetan, - ucp_Tifinagh, - ucp_Ugaritic, - ucp_Yi, +/* These are the script identifications. */ + +enum { + ucp_Arabic, + ucp_Armenian, + ucp_Bengali, + ucp_Bopomofo, + ucp_Braille, + ucp_Buginese, + ucp_Buhid, + ucp_Canadian_Aboriginal, + ucp_Cherokee, + ucp_Common, + ucp_Coptic, + ucp_Cypriot, + ucp_Cyrillic, + ucp_Deseret, + ucp_Devanagari, + ucp_Ethiopic, + ucp_Georgian, + ucp_Glagolitic, + ucp_Gothic, + ucp_Greek, + ucp_Gujarati, + ucp_Gurmukhi, + ucp_Han, + ucp_Hangul, + ucp_Hanunoo, + ucp_Hebrew, + ucp_Hiragana, + ucp_Inherited, + ucp_Kannada, + ucp_Katakana, + ucp_Kharoshthi, + ucp_Khmer, + ucp_Lao, + ucp_Latin, + ucp_Limbu, + ucp_Linear_B, + ucp_Malayalam, + ucp_Mongolian, + ucp_Myanmar, + ucp_New_Tai_Lue, + ucp_Ogham, + ucp_Old_Italic, + ucp_Old_Persian, + ucp_Oriya, + ucp_Osmanya, + ucp_Runic, + ucp_Shavian, + ucp_Sinhala, + ucp_Syloti_Nagri, + ucp_Syriac, + ucp_Tagalog, + ucp_Tagbanwa, + ucp_Tai_Le, + ucp_Tamil, + ucp_Telugu, + ucp_Thaana, + ucp_Thai, + ucp_Tibetan, + ucp_Tifinagh, + ucp_Ugaritic, + ucp_Yi, /* New for Unicode 5.0: */ ucp_Balinese, ucp_Cuneiform, @@ -217,8 +217,8 @@ enum { ucp_Siddham, ucp_Tirhuta, ucp_Warang_Citi -}; - -#endif - -/* End of ucp.h */ +}; + +#endif + +/* End of ucp.h */ diff --git a/contrib/libs/pcre/ya.make b/contrib/libs/pcre/ya.make index 26b8b1a5710..85ae1506979 100644 --- a/contrib/libs/pcre/ya.make +++ b/contrib/libs/pcre/ya.make @@ -1,12 +1,12 @@ # Generated by devtools/yamaker from nixpkgs a58a0b5098f0c2a389ee70eb69422a052982d990. -LIBRARY() +LIBRARY() OWNER( orivej g:cpp-contrib ) - + VERSION(8.44) ORIGINAL_SOURCE(https://ftp.pcre.org/pub/pcre/pcre-8.44.tar.bz2) @@ -26,14 +26,14 @@ ADDINCL( ) NO_COMPILER_WARNINGS() - + NO_RUNTIME() CFLAGS( GLOBAL -DPCRE_STATIC -DHAVE_CONFIG_H ) - + # JIT adds ≈108KB to binary size which may be critical for mobile and embedded devices binary distributions DEFAULT(ARCADIA_PCRE_ENABLE_JIT yes) @@ -43,32 +43,32 @@ IF (ARCADIA_PCRE_ENABLE_JIT) ) ENDIF() -SRCS( +SRCS( pcre_byte_order.c pcre_chartables.c - pcre_compile.c - pcre_config.c - pcre_dfa_exec.c - pcre_exec.c - pcre_fullinfo.c - pcre_get.c - pcre_globals.c + pcre_compile.c + pcre_config.c + pcre_dfa_exec.c + pcre_exec.c + pcre_fullinfo.c + pcre_get.c + pcre_globals.c pcre_jit_compile.c - pcre_maketables.c - pcre_newline.c - pcre_ord2utf8.c - pcre_refcount.c + pcre_maketables.c + pcre_newline.c + pcre_ord2utf8.c + pcre_refcount.c pcre_string_utils.c - pcre_study.c - pcre_tables.c + pcre_study.c + pcre_tables.c pcre_ucd.c - pcre_valid_utf8.c - pcre_version.c - pcre_xclass.c - pcreposix.c -) - -END() + pcre_valid_utf8.c + pcre_version.c + pcre_xclass.c + pcreposix.c +) + +END() RECURSE( pcre16 diff --git a/contrib/libs/pdqsort/ya.make b/contrib/libs/pdqsort/ya.make index cfb0bab2734..bba3e8e78d2 100644 --- a/contrib/libs/pdqsort/ya.make +++ b/contrib/libs/pdqsort/ya.make @@ -7,8 +7,8 @@ OWNER( LIBRARY() -LICENSE(Zlib) - +LICENSE(Zlib) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(978bc36a9bd4143a54b2551cfd9ce8a6afd6d04c) diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp index 5a97351e411..468ff61d924 100644 --- a/contrib/libs/pire/pire/extra/count.cpp +++ b/contrib/libs/pire/pire/extra/count.cpp @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -33,7 +33,7 @@ #include namespace Pire { - + namespace Impl { typedef LoadedScanner::Action Action; @@ -886,28 +886,28 @@ public: using TAction = typename Scanner::Action; using InternalState = typename Scanner::InternalState; typedef TMap InvStates; - + CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs) : ScannerGlueCommon(lhs, rhs, LettersEquality(lhs.m_letters, rhs.m_letters)) { } - + void AcceptStates(const TVector& states) { States = states; this->SetSc(THolder(new Scanner)); this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount()); - + for (size_t i = 0; i < states.size(); ++i) this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3)); } - + void Connect(size_t from, size_t to, Char letter) { this->Sc().SetJump(from, letter, to, Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount())); } - + protected: TVector States; TAction Action(const Scanner& sc, InternalState state, Char letter) const @@ -981,7 +981,7 @@ private: } - + CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */) { if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h index 03c2659c809..bd1526b98d0 100644 --- a/contrib/libs/pire/pire/extra/count.h +++ b/contrib/libs/pire/pire/extra/count.h @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -31,11 +31,11 @@ namespace Pire { class Fsm; - + namespace Impl { template class ScannerGlueCommon; - + template class CountingScannerGlueTask; @@ -121,7 +121,7 @@ public: enum { IncrementAction = 1, ResetAction = 2, - + FinalFlag = 0, DeadFlag = 1, }; @@ -170,7 +170,7 @@ public: bool Dead(const State&) const { return false; } using LoadedScanner::Swap; - + size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } protected: @@ -192,7 +192,7 @@ protected: mask &= s.m_updatedMask; if (mask) { ResetPerformer::Do(s, mask); - s.m_updatedMask &= (Action)~mask; + s.m_updatedMask &= (Action)~mask; } } @@ -267,7 +267,7 @@ private: else return 0; } - + friend void BuildScanner(const Fsm&, CountingScanner&); friend class Impl::ScannerGlueCommon; friend class Impl::CountingScannerGlueTask; diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h index 3b77a3979e6..120dc403b75 100644 --- a/contrib/libs/pire/pire/scanners/loaded.h +++ b/contrib/libs/pire/pire/scanners/loaded.h @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -68,16 +68,16 @@ public: }; // Override in subclass, if neccessary - enum { + enum { FinalFlag = 0, DeadFlag = 0 }; static const size_t MAX_RE_COUNT = 16; -protected: +protected: LoadedScanner() { Alias(Null()); } - + LoadedScanner(const LoadedScanner& s): m(s.m) { if (s.m_buffer) { @@ -137,7 +137,7 @@ public: Locals* locals; Impl::MapPtr(locals, 1, p, size); memcpy(&s.m, locals, sizeof(s.m)); - + Impl::MapPtr(s.m_letters, MaxChar, p, size); Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size); if (header.Version == Header::RE_VERSION_WITH_MACTIONS) { @@ -145,7 +145,7 @@ public: Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size); } Impl::MapPtr(s.m_tags, s.m.statesCount, p, size); - + s.m.initial += reinterpret_cast(s.m_jumps); Swap(s); @@ -260,14 +260,14 @@ private: static const LoadedScanner n = Fsm::MakeFalse().Compile(); return n; } - + void Markup(void* buf) { m_letters = reinterpret_cast(buf); m_jumps = reinterpret_cast(m_letters + MaxChar); m_tags = reinterpret_cast(m_jumps + m.statesCount * m.lettersCount); } - + void Alias(const LoadedScanner& s) { memcpy(&m, &s.m, sizeof(m)); @@ -283,9 +283,9 @@ private: Init(states, letters, startState, regexpsCount); } - friend class Fsm; + friend class Fsm; }; - + inline LoadedScanner::~LoadedScanner() = default; } diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h index 105b44ca3ee..29679e416ed 100644 --- a/contrib/libs/pire/pire/scanners/multi.h +++ b/contrib/libs/pire/pire/scanners/multi.h @@ -82,7 +82,7 @@ namespace Impl { }; -// Scanner implementation parametrized by +// Scanner implementation parametrized by // - transition table representation strategy // - strategy for fast forwarding through memory ranges template @@ -120,7 +120,7 @@ public: typedef typename Shortcutting::template ExtendedRowHeader ScannerRowHeader; Scanner() { Alias(Null()); } - + explicit Scanner(Fsm& fsm, size_t distance = 0) { if (distance) { @@ -257,14 +257,14 @@ public: throw Error("Type mismatch while mmapping Pire::Scanner"); Impl::AdvancePtr(p, size, sizeof(s.m)); Impl::AlignPtr(p, size); - + if (Shortcutting::Signature != s.m.shortcuttingSignature) throw Error("This scanner has different shortcutting type"); - + bool empty = *((const bool*) p); Impl::AdvancePtr(p, size, sizeof(empty)); Impl::AlignPtr(p, size); - + if (empty) s.Alias(Null()); else { @@ -335,8 +335,8 @@ protected: inline static const Scanner& Null() { static const Scanner n = Fsm::MakeFalse().Compile< Scanner >(); - - return n; + + return n; } // Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord) @@ -394,7 +394,7 @@ protected: m_finalIndex = s.m_finalIndex; m_transitions = s.m_transitions; } - + template void DeepCopy(const Scanner& s) { @@ -582,7 +582,7 @@ struct ScannerSaver { bool empty; LoadPodType(s, empty); Impl::AlignLoad(s, sizeof(empty)); - + if (empty) { sc.Alias(ScannerType::Null()); } else { @@ -596,13 +596,13 @@ struct ScannerSaver { // TODO: implement more effective serialization // of nonrelocatable scanner if necessary - + template static void SaveScanner(const Scanner& scanner, yostream* s) { Scanner(scanner).Save(s); } - + template static void LoadScanner(Scanner& scanner, yistream* s) { @@ -633,7 +633,7 @@ private: NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts NO_EXIT_MASK = 2 // the state has only transtions to itself (we can stop the scan) }; - + template struct MaskCheckerBase { static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION @@ -645,7 +645,7 @@ private: } return !IsAnySet(mask); } - + static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) { @@ -653,12 +653,12 @@ private: return begin; } }; - + template struct MaskChecker : MaskCheckerBase { typedef MaskCheckerBase Base; typedef MaskChecker Next; - + static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) { @@ -668,17 +668,17 @@ private: return Next::Run(hdr, alignOffset, begin, end); } }; - + template struct MaskChecker : MaskCheckerBase { typedef MaskCheckerBase Base; - + static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end) { return Base::DoRun(hdr, alignOffset, begin, end); } - }; + }; // Compares the ExitMask[0] value without SSE reads which seems to be more optimal template @@ -704,7 +704,7 @@ public: MaskSizeInSizeT = 2 * SizeTInMaxSizeWord, }; - public: + public: static const size_t ExitMaskCount = MaskCount; inline @@ -716,14 +716,14 @@ public: Y_ASSERT(IsAligned(p, sizeof(Word))); return *p; } - + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION size_t Mask(size_t i) const { Y_ASSERT(i < ExitMaskCount); return ExitMasksArray[MaskSizeInSizeT*i]; } - + void SetMask(size_t i, size_t val) { for (size_t j = 0; j < MaskSizeInSizeT; ++j) @@ -735,7 +735,7 @@ public: for (size_t i = 0; i < ExitMaskCount; ++i) SetMask(i, NO_SHORTCUT_MASK); } - + template ExtendedRowHeader& operator =(const ExtendedRowHeader& other) { @@ -932,7 +932,7 @@ public: static inline PIRE_HOT_FUNCTION Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred) { - typename ScannerType::State state = st; + typename ScannerType::State state = st; const Word* head = AlignUp((const Word*) begin, sizeof(Word)); const Word* tail = AlignDown((const Word*) end, sizeof(Word)); for (; begin != (const size_t*) head && begin != end; ++begin) @@ -940,7 +940,7 @@ public: st = state; return Stop; } - + if (begin == end) { st = state; return Continue; @@ -949,7 +949,7 @@ public: st = state; return pred(scanner, state, ((const char*) end)); } - + // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0); size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t); @@ -980,14 +980,14 @@ public: head = skipEnd; noShortcut = true; } - + for (size_t* p = (size_t*) tail; p != end; ++p) { if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) { st = state; return Stop; } } - + st = state; return Continue; } @@ -1004,36 +1004,36 @@ public: using Base::Rhs; using Base::Sc; using Base::Letters; - + typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates; - + ScannerGlueTask(const Scanner& lhs, const Scanner& rhs) : ScannerGlueCommon(lhs, rhs, LettersEquality(lhs.m_letters, rhs.m_letters)) { } - + void AcceptStates(const TVector& states) { // Make up a new scanner and fill in the final table - + size_t finalTableSize = 0; for (auto&& i : states) finalTableSize += RangeLen(Lhs().AcceptedRegexps(i.first)) + RangeLen(Rhs().AcceptedRegexps(i.second)); this->SetSc(THolder(new Scanner)); Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount()); - + auto finalWriter = Sc().m_final; for (size_t state = 0; state != states.size(); ++state) { Sc().m_finalIndex[state] = finalWriter - Sc().m_final; finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter); finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter); *finalWriter++ = static_cast(-1); - + Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0) | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0)); } } - + void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); } const Scanner& Success() @@ -1041,8 +1041,8 @@ public: Sc().BuildShortcuts(); return Sc(); } - -private: + +private: template size_t RangeLen(ypair range) const { @@ -1089,7 +1089,7 @@ Impl::Scanner Impl::Scanner: return rhs; if (rhs.Empty()) return lhs; - + static const size_t DefMaxSize = 80000; Impl::ScannerGlueTask< Impl::Scanner > task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h index ab0aca6ae1f..ef959aeed13 100644 --- a/contrib/libs/pire/pire/scanners/simple.h +++ b/contrib/libs/pire/pire/scanners/simple.h @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -48,7 +48,7 @@ public: typedef ui8 Tag; SimpleScanner() { Alias(Null()); } - + explicit SimpleScanner(Fsm& fsm, size_t distance = 0); size_t Size() const { return m.statesCount; } @@ -96,7 +96,7 @@ public: m.initial += (m_transitions - s.m_transitions) * sizeof(Transition); } } - + // Makes a shallow ("weak") copy of the given scanner. // The copied scanner does not maintain lifetime of the original's entrails. void Alias(const SimpleScanner& s) @@ -139,7 +139,7 @@ public: bool empty = *((const bool*) p); Impl::AdvancePtr(p, size, sizeof(empty)); Impl::AlignPtr(p, size); - + if (empty) s.Alias(Null()); else { @@ -235,7 +235,7 @@ inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) fsm = CreateApproxFsm(fsm, distance); } fsm.Canonize(); - + m.statesCount = fsm.Size(); m_buffer = BufferType(new char[BufSize()]); memset(m_buffer.Get(), 0, BufSize()); @@ -255,7 +255,7 @@ inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) } } - + } #endif diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h index 6a12817623f..6adfcb8c1d0 100644 --- a/contrib/libs/pire/pire/scanners/slow.h +++ b/contrib/libs/pire/pire/scanners/slow.h @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -56,7 +56,7 @@ public: typedef ui32 Action; typedef ui8 Tag; - enum { + enum { FinalFlag = 1, DeadFlag = 0 }; @@ -84,7 +84,7 @@ public: size_t Size() const { return GetSize(); } size_t GetSize() const { return m.statesCount; } bool Empty() const { return m_finals == Null().m_finals; } - + size_t Id() const {return (size_t) -1;} size_t RegexpsCount() const { return Empty() ? 0 : 1; } @@ -170,7 +170,7 @@ public: bool CanStop(const State& s) const { return Final(s); } - + const void* Mmap(const void* ptr, size_t size) { Impl::CheckAlign(ptr); @@ -181,11 +181,11 @@ public: Locals* locals; Impl::MapPtr(locals, 1, p, size); memcpy(&s.m, locals, sizeof(s.m)); - + bool empty = *((const bool*) p); Impl::AdvancePtr(p, size, sizeof(empty)); Impl::AlignPtr(p, size); - + if (empty) s.Alias(Null()); else { @@ -213,7 +213,7 @@ public: DoSwap(m_letters, s.m_letters); DoSwap(m_pool, s.m_pool); DoSwap(m_vec, s.m_vec); - + DoSwap(m_vecptr, s.m_vecptr); DoSwap(need_actions, s.need_actions); DoSwap(m_actionsvec, s.m_actionsvec); @@ -249,7 +249,7 @@ public: m_vecptr = &m_vec; } } - + explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) : need_actions(needActions) { @@ -365,9 +365,9 @@ private: memset(p, 0, size * sizeof(T)); m_pool.push_back(p); } - + void Alias(const SlowScanner& s) - { + { memcpy(&m, &s.m, sizeof(m)); m_vec.clear(); need_actions = s.need_actions; @@ -380,7 +380,7 @@ private: m_vecptr = s.m_vecptr; m_pool.clear(); } - + void SetJump(size_t oldState, Char c, size_t newState, unsigned long action) { Y_ASSERT(!m_vec.empty()); @@ -397,7 +397,7 @@ private: void SetInitial(size_t state) { m.start = state; } void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); } - + void FinishBuild() {} static ypair Accept() @@ -427,7 +427,7 @@ inline const SlowScanner& SlowScanner::Null() return n; } -#ifndef PIRE_DEBUG +#ifndef PIRE_DEBUG /// A specialization of Run(), since its state is much heavier than other ones /// and we thus want to avoid copying states. template<> diff --git a/contrib/libs/pire/pire/stub/stl.h b/contrib/libs/pire/pire/stub/stl.h index 53247b2afda..98ebd9f7c6f 100644 --- a/contrib/libs/pire/pire/stub/stl.h +++ b/contrib/libs/pire/pire/stub/stl.h @@ -27,10 +27,10 @@ namespace Pire { using ystring = TString; - template using ybitset = std::bitset; + template using ybitset = std::bitset; template using ypair = std::pair; - template using yauto_ptr = std::auto_ptr; - template using ybinary_function = std::binary_function; + template using yauto_ptr = std::auto_ptr; + template using ybinary_function = std::binary_function; template inline ypair ymake_pair(T1 v1, T2 v2) { @@ -39,16 +39,16 @@ namespace Pire { template inline T ymax(T v1, T v2) { - return std::max(v1, v2); + return std::max(v1, v2); } template inline T ymin(T v1, T v2) { - return std::min(v1, v2); + return std::min(v1, v2); } template - void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); } + void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); } class Error: public yexception { public: diff --git a/contrib/libs/pire/pire/stub/utf8.h b/contrib/libs/pire/pire/stub/utf8.h index 7bb05c8ef94..51ea0479d4a 100644 --- a/contrib/libs/pire/pire/stub/utf8.h +++ b/contrib/libs/pire/pire/stub/utf8.h @@ -1,5 +1,5 @@ -#pragma once - +#pragma once + #include #include @@ -11,7 +11,7 @@ inline wchar32 to_upper(wchar32 c) { } inline bool is_digit(wchar32 c) { - return IsDigit(c); + return IsDigit(c); } inline bool is_upper(wchar32 c) { diff --git a/contrib/libs/poco/Crypto/ya.make b/contrib/libs/poco/Crypto/ya.make index 37bd231c0a9..cf58950785a 100644 --- a/contrib/libs/poco/Crypto/ya.make +++ b/contrib/libs/poco/Crypto/ya.make @@ -11,7 +11,7 @@ LICENSE( BSD-3-Clause AND BSL-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/Foundation/ya.make b/contrib/libs/poco/Foundation/ya.make index 2e1346c3a5a..9b22a799791 100644 --- a/contrib/libs/poco/Foundation/ya.make +++ b/contrib/libs/poco/Foundation/ya.make @@ -15,7 +15,7 @@ LICENSE( RSA-MD AND RSA-MD4 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/JSON/ya.make b/contrib/libs/poco/JSON/ya.make index 98aaf57650b..11e0ff72b1d 100644 --- a/contrib/libs/poco/JSON/ya.make +++ b/contrib/libs/poco/JSON/ya.make @@ -11,7 +11,7 @@ LICENSE( BSD-3-Clause AND BSL-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/Net/ya.make b/contrib/libs/poco/Net/ya.make index c0daf1e9910..9feadc70270 100644 --- a/contrib/libs/poco/Net/ya.make +++ b/contrib/libs/poco/Net/ya.make @@ -12,7 +12,7 @@ LICENSE( BSL-1.0 AND Custom-Punycode ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/NetSSL_OpenSSL/ya.make b/contrib/libs/poco/NetSSL_OpenSSL/ya.make index 599ebc617b7..ba6f99d872b 100644 --- a/contrib/libs/poco/NetSSL_OpenSSL/ya.make +++ b/contrib/libs/poco/NetSSL_OpenSSL/ya.make @@ -11,7 +11,7 @@ LICENSE( BSD-3-Clause AND BSL-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/Util/ya.make b/contrib/libs/poco/Util/ya.make index 2f349bb7c68..3a295763c66 100644 --- a/contrib/libs/poco/Util/ya.make +++ b/contrib/libs/poco/Util/ya.make @@ -11,7 +11,7 @@ LICENSE( BSD-3-Clause AND BSL-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/poco/XML/ya.make b/contrib/libs/poco/XML/ya.make index 40c45040284..ffa0227a187 100644 --- a/contrib/libs/poco/XML/ya.make +++ b/contrib/libs/poco/XML/ya.make @@ -11,7 +11,7 @@ LICENSE( BSD-3-Clause AND BSL-1.0 ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( diff --git a/contrib/libs/python/Include/ya.make b/contrib/libs/python/Include/ya.make index 556ec54e080..e65263e6a0b 100644 --- a/contrib/libs/python/Include/ya.make +++ b/contrib/libs/python/Include/ya.make @@ -3,7 +3,7 @@ PY23_LIBRARY() WITHOUT_LICENSE_TEXTS() LICENSE(YandexOpen) - + OWNER( orivej spreis diff --git a/contrib/libs/python/ut/lib/ya.make b/contrib/libs/python/ut/lib/ya.make index 07b2c246f11..cfa0aaa612f 100644 --- a/contrib/libs/python/ut/lib/ya.make +++ b/contrib/libs/python/ut/lib/ya.make @@ -1,17 +1,17 @@ OWNER(spreis) PY23_LIBRARY() - + WITHOUT_LICENSE_TEXTS() LICENSE(YandexOpen) - + PEERDIR( library/cpp/testing/unittest ) - + SRCS( test.cpp ) - + END() diff --git a/contrib/libs/python/ya.make b/contrib/libs/python/ya.make index f635a96ab0f..20a57f4b480 100644 --- a/contrib/libs/python/ya.make +++ b/contrib/libs/python/ya.make @@ -7,7 +7,7 @@ OWNER( PY23_LIBRARY() LICENSE(YandexOpen) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) NO_PYTHON_INCLUDES() diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 718bc9c6797..85f16f060ba 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -1329,7 +1329,7 @@ static bool ParseInteger(StringPiece* s, int* np) { if (s->empty() || !isdigit((*s)[0] & 0xFF)) return false; // Disallow leading zeros. - if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) + if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) return false; int n = 0; int c; @@ -1471,7 +1471,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, int code; switch (c) { default: - if (c < Runeself && !isalpha(c) && !isdigit(c)) { + if (c < Runeself && !isalpha(c) && !isdigit(c)) { // Escaped non-word characters are always themselves. // PCRE is not quite so rigorous: it accepts things like // \q, but we don't. We once rejected \_, but too many @@ -1633,11 +1633,11 @@ static const UGroup* LookupGroup(const StringPiece& name, } // Look for a POSIX group with the given name (e.g., "[:^alpha:]") -static const UGroup* LookupPosixGroup(const StringPiece& name) { +static const UGroup* LookupPosixGroup(const StringPiece& name) { return LookupGroup(name, posix_groups, num_posix_groups); } -static const UGroup* LookupPerlGroup(const StringPiece& name) { +static const UGroup* LookupPerlGroup(const StringPiece& name) { return LookupGroup(name, perl_groups, num_perl_groups); } @@ -1648,7 +1648,7 @@ static URange32 any32[] = { { 65536, Runemax } }; static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; // Look for a Unicode group with the given name (e.g., "Han") -static const UGroup* LookupUnicodeGroup(const StringPiece& name) { +static const UGroup* LookupUnicodeGroup(const StringPiece& name) { // Special case: "Any" means any. if (name == StringPiece("Any")) return &anygroup; @@ -1708,7 +1708,7 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, // On success, sets *s to span the remainder of the string // and returns the corresponding UGroup. // The StringPiece must *NOT* be edited unless the call succeeds. -const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) { +const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) { if (!(parse_flags & Regexp::PerlClasses)) return NULL; if (s->size() < 2 || (*s)[0] != '\\') @@ -1716,7 +1716,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl // Could use StringPieceToRune, but there aren't // any non-ASCII Perl group names. StringPiece name(s->data(), 2); - const UGroup *g = LookupPerlGroup(name); + const UGroup *g = LookupPerlGroup(name); if (g == NULL) return NULL; s->remove_prefix(name.size()); @@ -1783,7 +1783,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, #if !defined(RE2_USE_ICU) // Look up the group in the RE2 Unicode data. - const UGroup *g = LookupUnicodeGroup(name); + const UGroup *g = LookupUnicodeGroup(name); if (g == NULL) { status->set_code(kRegexpBadCharRange); status->set_error_arg(seq); @@ -1843,7 +1843,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, q += 2; StringPiece name(p, static_cast(q - p)); - const UGroup *g = LookupPosixGroup(name); + const UGroup *g = LookupPosixGroup(name); if (g == NULL) { status->set_code(kRegexpBadCharRange); status->set_error_arg(name); @@ -1981,7 +1981,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, } // Look for Perl character class symbols (extension). - const UGroup *g = MaybeParsePerlCCEscape(s, flags_); + const UGroup *g = MaybeParsePerlCCEscape(s, flags_); if (g != NULL) { AddUGroup(re->ccb_, g, g->sign, flags_); continue; @@ -2456,7 +2456,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, } } - const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags()); + const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags()); if (g != NULL) { Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); re->ccb_ = new CharClassBuilder; diff --git a/contrib/libs/re2/re2/perl_groups.cc b/contrib/libs/re2/re2/perl_groups.cc index 605c0e4aa7a..46874445816 100644 --- a/contrib/libs/re2/re2/perl_groups.cc +++ b/contrib/libs/re2/re2/perl_groups.cc @@ -5,21 +5,21 @@ namespace re2 { -static const URange16 code1[] = { /* \d */ +static const URange16 code1[] = { /* \d */ { 0x30, 0x39 }, }; -static const URange16 code2[] = { /* \s */ +static const URange16 code2[] = { /* \s */ { 0x9, 0xa }, { 0xc, 0xd }, { 0x20, 0x20 }, }; -static const URange16 code3[] = { /* \w */ +static const URange16 code3[] = { /* \w */ { 0x30, 0x39 }, { 0x41, 0x5a }, { 0x5f, 0x5f }, { 0x61, 0x7a }, }; -const UGroup perl_groups[] = { +const UGroup perl_groups[] = { { "\\d", +1, code1, 1, 0, 0 }, { "\\D", -1, code1, 1, 0, 0 }, { "\\s", +1, code2, 3, 0, 0 }, @@ -27,64 +27,64 @@ const UGroup perl_groups[] = { { "\\w", +1, code3, 4, 0, 0 }, { "\\W", -1, code3, 4, 0, 0 }, }; -const int num_perl_groups = 6; -static const URange16 code4[] = { /* [:alnum:] */ +const int num_perl_groups = 6; +static const URange16 code4[] = { /* [:alnum:] */ { 0x30, 0x39 }, { 0x41, 0x5a }, { 0x61, 0x7a }, }; -static const URange16 code5[] = { /* [:alpha:] */ +static const URange16 code5[] = { /* [:alpha:] */ { 0x41, 0x5a }, { 0x61, 0x7a }, }; -static const URange16 code6[] = { /* [:ascii:] */ +static const URange16 code6[] = { /* [:ascii:] */ { 0x0, 0x7f }, }; -static const URange16 code7[] = { /* [:blank:] */ +static const URange16 code7[] = { /* [:blank:] */ { 0x9, 0x9 }, { 0x20, 0x20 }, }; -static const URange16 code8[] = { /* [:cntrl:] */ +static const URange16 code8[] = { /* [:cntrl:] */ { 0x0, 0x1f }, { 0x7f, 0x7f }, }; -static const URange16 code9[] = { /* [:digit:] */ +static const URange16 code9[] = { /* [:digit:] */ { 0x30, 0x39 }, }; -static const URange16 code10[] = { /* [:graph:] */ +static const URange16 code10[] = { /* [:graph:] */ { 0x21, 0x7e }, }; -static const URange16 code11[] = { /* [:lower:] */ +static const URange16 code11[] = { /* [:lower:] */ { 0x61, 0x7a }, }; -static const URange16 code12[] = { /* [:print:] */ +static const URange16 code12[] = { /* [:print:] */ { 0x20, 0x7e }, }; -static const URange16 code13[] = { /* [:punct:] */ +static const URange16 code13[] = { /* [:punct:] */ { 0x21, 0x2f }, { 0x3a, 0x40 }, { 0x5b, 0x60 }, { 0x7b, 0x7e }, }; -static const URange16 code14[] = { /* [:space:] */ +static const URange16 code14[] = { /* [:space:] */ { 0x9, 0xd }, { 0x20, 0x20 }, }; -static const URange16 code15[] = { /* [:upper:] */ +static const URange16 code15[] = { /* [:upper:] */ { 0x41, 0x5a }, }; -static const URange16 code16[] = { /* [:word:] */ +static const URange16 code16[] = { /* [:word:] */ { 0x30, 0x39 }, { 0x41, 0x5a }, { 0x5f, 0x5f }, { 0x61, 0x7a }, }; -static const URange16 code17[] = { /* [:xdigit:] */ +static const URange16 code17[] = { /* [:xdigit:] */ { 0x30, 0x39 }, { 0x41, 0x46 }, { 0x61, 0x66 }, }; -const UGroup posix_groups[] = { +const UGroup posix_groups[] = { { "[:alnum:]", +1, code4, 3, 0, 0 }, { "[:^alnum:]", -1, code4, 3, 0, 0 }, { "[:alpha:]", +1, code5, 2, 0, 0 }, @@ -114,6 +114,6 @@ const UGroup posix_groups[] = { { "[:xdigit:]", +1, code17, 3, 0, 0 }, { "[:^xdigit:]", -1, code17, 3, 0, 0 }, }; -const int num_posix_groups = 28; +const int num_posix_groups = 28; } // namespace re2 diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc index 1a226d12b92..47fb385e4e4 100644 --- a/contrib/libs/re2/re2/re2.cc +++ b/contrib/libs/re2/re2/re2.cc @@ -956,7 +956,7 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite, if (c == '\\') { continue; } - if (!isdigit(c)) { + if (!isdigit(c)) { *error = "Rewrite schema error: " "'\\' must be followed by a digit or '\\'."; return false; diff --git a/contrib/libs/re2/re2/unicode_casefold.h b/contrib/libs/re2/re2/unicode_casefold.h index d71f50f0b81..8bdbb42fbc1 100644 --- a/contrib/libs/re2/re2/unicode_casefold.h +++ b/contrib/libs/re2/re2/unicode_casefold.h @@ -59,8 +59,8 @@ struct CaseFold { int32_t delta; }; -extern const CaseFold unicode_casefold[]; -extern const int num_unicode_casefold; +extern const CaseFold unicode_casefold[]; +extern const int num_unicode_casefold; extern const CaseFold unicode_tolower[]; extern const int num_unicode_tolower; diff --git a/contrib/libs/re2/re2/unicode_groups.h b/contrib/libs/re2/re2/unicode_groups.h index 512203c43a3..75f55daa619 100644 --- a/contrib/libs/re2/re2/unicode_groups.h +++ b/contrib/libs/re2/re2/unicode_groups.h @@ -41,26 +41,26 @@ struct UGroup { const char *name; int sign; // +1 for [abc], -1 for [^abc] - const URange16 *r16; + const URange16 *r16; int nr16; - const URange32 *r32; + const URange32 *r32; int nr32; }; // Named by property or script name (e.g., "Nd", "N", "Han"). // Negated groups are not included. -extern const UGroup unicode_groups[]; -extern const int num_unicode_groups; +extern const UGroup unicode_groups[]; +extern const int num_unicode_groups; // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). // Negated groups are included. -extern const UGroup posix_groups[]; -extern const int num_posix_groups; +extern const UGroup posix_groups[]; +extern const int num_posix_groups; // Named by Perl name (e.g., "\\d", "\\D"). // Negated groups are included. -extern const UGroup perl_groups[]; -extern const int num_perl_groups; +extern const UGroup perl_groups[]; +extern const int num_perl_groups; } // namespace re2 diff --git a/contrib/libs/re2/util/utf.h b/contrib/libs/re2/util/utf.h index 74a52727c32..85b42972390 100644 --- a/contrib/libs/re2/util/utf.h +++ b/contrib/libs/re2/util/utf.h @@ -18,7 +18,7 @@ #ifndef UTIL_UTF_H_ #define UTIL_UTF_H_ -#include +#include namespace re2 { diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index 1af8c4ed6a4..8072de2eb2b 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -21,7 +21,7 @@ ADDINCL( ) NO_COMPILER_WARNINGS() - + IF (WITH_VALGRIND) CFLAGS( GLOBAL -DRE2_ON_VALGRIND diff --git a/contrib/libs/snappy/snappy-c.cc b/contrib/libs/snappy/snappy-c.cc index 0cb59c7296f..473a0b09786 100644 --- a/contrib/libs/snappy/snappy-c.cc +++ b/contrib/libs/snappy/snappy-c.cc @@ -1,90 +1,90 @@ -// Copyright 2011 Martin Gieseking . -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "snappy.h" -#include "snappy-c.h" - -extern "C" { - -snappy_status snappy_compress(const char* input, - size_t input_length, - char* compressed, - size_t *compressed_length) { - if (*compressed_length < snappy_max_compressed_length(input_length)) { - return SNAPPY_BUFFER_TOO_SMALL; - } - snappy::RawCompress(input, input_length, compressed, compressed_length); - return SNAPPY_OK; -} - -snappy_status snappy_uncompress(const char* compressed, - size_t compressed_length, - char* uncompressed, - size_t* uncompressed_length) { - size_t real_uncompressed_length; - if (!snappy::GetUncompressedLength(compressed, - compressed_length, - &real_uncompressed_length)) { - return SNAPPY_INVALID_INPUT; - } - if (*uncompressed_length < real_uncompressed_length) { - return SNAPPY_BUFFER_TOO_SMALL; - } - if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { - return SNAPPY_INVALID_INPUT; - } - *uncompressed_length = real_uncompressed_length; - return SNAPPY_OK; -} - -size_t snappy_max_compressed_length(size_t source_length) { - return snappy::MaxCompressedLength(source_length); -} - -snappy_status snappy_uncompressed_length(const char *compressed, - size_t compressed_length, - size_t *result) { - if (snappy::GetUncompressedLength(compressed, - compressed_length, - result)) { - return SNAPPY_OK; - } else { - return SNAPPY_INVALID_INPUT; - } -} - -snappy_status snappy_validate_compressed_buffer(const char *compressed, - size_t compressed_length) { - if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { - return SNAPPY_OK; - } else { - return SNAPPY_INVALID_INPUT; - } -} - -} // extern "C" +// Copyright 2011 Martin Gieseking . +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "snappy.h" +#include "snappy-c.h" + +extern "C" { + +snappy_status snappy_compress(const char* input, + size_t input_length, + char* compressed, + size_t *compressed_length) { + if (*compressed_length < snappy_max_compressed_length(input_length)) { + return SNAPPY_BUFFER_TOO_SMALL; + } + snappy::RawCompress(input, input_length, compressed, compressed_length); + return SNAPPY_OK; +} + +snappy_status snappy_uncompress(const char* compressed, + size_t compressed_length, + char* uncompressed, + size_t* uncompressed_length) { + size_t real_uncompressed_length; + if (!snappy::GetUncompressedLength(compressed, + compressed_length, + &real_uncompressed_length)) { + return SNAPPY_INVALID_INPUT; + } + if (*uncompressed_length < real_uncompressed_length) { + return SNAPPY_BUFFER_TOO_SMALL; + } + if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { + return SNAPPY_INVALID_INPUT; + } + *uncompressed_length = real_uncompressed_length; + return SNAPPY_OK; +} + +size_t snappy_max_compressed_length(size_t source_length) { + return snappy::MaxCompressedLength(source_length); +} + +snappy_status snappy_uncompressed_length(const char *compressed, + size_t compressed_length, + size_t *result) { + if (snappy::GetUncompressedLength(compressed, + compressed_length, + result)) { + return SNAPPY_OK; + } else { + return SNAPPY_INVALID_INPUT; + } +} + +snappy_status snappy_validate_compressed_buffer(const char *compressed, + size_t compressed_length) { + if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { + return SNAPPY_OK; + } else { + return SNAPPY_INVALID_INPUT; + } +} + +} // extern "C" diff --git a/contrib/libs/snappy/snappy-c.h b/contrib/libs/snappy/snappy-c.h index 826bccfded2..32aa0c6b8b5 100644 --- a/contrib/libs/snappy/snappy-c.h +++ b/contrib/libs/snappy/snappy-c.h @@ -1,138 +1,138 @@ -/* - * Copyright 2011 Martin Gieseking . - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Plain C interface (a wrapper around the C++ implementation). - */ - +/* + * Copyright 2011 Martin Gieseking . + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Plain C interface (a wrapper around the C++ implementation). + */ + #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * Return values; see the documentation for each function to know - * what each can return. - */ -typedef enum { - SNAPPY_OK = 0, - SNAPPY_INVALID_INPUT = 1, + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* + * Return values; see the documentation for each function to know + * what each can return. + */ +typedef enum { + SNAPPY_OK = 0, + SNAPPY_INVALID_INPUT = 1, SNAPPY_BUFFER_TOO_SMALL = 2 -} snappy_status; - -/* - * Takes the data stored in "input[0..input_length-1]" and stores - * it in the array pointed to by "compressed". - * - * signals the space available in "compressed". - * If it is not at least equal to "snappy_max_compressed_length(input_length)", - * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, - * contains the true length of the compressed output, - * and SNAPPY_OK is returned. - * - * Example: - * size_t output_length = snappy_max_compressed_length(input_length); - * char* output = (char*)malloc(output_length); - * if (snappy_compress(input, input_length, output, &output_length) - * == SNAPPY_OK) { - * ... Process(output, output_length) ... - * } - * free(output); - */ -snappy_status snappy_compress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length); - -/* - * Given data in "compressed[0..compressed_length-1]" generated by - * calling the snappy_compress routine, this routine stores - * the uncompressed data to - * uncompressed[0..uncompressed_length-1]. - * Returns failure (a value not equal to SNAPPY_OK) if the message - * is corrupted and could not be decrypted. - * - * signals the space available in "uncompressed". - * If it is not at least equal to the value returned by - * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL - * is returned. After successful decompression, - * contains the true length of the decompressed output. - * - * Example: - * size_t output_length; - * if (snappy_uncompressed_length(input, input_length, &output_length) - * != SNAPPY_OK) { - * ... fail ... - * } - * char* output = (char*)malloc(output_length); - * if (snappy_uncompress(input, input_length, output, &output_length) - * == SNAPPY_OK) { - * ... Process(output, output_length) ... - * } - * free(output); - */ -snappy_status snappy_uncompress(const char* compressed, - size_t compressed_length, - char* uncompressed, - size_t* uncompressed_length); - -/* - * Returns the maximal size of the compressed representation of - * input data that is "source_length" bytes in length. - */ -size_t snappy_max_compressed_length(size_t source_length); - -/* - * REQUIRES: "compressed[]" was produced by snappy_compress() - * Returns SNAPPY_OK and stores the length of the uncompressed data in - * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. - * This operation takes O(1) time. - */ -snappy_status snappy_uncompressed_length(const char* compressed, - size_t compressed_length, - size_t* result); - -/* - * Check if the contents of "compressed[]" can be uncompressed successfully. - * Does not return the uncompressed data; if so, returns SNAPPY_OK, - * or if not, returns SNAPPY_INVALID_INPUT. - * Takes time proportional to compressed_length, but is usually at least a - * factor of four faster than actual decompression. - */ -snappy_status snappy_validate_compressed_buffer(const char* compressed, - size_t compressed_length); - -#ifdef __cplusplus -} // extern "C" -#endif - +} snappy_status; + +/* + * Takes the data stored in "input[0..input_length-1]" and stores + * it in the array pointed to by "compressed". + * + * signals the space available in "compressed". + * If it is not at least equal to "snappy_max_compressed_length(input_length)", + * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, + * contains the true length of the compressed output, + * and SNAPPY_OK is returned. + * + * Example: + * size_t output_length = snappy_max_compressed_length(input_length); + * char* output = (char*)malloc(output_length); + * if (snappy_compress(input, input_length, output, &output_length) + * == SNAPPY_OK) { + * ... Process(output, output_length) ... + * } + * free(output); + */ +snappy_status snappy_compress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + +/* + * Given data in "compressed[0..compressed_length-1]" generated by + * calling the snappy_compress routine, this routine stores + * the uncompressed data to + * uncompressed[0..uncompressed_length-1]. + * Returns failure (a value not equal to SNAPPY_OK) if the message + * is corrupted and could not be decrypted. + * + * signals the space available in "uncompressed". + * If it is not at least equal to the value returned by + * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL + * is returned. After successful decompression, + * contains the true length of the decompressed output. + * + * Example: + * size_t output_length; + * if (snappy_uncompressed_length(input, input_length, &output_length) + * != SNAPPY_OK) { + * ... fail ... + * } + * char* output = (char*)malloc(output_length); + * if (snappy_uncompress(input, input_length, output, &output_length) + * == SNAPPY_OK) { + * ... Process(output, output_length) ... + * } + * free(output); + */ +snappy_status snappy_uncompress(const char* compressed, + size_t compressed_length, + char* uncompressed, + size_t* uncompressed_length); + +/* + * Returns the maximal size of the compressed representation of + * input data that is "source_length" bytes in length. + */ +size_t snappy_max_compressed_length(size_t source_length); + +/* + * REQUIRES: "compressed[]" was produced by snappy_compress() + * Returns SNAPPY_OK and stores the length of the uncompressed data in + * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. + * This operation takes O(1) time. + */ +snappy_status snappy_uncompressed_length(const char* compressed, + size_t compressed_length, + size_t* result); + +/* + * Check if the contents of "compressed[]" can be uncompressed successfully. + * Does not return the uncompressed data; if so, returns SNAPPY_OK, + * or if not, returns SNAPPY_INVALID_INPUT. + * Takes time proportional to compressed_length, but is usually at least a + * factor of four faster than actual decompression. + */ +snappy_status snappy_validate_compressed_buffer(const char* compressed, + size_t compressed_length); + +#ifdef __cplusplus +} // extern "C" +#endif + #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */ diff --git a/contrib/libs/snappy/snappy-internal.h b/contrib/libs/snappy/snappy-internal.h index 28c179ee46c..1e1c307fef8 100644 --- a/contrib/libs/snappy/snappy-internal.h +++ b/contrib/libs/snappy/snappy-internal.h @@ -1,94 +1,94 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Internals shared between the Snappy implementation and its unittest. - +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Internals shared between the Snappy implementation and its unittest. + #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - -#include "snappy-stubs-internal.h" - -namespace snappy { -namespace internal { - + +#include "snappy-stubs-internal.h" + +namespace snappy { +namespace internal { + // Working memory performs a single allocation to hold all scratch space // required for compression. -class WorkingMemory { - public: +class WorkingMemory { + public: explicit WorkingMemory(size_t input_size); ~WorkingMemory(); - - // Allocates and clears a hash table using memory in "*this", - // stores the number of buckets in "*table_size" and returns a pointer to - // the base of the hash table. + + // Allocates and clears a hash table using memory in "*this", + // stores the number of buckets in "*table_size" and returns a pointer to + // the base of the hash table. uint16* GetHashTable(size_t fragment_size, int* table_size) const; char* GetScratchInput() const { return input_; } char* GetScratchOutput() const { return output_; } - - private: + + private: char* mem_; // the allocated memory, never nullptr size_t size_; // the size of the allocated memory, never 0 uint16* table_; // the pointer to the hashtable char* input_; // the pointer to the input scratch buffer char* output_; // the pointer to the output scratch buffer - + // No copying WorkingMemory(const WorkingMemory&); void operator=(const WorkingMemory&); -}; - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input_length <= kBlockSize" -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input_length)" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -char* CompressFragment(const char* input, - size_t input_length, - char* op, - uint16* table, - const int table_size); - +}; + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input_length <= kBlockSize" +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input_length)" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +char* CompressFragment(const char* input, + size_t input_length, + char* op, + uint16* table, + const int table_size); + // Find the largest n such that -// -// s1[0,n-1] == s2[0,n-1] -// and n <= (s2_limit - s2). -// +// +// s1[0,n-1] == s2[0,n-1] +// and n <= (s2_limit - s2). +// // Return make_pair(n, n < 8). -// Does not read *s2_limit or beyond. -// Does not read *(s1 + (s2_limit - s2)) or beyond. -// Requires that s2_limit >= s2. -// +// Does not read *s2_limit or beyond. +// Does not read *(s1 + (s2_limit - s2)) or beyond. +// Requires that s2_limit >= s2. +// // Separate implementation for 64-bit, little-endian cpus. #if !defined(SNAPPY_IS_BIG_ENDIAN) && \ (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) @@ -97,7 +97,7 @@ static inline std::pair FindMatchLength(const char* s1, const char* s2_limit) { assert(s2_limit >= s2); size_t matched = 0; - + // This block isn't necessary for correctness; we could just start looping // immediately. As an optimization though, it is useful. It creates some not // uncommon code paths that determine, without extra effort, whether the match @@ -115,59 +115,59 @@ static inline std::pair FindMatchLength(const char* s1, } } - // Find out how long the match is. We loop over the data 64 bits at a - // time until we find a 64-bit block that doesn't match; then we find - // the first non-matching bit and use that to calculate the total - // length of the match. + // Find out how long the match is. We loop over the data 64 bits at a + // time until we find a 64-bit block that doesn't match; then we find + // the first non-matching bit and use that to calculate the total + // length of the match. while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { - s2 += 8; - matched += 8; - } else { - uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero64(x); - matched += matching_bits >> 3; + s2 += 8; + matched += 8; + } else { + uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero64(x); + matched += matching_bits >> 3; assert(matched >= 8); return std::pair(matched, false); - } - } + } + } while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { if (s1[matched] == *s2) { - ++s2; - ++matched; - } else { + ++s2; + ++matched; + } else { return std::pair(matched, matched < 8); - } - } + } + } return std::pair(matched, matched < 8); -} -#else +} +#else static inline std::pair FindMatchLength(const char* s1, const char* s2, const char* s2_limit) { - // Implementation based on the x86-64 version, above. + // Implementation based on the x86-64 version, above. assert(s2_limit >= s2); - int matched = 0; - - while (s2 <= s2_limit - 4 && - UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { - s2 += 4; - matched += 4; - } - if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { - uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero(x); - matched += matching_bits >> 3; - } else { - while ((s2 < s2_limit) && (s1[matched] == *s2)) { - ++s2; - ++matched; - } - } + int matched = 0; + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } + if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { + uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } return std::pair(matched, matched < 8); -} -#endif - +} +#endif + // Lookup tables for decompression code. Give --snappy_dump_decompression_table // to the unit test to recompute char_table. @@ -225,7 +225,7 @@ static const uint16 char_table[256] = { 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 }; -} // end namespace internal -} // end namespace snappy - +} // end namespace internal +} // end namespace snappy + #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ diff --git a/contrib/libs/snappy/snappy-sinksource.cc b/contrib/libs/snappy/snappy-sinksource.cc index 42651664bf7..369a13215bc 100644 --- a/contrib/libs/snappy/snappy-sinksource.cc +++ b/contrib/libs/snappy/snappy-sinksource.cc @@ -1,45 +1,45 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "snappy-sinksource.h" - -namespace snappy { - -Source::~Source() { } - -Sink::~Sink() { } - -char* Sink::GetAppendBuffer(size_t length, char* scratch) { - return scratch; -} - +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "snappy-sinksource.h" + +namespace snappy { + +Source::~Source() { } + +Sink::~Sink() { } + +char* Sink::GetAppendBuffer(size_t length, char* scratch) { + return scratch; +} + char* Sink::GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, size_t scratch_size, size_t* allocated_size) { @@ -55,34 +55,34 @@ void Sink::AppendAndTakeOwnership( (*deleter)(deleter_arg, bytes, n); } -ByteArraySource::~ByteArraySource() { } - -size_t ByteArraySource::Available() const { return left_; } - -const char* ByteArraySource::Peek(size_t* len) { - *len = left_; - return ptr_; -} - -void ByteArraySource::Skip(size_t n) { - left_ -= n; - ptr_ += n; -} - -UncheckedByteArraySink::~UncheckedByteArraySink() { } - -void UncheckedByteArraySink::Append(const char* data, size_t n) { - // Do no copying if the caller filled in the result of GetAppendBuffer() - if (data != dest_) { - memcpy(dest_, data, n); - } - dest_ += n; -} - -char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { - return dest_; -} - +ByteArraySource::~ByteArraySource() { } + +size_t ByteArraySource::Available() const { return left_; } + +const char* ByteArraySource::Peek(size_t* len) { + *len = left_; + return ptr_; +} + +void ByteArraySource::Skip(size_t n) { + left_ -= n; + ptr_ += n; +} + +UncheckedByteArraySink::~UncheckedByteArraySink() { } + +void UncheckedByteArraySink::Append(const char* data, size_t n) { + // Do no copying if the caller filled in the result of GetAppendBuffer() + if (data != dest_) { + memcpy(dest_, data, n); + } + dest_ += n; +} + +char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { + return dest_; +} + void UncheckedByteArraySink::AppendAndTakeOwnership( char* data, size_t n, void (*deleter)(void*, const char*, size_t), @@ -92,7 +92,7 @@ void UncheckedByteArraySink::AppendAndTakeOwnership( (*deleter)(deleter_arg, data, n); } dest_ += n; -} +} char* UncheckedByteArraySink::GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, diff --git a/contrib/libs/snappy/snappy-sinksource.h b/contrib/libs/snappy/snappy-sinksource.h index 75aa8726539..8afcdaaa2cc 100644 --- a/contrib/libs/snappy/snappy-sinksource.h +++ b/contrib/libs/snappy/snappy-sinksource.h @@ -1,69 +1,69 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ - -#include - -namespace snappy { - -// A Sink is an interface that consumes a sequence of bytes. -class Sink { - public: - Sink() { } - virtual ~Sink(); - - // Append "bytes[0,n-1]" to this. - virtual void Append(const char* bytes, size_t n) = 0; - - // Returns a writable buffer of the specified length for appending. - // May return a pointer to the caller-owned scratch buffer which - // must have at least the indicated length. The returned buffer is - // only valid until the next operation on this Sink. - // - // After writing at most "length" bytes, call Append() with the - // pointer returned from this function and the number of bytes - // written. Many Append() implementations will avoid copying - // bytes if this function returned an internal buffer. - // - // If a non-scratch buffer is returned, the caller may only pass a - // prefix of it to Append(). That is, it is not correct to pass an - // interior pointer of the returned array to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBuffer(size_t length, char* scratch); - + +#include + +namespace snappy { + +// A Sink is an interface that consumes a sequence of bytes. +class Sink { + public: + Sink() { } + virtual ~Sink(); + + // Append "bytes[0,n-1]" to this. + virtual void Append(const char* bytes, size_t n) = 0; + + // Returns a writable buffer of the specified length for appending. + // May return a pointer to the caller-owned scratch buffer which + // must have at least the indicated length. The returned buffer is + // only valid until the next operation on this Sink. + // + // After writing at most "length" bytes, call Append() with the + // pointer returned from this function and the number of bytes + // written. Many Append() implementations will avoid copying + // bytes if this function returned an internal buffer. + // + // If a non-scratch buffer is returned, the caller may only pass a + // prefix of it to Append(). That is, it is not correct to pass an + // interior pointer of the returned array to Append(). + // + // The default implementation always returns the scratch buffer. + virtual char* GetAppendBuffer(size_t length, char* scratch); + // For higher performance, Sink implementations can provide custom // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. // These methods can reduce the number of copies done during // compression/decompression. - + // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) // to free the buffer. deleter function must be non NULL. @@ -101,82 +101,82 @@ class Sink { size_t min_size, size_t desired_size_hint, char* scratch, size_t scratch_size, size_t* allocated_size); - private: - // No copying - Sink(const Sink&); - void operator=(const Sink&); -}; - -// A Source is an interface that yields a sequence of bytes -class Source { - public: - Source() { } - virtual ~Source(); - - // Return the number of bytes left to read from the source - virtual size_t Available() const = 0; - - // Peek at the next flat region of the source. Does not reposition - // the source. The returned region is empty iff Available()==0. - // - // Returns a pointer to the beginning of the region and store its - // length in *len. - // - // The returned region is valid until the next call to Skip() or - // until this object is destroyed, whichever occurs first. - // - // The returned region may be larger than Available() (for example - // if this ByteSource is a view on a substring of a larger source). - // The caller is responsible for ensuring that it only reads the - // Available() bytes. - virtual const char* Peek(size_t* len) = 0; - - // Skip the next n bytes. Invalidates any buffer returned by - // a previous call to Peek(). - // REQUIRES: Available() >= n - virtual void Skip(size_t n) = 0; - - private: - // No copying - Source(const Source&); - void operator=(const Source&); -}; - -// A Source implementation that yields the contents of a flat array -class ByteArraySource : public Source { - public: - ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } - virtual ~ByteArraySource(); - virtual size_t Available() const; - virtual const char* Peek(size_t* len); - virtual void Skip(size_t n); - private: - const char* ptr_; - size_t left_; -}; - -// A Sink implementation that writes to a flat array without any bound checks. -class UncheckedByteArraySink : public Sink { - public: - explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } - virtual ~UncheckedByteArraySink(); - virtual void Append(const char* data, size_t n); - virtual char* GetAppendBuffer(size_t len, char* scratch); + private: + // No copying + Sink(const Sink&); + void operator=(const Sink&); +}; + +// A Source is an interface that yields a sequence of bytes +class Source { + public: + Source() { } + virtual ~Source(); + + // Return the number of bytes left to read from the source + virtual size_t Available() const = 0; + + // Peek at the next flat region of the source. Does not reposition + // the source. The returned region is empty iff Available()==0. + // + // Returns a pointer to the beginning of the region and store its + // length in *len. + // + // The returned region is valid until the next call to Skip() or + // until this object is destroyed, whichever occurs first. + // + // The returned region may be larger than Available() (for example + // if this ByteSource is a view on a substring of a larger source). + // The caller is responsible for ensuring that it only reads the + // Available() bytes. + virtual const char* Peek(size_t* len) = 0; + + // Skip the next n bytes. Invalidates any buffer returned by + // a previous call to Peek(). + // REQUIRES: Available() >= n + virtual void Skip(size_t n) = 0; + + private: + // No copying + Source(const Source&); + void operator=(const Source&); +}; + +// A Source implementation that yields the contents of a flat array +class ByteArraySource : public Source { + public: + ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } + virtual ~ByteArraySource(); + virtual size_t Available() const; + virtual const char* Peek(size_t* len); + virtual void Skip(size_t n); + private: + const char* ptr_; + size_t left_; +}; + +// A Sink implementation that writes to a flat array without any bound checks. +class UncheckedByteArraySink : public Sink { + public: + explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } + virtual ~UncheckedByteArraySink(); + virtual void Append(const char* data, size_t n); + virtual char* GetAppendBuffer(size_t len, char* scratch); virtual char* GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, size_t scratch_size, size_t* allocated_size); virtual void AppendAndTakeOwnership( char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), void *deleter_arg); - - // Return the current output pointer so that a caller can see how - // many bytes were produced. - // Note: this is not a Sink method. - char* CurrentDestination() const { return dest_; } - private: - char* dest_; -}; - + + // Return the current output pointer so that a caller can see how + // many bytes were produced. + // Note: this is not a Sink method. + char* CurrentDestination() const { return dest_; } + private: + char* dest_; +}; + } // namespace snappy - + #endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ diff --git a/contrib/libs/snappy/snappy-stubs-internal.cc b/contrib/libs/snappy/snappy-stubs-internal.cc index 3e43a2124b5..66ed2e90393 100644 --- a/contrib/libs/snappy/snappy-stubs-internal.cc +++ b/contrib/libs/snappy/snappy-stubs-internal.cc @@ -1,42 +1,42 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include #include - -#include "snappy-stubs-internal.h" - -namespace snappy { - + +#include "snappy-stubs-internal.h" + +namespace snappy { + void Varint::Append32(std::string* s, uint32 value) { - char buf[Varint::kMax32]; - const char* p = Varint::Encode32(buf, value); + char buf[Varint::kMax32]; + const char* p = Varint::Encode32(buf, value); s->append(buf, p - buf); -} - -} // namespace snappy +} + +} // namespace snappy diff --git a/contrib/libs/snappy/snappy-stubs-internal.h b/contrib/libs/snappy/snappy-stubs-internal.h index 48b40bac980..4854689d177 100644 --- a/contrib/libs/snappy/snappy-stubs-internal.h +++ b/contrib/libs/snappy/snappy-stubs-internal.h @@ -1,46 +1,46 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various stubs for the open-source version of Snappy. - +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various stubs for the open-source version of Snappy. + #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ - + #ifdef HAVE_CONFIG_H #include "config.h" -#endif - +#endif + #include - -#include -#include -#include - + +#include +#include +#include + #ifdef HAVE_SYS_MMAN_H #include #endif @@ -65,13 +65,13 @@ #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ #endif // __has_feature(memory_sanitizer) -#include "snappy-stubs-public.h" - -#if defined(__x86_64__) - -// Enable 64-bit optimized versions of some routines. -#define ARCH_K8 1 - +#include "snappy-stubs-public.h" + +#if defined(__x86_64__) + +// Enable 64-bit optimized versions of some routines. +#define ARCH_K8 1 + #elif defined(__ppc64__) #define ARCH_PPC 1 @@ -80,21 +80,21 @@ #define ARCH_ARM 1 -#endif - -// Needed by OS X, among others. -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -// The size of an array, if known at compile-time. -// Will give unexpected results if used on a pointer. -// We undefine it first, since some compilers already have a definition. -#ifdef ARRAYSIZE -#undef ARRAYSIZE -#endif -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) - +#endif + +// Needed by OS X, among others. +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +// The size of an array, if known at compile-time. +// Will give unexpected results if used on a pointer. +// We undefine it first, since some compilers already have a definition. +#ifdef ARRAYSIZE +#undef ARRAYSIZE +#endif +#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) + // Static prediction hints. #ifdef HAVE_BUILTIN_EXPECT #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) @@ -103,44 +103,44 @@ #define SNAPPY_PREDICT_FALSE(x) x #define SNAPPY_PREDICT_TRUE(x) x #endif - -// This is only used for recomputing the tag byte table used during -// decompression; for simplicity we just remove it from the open-source -// version (anyone who wants to regenerate it can just do the call -// themselves within main()). -#define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value -#define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name - -namespace snappy { - -static const uint32 kuint32max = static_cast(0xFFFFFFFF); -static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); - -// Potentially unaligned loads and stores. - + +// This is only used for recomputing the tag byte table used during +// decompression; for simplicity we just remove it from the open-source +// version (anyone who wants to regenerate it can just do the call +// themselves within main()). +#define DEFINE_bool(flag_name, default_value, description) \ + bool FLAGS_ ## flag_name = default_value +#define DECLARE_bool(flag_name) \ + extern bool FLAGS_ ## flag_name + +namespace snappy { + +static const uint32 kuint32max = static_cast(0xFFFFFFFF); +static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + +// Potentially unaligned loads and stores. + // x86, PowerPC, and ARM64 can simply do these loads and stores native. - + #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ defined(__aarch64__) - -#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) - -#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) - -// ARMv7 and newer support native unaligned accesses, but only of 16-bit -// and 32-bit values (not 64-bit); older versions either raise a fatal signal, -// do an unaligned read and rotate the words around a bit, or do the reads very -// slowly (trip through kernel mode). There's no simple #define that says just -// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 -// sub-architectures. -// -// This is a mess, but there's not much we can do about it. + +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) + +// ARMv7 and newer support native unaligned accesses, but only of 16-bit +// and 32-bit values (not 64-bit); older versions either raise a fatal signal, +// do an unaligned read and rotate the words around a bit, or do the reads very +// slowly (trip through kernel mode). There's no simple #define that says just +// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 +// sub-architectures. +// +// This is a mess, but there's not much we can do about it. // // To further complicate matters, only LDR instructions (single reads) are // allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we @@ -150,30 +150,30 @@ static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); // (it ignores __attribute__((packed)) on individual variables). However, // we can tell it that a _struct_ is unaligned, which has the same effect, // so we do that. - -#elif defined(__arm__) && \ + +#elif defined(__arm__) && \ !defined(__ARM_ARCH_4__) && \ !defined(__ARM_ARCH_4T__) && \ - !defined(__ARM_ARCH_5__) && \ - !defined(__ARM_ARCH_5T__) && \ - !defined(__ARM_ARCH_5TE__) && \ - !defined(__ARM_ARCH_5TEJ__) && \ - !defined(__ARM_ARCH_6__) && \ - !defined(__ARM_ARCH_6J__) && \ - !defined(__ARM_ARCH_6K__) && \ - !defined(__ARM_ARCH_6Z__) && \ - !defined(__ARM_ARCH_6ZK__) && \ - !defined(__ARM_ARCH_6T2__) - + !defined(__ARM_ARCH_5__) && \ + !defined(__ARM_ARCH_5T__) && \ + !defined(__ARM_ARCH_5TE__) && \ + !defined(__ARM_ARCH_5TEJ__) && \ + !defined(__ARM_ARCH_6__) && \ + !defined(__ARM_ARCH_6J__) && \ + !defined(__ARM_ARCH_6K__) && \ + !defined(__ARM_ARCH_6Z__) && \ + !defined(__ARM_ARCH_6ZK__) && \ + !defined(__ARM_ARCH_6T2__) + #if __GNUC__ #define ATTRIBUTE_PACKED __attribute__((__packed__)) #else #define ATTRIBUTE_PACKED #endif - + namespace base { namespace internal { - + struct Unaligned16Struct { uint16 value; uint8 dummy; // To make the size non-power-of-two. @@ -200,59 +200,59 @@ struct Unaligned32Struct { (_val)) // TODO: NEON supports unaligned 64-bit loads and stores. -// See if that would be more efficient on platforms supporting it, -// at least for copies. - -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); -} - -#else - -// These functions are provided for architectures that don't support -// unaligned loads and stores. - -inline uint16 UNALIGNED_LOAD16(const void *p) { - uint16 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint32 UNALIGNED_LOAD32(const void *p) { - uint32 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UNALIGNED_STORE16(void *p, uint16 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE32(void *p, uint32 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); -} - -#endif - +// See if that would be more efficient on platforms supporting it, +// at least for copies. + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + +#else + +// These functions are provided for architectures that don't support +// unaligned loads and stores. + +inline uint16 UNALIGNED_LOAD16(const void *p) { + uint16 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint32 UNALIGNED_LOAD32(const void *p) { + uint32 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE16(void *p, uint16 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE32(void *p, uint32 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + +#endif + // The following guarantees declaration of the byte swap functions. #if defined(SNAPPY_IS_BIG_ENDIAN) - + #ifdef HAVE_SYS_BYTEORDER_H #include #endif @@ -293,8 +293,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { inline uint16 bswap_16(uint16 x) { return (x << 8) | (x >> 8); -} - +} + inline uint32 bswap_32(uint32 x) { x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); return (x >> 16) | (x << 16); @@ -310,28 +310,28 @@ inline uint64 bswap_64(uint64 x) { #endif // defined(SNAPPY_IS_BIG_ENDIAN) -// Convert to little-endian storage, opposite of network format. -// Convert x from host to little endian: x = LittleEndian.FromHost(x); -// convert x from little endian to host: x = LittleEndian.ToHost(x); -// -// Store values into unaligned memory converting to little endian order: -// LittleEndian.Store16(p, x); -// -// Load unaligned values stored in little endian converting to host order: -// x = LittleEndian.Load16(p); -class LittleEndian { - public: - // Conversion functions. +// Convert to little-endian storage, opposite of network format. +// Convert x from host to little endian: x = LittleEndian.FromHost(x); +// convert x from little endian to host: x = LittleEndian.ToHost(x); +// +// Store values into unaligned memory converting to little endian order: +// LittleEndian.Store16(p, x); +// +// Load unaligned values stored in little endian converting to host order: +// x = LittleEndian.Load16(p); +class LittleEndian { + public: + // Conversion functions. #if defined(SNAPPY_IS_BIG_ENDIAN) - + static uint16 FromHost16(uint16 x) { return bswap_16(x); } static uint16 ToHost16(uint16 x) { return bswap_16(x); } - + static uint32 FromHost32(uint32 x) { return bswap_32(x); } static uint32 ToHost32(uint32 x) { return bswap_32(x); } - static bool IsLittleEndian() { return false; } - + static bool IsLittleEndian() { return false; } + #else // !defined(SNAPPY_IS_BIG_ENDIAN) static uint16 FromHost16(uint16 x) { return x; } @@ -344,50 +344,50 @@ class LittleEndian { #endif // !defined(SNAPPY_IS_BIG_ENDIAN) - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } - - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } - - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } - - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } -}; - -// Some bit-manipulation functions. -class Bits { - public: + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } + + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } + + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } +}; + +// Some bit-manipulation functions. +class Bits { + public: // Return floor(log2(n)) for positive integer n. static int Log2FloorNonZero(uint32 n); - // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); - - // Return the first set least / most significant bit, 0-indexed. Returns an - // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except - // that it's 0-indexed. - static int FindLSBSetNonZero(uint32 n); + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Floor(uint32 n); + + // Return the first set least / most significant bit, 0-indexed. Returns an + // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except + // that it's 0-indexed. + static int FindLSBSetNonZero(uint32 n); #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - static int FindLSBSetNonZero64(uint64 n); + static int FindLSBSetNonZero64(uint64 n); #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - - private: + + private: // No copying Bits(const Bits&); void operator=(const Bits&); -}; - -#ifdef HAVE_BUILTIN_CTZ - +}; + +#ifdef HAVE_BUILTIN_CTZ + inline int Bits::Log2FloorNonZero(uint32 n) { assert(n != 0); // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof @@ -399,22 +399,22 @@ inline int Bits::Log2FloorNonZero(uint32 n) { return 31 ^ __builtin_clz(n); } -inline int Bits::Log2Floor(uint32 n) { +inline int Bits::Log2Floor(uint32 n) { return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { assert(n != 0); - return __builtin_ctz(n); -} - + return __builtin_ctz(n); +} + #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { +inline int Bits::FindLSBSetNonZero64(uint64 n) { assert(n != 0); - return __builtin_ctzll(n); -} + return __builtin_ctzll(n); +} #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - + #elif defined(_MSC_VER) inline int Bits::Log2FloorNonZero(uint32 n) { @@ -449,158 +449,158 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) { } #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -#else // Portable versions. - +#else // Portable versions. + inline int Bits::Log2FloorNonZero(uint32 n) { assert(n != 0); - int log = 0; - uint32 value = n; - for (int i = 4; i >= 0; --i) { - int shift = (1 << i); - uint32 x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - assert(value == 1); - return log; -} - + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + inline int Bits::Log2Floor(uint32 n) { return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); } -inline int Bits::FindLSBSetNonZero(uint32 n) { +inline int Bits::FindLSBSetNonZero(uint32 n) { assert(n != 0); - int rc = 31; - for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; - if (x != 0) { - n = x; - rc -= shift; - } - shift >>= 1; - } - return rc; -} - + int rc = 31; + for (int i = 4, shift = 1 << 4; i >= 0; --i) { + const uint32 x = n << shift; + if (x != 0) { + n = x; + rc -= shift; + } + shift >>= 1; + } + return rc; +} + #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). -inline int Bits::FindLSBSetNonZero64(uint64 n) { +// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). +inline int Bits::FindLSBSetNonZero64(uint64 n) { assert(n != 0); - const uint32 bottombits = static_cast(n); - if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); - } else { - return FindLSBSetNonZero(bottombits); - } -} + const uint32 bottombits = static_cast(n); + if (bottombits == 0) { + // Bottom bits are zero, so scan in top bits + return 32 + FindLSBSetNonZero(static_cast(n >> 32)); + } else { + return FindLSBSetNonZero(bottombits); + } +} #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#endif // End portable versions. - -// Variable-length integer encoding. -class Varint { - public: - // Maximum lengths of varint encoding of uint32. - static const int kMax32 = 5; - - // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. - // Never reads a character at or beyond limit. If a valid/terminated varint32 - // was found in the range, stores it in *OUTPUT and returns a pointer just - // past the last byte of the varint32. Else returns NULL. On success, - // "result <= limit". - static const char* Parse32WithLimit(const char* ptr, const char* limit, - uint32* OUTPUT); - - // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". - // EFFECTS Encodes "v" into "ptr" and returns a pointer to the - // byte just past the last encoded byte. - static char* Encode32(char* ptr, uint32 v); - - // EFFECTS Appends the varint representation of "value" to "*s". + +#endif // End portable versions. + +// Variable-length integer encoding. +class Varint { + public: + // Maximum lengths of varint encoding of uint32. + static const int kMax32 = 5; + + // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. + // Never reads a character at or beyond limit. If a valid/terminated varint32 + // was found in the range, stores it in *OUTPUT and returns a pointer just + // past the last byte of the varint32. Else returns NULL. On success, + // "result <= limit". + static const char* Parse32WithLimit(const char* ptr, const char* limit, + uint32* OUTPUT); + + // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". + // EFFECTS Encodes "v" into "ptr" and returns a pointer to the + // byte just past the last encoded byte. + static char* Encode32(char* ptr, uint32 v); + + // EFFECTS Appends the varint representation of "value" to "*s". static void Append32(std::string* s, uint32 value); -}; - -inline const char* Varint::Parse32WithLimit(const char* p, - const char* l, - uint32* OUTPUT) { - const unsigned char* ptr = reinterpret_cast(p); - const unsigned char* limit = reinterpret_cast(l); - uint32 b, result; - if (ptr >= limit) return NULL; - b = *(ptr++); result = b & 127; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; - return NULL; // Value is too long to be a varint32 - done: - *OUTPUT = result; - return reinterpret_cast(ptr); -} - -inline char* Varint::Encode32(char* sptr, uint32 v) { - // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(sptr); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; - } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; - } - return reinterpret_cast(ptr); -} - +}; + +inline const char* Varint::Parse32WithLimit(const char* p, + const char* l, + uint32* OUTPUT) { + const unsigned char* ptr = reinterpret_cast(p); + const unsigned char* limit = reinterpret_cast(l); + uint32 b, result; + if (ptr >= limit) return NULL; + b = *(ptr++); result = b & 127; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; + return NULL; // Value is too long to be a varint32 + done: + *OUTPUT = result; + return reinterpret_cast(ptr); +} + +inline char* Varint::Encode32(char* sptr, uint32 v) { + // Operate on characters as unsigneds + unsigned char* ptr = reinterpret_cast(sptr); + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return reinterpret_cast(ptr); +} + // If you know the internal layout of the std::string in use, you can -// replace this function with one that resizes the string without -// filling the new space with zeros (if applicable) -- -// it will be non-portable but faster. +// replace this function with one that resizes the string without +// filling the new space with zeros (if applicable) -- +// it will be non-portable but faster. inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { s->resize(new_size); -} - -// Return a mutable char* pointing to a string's internal buffer, -// which may not be null-terminated. Writing through this pointer will -// modify the string. -// -// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the -// next call to a string method that invalidates iterators. -// -// As of 2006-04, there is no standard-blessed way of getting a -// mutable reference to a string's internal buffer. However, issue 530 -// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) -// proposes this as the method. It will officially be part of the standard -// for C++0x. This should already work on all current implementations. +} + +// Return a mutable char* pointing to a string's internal buffer, +// which may not be null-terminated. Writing through this pointer will +// modify the string. +// +// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the +// next call to a string method that invalidates iterators. +// +// As of 2006-04, there is no standard-blessed way of getting a +// mutable reference to a string's internal buffer. However, issue 530 +// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) +// proposes this as the method. It will officially be part of the standard +// for C++0x. This should already work on all current implementations. inline char* string_as_array(std::string* str) { return str->empty() ? NULL : &*str->begin(); -} - -} // namespace snappy - +} + +} // namespace snappy + #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ diff --git a/contrib/libs/snappy/snappy-stubs-public.h b/contrib/libs/snappy/snappy-stubs-public.h index 2a2931c4a4f..357c4b2e4bf 100644 --- a/contrib/libs/snappy/snappy-stubs-public.h +++ b/contrib/libs/snappy/snappy-stubs-public.h @@ -1,58 +1,58 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various type stubs for the open-source version of Snappy. -// -// This file cannot include config.h, as it is included from snappy.h, -// which is a public header. Instead, snappy-stubs-public.h is generated by -// from snappy-stubs-public.h.in at configure time. - +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various type stubs for the open-source version of Snappy. +// +// This file cannot include config.h, as it is included from snappy.h, +// which is a public header. Instead, snappy-stubs-public.h is generated by +// from snappy-stubs-public.h.in at configure time. + #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ - + #include #include #include - + #include "config.h" #if defined(HAVE_SYS_UIO_H) #include #endif // HAVE_SYS_UIO_H -#define SNAPPY_MAJOR 1 +#define SNAPPY_MAJOR 1 #define SNAPPY_MINOR 1 #define SNAPPY_PATCHLEVEL 8 -#define SNAPPY_VERSION \ - ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) - -namespace snappy { - +#define SNAPPY_VERSION \ + ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) + +namespace snappy { + using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; @@ -61,7 +61,7 @@ using int32 = std::int32_t; using uint32 = std::uint32_t; using int64 = std::int64_t; using uint64 = std::uint64_t; - + #if !defined(HAVE_SYS_UIO_H) // Windows does not have an iovec type, yet the concept is universally useful. // It is simple to define it ourselves, so we put it inside our own namespace. @@ -70,7 +70,7 @@ struct iovec { size_t iov_len; }; #endif // !HAVE_SYS_UIO_H - -} // namespace snappy - + +} // namespace snappy + #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ diff --git a/contrib/libs/snappy/snappy.cc b/contrib/libs/snappy/snappy.cc index 27e491c0436..9351b0f21e4 100644 --- a/contrib/libs/snappy/snappy.cc +++ b/contrib/libs/snappy/snappy.cc @@ -1,35 +1,35 @@ -// Copyright 2005 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "snappy.h" -#include "snappy-internal.h" -#include "snappy-sinksource.h" - +// Copyright 2005 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "snappy.h" +#include "snappy-internal.h" +#include "snappy-sinksource.h" + #if !defined(SNAPPY_HAVE_SSSE3) // __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD // support between SSE2 and AVX (so SSSE3 instructions require AVX support), and @@ -68,60 +68,60 @@ #include #endif -#include - -#include +#include + +#include #include #include #include - -namespace snappy { - + +namespace snappy { + using internal::COPY_1_BYTE_OFFSET; using internal::COPY_2_BYTE_OFFSET; using internal::LITERAL; using internal::char_table; using internal::kMaximumTagLength; -// Any hash function will produce a valid compressed bitstream, but a good -// hash function reduces the number of collisions and thus yields better -// compression for compressible input, and more speed for incompressible -// input. Of course, it doesn't hurt if the hash function is reasonably fast -// either, as it gets called a lot. -static inline uint32 HashBytes(uint32 bytes, int shift) { - uint32 kMul = 0x1e35a7bd; - return (bytes * kMul) >> shift; -} -static inline uint32 Hash(const char* p, int shift) { - return HashBytes(UNALIGNED_LOAD32(p), shift); -} - -size_t MaxCompressedLength(size_t source_len) { - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - return 32 + source_len + source_len/6; -} - +// Any hash function will produce a valid compressed bitstream, but a good +// hash function reduces the number of collisions and thus yields better +// compression for compressible input, and more speed for incompressible +// input. Of course, it doesn't hurt if the hash function is reasonably fast +// either, as it gets called a lot. +static inline uint32 HashBytes(uint32 bytes, int shift) { + uint32 kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; +} +static inline uint32 Hash(const char* p, int shift) { + return HashBytes(UNALIGNED_LOAD32(p), shift); +} + +size_t MaxCompressedLength(size_t source_len) { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + source_len + source_len/6; +} + namespace { - + void UnalignedCopy64(const void* src, void* dst) { char tmp[8]; memcpy(tmp, src, 8); @@ -145,7 +145,7 @@ void UnalignedCopy128(const void* src, void* dst) { // op_limit == op + 20 // After IncrementalCopySlow(src, op, op_limit), the result will have eleven // copies of "ab" -// ababababababababababab +// ababababababababababab // Note that this does not match the semantics of either memcpy() or memmove(). inline char* IncrementalCopySlow(const char* src, char* op, char* const op_limit) { @@ -156,13 +156,13 @@ inline char* IncrementalCopySlow(const char* src, char* op, #pragma clang loop unroll(disable) #endif while (op < op_limit) { - *op++ = *src++; + *op++ = *src++; } return op_limit; -} - +} + #if SNAPPY_HAVE_SSSE3 - + // This is a table of shuffle control masks that can be used as the source // operand for PSHUFB to permute the contents of the destination XMM register // into a repeating byte pattern. @@ -175,9 +175,9 @@ alignas(16) const char pshufb_fill_patterns[7][16] = { {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, }; - + #endif // SNAPPY_HAVE_SSSE3 - + // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than // IncrementalCopySlow. buf_limit is the address past the end of the writable // region of the buffer. @@ -194,7 +194,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that // to optimize this function but we have to also handle other cases in case // the input does not satisfy these conditions. - + size_t pattern_size = op - src; // The cases are split into different branches to allow the branch predictor, // FDO, and static prediction hints to work better. For each input we list the @@ -286,7 +286,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // conditionals instead of a loop allows FDO to layout the code with respect // to the actual probabilities of each length. // TODO: Replace with loop with trip count hint. - UnalignedCopy64(src, op); + UnalignedCopy64(src, op); UnalignedCopy64(src + 8, op + 8); if (op + 16 < op_limit) { @@ -302,7 +302,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, UnalignedCopy64(src + 56, op + 56); } return op_limit; - } + } // Fall back to doing as much as we can with the available slop in the // buffer. This code path is relatively cold however so we save code size by @@ -314,7 +314,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, #pragma clang loop unroll(disable) #endif for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { - UnalignedCopy64(src, op); + UnalignedCopy64(src, op); UnalignedCopy64(src + 8, op + 8); } if (op >= op_limit) @@ -324,17 +324,17 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // single 8 byte copy. if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { UnalignedCopy64(src, op); - src += 8; - op += 8; - } + src += 8; + op += 8; + } return IncrementalCopySlow(src, op, op_limit); -} - +} + } // namespace template -static inline char* EmitLiteral(char* op, - const char* literal, +static inline char* EmitLiteral(char* op, + const char* literal, int len) { // The vast majority of copies are below 16 bytes, for which a // call to memcpy is overkill. This fast path can sometimes @@ -356,13 +356,13 @@ static inline char* EmitLiteral(char* op, return op + len; } - if (n < 60) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - } else { + if (n < 60) { + // Fits in tag byte + *op++ = LITERAL | (n << 2); + } else { int count = (Bits::Log2Floor(n) >> 3) + 1; - assert(count >= 1); - assert(count <= 4); + assert(count >= 1); + assert(count <= 4); *op++ = LITERAL | ((59 + count) << 2); // Encode in upcoming bytes. // Write 4 bytes, though we may care about only 1 of them. The output buffer @@ -370,33 +370,33 @@ static inline char* EmitLiteral(char* op, // here and there is a memcpy of size 'len' below. LittleEndian::Store32(op, n); op += count; - } - memcpy(op, literal, len); - return op + len; -} - + } + memcpy(op, literal, len); + return op + len; +} + template static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { assert(len <= 64); assert(len >= 4); assert(offset < 65536); assert(len_less_than_12 == (len < 12)); - + if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) { // offset fits in 11 bits. The 3 highest go in the top of the first byte, // and the rest go in the second byte. *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); - *op++ = offset & 0xff; - } else { + *op++ = offset & 0xff; + } else { // Write 4 bytes, though we only care about 3 of them. The output buffer // is required to have some slack, so the extra byte won't overrun it. uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); LittleEndian::Store32(op, u); op += 3; - } - return op; -} - + } + return op; +} + template static inline char* EmitCopy(char* op, size_t offset, size_t len) { assert(len_less_than_12 == (len < 12)); @@ -405,7 +405,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len) { } else { // A special case for len <= 64 might help, but so far measurements suggest // it's in the noise. - + // Emit 64 byte copies but make sure to keep at least four bytes reserved. while (SNAPPY_PREDICT_FALSE(len >= 68)) { op = EmitCopyAtMost64(op, offset, 64); @@ -425,20 +425,20 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len) { op = EmitCopyAtMost64(op, offset, len); } return op; - } -} - -bool GetUncompressedLength(const char* start, size_t n, size_t* result) { - uint32 v = 0; - const char* limit = start + n; - if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { - *result = v; - return true; - } else { - return false; - } -} - + } +} + +bool GetUncompressedLength(const char* start, size_t n, size_t* result) { + uint32 v = 0; + const char* limit = start + n; + if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { + *result = v; + return true; + } else { + return false; + } +} + namespace { uint32 CalculateTableSize(uint32 input_size) { static_assert( @@ -446,16 +446,16 @@ uint32 CalculateTableSize(uint32 input_size) { "kMaxHashTableSize should be greater or equal to kMinHashTableSize."); if (input_size > kMaxHashTableSize) { return kMaxHashTableSize; - } + } if (input_size < kMinHashTableSize) { return kMinHashTableSize; - } + } // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). return 2u << Bits::Log2Floor(input_size - 1); } } // namespace - + namespace internal { WorkingMemory::WorkingMemory(size_t input_size) { const size_t max_fragment_size = std::min(input_size, kBlockSize); @@ -476,225 +476,225 @@ uint16* WorkingMemory::GetHashTable(size_t fragment_size, int* table_size) const { const size_t htsize = CalculateTableSize(fragment_size); memset(table_, 0, htsize * sizeof(*table_)); - *table_size = htsize; + *table_size = htsize; return table_; -} -} // end namespace internal - -// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will -// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have -// empirically found that overlapping loads such as -// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) -// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. -// -// We have different versions for 64- and 32-bit; ideally we would avoid the -// two functions and just inline the UNALIGNED_LOAD64 call into -// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever -// enough to avoid loading the value multiple times then. For 64-bit, the load -// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is -// done at GetUint32AtOffset() time. - -#ifdef ARCH_K8 - -typedef uint64 EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return UNALIGNED_LOAD64(ptr); -} - -static inline uint32 GetUint32AtOffset(uint64 v, int offset) { +} +} // end namespace internal + +// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will +// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have +// empirically found that overlapping loads such as +// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) +// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. +// +// We have different versions for 64- and 32-bit; ideally we would avoid the +// two functions and just inline the UNALIGNED_LOAD64 call into +// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever +// enough to avoid loading the value multiple times then. For 64-bit, the load +// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is +// done at GetUint32AtOffset() time. + +#ifdef ARCH_K8 + +typedef uint64 EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return UNALIGNED_LOAD64(ptr); +} + +static inline uint32 GetUint32AtOffset(uint64 v, int offset) { assert(offset >= 0); assert(offset <= 4); - return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); -} - -#else - -typedef const char* EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return ptr; -} - -static inline uint32 GetUint32AtOffset(const char* v, int offset) { + return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); +} + +#else + +typedef const char* EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return ptr; +} + +static inline uint32 GetUint32AtOffset(const char* v, int offset) { assert(offset >= 0); assert(offset <= 4); - return UNALIGNED_LOAD32(v + offset); -} - -#endif - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input" is at most "kBlockSize" bytes long. -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input.size())" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -namespace internal { -char* CompressFragment(const char* input, - size_t input_size, - char* op, - uint16* table, - const int table_size) { - // "ip" is the input pointer, and "op" is the output pointer. - const char* ip = input; + return UNALIGNED_LOAD32(v + offset); +} + +#endif + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input" is at most "kBlockSize" bytes long. +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input.size())" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +namespace internal { +char* CompressFragment(const char* input, + size_t input_size, + char* op, + uint16* table, + const int table_size) { + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; assert(input_size <= kBlockSize); assert((table_size & (table_size - 1)) == 0); // table must be power of two - const int shift = 32 - Bits::Log2Floor(table_size); + const int shift = 32 - Bits::Log2Floor(table_size); assert(static_cast(kuint32max >> shift) == table_size - 1); - const char* ip_end = input + input_size; - const char* base_ip = ip; - // Bytes in [next_emit, ip) will be emitted as literal bytes. Or - // [next_emit, ip_end) after the main loop. - const char* next_emit = ip; - - const size_t kInputMarginBytes = 15; + const char* ip_end = input + input_size; + const char* base_ip = ip; + // Bytes in [next_emit, ip) will be emitted as literal bytes. Or + // [next_emit, ip_end) after the main loop. + const char* next_emit = ip; + + const size_t kInputMarginBytes = 15; if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { - const char* ip_limit = input + input_size - kInputMarginBytes; - - for (uint32 next_hash = Hash(++ip, shift); ; ) { + const char* ip_limit = input + input_size - kInputMarginBytes; + + for (uint32 next_hash = Hash(++ip, shift); ; ) { assert(next_emit < ip); - // The body of this loop calls EmitLiteral once and then EmitCopy one or - // more times. (The exception is that when we're close to exhausting - // the input we goto emit_remainder.) - // - // In the first iteration of this loop we're just starting, so - // there's nothing to copy, so calling EmitLiteral once is - // necessary. And we only start a new iteration when the - // current iteration has determined that a call to EmitLiteral will - // precede the next call to EmitCopy (if any). - // - // Step 1: Scan forward in the input looking for a 4-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are + // The body of this loop calls EmitLiteral once and then EmitCopy one or + // more times. (The exception is that when we're close to exhausting + // the input we goto emit_remainder.) + // + // In the first iteration of this loop we're just starting, so + // there's nothing to copy, so calling EmitLiteral once is + // necessary. And we only start a new iteration when the + // current iteration has determined that a call to EmitLiteral will + // precede the next call to EmitCopy (if any). + // + // Step 1: Scan forward in the input looking for a 4-byte-long match. + // If we get close to exhausting the input then goto emit_remainder. + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are // scanned (or skipped), look at every third byte, etc.. When a match is // found, immediately go back to looking at every byte. This is a small // loss (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. - uint32 skip = 32; - - const char* next_ip = ip; - const char* candidate; - do { - ip = next_ip; - uint32 hash = next_hash; + // bookkeeping, but for non-compressible data (such as JPEG) it's a huge + // win since the compressor quickly "realizes" the data is incompressible + // and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since the + // last match; dividing it by 32 (ie. right-shifting by five) gives the + // number of bytes to move ahead for each iteration. + uint32 skip = 32; + + const char* next_ip = ip; + const char* candidate; + do { + ip = next_ip; + uint32 hash = next_hash; assert(hash == Hash(ip, shift)); uint32 bytes_between_hash_lookups = skip >> 5; skip += bytes_between_hash_lookups; - next_ip = ip + bytes_between_hash_lookups; + next_ip = ip + bytes_between_hash_lookups; if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { - goto emit_remainder; - } - next_hash = Hash(next_ip, shift); - candidate = base_ip + table[hash]; + goto emit_remainder; + } + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; assert(candidate >= base_ip); assert(candidate < ip); - - table[hash] = ip - base_ip; + + table[hash] = ip - base_ip; } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) != UNALIGNED_LOAD32(candidate))); - - // Step 2: A 4-byte match has been found. We'll later see if more - // than 4 bytes match. But, prior to the match, input - // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + + // Step 2: A 4-byte match has been found. We'll later see if more + // than 4 bytes match. But, prior to the match, input + // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." assert(next_emit + 16 <= ip_end); op = EmitLiteral(op, next_emit, ip - next_emit); - - // Step 3: Call EmitCopy, and then see if another EmitCopy could - // be our next move. Repeat until we find no match for the - // input immediately after what was consumed by the last EmitCopy call. - // - // If we exit this loop normally then we need to call EmitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can exit - // this loop via goto if we get close to exhausting the input. - EightBytesReference input_bytes; - uint32 candidate_bytes = 0; - - do { - // We have a 4-byte match at ip, and no need to emit any - // "literal bytes" prior to ip. - const char* base = ip; + + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + EightBytesReference input_bytes; + uint32 candidate_bytes = 0; + + do { + // We have a 4-byte match at ip, and no need to emit any + // "literal bytes" prior to ip. + const char* base = ip; std::pair p = FindMatchLength(candidate + 4, ip + 4, ip_end); size_t matched = 4 + p.first; - ip += matched; - size_t offset = base - candidate; + ip += matched; + size_t offset = base - candidate; assert(0 == memcmp(base, candidate, matched)); if (p.second) { op = EmitCopy(op, offset, matched); } else { op = EmitCopy(op, offset, matched); } - next_emit = ip; + next_emit = ip; if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { - goto emit_remainder; - } + goto emit_remainder; + } // We are now looking for a 4-byte match again. We read // table[Hash(ip, shift)] for that. To improve compression, // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. input_bytes = GetEightBytesAt(ip - 1); - uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); - table[prev_hash] = ip - base_ip - 1; - uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); - candidate = base_ip + table[cur_hash]; - candidate_bytes = UNALIGNED_LOAD32(candidate); - table[cur_hash] = ip - base_ip; - } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); - - next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); - ++ip; - } - } - - emit_remainder: - // Emit the remaining bytes as a literal - if (next_emit < ip_end) { + uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; + } + } + + emit_remainder: + // Emit the remaining bytes as a literal + if (next_emit < ip_end) { op = EmitLiteral(op, next_emit, ip_end - next_emit); - } - - return op; -} -} // end namespace internal - + } + + return op; +} +} // end namespace internal + // Called back at avery compression call to trace parameters and sizes. static inline void Report(const char *algorithm, size_t compressed_size, size_t uncompressed_size) {} -// Signature of output types needed by decompression code. -// The decompression code is templatized on a type that obeys this -// signature so that we do not pay virtual function call overhead in -// the middle of a tight decompression loop. -// -// class DecompressionWriter { -// public: -// // Called before decompression -// void SetExpectedLength(size_t length); -// -// // Called after decompression -// bool CheckLength() const; -// -// // Called repeatedly during decompression -// bool Append(const char* ip, size_t length); -// bool AppendFromSelf(uint32 offset, size_t length); -// +// Signature of output types needed by decompression code. +// The decompression code is templatized on a type that obeys this +// signature so that we do not pay virtual function call overhead in +// the middle of a tight decompression loop. +// +// class DecompressionWriter { +// public: +// // Called before decompression +// void SetExpectedLength(size_t length); +// +// // Called after decompression +// bool CheckLength() const; +// +// // Called repeatedly during decompression +// bool Append(const char* ip, size_t length); +// bool AppendFromSelf(uint32 offset, size_t length); +// // // The rules for how TryFastAppend differs from Append are somewhat // // convoluted: -// // +// // // // - TryFastAppend is allowed to decline (return false) at any // // time, for any reason -- just "return false" would be // // a perfectly legal implementation of TryFastAppend. @@ -711,10 +711,10 @@ static inline void Report(const char *algorithm, size_t compressed_size, // // decoded fully. In practice, this should not be a big problem, // // as it is unlikely that one would implement a fast path accepting // // this much data. -// // -// bool TryFastAppend(const char* ip, size_t available, size_t length); -// }; - +// // +// bool TryFastAppend(const char* ip, size_t available, size_t length); +// }; + static inline uint32 ExtractLowBytes(uint32 v, int n) { assert(n >= 0); assert(n <= 4); @@ -726,8 +726,8 @@ static inline uint32 ExtractLowBytes(uint32 v, int n) { uint64 mask = 0xffffffff; return v & ~(mask << (8 * n)); #endif -} - +} + static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { assert(shift < 32); static const uint8 masks[] = { @@ -736,77 +736,77 @@ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; return (value & masks[shift]) != 0; -} - -// Helper class for decompression -class SnappyDecompressor { - private: - Source* reader_; // Underlying source of bytes to decompress - const char* ip_; // Points to next buffered byte - const char* ip_limit_; // Points just past buffered bytes - uint32 peeked_; // Bytes peeked from reader (need to skip) - bool eof_; // Hit end of input without an error? +} + +// Helper class for decompression +class SnappyDecompressor { + private: + Source* reader_; // Underlying source of bytes to decompress + const char* ip_; // Points to next buffered byte + const char* ip_limit_; // Points just past buffered bytes + uint32 peeked_; // Bytes peeked from reader (need to skip) + bool eof_; // Hit end of input without an error? char scratch_[kMaximumTagLength]; // See RefillTag(). - - // Ensure that all of the tag metadata for the next tag is available - // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even - // if (ip_limit_ - ip_ < 5). - // - // Returns true on success, false on error or end of input. - bool RefillTag(); - - public: - explicit SnappyDecompressor(Source* reader) - : reader_(reader), - ip_(NULL), - ip_limit_(NULL), - peeked_(0), - eof_(false) { - } - - ~SnappyDecompressor() { - // Advance past any bytes we peeked at from the reader - reader_->Skip(peeked_); - } - - // Returns true iff we have hit the end of the input without an error. - bool eof() const { - return eof_; - } - - // Read the uncompressed length stored at the start of the compressed data. + + // Ensure that all of the tag metadata for the next tag is available + // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even + // if (ip_limit_ - ip_ < 5). + // + // Returns true on success, false on error or end of input. + bool RefillTag(); + + public: + explicit SnappyDecompressor(Source* reader) + : reader_(reader), + ip_(NULL), + ip_limit_(NULL), + peeked_(0), + eof_(false) { + } + + ~SnappyDecompressor() { + // Advance past any bytes we peeked at from the reader + reader_->Skip(peeked_); + } + + // Returns true iff we have hit the end of the input without an error. + bool eof() const { + return eof_; + } + + // Read the uncompressed length stored at the start of the compressed data. // On success, stores the length in *result and returns true. - // On failure, returns false. - bool ReadUncompressedLength(uint32* result) { + // On failure, returns false. + bool ReadUncompressedLength(uint32* result) { assert(ip_ == NULL); // Must not have read anything yet - // Length is encoded in 1..5 bytes - *result = 0; - uint32 shift = 0; - while (true) { - if (shift >= 32) return false; - size_t n; - const char* ip = reader_->Peek(&n); - if (n == 0) return false; - const unsigned char c = *(reinterpret_cast(ip)); - reader_->Skip(1); + // Length is encoded in 1..5 bytes + *result = 0; + uint32 shift = 0; + while (true) { + if (shift >= 32) return false; + size_t n; + const char* ip = reader_->Peek(&n); + if (n == 0) return false; + const unsigned char c = *(reinterpret_cast(ip)); + reader_->Skip(1); uint32 val = c & 0x7f; if (LeftShiftOverflows(static_cast(val), shift)) return false; *result |= val << shift; - if (c < 128) { - break; - } - shift += 7; - } - return true; - } - - // Process the next item found in the input. - // Returns true if successful, false on error or end of input. - template + if (c < 128) { + break; + } + shift += 7; + } + return true; + } + + // Process the next item found in the input. + // Returns true if successful, false on error or end of input. + template #if defined(__GNUC__) && defined(__x86_64__) __attribute__((aligned(32))) #endif - void DecompressAllTags(Writer* writer) { + void DecompressAllTags(Writer* writer) { // In x86, pad the function body to start 16 bytes later. This function has // a couple of hotspots that are highly sensitive to alignment: we have // observed regressions by more than 20% in some metrics just by moving the @@ -823,22 +823,22 @@ class SnappyDecompressor { asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); #endif - const char* ip = ip_; - // We could have put this refill fragment only at the beginning of the loop. - // However, duplicating it at the end of each branch gives the compiler more - // scope to optimize the expression based on the local - // context, which overall increases speed. - #define MAYBE_REFILL() \ + const char* ip = ip_; + // We could have put this refill fragment only at the beginning of the loop. + // However, duplicating it at the end of each branch gives the compiler more + // scope to optimize the expression based on the local + // context, which overall increases speed. + #define MAYBE_REFILL() \ if (ip_limit_ - ip < kMaximumTagLength) { \ - ip_ = ip; \ - if (!RefillTag()) return; \ - ip = ip_; \ - } - - MAYBE_REFILL(); - for ( ;; ) { - const unsigned char c = *(reinterpret_cast(ip++)); - + ip_ = ip; \ + if (!RefillTag()) return; \ + ip = ip_; \ + } + + MAYBE_REFILL(); + for ( ;; ) { + const unsigned char c = *(reinterpret_cast(ip++)); + // Ratio of iterations that have LITERAL vs non-LITERAL for different // inputs. // @@ -852,202 +852,202 @@ class SnappyDecompressor { // pb 24% 76% // bin 24% 76% if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { - size_t literal_length = (c >> 2) + 1u; - if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { + size_t literal_length = (c >> 2) + 1u; + if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { assert(literal_length < 61); - ip += literal_length; + ip += literal_length; // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() // will not return true unless there's already at least five spare // bytes in addition to the literal. - continue; - } + continue; + } if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { - // Long literal. - const size_t literal_length_length = literal_length - 60; - literal_length = + // Long literal. + const size_t literal_length_length = literal_length - 60; + literal_length = ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + 1; - ip += literal_length_length; - } - - size_t avail = ip_limit_ - ip; - while (avail < literal_length) { - if (!writer->Append(ip, avail)) return; - literal_length -= avail; - reader_->Skip(peeked_); - size_t n; - ip = reader_->Peek(&n); - avail = n; - peeked_ = avail; - if (avail == 0) return; // Premature end of input - ip_limit_ = ip + avail; - } - if (!writer->Append(ip, literal_length)) { - return; - } - ip += literal_length; - MAYBE_REFILL(); - } else { + ip += literal_length_length; + } + + size_t avail = ip_limit_ - ip; + while (avail < literal_length) { + if (!writer->Append(ip, avail)) return; + literal_length -= avail; + reader_->Skip(peeked_); + size_t n; + ip = reader_->Peek(&n); + avail = n; + peeked_ = avail; + if (avail == 0) return; // Premature end of input + ip_limit_ = ip + avail; + } + if (!writer->Append(ip, literal_length)) { + return; + } + ip += literal_length; + MAYBE_REFILL(); + } else { const size_t entry = char_table[c]; const size_t trailer = ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); const size_t length = entry & 0xff; - ip += entry >> 11; - - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). + ip += entry >> 11; + + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). const size_t copy_offset = entry & 0x700; - if (!writer->AppendFromSelf(copy_offset + trailer, length)) { - return; - } - MAYBE_REFILL(); - } - } - -#undef MAYBE_REFILL - } -}; - -bool SnappyDecompressor::RefillTag() { - const char* ip = ip_; - if (ip == ip_limit_) { - // Fetch a new fragment from the reader - reader_->Skip(peeked_); // All peeked bytes are used up - size_t n; - ip = reader_->Peek(&n); - peeked_ = n; + if (!writer->AppendFromSelf(copy_offset + trailer, length)) { + return; + } + MAYBE_REFILL(); + } + } + +#undef MAYBE_REFILL + } +}; + +bool SnappyDecompressor::RefillTag() { + const char* ip = ip_; + if (ip == ip_limit_) { + // Fetch a new fragment from the reader + reader_->Skip(peeked_); // All peeked bytes are used up + size_t n; + ip = reader_->Peek(&n); + peeked_ = n; eof_ = (n == 0); if (eof_) return false; - ip_limit_ = ip + n; - } - - // Read the tag character + ip_limit_ = ip + n; + } + + // Read the tag character assert(ip < ip_limit_); - const unsigned char c = *(reinterpret_cast(ip)); - const uint32 entry = char_table[c]; - const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' + const unsigned char c = *(reinterpret_cast(ip)); + const uint32 entry = char_table[c]; + const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' assert(needed <= sizeof(scratch_)); - - // Read more bytes from reader if needed - uint32 nbuf = ip_limit_ - ip; - if (nbuf < needed) { - // Stitch together bytes from ip and reader to form the word - // contents. We store the needed bytes in "scratch_". They - // will be consumed immediately by the caller since we do not - // read more than we need. - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - while (nbuf < needed) { - size_t length; - const char* src = reader_->Peek(&length); - if (length == 0) return false; + + // Read more bytes from reader if needed + uint32 nbuf = ip_limit_ - ip; + if (nbuf < needed) { + // Stitch together bytes from ip and reader to form the word + // contents. We store the needed bytes in "scratch_". They + // will be consumed immediately by the caller since we do not + // read more than we need. + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + while (nbuf < needed) { + size_t length; + const char* src = reader_->Peek(&length); + if (length == 0) return false; uint32 to_add = std::min(needed - nbuf, length); - memcpy(scratch_ + nbuf, src, to_add); - nbuf += to_add; - reader_->Skip(to_add); - } + memcpy(scratch_ + nbuf, src, to_add); + nbuf += to_add; + reader_->Skip(to_add); + } assert(nbuf == needed); - ip_ = scratch_; - ip_limit_ = scratch_ + needed; + ip_ = scratch_; + ip_limit_ = scratch_ + needed; } else if (nbuf < kMaximumTagLength) { - // Have enough bytes, but move into scratch_ so that we do not - // read past end of input - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - ip_ = scratch_; - ip_limit_ = scratch_ + nbuf; - } else { - // Pass pointer to buffer returned by reader_. - ip_ = ip; - } - return true; -} - -template + // Have enough bytes, but move into scratch_ so that we do not + // read past end of input + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + ip_ = scratch_; + ip_limit_ = scratch_ + nbuf; + } else { + // Pass pointer to buffer returned by reader_. + ip_ = ip; + } + return true; +} + +template static bool InternalUncompress(Source* r, Writer* writer) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(r); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; + // Read the uncompressed length from the front of the compressed input + SnappyDecompressor decompressor(r); + uint32 uncompressed_len = 0; + if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; return InternalUncompressAllTags(&decompressor, writer, r->Available(), uncompressed_len); -} - -template -static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, - Writer* writer, +} + +template +static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, + Writer* writer, uint32 compressed_len, uint32 uncompressed_len) { Report("snappy_uncompress", compressed_len, uncompressed_len); - - writer->SetExpectedLength(uncompressed_len); - - // Process the entire input - decompressor->DecompressAllTags(writer); + + writer->SetExpectedLength(uncompressed_len); + + // Process the entire input + decompressor->DecompressAllTags(writer); writer->Flush(); - return (decompressor->eof() && writer->CheckLength()); -} - -bool GetUncompressedLength(Source* source, uint32* result) { - SnappyDecompressor decompressor(source); - return decompressor.ReadUncompressedLength(result); -} - -size_t Compress(Source* reader, Sink* writer) { - size_t written = 0; - size_t N = reader->Available(); + return (decompressor->eof() && writer->CheckLength()); +} + +bool GetUncompressedLength(Source* source, uint32* result) { + SnappyDecompressor decompressor(source); + return decompressor.ReadUncompressedLength(result); +} + +size_t Compress(Source* reader, Sink* writer) { + size_t written = 0; + size_t N = reader->Available(); const size_t uncompressed_size = N; - char ulength[Varint::kMax32]; - char* p = Varint::Encode32(ulength, N); - writer->Append(ulength, p-ulength); - written += (p - ulength); - + char ulength[Varint::kMax32]; + char* p = Varint::Encode32(ulength, N); + writer->Append(ulength, p-ulength); + written += (p - ulength); + internal::WorkingMemory wmem(N); - - while (N > 0) { - // Get next block to compress (without copying if possible) - size_t fragment_size; - const char* fragment = reader->Peek(&fragment_size); + + while (N > 0) { + // Get next block to compress (without copying if possible) + size_t fragment_size; + const char* fragment = reader->Peek(&fragment_size); assert(fragment_size != 0); // premature end of input const size_t num_to_read = std::min(N, kBlockSize); - size_t bytes_read = fragment_size; - - size_t pending_advance = 0; - if (bytes_read >= num_to_read) { - // Buffer returned by reader is large enough - pending_advance = num_to_read; - fragment_size = num_to_read; - } else { + size_t bytes_read = fragment_size; + + size_t pending_advance = 0; + if (bytes_read >= num_to_read) { + // Buffer returned by reader is large enough + pending_advance = num_to_read; + fragment_size = num_to_read; + } else { char* scratch = wmem.GetScratchInput(); - memcpy(scratch, fragment, bytes_read); - reader->Skip(bytes_read); - - while (bytes_read < num_to_read) { - fragment = reader->Peek(&fragment_size); + memcpy(scratch, fragment, bytes_read); + reader->Skip(bytes_read); + + while (bytes_read < num_to_read) { + fragment = reader->Peek(&fragment_size); size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); - bytes_read += n; - reader->Skip(n); - } + memcpy(scratch + bytes_read, fragment, n); + bytes_read += n; + reader->Skip(n); + } assert(bytes_read == num_to_read); - fragment = scratch; - fragment_size = num_to_read; - } + fragment = scratch; + fragment_size = num_to_read; + } assert(fragment_size == num_to_read); - - // Get encoding table for compression - int table_size; - uint16* table = wmem.GetHashTable(num_to_read, &table_size); - - // Compress input_fragment and append to dest - const int max_output = MaxCompressedLength(num_to_read); - - // Need a scratch buffer for the output, in case the byte sink doesn't - // have room for us directly. + + // Get encoding table for compression + int table_size; + uint16* table = wmem.GetHashTable(num_to_read, &table_size); + + // Compress input_fragment and append to dest + const int max_output = MaxCompressedLength(num_to_read); + + // Need a scratch buffer for the output, in case the byte sink doesn't + // have room for us directly. // Since we encode kBlockSize regions followed by a region // which is <= kBlockSize in length, a previously allocated @@ -1055,19 +1055,19 @@ size_t Compress(Source* reader, Sink* writer) { char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); char* end = internal::CompressFragment(fragment, fragment_size, dest, table, table_size); - writer->Append(dest, end - dest); - written += (end - dest); - - N -= num_to_read; - reader->Skip(pending_advance); - } - + writer->Append(dest, end - dest); + written += (end - dest); + + N -= num_to_read; + reader->Skip(pending_advance); + } + Report("snappy_compress", written, uncompressed_size); - - return written; -} - -// ----------------------------------------------------------------------- + + return written; +} + +// ----------------------------------------------------------------------- // IOVec interfaces // ----------------------------------------------------------------------- @@ -1260,60 +1260,60 @@ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, } // ----------------------------------------------------------------------- -// Flat array interfaces -// ----------------------------------------------------------------------- - -// A type that writes to a flat array. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyArrayWriter { - private: - char* base_; - char* op_; - char* op_limit_; - - public: - inline explicit SnappyArrayWriter(char* dst) - : base_(dst), +// Flat array interfaces +// ----------------------------------------------------------------------- + +// A type that writes to a flat array. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyArrayWriter { + private: + char* base_; + char* op_; + char* op_limit_; + + public: + inline explicit SnappyArrayWriter(char* dst) + : base_(dst), op_(dst), op_limit_(dst) { - } - - inline void SetExpectedLength(size_t len) { - op_limit_ = op_ + len; - } - - inline bool CheckLength() const { - return op_ == op_limit_; - } - - inline bool Append(const char* ip, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (space_left < len) { - return false; - } - memcpy(op, ip, len); - op_ = op + len; - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; + } + + inline void SetExpectedLength(size_t len) { + op_limit_ = op_ + len; + } + + inline bool CheckLength() const { + return op_ == op_limit_; + } + + inline bool Append(const char* ip, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + if (space_left < len) { + return false; + } + memcpy(op, ip, len); + op_ = op + len; + return true; + } + + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. + // Fast path, used for the majority (about 95%) of invocations. UnalignedCopy128(ip, op); - op_ = op + len; - return true; - } else { - return false; - } - } - - inline bool AppendFromSelf(size_t offset, size_t len) { + op_ = op + len; + return true; + } else { + return false; + } + } + + inline bool AppendFromSelf(size_t offset, size_t len) { char* const op_end = op_ + len; - + // Check if we try to append from before the start of the buffer. // Normally this would just be a check for "produced < offset", // but "produced <= offset - 1u" is equivalent for every case @@ -1323,40 +1323,40 @@ class SnappyArrayWriter { // into an infinite loop. if (Produced() <= offset - 1u || op_end > op_limit_) return false; op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); - - return true; - } + + return true; + } inline size_t Produced() const { assert(op_ >= base_); return op_ - base_; } inline void Flush() {} -}; - -bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { - ByteArraySource reader(compressed, n); - return RawUncompress(&reader, uncompressed); -} - -bool RawUncompress(Source* compressed, char* uncompressed) { - SnappyArrayWriter output(uncompressed); +}; + +bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { + ByteArraySource reader(compressed, n); + return RawUncompress(&reader, uncompressed); +} + +bool RawUncompress(Source* compressed, char* uncompressed) { + SnappyArrayWriter output(uncompressed); return InternalUncompress(compressed, &output); -} - +} + bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) { - size_t ulength; - if (!GetUncompressedLength(compressed, n, &ulength)) { - return false; - } + size_t ulength; + if (!GetUncompressedLength(compressed, n, &ulength)) { + return false; + } // On 32-bit builds: max_size() < kuint32max. Check for that instead // of crashing (e.g., consider externally specified compressed data). if (ulength > uncompressed->max_size()) { - return false; - } - STLStringResizeUninitialized(uncompressed, ulength); - return RawUncompress(compressed, n, string_as_array(uncompressed)); -} - + return false; + } + STLStringResizeUninitialized(uncompressed, ulength); + return RawUncompress(compressed, n, string_as_array(uncompressed)); +} + bool Uncompress(const char* compressed, size_t n, TString* uncompressed) { size_t ulength; if (!GetUncompressedLength(compressed, n, &ulength)) { @@ -1371,72 +1371,72 @@ bool Uncompress(const char* compressed, size_t n, TString* uncompressed) { return RawUncompress(compressed, n, uncompressed->begin()); } -// A Writer that drops everything on the floor and just does validation -class SnappyDecompressionValidator { - private: - size_t expected_; - size_t produced_; - - public: +// A Writer that drops everything on the floor and just does validation +class SnappyDecompressionValidator { + private: + size_t expected_; + size_t produced_; + + public: inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } - inline void SetExpectedLength(size_t len) { - expected_ = len; - } - inline bool CheckLength() const { - return expected_ == produced_; - } - inline bool Append(const char* ip, size_t len) { - produced_ += len; - return produced_ <= expected_; - } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - return false; - } - inline bool AppendFromSelf(size_t offset, size_t len) { + inline void SetExpectedLength(size_t len) { + expected_ = len; + } + inline bool CheckLength() const { + return expected_ == produced_; + } + inline bool Append(const char* ip, size_t len) { + produced_ += len; + return produced_ <= expected_; + } + inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + return false; + } + inline bool AppendFromSelf(size_t offset, size_t len) { // See SnappyArrayWriter::AppendFromSelf for an explanation of // the "offset - 1u" trick. if (produced_ <= offset - 1u) return false; - produced_ += len; - return produced_ <= expected_; - } + produced_ += len; + return produced_ <= expected_; + } inline void Flush() {} -}; - -bool IsValidCompressedBuffer(const char* compressed, size_t n) { - ByteArraySource reader(compressed, n); - SnappyDecompressionValidator writer; +}; + +bool IsValidCompressedBuffer(const char* compressed, size_t n) { + ByteArraySource reader(compressed, n); + SnappyDecompressionValidator writer; return InternalUncompress(&reader, &writer); -} - +} + bool IsValidCompressed(Source* compressed) { SnappyDecompressionValidator writer; return InternalUncompress(compressed, &writer); } -void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length) { - ByteArraySource reader(input, input_length); - UncheckedByteArraySink writer(compressed); - Compress(&reader, &writer); - - // Compute how many bytes were added - *compressed_length = (writer.CurrentDestination() - compressed); -} - +void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) { + ByteArraySource reader(input, input_length); + UncheckedByteArraySink writer(compressed); + Compress(&reader, &writer); + + // Compute how many bytes were added + *compressed_length = (writer.CurrentDestination() - compressed); +} + size_t Compress(const char* input, size_t input_length, std::string* compressed) { - // Pre-grow the buffer to the max length of the compressed output + // Pre-grow the buffer to the max length of the compressed output STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); - - size_t compressed_length; - RawCompress(input, input_length, string_as_array(compressed), - &compressed_length); - compressed->resize(compressed_length); - return compressed_length; -} - + + size_t compressed_length; + RawCompress(input, input_length, string_as_array(compressed), + &compressed_length); + compressed->resize(compressed_length); + return compressed_length; +} + size_t Compress(const char* input, size_t input_length, TString* compressed) { // Pre-grow the buffer to the max length of the compressed output @@ -1452,14 +1452,14 @@ size_t Compress(const char* input, size_t input_length, // ----------------------------------------------------------------------- // Sink interface // ----------------------------------------------------------------------- - + // A type that decompresses into a Sink. The template parameter // Allocator must export one method "char* Allocate(int size);", which // allocates a buffer of "size" and appends that to the destination. template class SnappyScatteredWriter { Allocator allocator_; - + // We need random access into the data generated so far. Therefore // we keep track of all of the generated data as an array of blocks. // All of the blocks except the last have length kBlockSize. diff --git a/contrib/libs/snappy/snappy.h b/contrib/libs/snappy/snappy.h index a91ef2b4d50..9a3bc3fa64f 100644 --- a/contrib/libs/snappy/snappy.h +++ b/contrib/libs/snappy/snappy.h @@ -1,63 +1,63 @@ -// Copyright 2005 and onwards Google Inc. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// A light-weight compression algorithm. It is designed for speed of -// compression and decompression, rather than for the utmost in space -// savings. -// -// For getting better compression ratios when you are compressing data -// with long repeated sequences or compressing data that is similar to -// other data, while still compressing fast, you might look at first -// using BMDiff and then compressing the output of BMDiff with -// Snappy. - +// Copyright 2005 and onwards Google Inc. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// A light-weight compression algorithm. It is designed for speed of +// compression and decompression, rather than for the utmost in space +// savings. +// +// For getting better compression ratios when you are compressing data +// with long repeated sequences or compressing data that is similar to +// other data, while still compressing fast, you might look at first +// using BMDiff and then compressing the output of BMDiff with +// Snappy. + #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ #define THIRD_PARTY_SNAPPY_SNAPPY_H__ - + #include #include - + #include -#include "snappy-stubs-public.h" - -namespace snappy { - class Source; - class Sink; - - // ------------------------------------------------------------------------ - // Generic compression/decompression routines. - // ------------------------------------------------------------------------ - - // Compress the bytes read from "*source" and append to "*sink". Return the - // number of bytes written. - size_t Compress(Source* source, Sink* sink); - +#include "snappy-stubs-public.h" + +namespace snappy { + class Source; + class Sink; + + // ------------------------------------------------------------------------ + // Generic compression/decompression routines. + // ------------------------------------------------------------------------ + + // Compress the bytes read from "*source" and append to "*sink". Return the + // number of bytes written. + size_t Compress(Source* source, Sink* sink); + // Find the uncompressed length of the given stream, as given by the header. // Note that the true length could deviate from this; the stream could e.g. // be truncated. @@ -65,37 +65,37 @@ namespace snappy { // Also note that this leaves "*source" in a state that is unsuitable for // further operations, such as RawUncompress(). You will need to rewind // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); - - // ------------------------------------------------------------------------ - // Higher-level string based routines (should be sufficient for most users) - // ------------------------------------------------------------------------ - + bool GetUncompressedLength(Source* source, uint32* result); + + // ------------------------------------------------------------------------ + // Higher-level string based routines (should be sufficient for most users) + // ------------------------------------------------------------------------ + // Sets "*compressed" to the compressed version of "input[0,input_length-1]". // Original contents of *compressed are lost. - // + // // REQUIRES: "input[]" is not an alias of "*compressed". size_t Compress(const char* input, size_t input_length, std::string* compressed); size_t Compress(const char* input, size_t input_length, TString* compressed); - - // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". - // Original contents of "*uncompressed" are lost. - // - // REQUIRES: "compressed[]" is not an alias of "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(const char* compressed, size_t compressed_length, + + // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". + // Original contents of "*uncompressed" are lost. + // + // REQUIRES: "compressed[]" is not an alias of "*uncompressed". + // + // returns false if the message is corrupted and could not be decompressed + bool Uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed); bool Uncompress(const char* compressed, size_t compressed_length, TString* uncompressed); - + // Decompresses "compressed" to "*uncompressed". // // returns false if the message is corrupted and could not be decompressed bool Uncompress(Source* compressed, Sink* uncompressed); - + // This routine uncompresses as much of the "compressed" as possible // into sink. It returns the number of valid bytes added to sink // (extra invalid bytes may have been added due to errors; the caller @@ -104,45 +104,45 @@ namespace snappy { // encountered. size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); - // ------------------------------------------------------------------------ - // Lower-level character array based routines. May be useful for - // efficiency reasons in certain circumstances. - // ------------------------------------------------------------------------ - - // REQUIRES: "compressed" must point to an area of memory that is at - // least "MaxCompressedLength(input_length)" bytes in length. - // - // Takes the data stored in "input[0..input_length]" and stores - // it in the array pointed to by "compressed". - // - // "*compressed_length" is set to the length of the compressed output. - // - // Example: - // char* output = new char[snappy::MaxCompressedLength(input_length)]; - // size_t output_length; - // RawCompress(input, input_length, output, &output_length); - // ... Process(output, output_length) ... - // delete [] output; - void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to - // uncompressed[0..GetUncompressedLength(compressed)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(const char* compressed, size_t compressed_length, - char* uncompressed); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to - // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(Source* compressed, char* uncompressed); - + // ------------------------------------------------------------------------ + // Lower-level character array based routines. May be useful for + // efficiency reasons in certain circumstances. + // ------------------------------------------------------------------------ + + // REQUIRES: "compressed" must point to an area of memory that is at + // least "MaxCompressedLength(input_length)" bytes in length. + // + // Takes the data stored in "input[0..input_length]" and stores + // it in the array pointed to by "compressed". + // + // "*compressed_length" is set to the length of the compressed output. + // + // Example: + // char* output = new char[snappy::MaxCompressedLength(input_length)]; + // size_t output_length; + // RawCompress(input, input_length, output, &output_length); + // ... Process(output, output_length) ... + // delete [] output; + void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to + // uncompressed[0..GetUncompressedLength(compressed)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(const char* compressed, size_t compressed_length, + char* uncompressed); + + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to + // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(Source* compressed, char* uncompressed); + // Given data in "compressed[0..compressed_length-1]" generated by // calling the Snappy::Compress routine, this routine // stores the uncompressed data to the iovec "iov". The number of physical @@ -165,24 +165,24 @@ namespace snappy { bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, size_t iov_cnt); - // Returns the maximal size of the compressed representation of - // input data that is "source_bytes" bytes in length; - size_t MaxCompressedLength(size_t source_bytes); - - // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() - // Returns true and stores the length of the uncompressed data in - // *result normally. Returns false on parsing error. - // This operation takes O(1) time. - bool GetUncompressedLength(const char* compressed, size_t compressed_length, - size_t* result); - - // Returns true iff the contents of "compressed[]" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to compressed_length, but is usually at least - // a factor of four faster than actual decompression. - bool IsValidCompressedBuffer(const char* compressed, - size_t compressed_length); - + // Returns the maximal size of the compressed representation of + // input data that is "source_bytes" bytes in length; + size_t MaxCompressedLength(size_t source_bytes); + + // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() + // Returns true and stores the length of the uncompressed data in + // *result normally. Returns false on parsing error. + // This operation takes O(1) time. + bool GetUncompressedLength(const char* compressed, size_t compressed_length, + size_t* result); + + // Returns true iff the contents of "compressed[]" can be uncompressed + // successfully. Does not return the uncompressed data. Takes + // time proportional to compressed_length, but is usually at least + // a factor of four faster than actual decompression. + bool IsValidCompressedBuffer(const char* compressed, + size_t compressed_length); + // Returns true iff the contents of "compressed" can be uncompressed // successfully. Does not return the uncompressed data. Takes // time proportional to *compressed length, but is usually at least @@ -196,18 +196,18 @@ namespace snappy { // can only store 16-bit offsets, and EmitCopy() also assumes the offset // is 65535 bytes or less. Note also that if you change this, it will // affect the framing format (see framing_format.txt). - // + // // Note that there might be older data around that is compressed with larger // block sizes, so the decompression code should not rely on the // non-existence of long backreferences. static constexpr int kBlockLog = 16; static constexpr size_t kBlockSize = 1 << kBlockLog; - + static constexpr int kMinHashTableBits = 8; static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits; - + static constexpr int kMaxHashTableBits = 14; static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits; -} // end namespace snappy - +} // end namespace snappy + #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ diff --git a/contrib/libs/snappy/ya.make b/contrib/libs/snappy/ya.make index 4fb0aef69c5..472daa0c808 100644 --- a/contrib/libs/snappy/ya.make +++ b/contrib/libs/snappy/ya.make @@ -1,6 +1,6 @@ # Generated by devtools/yamaker from nixpkgs 92c884dfd7140a6c3e6c717cf8990f7a78524331. -LIBRARY() +LIBRARY() OWNER(g:cpp-contrib) @@ -11,7 +11,7 @@ ORIGINAL_SOURCE(https://github.com/google/snappy/archive/1.1.8.tar.gz) LICENSE(BSD-3-Clause) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - + ADDINCL( GLOBAL contrib/libs/snappy/include ) @@ -21,12 +21,12 @@ NO_COMPILER_WARNINGS() CFLAGS( -DHAVE_CONFIG_H ) - -SRCS( - snappy-c.cc + +SRCS( + snappy-c.cc snappy-sinksource.cc - snappy-stubs-internal.cc + snappy-stubs-internal.cc snappy.cc -) - -END() +) + +END() diff --git a/contrib/libs/sqlite3/ya.make b/contrib/libs/sqlite3/ya.make index 9535cdc9c49..e6b26dccc35 100644 --- a/contrib/libs/sqlite3/ya.make +++ b/contrib/libs/sqlite3/ya.make @@ -15,7 +15,7 @@ LICENSE( Public-Domain AND blessing ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) ADDINCL( diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h index 6cc5895bace..89f8e4e5c81 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h @@ -32,7 +32,7 @@ #include "tcmalloc/libc_override_glibc.h" #else -#include "tcmalloc/libc_override_redefine.h" +#include "tcmalloc/libc_override_redefine.h" #endif diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h index 5a851e2a18c..b1655461c39 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h @@ -57,9 +57,9 @@ int posix_memalign(void** r, size_t a, size_t s) { size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); } // tcmalloc extension -void sdallocx(void* p, size_t s, int flags) noexcept { - TCMallocInternalSdallocx(p, s, flags); -} +void sdallocx(void* p, size_t s, int flags) noexcept { + TCMallocInternalSdallocx(p, s, flags); +} #if defined(__GLIBC__) || defined(__NEWLIB__) // SunOS extension diff --git a/contrib/libs/tcmalloc/ya.make b/contrib/libs/tcmalloc/ya.make index 362d676ede6..54701b1b777 100644 --- a/contrib/libs/tcmalloc/ya.make +++ b/contrib/libs/tcmalloc/ya.make @@ -1,7 +1,7 @@ LIBRARY() LICENSE(Apache-2.0) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/utf8proc/ya.make b/contrib/libs/utf8proc/ya.make index 4a11983f58b..5da2c75ea83 100644 --- a/contrib/libs/utf8proc/ya.make +++ b/contrib/libs/utf8proc/ya.make @@ -4,7 +4,7 @@ LICENSE( MIT AND Unicode ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( diff --git a/contrib/libs/xz/common/ya.make b/contrib/libs/xz/common/ya.make index 6a2c2062dfb..ce25e6a2302 100644 --- a/contrib/libs/xz/common/ya.make +++ b/contrib/libs/xz/common/ya.make @@ -10,7 +10,7 @@ OWNER( LIBRARY() LICENSE(Public-Domain) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(5.2.4) diff --git a/contrib/libs/xz/liblzma/ya.make b/contrib/libs/xz/liblzma/ya.make index 6e8fd9d68c5..d548283a2bd 100644 --- a/contrib/libs/xz/liblzma/ya.make +++ b/contrib/libs/xz/liblzma/ya.make @@ -10,7 +10,7 @@ OWNER( LIBRARY() LICENSE(Public-Domain) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(5.2.4) diff --git a/contrib/libs/ya.make b/contrib/libs/ya.make index bf98d280412..9c4640fdcf2 100644 --- a/contrib/libs/ya.make +++ b/contrib/libs/ya.make @@ -2,8 +2,8 @@ OWNER( g:contrib g:cpp-contrib ) - -RECURSE( + +RECURSE( afl antlr4_cpp_runtime apache @@ -22,7 +22,7 @@ RECURSE( bento4 bluez-clean-headers breakpad - brotli + brotli c-ares cairo cbc @@ -112,7 +112,7 @@ RECURSE( ImageMagick inja inja/ut - intel + intel isa-l isa-l/ut jansson @@ -222,7 +222,7 @@ RECURSE( matrixssl mecab metrohash - mimalloc + mimalloc minilzo minizip mlir11 @@ -282,7 +282,7 @@ RECURSE( protoc_std psimd pthreadpool - pugixml + pugixml pybind11 pycxx python @@ -363,12 +363,12 @@ RECURSE( yaml-cpp zeromq zlib - zlib-ng-develop + zlib-ng-develop zookeeper zstd - zstd06 + zstd06 zzip -) +) IF (OS_FREEBSD OR OS_LINUX) RECURSE( @@ -381,7 +381,7 @@ ENDIF() IF (OS_DARWIN) RECURSE( - gperftools + gperftools osxfuse macfuse-headers uuid @@ -421,11 +421,11 @@ ELSE() unixodbc ) ENDIF() - + IF (OS_LINUX OR OS_WINDOWS) - RECURSE( + RECURSE( lockless - ) + ) ENDIF() IF (OS_ANDROID) @@ -441,6 +441,6 @@ IF (OS_IOS AND ARCH_ARM64 OR OS_DARWIN) ) ENDIF() -IF (MUSL) - RECURSE(musl_extra) -ENDIF() +IF (MUSL) + RECURSE(musl_extra) +ENDIF() diff --git a/contrib/libs/yaml-cpp/ya.make b/contrib/libs/yaml-cpp/ya.make index 66656da4012..058caf92fa9 100644 --- a/contrib/libs/yaml-cpp/ya.make +++ b/contrib/libs/yaml-cpp/ya.make @@ -1,7 +1,7 @@ LIBRARY() -LICENSE(MIT) - +LICENSE(MIT) + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) VERSION(0.5.3) diff --git a/contrib/libs/zlib/deflate.c b/contrib/libs/zlib/deflate.c index 7f3ff741f85..7318b1e2fbc 100644 --- a/contrib/libs/zlib/deflate.c +++ b/contrib/libs/zlib/deflate.c @@ -1236,7 +1236,7 @@ local void lm_init (s) /* For 80x86 and 680x0, an optimized version will be provided in match.asm or * match.S. The code will be functionally equivalent. */ -Y_NO_SANITIZE("undefined") local uInt longest_match(s, cur_match) +Y_NO_SANITIZE("undefined") local uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ { diff --git a/contrib/libs/zstd06/LICENSE b/contrib/libs/zstd06/LICENSE index 29c8670589a..35495850f2e 100755 --- a/contrib/libs/zstd06/LICENSE +++ b/contrib/libs/zstd06/LICENSE @@ -1,26 +1,26 @@ -ZSTD Library -Copyright (c) 2014-2015, Yann Collet -All rights reserved. - -BSD License - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +ZSTD Library +Copyright (c) 2014-2015, Yann Collet +All rights reserved. + +BSD License + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libs/zstd06/common/bitstream.h b/contrib/libs/zstd06/common/bitstream.h index 5d89872904c..97fc621579c 100644 --- a/contrib/libs/zstd06/common/bitstream.h +++ b/contrib/libs/zstd06/common/bitstream.h @@ -1,417 +1,417 @@ #include -/* ****************************************************************** - bitstream - Part of FSE library - header file (to include) - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -****************************************************************** */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - -/*-**************************************** -* Dependencies -******************************************/ -#include "mem.h" /* unaligned access routines */ -#include "error_private.h" /* error codes and messages */ - - -/*========================================= -* Target specific -=========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#endif - - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. -* A critical property of these streams is that they encode and decode in **reverse** direction. -* So the first bit sequence you add will be the last to be read, like a LIFO stack. -*/ -typedef struct -{ - size_t bitContainer; - int bitPos; - char* startPtr; - char* ptr; - char* endPtr; -} BIT_CStream_t; - -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. +/* ****************************************************************** + bitstream + Part of FSE library + header file (to include) + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "error_private.h" /* error codes and messages */ + + +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. +* A critical property of these streams is that they encode and decode in **reverse** direction. +* So the first bit sequence you add will be the last to be read, like a LIFO stack. +*/ +typedef struct +{ + size_t bitContainer; + int bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct -{ - size_t bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; -} BIT_DStream_t; - -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - - -/*-**************************************** -* unsafe API -******************************************/ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); -/* unsafe version; does not check buffer overflow */ - -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - - - -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (register U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse ( &r, val ); - return (unsigned) r; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - unsigned r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(void*) - * @return : 0 if success, - otherwise an error code (can be tested using ERR_isError() ) */ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) -{ - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char*)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); - if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); - return 0; -} - -/*! BIT_addBits() : - can add up to 26 bits into `bitC`. - Does not check for register overflow ! */ -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_flushBitsFast() : - * unsafe version; does not check buffer overflow */ -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_flushBits() : - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - or 0 if it could not fit into dstBuffer */ -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - - if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ - - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} - - -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : -* Initialize a BIT_DStream_t. -* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcSize` must be the *exact* size of the bitStream, in bytes. -* @return : size of stream (== srcSize) or an errorCode if a problem is detected -*/ -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(void*) + * @return : 0 if success, + otherwise an error code (can be tested using ERR_isError() ) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); + if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + can add up to 26 bits into `bitC`. + Does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ +} + +/*! BIT_flushBits() : + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + + if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ + + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : +* Initialize a BIT_DStream_t. +* `bitD` : a pointer to an already allocated BIT_DStream_t structure. +* `srcSize` must be the *exact* size of the bitStream, in bytes. +* @return : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->start = (const char*)srcBuffer; + bitD->start = (const char*)srcBuffer; bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } - } else { - bitD->start = (const char*)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } + } else { + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - default:; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } + default:; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; - } - - return srcSize; -} - + } + + return srcSize; +} + MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) -{ +{ return bitContainer >> start; -} - +} + MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -{ -#if defined(__BMI__) && defined(__GNUC__) /* experimental */ +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ # if defined(__x86_64__) if (sizeof(bitContainer)==8) return _bextr_u64(bitContainer, start, nbBits); else # endif return _bextr_u32(bitContainer, start, nbBits); -#else +#else return (bitContainer >> start) & BIT_mask[nbBits]; -#endif -} - +#endif +} + MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -{ +{ return bitContainer & BIT_mask[nbBits]; -} - -/*! BIT_lookBits() : - * Provides next n bits from local register. +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted - */ - MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -{ + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted + */ + MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); -#else - U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); -#endif -} - -/*! BIT_lookBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) -{ - U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); -} - -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. - */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_readBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_reloadDStream() : -* Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ). -* This function is safe, it guarantees it will not read beyond src buffer. -* @return : status of `BIT_DStream_t` internal register. +#else + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +#endif +} + +/*! BIT_lookBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream() : +* Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ). +* This function is safe, it guarantees it will not read beyond src buffer. +* @return : status of `BIT_DStream_t` internal register. if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; - } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - -/*! BIT_endOfDStream() : -* @return Tells if DStream has exactly reached its end (all bits consumed). -*/ -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); -} - -#if defined (__cplusplus) -} -#endif - -#endif /* BITSTREAM_H_MODULE */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream() : +* @return Tells if DStream has exactly reached its end (all bits consumed). +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/contrib/libs/zstd06/common/error_private.h b/contrib/libs/zstd06/common/error_private.h index f2cd35dd152..d8d1ef24b3f 100644 --- a/contrib/libs/zstd06/common/error_private.h +++ b/contrib/libs/zstd06/common/error_private.h @@ -1,124 +1,124 @@ #include -/* ****************************************************************** - Error codes and messages - Copyright (C) 2013-2016, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Homepage : http://www.zstd.net -****************************************************************** */ -/* Note : this module is expected to remain private, do not expose it */ - -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ -#include "error_public.h" /* enum list */ - - -/* **************************************** -* Compiler-specific -******************************************/ -#if defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name - - -/*-**************************************** -* Error codes handling -******************************************/ -#ifdef ERROR -# undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ -#endif -#define ERROR(name) ((size_t)-PREFIX(name)) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2016, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Homepage : http://www.zstd.net +****************************************************************** */ +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ +#include "error_public.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#ifdef ERROR +# undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#endif +#define ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } - - -/*-**************************************** -* Error Strings -******************************************/ - + + +/*-**************************************** +* Error Strings +******************************************/ + ERR_STATIC const char* ERR_getErrorString(ERR_enum code) -{ - static const char* notErrorCode = "Unspecified error code"; +{ + static const char* notErrorCode = "Unspecified error code"; switch( code ) - { - case PREFIX(no_error): return "No error detected"; - case PREFIX(GENERIC): return "Error (generic)"; - case PREFIX(prefix_unknown): return "Unknown frame descriptor"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; - case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; - case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size incorrect"; - case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(maxCode): + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; + case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size incorrect"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(maxCode): default: return notErrorCode; - } -} - + } +} + ERR_STATIC const char* ERR_getErrorName(size_t code) { return ERR_getErrorString(ERR_getErrorCode(code)); } - -#if defined (__cplusplus) -} -#endif - -#endif /* ERROR_H_MODULE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/contrib/libs/zstd06/common/error_public.h b/contrib/libs/zstd06/common/error_public.h index 20c04de531b..1d8f03995fb 100644 --- a/contrib/libs/zstd06/common/error_public.h +++ b/contrib/libs/zstd06/common/error_public.h @@ -1,72 +1,72 @@ #include -/* ****************************************************************** - Error codes list - Copyright (C) 2016, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Homepage : http://www.zstd.net -****************************************************************** */ -#ifndef ERROR_PUBLIC_H_MODULE -#define ERROR_PUBLIC_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* **************************************** -* error codes list -******************************************/ -typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_maxCode -} ZSTD_ErrorCode; - -/* note : compare with size_t function results using ZSTD_getError() */ - - -#if defined (__cplusplus) -} -#endif - -#endif /* ERROR_PUBLIC_H_MODULE */ +/* ****************************************************************** + Error codes list + Copyright (C) 2016, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Homepage : http://www.zstd.net +****************************************************************** */ +#ifndef ERROR_PUBLIC_H_MODULE +#define ERROR_PUBLIC_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* error codes list +******************************************/ +typedef enum { + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_maxCode +} ZSTD_ErrorCode; + +/* note : compare with size_t function results using ZSTD_getError() */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_PUBLIC_H_MODULE */ diff --git a/contrib/libs/zstd06/common/fse.h b/contrib/libs/zstd06/common/fse.h index fcd0216f108..1cc69929511 100644 --- a/contrib/libs/zstd06/common/fse.h +++ b/contrib/libs/zstd06/common/fse.h @@ -1,280 +1,280 @@ #include -/* ****************************************************************** - FSE : Finite State Entropy codec - Public Prototypes declaration - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -****************************************************************** */ -#ifndef FSE_H -#define FSE_H - -#if defined (__cplusplus) -extern "C" { -#endif - - -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ - - -/*-**************************************** -* FSE simple functions -******************************************/ -/*! FSE_compress() : - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). - @return : size of compressed data (<= dstCapacity). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) -*/ -size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/*! FSE_decompress(): - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'dstCapacity'. - @return : size of regenerated data (<= maxDstSize), - or an error code, which can be tested using FSE_isError() . - - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! - Why ? : making this distinction requires a header. - Header management is intentionally delegated to the user layer, which can better manage special cases. -*/ -size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); - - -/*-***************************************** -* Tool functions -******************************************/ -size_t FSE_compressBound(size_t size); /* maximum compressed size */ - -/* Error Management */ -unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ - - -/*-***************************************** -* FSE advanced functions -******************************************/ -/*! FSE_compress2() : - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' - Both parameters can be defined as '0' to mean : use default value - @return : size of compressed data - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), it's an error code. -*/ -size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ - -/*! FSE_count(): - Provides the precise count of each byte within a table 'count'. - 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - *maxSymbolValuePtr will be updated if detected smaller than initial value. - @return : the count of the most frequent symbol (which is not identified). - if return == srcSize, there is only one symbol. - Can also return an error code, which can be tested with FSE_isError(). */ -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= initial 'tableLog') */ -unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); -void FSE_freeCTable (FSE_CTable* ct); - -/*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). - @return : 0, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - - -/* *** DECOMPRESSION *** */ - -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); - -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_DTable* FSE_createDTable(unsigned tableLog); -void FSE_freeDTable(FSE_DTable* dt); - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) - -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ - - -#if defined (__cplusplus) -} -#endif - -#endif /* FSE_H */ +/* ****************************************************************** + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef FSE_H +#define FSE_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-***************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_count(): + Provides the precise count of each byte within a table 'count'. + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified). + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError(). */ +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= initial 'tableLog') */ +unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); +void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_DTable* FSE_createDTable(unsigned tableLog); +void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + + +#if defined (__cplusplus) +} +#endif + +#endif /* FSE_H */ diff --git a/contrib/libs/zstd06/common/fse_static.h b/contrib/libs/zstd06/common/fse_static.h index 9ecb0b2cb98..d09589efee6 100644 --- a/contrib/libs/zstd06/common/fse_static.h +++ b/contrib/libs/zstd06/common/fse_static.h @@ -1,340 +1,340 @@ #include -/* ****************************************************************** - FSE : Finite State Entropy coder - header file for static linking (only) - Copyright (C) 2013-2015, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef FSE_STATIC_H -#define FSE_STATIC_H - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* ***************************************** -* Dependencies -*******************************************/ -#include "fse.h" -#include "bitstream.h" - - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7)) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) -{ - const void* ptr = ct; - const U16* u16ptr = (const U16*) ptr; - const U32 tableLog = MEM_read16(ptr); - statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; - statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); - statePtr->stateLog = tableLog; -} - -MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - - } -} - -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) -{ - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) -{ - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} - -/*<===== Decompression =====>*/ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; -} - -MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} - -MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef FSE_STATIC_H +#define FSE_STATIC_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* ***************************************** +* Dependencies +*******************************************/ +#include "fse.h" +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { + const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + +/*<===== Decompression =====>*/ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + #ifndef FSE_COMMONDEFS_ONLY @@ -383,8 +383,8 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) -#if defined (__cplusplus) -} -#endif - -#endif /* FSE_STATIC_H */ +#if defined (__cplusplus) +} +#endif + +#endif /* FSE_STATIC_H */ diff --git a/contrib/libs/zstd06/common/mem.h b/contrib/libs/zstd06/common/mem.h index 1b84bfdb3d4..85d78c84c12 100644 --- a/contrib/libs/zstd06/common/mem.h +++ b/contrib/libs/zstd06/common/mem.h @@ -1,201 +1,201 @@ #include -/* ****************************************************************** - mem.h - low-level memory access routines - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - -/*-**************************************** -* Dependencies -******************************************/ +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ #include /* size_t, ptrdiff_t */ #include /* memcpy */ - - -/*-**************************************** -* Compiler specifics -******************************************/ + + +/*-**************************************** +* Compiler specifics +******************************************/ #if defined(_MSC_VER) /* Visual Studio */ # include /* _byteswap_ulong */ # include /* _byteswap_* */ #endif -#if defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + /* code only tested on 32 and 64 bits systems */ #define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } - -/*-************************************************************** -* Basic Types -*****************************************************************/ + +/*-************************************************************** +* Basic Types +*****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef signed short S16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; - typedef signed long long S64; -#endif - - -/*-************************************************************** -* Memory I/O -*****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif - +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } - -MEM_STATIC unsigned MEM_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } -MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } -MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - -MEM_STATIC U16 MEM_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U32 MEM_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U64 MEM_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC size_t MEM_readST(const void* memPtr) -{ - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write32(void* memPtr, U32 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write64(void* memPtr, U64 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif /* MEM_FORCE_MEMORY_ACCESS */ - + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ @@ -238,27 +238,27 @@ MEM_STATIC size_t MEM_swapST(size_t in) /*=== Little endian r/w ===*/ -MEM_STATIC U16 MEM_readLE16(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read16(memPtr); - else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) -{ - if (MEM_isLittleEndian()) { - MEM_write16(memPtr, val); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); - } -} - +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + MEM_STATIC U32 MEM_readLE24(const void* memPtr) { return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); @@ -270,54 +270,54 @@ MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) ((BYTE*)memPtr)[2] = (BYTE)(val>>16); } -MEM_STATIC U32 MEM_readLE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read32(memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); else return MEM_swap32(MEM_read32(memPtr)); -} - -MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) -{ +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ if (MEM_isLittleEndian()) - MEM_write32(memPtr, val32); + MEM_write32(memPtr, val32); else MEM_write32(memPtr, MEM_swap32(val32)); -} - -MEM_STATIC U64 MEM_readLE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read64(memPtr); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); else return MEM_swap64(MEM_read64(memPtr)); -} - -MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) -{ +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ if (MEM_isLittleEndian()) - MEM_write64(memPtr, val64); + MEM_write64(memPtr, val64); else MEM_write64(memPtr, MEM_swap64(val64)); -} - -MEM_STATIC size_t MEM_readLEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readLE32(memPtr); - else - return (size_t)MEM_readLE64(memPtr); -} - -MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeLE32(memPtr, (U32)val); - else - MEM_writeLE64(memPtr, (U64)val); -} - +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + /*=== Big endian r/w ===*/ MEM_STATIC U32 MEM_readBE32(const void* memPtr) @@ -370,21 +370,21 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) /* function safe only for comparisons */ -MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; - } -} - -#if defined (__cplusplus) -} -#endif - -#endif /* MEM_H_MODULE */ +MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/contrib/libs/zstd06/common/zbuff.h b/contrib/libs/zstd06/common/zbuff.h index 03de2ad268e..54c40b47ca3 100644 --- a/contrib/libs/zstd06/common/zbuff.h +++ b/contrib/libs/zstd06/common/zbuff.h @@ -1,168 +1,168 @@ #include -/* - Buffered version of Zstd compression library - Copyright (C) 2015-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd homepage : http://www.zstd.net/ -*/ -#ifndef ZSTD_BUFFERED_H -#define ZSTD_BUFFERED_H - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Dependencies -***************************************/ -#include /* size_t */ - - -/* *************************************************************** -* Compiler specifics -*****************************************************************/ -/*! -* ZSTD_DLL_EXPORT : -* Enable exporting of functions when building a Windows DLL -*/ -#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) -#else -# define ZSTDLIB_API -#endif - - -/* ************************************* -* Streaming functions -***************************************/ -typedef struct ZBUFF_CCtx_s ZBUFF_CCtx; -ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void); -ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); - -ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); - -ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); -ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); -ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); - -/*-************************************************* -* Streaming compression - howto -* -* A ZBUFF_CCtx object is required to track streaming operation. -* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. -* ZBUFF_CCtx objects can be reused multiple times. -* -* Start by initializing ZBUF_CCtx. -* Use ZBUFF_compressInit() to start a new compression operation. -* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. -* -* Use ZBUFF_compressContinue() repetitively to consume input stream. -* *srcSizePtr and *dstCapacityPtr can be any size. -* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. -* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. -* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . -* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) -* or an error code, which can be tested using ZBUFF_isError(). -* -* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). -* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. -* Note that the function cannot output more than *dstCapacityPtr, -* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. -* @return : nb of bytes still present into internal buffer (0 if it's empty) -* or an error code, which can be tested using ZBUFF_isError(). -* -* ZBUFF_compressEnd() instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. -* In which case, call again ZBUFF_compressFlush() to complete the flush. -* @return : nb of bytes still present into internal buffer (0 if it's empty) -* or an error code, which can be tested using ZBUFF_isError(). -* -* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize -* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering). -* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. -* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. -* **************************************************/ - - -typedef struct ZBUFF_DCtx_s ZBUFF_DCtx; -ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void); -ZSTDLIB_API size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); - -ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); -ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); - -ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, - void* dst, size_t* dstCapacityPtr, - const void* src, size_t* srcSizePtr); - -/*-*************************************************************************** -* Streaming decompression howto -* -* A ZBUFF_DCtx object is required to track streaming operations. -* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. -* Use ZBUFF_decompressInit() to start a new decompression operation, -* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. -* Note that ZBUFF_DCtx objects can be re-init multiple times. -* -* Use ZBUFF_decompressContinue() repetitively to consume your input. -* *srcSizePtr and *dstCapacityPtr can be any size. -* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. -* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. -* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. -* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), -* or 0 when a frame is completely decoded, -* or an error code, which can be tested using ZBUFF_isError(). -* -* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() -* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. -* input : ZBUFF_recommendedDInSize == 128KB + 3; -* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . -* *******************************************************************************/ - - -/* ************************************* -* Tool functions -***************************************/ -ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode); -ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode); - -/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +/* + Buffered version of Zstd compression library + Copyright (C) 2015-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net/ +*/ +#ifndef ZSTD_BUFFERED_H +#define ZSTD_BUFFERED_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Dependencies +***************************************/ +#include /* size_t */ + + +/* *************************************************************** +* Compiler specifics +*****************************************************************/ +/*! +* ZSTD_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) +#else +# define ZSTDLIB_API +#endif + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFF_CCtx_s ZBUFF_CCtx; +ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void); +ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + +ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); +ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); +ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); + +/*-************************************************* +* Streaming compression - howto +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. +* Note that the function cannot output more than *dstCapacityPtr, +* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering). +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. +* **************************************************/ + + +typedef struct ZBUFF_DCtx_s ZBUFF_DCtx; +ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void); +ZSTDLIB_API size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFF_DCtx object is required to track streaming operations. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation, +* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFF_DCtx objects can be re-init multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() +* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFF_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode); +ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. * These sizes are just hints, they tend to offer better latency */ -ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void); -ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void); -ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void); -ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void); - - -#if defined (__cplusplus) -} -#endif - -#endif /* ZSTD_BUFFERED_H */ +ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_BUFFERED_H */ diff --git a/contrib/libs/zstd06/common/zbuff_static.h b/contrib/libs/zstd06/common/zbuff_static.h index 7aa81642f69..e06404f6460 100644 --- a/contrib/libs/zstd06/common/zbuff_static.h +++ b/contrib/libs/zstd06/common/zbuff_static.h @@ -1,72 +1,72 @@ #include -/* - zstd - buffered version of compression library - experimental complementary API, for static linking only - Copyright (C) 2015-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd homepage : http://www.zstd.net -*/ -#ifndef ZSTD_BUFFERED_STATIC_H -#define ZSTD_BUFFERED_STATIC_H - -/* The objects defined into this file should be considered experimental. - * They are not labelled stable, as their prototype may change in the future. - * You can use them for tests, provide feedback, or if you can endure risk of future changes. - */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Includes -***************************************/ -#include "zstd_static.h" /* ZSTD_parameters */ -#include "zbuff.h" +/* + zstd - buffered version of compression library + experimental complementary API, for static linking only + Copyright (C) 2015-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net +*/ +#ifndef ZSTD_BUFFERED_STATIC_H +#define ZSTD_BUFFERED_STATIC_H + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "zstd_static.h" /* ZSTD_parameters */ +#include "zbuff.h" #include "zstd_internal.h" /* MIN */ - - -/* ************************************* -* Advanced Streaming functions -***************************************/ -ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, U64 pledgedSrcSize); - + + +/* ************************************* +* Advanced Streaming functions +***************************************/ +ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize); + MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { size_t length = MIN(dstCapacity, srcSize); memcpy(dst, src, length); return length; } - -#if defined (__cplusplus) -} -#endif - -#endif /* ZSTD_BUFFERED_STATIC_H */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_BUFFERED_STATIC_H */ diff --git a/contrib/libs/zstd06/common/zstd.h b/contrib/libs/zstd06/common/zstd.h index 49ad80a8a88..3574a82ec02 100644 --- a/contrib/libs/zstd06/common/zstd.h +++ b/contrib/libs/zstd06/common/zstd.h @@ -1,65 +1,65 @@ #include -/* - zstd - standard compression library - Header File - Copyright (C) 2014-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd -*/ -#ifndef ZSTD_H -#define ZSTD_H - -#if defined (__cplusplus) -extern "C" { -#endif - -/*-************************************* -* Dependencies -***************************************/ -#include /* size_t */ - - -/*-*************************************************************** -* Export parameters -*****************************************************************/ -/*! -* ZSTD_DLL_EXPORT : -* Enable exporting of functions when building a Windows DLL -*/ -#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) -#else -# define ZSTDLIB_API -#endif - - -/* ************************************* -* Version -***************************************/ +/* + zstd - standard compression library + Header File + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd +*/ +#ifndef ZSTD_H +#define ZSTD_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include /* size_t */ + + +/*-*************************************************************** +* Export parameters +*****************************************************************/ +/*! +* ZSTD_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) +#else +# define ZSTDLIB_API +#endif + + +/* ************************************* +* Version +***************************************/ #define ZSTD_VERSION_MAJOR 0 #define ZSTD_VERSION_MINOR 6 #define ZSTD_VERSION_RELEASE 2 @@ -69,88 +69,88 @@ extern "C" { #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber (void); - - -/* ************************************* -* Simple functions -***************************************/ -/*! ZSTD_compress() : - Compresses `srcSize` bytes from buffer `src` into buffer `dst` of size `dstCapacity`. - Destination buffer must be already allocated. - Compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - @return : the number of bytes written into `dst`, - or an error code if it fails (which can be tested using ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - -/*! ZSTD_decompress() : - `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. - `dstCapacity` must be large enough, equal or larger than originalSize. - @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - or an errorCode if it fails (which can be tested using ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); - - -/* ************************************* -* Helper functions -***************************************/ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ - -/* Error Management */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string for an error code */ - - -/* ************************************* -* Explicit memory management -***************************************/ -/** Compression context */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; /*< incomplete type */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /*!< @return : errorCode */ - -/** ZSTD_compressCCtx() : - Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); - -/** Decompression context */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /*!< @return : errorCode */ - -/** ZSTD_decompressDCtx() : -* Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - -/*-*********************** -* Dictionary API -*************************/ -/*! ZSTD_compress_usingDict() : -* Compression using a pre-defined Dictionary content (see dictBuilder). -* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); - -/*! ZSTD_decompress_usingDict() : -* Decompression using a pre-defined Dictionary content (see dictBuilder). -* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. -* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); - - -#if defined (__cplusplus) -} -#endif - -#endif /* ZSTD_H */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) +ZSTDLIB_API unsigned ZSTD_versionNumber (void); + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTD_compress() : + Compresses `srcSize` bytes from buffer `src` into buffer `dst` of size `dstCapacity`. + Destination buffer must be already allocated. + Compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + @return : the number of bytes written into `dst`, + or an error code if it fails (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Compression context */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /*< incomplete type */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /*!< @return : errorCode */ + +/** ZSTD_compressCCtx() : + Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); + +/** Decompression context */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTD_decompressDCtx() : +* Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTD_compress_usingDict() : +* Compression using a pre-defined Dictionary content (see dictBuilder). +* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_H */ diff --git a/contrib/libs/zstd06/common/zstd_internal.h b/contrib/libs/zstd06/common/zstd_internal.h index 367c1d03355..2ce2b18cf16 100644 --- a/contrib/libs/zstd06/common/zstd_internal.h +++ b/contrib/libs/zstd06/common/zstd_internal.h @@ -1,256 +1,256 @@ #include -/* - zstd_internal - common functions to include - Header File for include - Copyright (C) 2014-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd homepage : https://www.zstd.net -*/ -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE - -/*-************************************* -* Dependencies -***************************************/ -#include "mem.h" -#include "error_private.h" -#include "zstd_static.h" - - -/*-************************************* -* Common macros -***************************************/ -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) - - -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs -#include -#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 - #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) - #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) - #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) -#else - #define ZSTD_LOG_PARSER(...) - #define ZSTD_LOG_ENCODE(...) - #define ZSTD_LOG_BLOCK(...) -#endif - -#define ZSTD_OPT_NUM (1<<12) -#define ZSTD_DICT_MAGIC 0xEC30A436 - -#define ZSTD_REP_NUM 3 -#define ZSTD_REP_INIT ZSTD_REP_NUM -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; - -#define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define HufLog 12 - -#define IS_HUF 0 -#define IS_PCH 1 -#define IS_RAW 2 -#define IS_RLE 3 - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 -#define EQUAL_READ32 4 -#define REPCODE_STARTVALUE 1 - -#define Litbits 8 -#define MaxLit ((1<= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); -# else /* Software version */ - static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - int r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - - -/*-******************************************* -* Private interfaces -*********************************************/ -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_INIT]; -} ZSTD_optimal_t; - +/* + zstd_internal - common functions to include + Header File for include + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : https://www.zstd.net +*/ +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/*-************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" +#include "zstd_static.h" + + +/*-************************************* +* Common macros +***************************************/ +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs +#include +#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 + #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) + #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) + #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) +#else + #define ZSTD_LOG_PARSER(...) + #define ZSTD_LOG_ENCODE(...) + #define ZSTD_LOG_BLOCK(...) +#endif + +#define ZSTD_OPT_NUM (1<<12) +#define ZSTD_DICT_MAGIC 0xEC30A436 + +#define ZSTD_REP_NUM 3 +#define ZSTD_REP_INIT ZSTD_REP_NUM +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; + +#define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 + +#define IS_HUF 0 +#define IS_PCH 1 +#define IS_RAW 2 +#define IS_RLE 3 + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 +#define EQUAL_READ32 4 +#define REPCODE_STARTVALUE 1 + +#define Litbits 8 +#define MaxLit ((1<= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/*-******************************************* +* Private interfaces +*********************************************/ +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + U32 price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_INIT]; +} ZSTD_optimal_t; + //#if ZSTD_OPT_DEBUG == 3 // #include ".debug/zstd_stats.h" //#else - typedef struct { U32 unused; } ZSTD_stats_t; + typedef struct { U32 unused; } ZSTD_stats_t; MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; } MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; } MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; } MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; } //#endif - -typedef struct { - void* buffer; - U32* offsetStart; - U32* offset; - BYTE* offCodeStart; - BYTE* litStart; - BYTE* lit; - U16* litLengthStart; - U16* litLength; - BYTE* llCodeStart; - U16* matchLengthStart; - U16* matchLength; - BYTE* mlCodeStart; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; - /* opt */ - ZSTD_optimal_t* priceTable; - ZSTD_match_t* matchTable; - U32* matchLengthFreq; - U32* litLengthFreq; - U32* litFreq; - U32* offCodeFreq; - U32 matchLengthSum; - U32 matchSum; - U32 litLengthSum; - U32 litSum; - U32 offCodeSum; - U32 log2matchLengthSum; - U32 log2matchSum; - U32 log2litLengthSum; - U32 log2litSum; - U32 log2offCodeSum; - U32 factor; + +typedef struct { + void* buffer; + U32* offsetStart; + U32* offset; + BYTE* offCodeStart; + BYTE* litStart; + BYTE* lit; + U16* litLengthStart; + U16* litLength; + BYTE* llCodeStart; + U16* matchLengthStart; + U16* matchLength; + BYTE* mlCodeStart; + U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ + U32 longLengthPos; + /* opt */ + ZSTD_optimal_t* priceTable; + ZSTD_match_t* matchTable; + U32* matchLengthFreq; + U32* litLengthFreq; + U32* litFreq; + U32* offCodeFreq; + U32 matchLengthSum; + U32 matchSum; + U32 litLengthSum; + U32 litSum; + U32 offCodeSum; + U32 log2matchLengthSum; + U32 log2matchSum; + U32 log2litLengthSum; + U32 log2litSum; + U32 log2offCodeSum; + U32 factor; U32 cachedPrice; U32 cachedLitLength; const BYTE* cachedLiterals; - ZSTD_stats_t stats; -} seqStore_t; - -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq); - - -#endif /* ZSTD_CCOMMON_H_MODULE */ + ZSTD_stats_t stats; +} seqStore_t; + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq); + + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/contrib/libs/zstd06/common/zstd_static.h b/contrib/libs/zstd06/common/zstd_static.h index 65d3bc8748d..a1dae420942 100644 --- a/contrib/libs/zstd06/common/zstd_static.h +++ b/contrib/libs/zstd06/common/zstd_static.h @@ -1,273 +1,273 @@ #include -/* - zstd - standard compression library - Header File for static linking only - Copyright (C) 2014-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd homepage : http://www.zstd.net -*/ -#ifndef ZSTD_STATIC_H -#define ZSTD_STATIC_H - -/* The prototypes defined within this file are considered experimental. - * They should not be used in the context DLL as they may change in the future. - * Prefer static linking if you need them, to control breaking version changes issues. - */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/*-************************************* -* Dependencies -***************************************/ -#include "zstd.h" -#include "mem.h" - - -/*-************************************* -* Constants -***************************************/ -#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ - - -/*-************************************* -* Types -***************************************/ -#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? 25 : 27)) -#define ZSTD_WINDOWLOG_MIN 18 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN 4 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 12 -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_HASHLOG3_MIN 15 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_SEARCHLENGTH_MAX 7 -#define ZSTD_SEARCHLENGTH_MIN 3 -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -/* from faster to stronger */ -typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; - -typedef struct { - U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ - U32 chainLog; /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - U32 hashLog; /* dispatch table : larger == faster, more memory */ - U32 searchLog; /* nb of searches : larger == more compression, slower */ - U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ - U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTD_strategy strategy; -} ZSTD_compressionParameters; - -typedef struct { - U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ -} ZSTD_frameParameters; - -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; - - -/*-************************************* -* Advanced functions -***************************************/ -ZSTDLIB_API unsigned ZSTD_maxCLevel (void); - -/*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level and srcSize. -* `srcSize` value is optional, select 0 if not known */ +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net +*/ +#ifndef ZSTD_STATIC_H +#define ZSTD_STATIC_H + +/* The prototypes defined within this file are considered experimental. + * They should not be used in the context DLL as they may change in the future. + * Prefer static linking if you need them, to control breaking version changes issues. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd.h" +#include "mem.h" + + +/*-************************************* +* Constants +***************************************/ +#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + +/*-************************************* +* Types +***************************************/ +#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? 25 : 27)) +#define ZSTD_WINDOWLOG_MIN 18 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN 4 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_HASHLOG_MIN 12 +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_HASHLOG3_MIN 15 +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_SEARCHLENGTH_MAX 7 +#define ZSTD_SEARCHLENGTH_MIN 3 +#define ZSTD_TARGETLENGTH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX 999 + +/* from faster to stronger */ +typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; + +typedef struct { + U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ + U32 chainLog; /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + U32 hashLog; /* dispatch table : larger == faster, more memory */ + U32 searchLog; /* nb of searches : larger == more compression, slower */ + U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ + U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; +} ZSTD_compressionParameters; + +typedef struct { + U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + + +/*-************************************* +* Advanced functions +***************************************/ +ZSTDLIB_API unsigned ZSTD_maxCLevel (void); + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level and srcSize. +* `srcSize` value is optional, select 0 if not known */ ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize); - -/*! ZSTD_checkParams() : -* Ensure param values remain within authorized range */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); - -/*! ZSTD_adjustParams() : -* optimize params for a given `srcSize` and `dictSize`. -* both values are optional, select `0` if unknown. */ -ZSTDLIB_API void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize); - -/*! ZSTD_compress_advanced() : -* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ -ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); - -/*! ZSTD_compress_usingPreparedDCtx() : -* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded. -* It avoids reloading the dictionary each time. -* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced(). -* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */ -ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx( - ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/*- Advanced Decompression functions -*/ - -/*! ZSTD_decompress_usingPreparedDCtx() : -* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. -* It avoids reloading the dictionary each time. -* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). -* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ -ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( - ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - - -/* ************************************** -* Streaming functions (direct mode) -****************************************/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize); -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx); - -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity); - -/* - Streaming compression, synchronous mode (bufferless) - - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - The interface is synchronous, so all input will be consumed and produce a compressed output. - You must ensure there is enough space in destination buffer to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - - Finish a frame with ZSTD_compressEnd(), which will write the epilogue. - Without the epilogue, frames will be considered incomplete by decoder. - - You can then reuse ZSTD_CCtx to compress some new frame. -*/ - -typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams; - -#define ZSTD_FRAMEHEADERSIZE_MAX 13 /* for static allocation */ -static const size_t ZSTD_frameHeaderSize_min = 5; -static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ - -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); - -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -/* - Streaming decompression, direct mode (bufferless) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. - - First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input. - It can provide the minimum size of rolling buffer required to properly decompress data, - and optionally the final size of uncompressed content. - (Note : content size is an optional info that may not be present. 0 means : content size unknown) - Frame parameters are extracted from the beginning of compressed frame. - The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work) - If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result. - Result : 0 when successful, it means the ZSTD_frameParams structure has been filled. - >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header. - errorCode, which can be tested using ZSTD_isError() - - Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). - Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail. - ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog). - They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible. - - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity) - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. -*/ - - -/* ************************************** -* Block functions -****************************************/ -/*! Block functions produce and decode raw zstd blocks, without frame metadata. - User will have to take in charge required information to regenerate data, such as compressed and content sizes. - - A few rules to respect : - - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB) - - Compressing or decompressing requires a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : ZSTD_compressBegin() - + decompression : ZSTD_decompressBegin() - + variants _usingDict() are also allowed - + copyCCtx() and copyDCtx() work too - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst`. - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !! -*/ - -#define ZSTD_BLOCKSIZE_MAX (128 * 1024) /* define, for static allocation */ + +/*! ZSTD_checkParams() : +* Ensure param values remain within authorized range */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustParams() : +* optimize params for a given `srcSize` and `dictSize`. +* both values are optional, select `0` if unknown. */ +ZSTDLIB_API void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : +* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ +ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingPreparedDCtx() : +* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded. +* It avoids reloading the dictionary each time. +* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced(). +* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */ +ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx( + ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*- Advanced Decompression functions -*/ + +/*! ZSTD_decompress_usingPreparedDCtx() : +* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. +* It avoids reloading the dictionary each time. +* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). +* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ +ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( + ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/* ************************************** +* Streaming functions (direct mode) +****************************************/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx); + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity); + +/* + Streaming compression, synchronous mode (bufferless) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + The interface is synchronous, so all input will be consumed and produce a compressed output. + You must ensure there is enough space in destination buffer to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + + Finish a frame with ZSTD_compressEnd(), which will write the epilogue. + Without the epilogue, frames will be considered incomplete by decoder. + + You can then reuse ZSTD_CCtx to compress some new frame. +*/ + +typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams; + +#define ZSTD_FRAMEHEADERSIZE_MAX 13 /* for static allocation */ +static const size_t ZSTD_frameHeaderSize_min = 5; +static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; +ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* + Streaming decompression, direct mode (bufferless) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input. + It can provide the minimum size of rolling buffer required to properly decompress data, + and optionally the final size of uncompressed content. + (Note : content size is an optional info that may not be present. 0 means : content size unknown) + Frame parameters are extracted from the beginning of compressed frame. + The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work) + If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result. + Result : 0 when successful, it means the ZSTD_frameParams structure has been filled. + >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header. + errorCode, which can be tested using ZSTD_isError() + + Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). + Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail. + ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog). + They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity) + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. +*/ + + +/* ************************************** +* Block functions +****************************************/ +/*! Block functions produce and decode raw zstd blocks, without frame metadata. + User will have to take in charge required information to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB) + - Compressing or decompressing requires a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : ZSTD_compressBegin() + + decompression : ZSTD_decompressBegin() + + variants _usingDict() are also allowed + + copyCCtx() and copyDCtx() work too + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. + In which case, nothing is produced into `dst`. + + User must test for such outcome and deal directly with uncompressed data + + ZSTD_decompressBlock() doesn't accept uncompressed data as input !! +*/ + +#define ZSTD_BLOCKSIZE_MAX (128 * 1024) /* define, for static allocation */ ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - -/*-************************************* -* Error management -***************************************/ -#include "error_public.h" -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare directly with enum list published into "error_public.h" */ + + +/*-************************************* +* Error management +***************************************/ +#include "error_public.h" +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare directly with enum list published into "error_public.h" */ ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); - - -#if defined (__cplusplus) -} -#endif - -#endif /* ZSTD_STATIC_H */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_STATIC_H */ diff --git a/contrib/libs/zstd06/compress/zstd_compress.c b/contrib/libs/zstd06/compress/zstd_compress.c index e1a9e5a48b3..1bb75c68ccf 100644 --- a/contrib/libs/zstd06/compress/zstd_compress.c +++ b/contrib/libs/zstd06/compress/zstd_compress.c @@ -1,1971 +1,1971 @@ -/* - ZSTD HC - High Compression Mode of Zstandard - Copyright (C) 2015-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Zstd source repository : https://www.zstd.net -*/ - - -/* ******************************************************* -* Compiler specifics -*********************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -/*-************************************* -* Dependencies -***************************************/ -#include /* malloc */ -#include /* memset */ -#include "mem.h" -#include "fse_static.h" +/* + ZSTD HC - High Compression Mode of Zstandard + Copyright (C) 2015-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd source repository : https://www.zstd.net +*/ + + +/* ******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc */ +#include /* memset */ +#include "mem.h" +#include "fse_static.h" #include "huf_static.h" -#include "zstd_internal.h" - - -/*-************************************* -* Constants -***************************************/ -static const U32 g_searchStrength = 8; /* control skip over incompressible data */ - - -/*-************************************* -* Helper functions -***************************************/ -size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } - - -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->offset = ssPtr->offsetStart; - ssPtr->lit = ssPtr->litStart; - ssPtr->litLength = ssPtr->litLengthStart; - ssPtr->matchLength = ssPtr->matchLengthStart; - ssPtr->longLengthID = 0; -} - - -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CCtx_s -{ - const BYTE* nextSrc; /* next block here to continue on current prefix */ - const BYTE* base; /* All regular indexes relative to this position */ - const BYTE* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 nextToUpdate3; /* index from which to continue dictionary update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; - U32 stage; /* 0: created; 1: init,dictLoad; 2:started */ - ZSTD_parameters params; - void* workSpace; - size_t workSpaceSize; - size_t blockSize; - - seqStore_t seqStore; /* sequences storage ptrs */ - U32* hashTable; - U32* hashTable3; - U32* chainTable; - HUF_CElt* hufTable; - U32 flagStaticTables; - FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable [FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; -}; - -ZSTD_CCtx* ZSTD_createCCtx(void) -{ - return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx)); -} - -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) -{ - free(cctx->workSpace); - free(cctx); - return 0; /* reserved as a potential error code in the future */ -} - -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ -{ - return &(ctx->seqStore); -} - - -#define CLAMP(val,min,max) { if (valmax) val=max; } -#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } - -/** ZSTD_checkParams() : - ensure param values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ - CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; - U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; - CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } - CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); - return 0; -} - - -static unsigned ZSTD_highbit(U32 val); - -/** ZSTD_checkCParams_advanced() : - temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ -size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) -{ - if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); - if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); - if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ - if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN; /* fake value - temporary work around */ - if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN; /* fake value - temporary work around */ - return ZSTD_checkCParams(cParams); -} - - -/** ZSTD_adjustParams() : - optimize params for q given input (`srcSize` and `dictSize`). - mostly downsizing to reduce memory consumption and initialization. - Both `srcSize` and `dictSize` are optional (use 0 if unknown), - but if both are 0, no optimization can be done. - Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ -void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize) -{ - if (srcSize+dictSize == 0) return; /* no size information available : no adjustment */ - - /* resize params, to use less memory when necessary */ - { U32 const minSrcSize = (srcSize==0) ? 500 : 0; - U64 const rSize = srcSize + dictSize + minSrcSize; - if (rSize < ((U64)1<windowLog > srcLog) params->windowLog = srcLog; - } } - if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; - { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); - U32 const maxChainLog = params->windowLog+btPlus; - if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ - - if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - if ((params->hashLog < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN; /* required to ensure collision resistance in bt */ -} - - -size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface, for paramagrill */ -{ - ZSTD_CCtx* zc = ZSTD_createCCtx(); - ZSTD_parameters params; - params.cParams = cParams; - params.fParams.contentSizeFlag = 1; - ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0); - { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; - ZSTD_freeCCtx(zc); - return ccsize; } -} - -/*! ZSTD_resetCCtx_advanced() : - note : 'params' is expected to be validated */ -static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, - ZSTD_parameters params, U32 reset) -{ /* note : params considered validated here */ - const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); - const U32 divider = (params.cParams.searchLength==3) ? 3 : 4; - const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 11*maxNbSeq; - const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); +#include "zstd_internal.h" + + +/*-************************************* +* Constants +***************************************/ +static const U32 g_searchStrength = 8; /* control skip over incompressible data */ + + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } + + +/*-************************************* +* Sequence storage +***************************************/ +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->offset = ssPtr->offsetStart; + ssPtr->lit = ssPtr->litStart; + ssPtr->litLength = ssPtr->litLengthStart; + ssPtr->matchLength = ssPtr->matchLengthStart; + ssPtr->longLengthID = 0; +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CCtx_s +{ + const BYTE* nextSrc; /* next block here to continue on current prefix */ + const BYTE* base; /* All regular indexes relative to this position */ + const BYTE* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 loadedDictEnd; + U32 stage; /* 0: created; 1: init,dictLoad; 2:started */ + ZSTD_parameters params; + void* workSpace; + size_t workSpaceSize; + size_t blockSize; + + seqStore_t seqStore; /* sequences storage ptrs */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + HUF_CElt* hufTable; + U32 flagStaticTables; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable [FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; +}; + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx)); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + free(cctx->workSpace); + free(cctx); + return 0; /* reserved as a potential error code in the future */ +} + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +{ + return &(ctx->seqStore); +} + + +#define CLAMP(val,min,max) { if (valmax) val=max; } +#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; + U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } + CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); + return 0; +} + + +static unsigned ZSTD_highbit(U32 val); + +/** ZSTD_checkCParams_advanced() : + temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ +size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) +{ + if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); + if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); + if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ + if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN; /* fake value - temporary work around */ + if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN; /* fake value - temporary work around */ + return ZSTD_checkCParams(cParams); +} + + +/** ZSTD_adjustParams() : + optimize params for q given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize) +{ + if (srcSize+dictSize == 0) return; /* no size information available : no adjustment */ + + /* resize params, to use less memory when necessary */ + { U32 const minSrcSize = (srcSize==0) ? 500 : 0; + U64 const rSize = srcSize + dictSize + minSrcSize; + if (rSize < ((U64)1<windowLog > srcLog) params->windowLog = srcLog; + } } + if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; + { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + U32 const maxChainLog = params->windowLog+btPlus; + if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ + + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if ((params->hashLog < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN; /* required to ensure collision resistance in bt */ +} + + +size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface, for paramagrill */ +{ + ZSTD_CCtx* zc = ZSTD_createCCtx(); + ZSTD_parameters params; + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0); + { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; + ZSTD_freeCCtx(zc); + return ccsize; } +} + +/*! ZSTD_resetCCtx_advanced() : + note : 'params' is expected to be validated */ +static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, + ZSTD_parameters params, U32 reset) +{ /* note : params considered validated here */ + const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); + const U32 divider = (params.cParams.searchLength==3) ? 3 : 4; + const size_t maxNbSeq = blockSize / divider; + const size_t tokenSpace = blockSize + 11*maxNbSeq; + const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); const size_t hSize = ((size_t)1) << params.cParams.hashLog; - const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; - const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - - /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { - free(zc->workSpace); - zc->workSpace = malloc(neededSpace); - if (zc->workSpace == NULL) return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; - } } - - if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ - zc->hashTable3 = (U32*)(zc->workSpace); - zc->hashTable = zc->hashTable3 + h3Size; - zc->chainTable = zc->hashTable + hSize; - zc->seqStore.buffer = zc->chainTable + chainSize; - zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; - zc->flagStaticTables = 0; - zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; - - zc->nextToUpdate = 1; - zc->nextSrc = NULL; - zc->base = NULL; - zc->dictBase = NULL; - zc->dictLimit = 0; - zc->lowLimit = 0; - zc->params = params; - zc->blockSize = blockSize; - - if (params.cParams.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); - zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1))); - zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); - zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; - zc->seqStore.litLengthSum = 0; - } - zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); - zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); - zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); - zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; - zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - - zc->stage = 1; - zc->loadedDictEnd = 0; - - return 0; -} - - -/*! ZSTD_copyCCtx() : -* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. -* Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()). -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) -{ - if (srcCCtx->stage!=1) return ERROR(stage_wrong); - - dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */ - ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0); - dstCCtx->params.fParams.contentSizeFlag = 0; /* content size different from the one set during srcCCtx init */ - - /* copy tables */ - { const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); + const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { + free(zc->workSpace); + zc->workSpace = malloc(neededSpace); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + } } + + if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ + zc->hashTable3 = (U32*)(zc->workSpace); + zc->hashTable = zc->hashTable3 + h3Size; + zc->chainTable = zc->hashTable + hSize; + zc->seqStore.buffer = zc->chainTable + chainSize; + zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; + zc->flagStaticTables = 0; + zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; + + zc->nextToUpdate = 1; + zc->nextSrc = NULL; + zc->base = NULL; + zc->dictBase = NULL; + zc->dictLimit = 0; + zc->lowLimit = 0; + zc->params = params; + zc->blockSize = blockSize; + + if (params.cParams.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1))); + zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); + zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->seqStore.litLengthSum = 0; + } + zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); + zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); + zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; + + zc->stage = 1; + zc->loadedDictEnd = 0; + + return 0; +} + + +/*! ZSTD_copyCCtx() : +* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. +* Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()). +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) +{ + if (srcCCtx->stage!=1) return ERROR(stage_wrong); + + dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */ + ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0); + dstCCtx->params.fParams.contentSizeFlag = 0; /* content size different from the one set during srcCCtx init */ + + /* copy tables */ + { const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); const size_t hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; - const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0; - const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); - } - - /* copy dictionary pointers */ - dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; - dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; - dstCCtx->nextSrc = srcCCtx->nextSrc; - dstCCtx->base = srcCCtx->base; - dstCCtx->dictBase = srcCCtx->dictBase; - dstCCtx->dictLimit = srcCCtx->dictLimit; - dstCCtx->lowLimit = srcCCtx->lowLimit; - dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; - - /* copy entropy tables */ - dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - if (srcCCtx->flagStaticTables) { - memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); - memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); - memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); - memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); - } - - return 0; -} - - -/*! ZSTD_reduceTable() : -* reduce table indexes by `reducerValue` */ -static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) -{ - U32 u; - for (u=0 ; u < size ; u++) { - if (table[u] < reducerValue) table[u] = 0; - else table[u] -= reducerValue; - } -} - -/*! ZSTD_reduceIndex() : -* rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) -{ - { const U32 hSize = 1 << zc->params.cParams.hashLog; - ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } - - { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); - ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } - - { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; - ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } -} - - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* Frame format description - Frame Header - [ Block Header - Block ] - Frame End - 1) Frame Header - - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) - - 1 byte - Frame Descriptor - 2) Block Header - - 3 bytes, starting with a 2-bits descriptor - Uncompressed, Compressed, Frame End, unused - 3) Block - See Block Format Description - 4) Frame End - - 3 bytes, compatible with Block Header -*/ - - -/* Frame descriptor - - 1 byte, using : - bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) - bit 4 : minmatch 4(0) or 3(1) - bit 5 : reserved (must be zero) - bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes - - Optional : content size (0, 1, 2 or 8 bytes) - 0 : unknown - 1 : 0-255 bytes - 2 : 256 - 65535+256 - 8 : up to 16 exa -*/ - - -/* Block format description - - Block = Literal Section - Sequences Section - Prerequisite : size of (compressed) block, maximum size of regenerated data - - 1) Literal Section - - 1.1) Header : 1-5 bytes - flags: 2 bits - 00 compressed by Huff0 - 01 unused - 10 is Raw (uncompressed) - 11 is Rle - Note : using 01 => Huff0 with precomputed table ? - Note : delta map ? => compressed ? - - 1.1.1) Huff0-compressed literal block : 3-5 bytes - srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream - srcSize < 1 KB => 3 bytes (2-2-10-10) - srcSize < 16KB => 4 bytes (2-2-14-14) - else => 5 bytes (2-2-18-18) - big endian convention - - 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes - size : 5 bits: (IS_RAW<<6) + (0<<4) + size - 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) - size&255 - 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) - size>>8&255 - size&255 - - 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes - size : 5 bits: (IS_RLE<<6) + (0<<4) + size - 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) - size&255 - 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) - size>>8&255 - size&255 - - 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes - srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream - srcSize < 1 KB => 3 bytes (2-2-10-10) - srcSize < 16KB => 4 bytes (2-2-14-14) - else => 5 bytes (2-2-18-18) - big endian convention - - 1- CTable available (stored into workspace ?) - 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) - - - 1.2) Literal block content - - 1.2.1) Huff0 block, using sizes from header - See Huff0 format - - 1.2.2) Huff0 block, using prepared table - - 1.2.3) Raw content - - 1.2.4) single byte - - - 2) Sequences section - - - Nb Sequences : 2 bytes, little endian - - Control Token : 1 byte (see below) - - Dumps Length : 1 or 2 bytes (depending on control token) - - Dumps : as stated by dumps length - - Literal Lengths FSE table (as needed depending on encoding method) - - Offset Codes FSE table (as needed depending on encoding method) - - Match Lengths FSE table (as needed depending on encoding method) - - 2.1) Control Token - 8 bits, divided as : - 0-1 : dumpsLength - 2-3 : MatchLength, FSE encoding method - 4-5 : Offset Codes, FSE encoding method - 6-7 : Literal Lengths, FSE encoding method - - FSE encoding method : - FSE_ENCODING_RAW : uncompressed; no header - FSE_ENCODING_RLE : single repeated value; header 1 byte - FSE_ENCODING_STATIC : use prepared table; no header - FSE_ENCODING_DYNAMIC : read NCount -*/ - -size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); - - /* Build header */ - ostart[0] = (BYTE)(srcSize>>16); - ostart[1] = (BYTE)(srcSize>>8); - ostart[2] = (BYTE) srcSize; - ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ - - return ZSTD_blockHeaderSize+srcSize; -} - - -static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); - break; - case 2: /* 2 - 2 - 12 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); - ostart[1] = (BYTE)srcSize; - break; - default: /*note : should not be necessary : flSize is within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); - ostart[1] = (BYTE)(srcSize>>8); - ostart[2] = (BYTE)srcSize; - break; - } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; -} - -static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity guaranteed to be >=4, hence large enough */ - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); - break; - case 2: /* 2 - 2 - 12 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); - ostart[1] = (BYTE)srcSize; - break; - default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); - ostart[1] = (BYTE)(srcSize>>8); - ostart[2] = (BYTE)srcSize; - break; - } - - ostart[flSize] = *(const BYTE*)src; - return flSize+1; -} - - -static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } - -static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t const minGain = ZSTD_minGain(srcSize); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - U32 hType = IS_HUF; - size_t cLitSize; - - - /* small ? don't even attempt compression (speed opt) */ -# define LITERAL_NOENTROPY 63 - { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - if (zc->flagStaticTables && (lhSize==3)) { - hType = IS_PCH; - singleStream = 1; - cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); - } else { - cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12) - : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12); - } - - if ((cLitSize==0) || (cLitSize >= srcSize - minGain)) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - if (cLitSize==1) - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6)); - ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8)); - ostart[2] = (BYTE)(cLitSize); - break; - case 4: /* 2 - 2 - 14 - 14 */ - ostart[0] = (BYTE)((srcSize>>10) + (2<<4) + (hType<<6)); - ostart[1] = (BYTE)(srcSize>> 2); - ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8)); - ostart[3] = (BYTE)(cLitSize); - break; - default: /* should not be necessary, lhSize is only {3,4,5} */ - case 5: /* 2 - 2 - 18 - 18 */ - ostart[0] = (BYTE)((srcSize>>14) + (3<<4) + (hType<<6)); - ostart[1] = (BYTE)(srcSize>>6); - ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16)); - ostart[3] = (BYTE)(cLitSize>>8); - ostart[4] = (BYTE)(cLitSize); - break; - } - return lhSize+cLitSize; -} - - -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) -{ - /* LL codes */ - { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; - const BYTE LL_deltaCode = 19; - const U16* const llTable = seqStorePtr->litLengthStart; - BYTE* const llCodeTable = seqStorePtr->llCodeStart; - size_t u; - for (u=0; u63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; - } - if (seqStorePtr->longLengthID==1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - } - - /* Offset codes */ - { const U32* const offsetTable = seqStorePtr->offsetStart; - BYTE* const ofCodeTable = seqStorePtr->offCodeStart; - size_t u; - for (u=0; umatchLengthStart; - BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; - size_t u; - for (u=0; u127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; - } - if (seqStorePtr->longLengthID==2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; - } -} - - -size_t ZSTD_compressSequences(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - size_t srcSize) -{ - const seqStore_t* seqStorePtr = &(zc->seqStore); - U32 count[MaxSeq+1]; - S16 norm[MaxSeq+1]; - FSE_CTable* CTable_LitLength = zc->litlengthCTable; - FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; - FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - U16* const llTable = seqStorePtr->litLengthStart; - U16* const mlTable = seqStorePtr->matchLengthStart; - const U32* const offsetTable = seqStorePtr->offsetStart; - const U32* const offsetTableEnd = seqStorePtr->offset; - BYTE* const ofCodeTable = seqStorePtr->offCodeStart; - BYTE* const llCodeTable = seqStorePtr->llCodeStart; - BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t const nbSeq = offsetTableEnd - offsetTable; - BYTE* seqHead; - - /* Compress literals */ - { const BYTE* const literals = seqStorePtr->litStart; - size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); - if (ZSTD_isError(cSize)) return cSize; - op += cSize; - } - - /* Sequences Header */ - if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) goto _check_compressibility; - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr, nbSeq); - - /* CTable for Literal Lengths */ - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); - LLtype = FSE_ENCODING_RAW; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = FSE_ENCODING_DYNAMIC; - } } - - /* CTable for Offsets */ - { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { - FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); - Offtype = FSE_ENCODING_RAW; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); - Offtype = FSE_ENCODING_DYNAMIC; - } } - - /* CTable for MatchLengths */ - { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { - FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); - MLtype = FSE_ENCODING_RAW; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); - MLtype = FSE_ENCODING_DYNAMIC; - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - zc->flagStaticTables = 0; - - /* Encoding Sequences */ - { BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); - if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, llTable[n], llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, mlTable[n], mlBits); - if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ - BIT_flushBits(&blockStream); /* (7)*/ - } } - - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); - - { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } } - - /* check compressibility */ -_check_compressibility: - { size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - if ((size_t)(op-ostart) >= maxCSize) return 0; } - - return op - ostart; -} - - -/*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH -*/ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) -{ -#if 0 /* for debug */ - static const BYTE* g_start = NULL; - const U32 pos = (U32)(literals - g_start); - if (g_start==NULL) g_start = literals; + const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0; + const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); + } + + /* copy dictionary pointers */ + dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; + dstCCtx->nextSrc = srcCCtx->nextSrc; + dstCCtx->base = srcCCtx->base; + dstCCtx->dictBase = srcCCtx->dictBase; + dstCCtx->dictLimit = srcCCtx->dictLimit; + dstCCtx->lowLimit = srcCCtx->lowLimit; + dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + + /* copy entropy tables */ + dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; + if (srcCCtx->flagStaticTables) { + memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); + memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); + memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); + memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); + } + + return 0; +} + + +/*! ZSTD_reduceTable() : +* reduce table indexes by `reducerValue` */ +static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) +{ + U32 u; + for (u=0 ; u < size ; u++) { + if (table[u] < reducerValue) table[u] = 0; + else table[u] -= reducerValue; + } +} + +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + { const U32 hSize = 1 << zc->params.cParams.hashLog; + ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } + + { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); + ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } + + { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* Frame format description + Frame Header - [ Block Header - Block ] - Frame End + 1) Frame Header + - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) + - 1 byte - Frame Descriptor + 2) Block Header + - 3 bytes, starting with a 2-bits descriptor + Uncompressed, Compressed, Frame End, unused + 3) Block + See Block Format Description + 4) Frame End + - 3 bytes, compatible with Block Header +*/ + + +/* Frame descriptor + + 1 byte, using : + bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) + bit 4 : minmatch 4(0) or 3(1) + bit 5 : reserved (must be zero) + bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes + + Optional : content size (0, 1, 2 or 8 bytes) + 0 : unknown + 1 : 0-255 bytes + 2 : 256 - 65535+256 + 8 : up to 16 exa +*/ + + +/* Block format description + + Block = Literal Section - Sequences Section + Prerequisite : size of (compressed) block, maximum size of regenerated data + + 1) Literal Section + + 1.1) Header : 1-5 bytes + flags: 2 bits + 00 compressed by Huff0 + 01 unused + 10 is Raw (uncompressed) + 11 is Rle + Note : using 01 => Huff0 with precomputed table ? + Note : delta map ? => compressed ? + + 1.1.1) Huff0-compressed literal block : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes + size : 5 bits: (IS_RAW<<6) + (0<<4) + size + 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes + size : 5 bits: (IS_RLE<<6) + (0<<4) + size + 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1- CTable available (stored into workspace ?) + 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) + + + 1.2) Literal block content + + 1.2.1) Huff0 block, using sizes from header + See Huff0 format + + 1.2.2) Huff0 block, using prepared table + + 1.2.3) Raw content + + 1.2.4) single byte + + + 2) Sequences section + + - Nb Sequences : 2 bytes, little endian + - Control Token : 1 byte (see below) + - Dumps Length : 1 or 2 bytes (depending on control token) + - Dumps : as stated by dumps length + - Literal Lengths FSE table (as needed depending on encoding method) + - Offset Codes FSE table (as needed depending on encoding method) + - Match Lengths FSE table (as needed depending on encoding method) + + 2.1) Control Token + 8 bits, divided as : + 0-1 : dumpsLength + 2-3 : MatchLength, FSE encoding method + 4-5 : Offset Codes, FSE encoding method + 6-7 : Literal Lengths, FSE encoding method + + FSE encoding method : + FSE_ENCODING_RAW : uncompressed; no header + FSE_ENCODING_RLE : single repeated value; header 1 byte + FSE_ENCODING_STATIC : use prepared table; no header + FSE_ENCODING_DYNAMIC : read NCount +*/ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); + + /* Build header */ + ostart[0] = (BYTE)(srcSize>>16); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE) srcSize; + ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ + + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + +static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const minGain = ZSTD_minGain(srcSize); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + U32 hType = IS_HUF; + size_t cLitSize; + + + /* small ? don't even attempt compression (speed opt) */ +# define LITERAL_NOENTROPY 63 + { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + if (zc->flagStaticTables && (lhSize==3)) { + hType = IS_PCH; + singleStream = 1; + cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); + } else { + cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12) + : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12); + } + + if ((cLitSize==0) || (cLitSize >= srcSize - minGain)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + if (cLitSize==1) + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6)); + ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8)); + ostart[2] = (BYTE)(cLitSize); + break; + case 4: /* 2 - 2 - 14 - 14 */ + ostart[0] = (BYTE)((srcSize>>10) + (2<<4) + (hType<<6)); + ostart[1] = (BYTE)(srcSize>> 2); + ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8)); + ostart[3] = (BYTE)(cLitSize); + break; + default: /* should not be necessary, lhSize is only {3,4,5} */ + case 5: /* 2 - 2 - 18 - 18 */ + ostart[0] = (BYTE)((srcSize>>14) + (3<<4) + (hType<<6)); + ostart[1] = (BYTE)(srcSize>>6); + ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16)); + ostart[3] = (BYTE)(cLitSize>>8); + ostart[4] = (BYTE)(cLitSize); + break; + } + return lhSize+cLitSize; +} + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) +{ + /* LL codes */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + const U16* const llTable = seqStorePtr->litLengthStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + size_t u; + for (u=0; u63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + } + if (seqStorePtr->longLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + } + + /* Offset codes */ + { const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umatchLengthStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + size_t u; + for (u=0; u127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; + } +} + + +size_t ZSTD_compressSequences(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + const seqStore_t* seqStorePtr = &(zc->seqStore); + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = zc->litlengthCTable; + FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; + FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + U16* const llTable = seqStorePtr->litLengthStart; + U16* const mlTable = seqStorePtr->matchLengthStart; + const U32* const offsetTable = seqStorePtr->offsetStart; + const U32* const offsetTableEnd = seqStorePtr->offset; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = offsetTableEnd - offsetTable; + BYTE* seqHead; + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) goto _check_compressibility; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr, nbSeq); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + LLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + } } + + /* CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { + FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); + Offtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = FSE_ENCODING_DYNAMIC; + } } + + /* CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + MLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = FSE_ENCODING_DYNAMIC; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + zc->flagStaticTables = 0; + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); + if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, llTable[n], llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, mlTable[n], mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + /* check compressibility */ +_check_compressibility: + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) return 0; } + + return op - ostart; +} + + +/*! ZSTD_storeSeq() : + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + `offsetCode` : distance to match, or 0 == repCode. + `matchCode` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) +{ +#if 0 /* for debug */ + static const BYTE* g_start = NULL; + const U32 pos = (U32)(literals - g_start); + if (g_start==NULL) g_start = literals; if ((pos > 2587900) && (pos < 2588050)) - printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); -#endif - ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); - - /* copy Literals */ - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); } - *seqStorePtr->litLength++ = (U16)litLength; - - /* match offset */ - *(seqStorePtr->offset++) = (U32)offsetCode + 1; - - /* match Length */ - if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); } - *seqStorePtr->matchLength++ = (U16)matchCode; -} - - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (register size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - -static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while ((pIn iEnd) vEnd = iEnd; - matchLength = ZSTD_count(ip, match, vEnd); - if (match + matchLength == mEnd) - matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); - return matchLength; -} - - -/*-************************************* -* Hashes -***************************************/ -static const U32 prime3bytes = 506832829U; -static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } -static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } - -static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - } -} - - -/*-************************************* -* Fast Scan -***************************************/ -static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hBits = zc->params.cParams.hashLog; - const BYTE* const base = zc->base; - const BYTE* ip = base + zc->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - 8; - const size_t fastHashFillStep = 3; - - while(ip <= iend) { - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - - -FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hBits = zc->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(zc->seqStore); - const BYTE* const base = zc->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowIndex = zc->dictLimit; - const BYTE* const lowest = base + lowIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - - /* init */ - ZSTD_resetSeqStore(seqStorePtr); - if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mlCode; - size_t offset; - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - const U32 current = (U32)(ip-base); - hashTable[h] = current; /* update hash table */ - - if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { /* note : by construction, offset_1 <= current */ - mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); - } else { - if ( (matchIndex <= lowIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; - offset = ip-match; - while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); - } - - /* match found */ - ip += mlCode; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; - { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH); - ip += rlCode; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch(mls) - { - default: - case 4 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; - } -} - - -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* hashTable = ctx->hashTable; - const U32 hBits = ctx->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowLimit = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - - U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - - - /* init */ - ZSTD_resetSeqStore(seqStorePtr); - /* skip first position to avoid read overflow during repcode match check */ - hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); - ip += REPCODE_STARTVALUE; - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* repMatch = repBase + repIndex; - size_t mlCode; - U32 offset; - hashTable[h] = current; /* update hash table */ - - if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); - } else { - if ( (matchIndex < lowLimit) || - (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; - while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); - } } - - /* found a match : store it */ - ip += mlCode; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch(mls) - { - default: - case 4 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - - - - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. -* ip : assumed <= iend-8 . -* @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - const U32 btLog = zc->params.cParams.chainLog - 1; - const U32 btMask= (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match = base + matchIndex; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - const U32 windowLow = zc->lowLimit; - U32 matchEndIdx = current+8; - size_t bestLength = 8; - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ -#if 0 /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); - if (matchEndIdx > current + 8) return matchEndIdx - current - 8; - return 1; -} - - -static size_t ZSTD_insertBtAndFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 nbCompares, const U32 mls, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - const U32 btLog = zc->params.cParams.chainLog - 1; - const U32 btMask= (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return bestLength; -} - - -static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); -} - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); -} - - -static size_t ZSTD_BtFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - -static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); -} - - - -/** Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); -} - - -static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - - -/* *********************** -* Hash Chain -*************************/ - -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] - - -/* Update chains up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) { - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - - - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls, const U32 extDict) -{ - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.cParams.chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 current = (U32)(ip-base); - const U32 minChain = current > chainSize ? current - chainSize : 0; - int nbAttempts=maxNbAttempts; - size_t ml=EQUAL_READ32-1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - - for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; - } - - /* save best solution */ - if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } -} - - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base + ctx->dictLimit; - - U32 const maxSearches = 1 << ctx->params.cParams.searchLog; - U32 const mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - - /* init */ - U32 rep[ZSTD_REP_INIT]; - { U32 i ; for (i=0; inextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); - if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - - /* check repCode */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) { - /* repcode : we take it */ - matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; - if (depth==0) goto _storeSequence; - } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip= EQUAL_READ32) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= EQUAL_READ32) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ - { start--; matchLength++; } - rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; - offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); - } -} - - -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); -} - -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); -} - - -FORCE_INLINE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - - /* init */ - U32 rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; inextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); - - /* check repCode */ - { - const U32 repIndex = (U32)(current+1 - rep[0]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - if (depth==0) goto _storeSequence; - } } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip= 3) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 1 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= 3) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - int gain2 = (int)(repLength * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 2 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - rep[1]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); -} - -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); -} - + printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#endif + ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); + + /* copy Literals */ + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); } + *seqStorePtr->litLength++ = (U16)litLength; + + /* match offset */ + *(seqStorePtr->offset++) = (U32)offsetCode + 1; + + /* match Length */ + if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); } + *seqStorePtr->matchLength++ = (U16)matchCode; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while ((pIn iEnd) vEnd = iEnd; + matchLength = ZSTD_count(ip, match, vEnd); + if (match + matchLength == mEnd) + matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); + return matchLength; +} + + +/*-************************************* +* Hashes +***************************************/ +static const U32 prime3bytes = 506832829U; +static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } +static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + } +} + + +/*-************************************* +* Fast Scan +***************************************/ +static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hBits = zc->params.cParams.hashLog; + const BYTE* const base = zc->base; + const BYTE* ip = base + zc->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - 8; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hBits = zc->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(zc->seqStore); + const BYTE* const base = zc->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowIndex = zc->dictLimit; + const BYTE* const lowest = base + lowIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mlCode; + size_t offset; + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 current = (U32)(ip-base); + hashTable[h] = current; /* update hash table */ + + if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { /* note : by construction, offset_1 <= current */ + mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); + } else { + if ( (matchIndex <= lowIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; + offset = ip-match; + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + } + + /* match found */ + ip += mlCode; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; + { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH); + ip += rlCode; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowLimit = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + + U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + /* skip first position to avoid read overflow during repcode match check */ + hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); + ip += REPCODE_STARTVALUE; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mlCode; + U32 offset; + hashTable[h] = current; /* update hash table */ + + if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); + } else { + if ( (matchIndex < lowLimit) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + } } + + /* found a match : store it */ + ip += mlCode; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +* ip : assumed <= iend-8 . +* @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match = base + matchIndex; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + const U32 windowLow = zc->lowLimit; + U32 matchEndIdx = current+8; + size_t bestLength = 8; + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ +#if 0 /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); + if (matchEndIdx > current + 8) return matchEndIdx - current - 8; + return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return bestLength; +} + + +static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + + +/* *********************** +* Hash Chain +*************************/ + +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + + +/* Update chains up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) { + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=EQUAL_READ32-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + + for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; + } + + /* save best solution */ + if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; + + U32 const maxSearches = 1 << ctx->params.cParams.searchLog; + U32 const mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + + /* init */ + U32 rep[ZSTD_REP_INIT]; + { U32 i ; for (i=0; inextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= EQUAL_READ32) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= EQUAL_READ32) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ + { start--; matchLength++; } + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); + } +} + + +static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; + + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + /* init */ + U32 rep[ZSTD_REP_INIT]; + { U32 i; for (i=0; inextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { + const U32 repIndex = (U32)(current+1 - rep[0]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - rep[1]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +} + /* The optimal parser */ @@ -1976,590 +1976,590 @@ static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t src ZSTD_compressBlock_opt_generic(ctx, src, srcSize); } -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); -} - - -typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); - -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) -{ - static const ZSTD_blockCompressor blockCompressor[2][6] = { -#if 1 - { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, -#else - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, -#endif - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } - }; - - return blockCompressor[extDict][(U32)strat]; -} - - -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ - blockCompressor(zc, src, srcSize); - return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); -} - - - - -static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t blockSize = zc->blockSize; - size_t remaining = srcSize; - const BYTE* ip = (const BYTE*)src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - const U32 maxDist = 1 << zc->params.cParams.windowLog; - ZSTD_stats_t* stats = &zc->seqStore.stats; - - ZSTD_statsInit(stats); - - while (remaining) { - size_t cSize; - ZSTD_statsResetFreqs(stats); - - if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ - if (remaining < blockSize) blockSize = remaining; - - if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { - /* enforce maxDist */ - U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; - if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; - if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; - } - - cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; - - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; - } else { - op[0] = (BYTE)(cSize>>16); - op[1] = (BYTE)(cSize>>8); - op[2] = (BYTE)cSize; - op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ - cSize += 3; - } - - remaining -= blockSize; - dstCapacity -= cSize; - ip += blockSize; - op += cSize; - } - - ZSTD_statsPrint(stats, zc->params.cParams.searchLength); - return op-ostart; -} - - -static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - ZSTD_parameters params, U64 pledgedSrcSize) -{ BYTE* const op = (BYTE*)dst; - U32 const fcsId = params.fParams.contentSizeFlag ? - (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) : /* 0-3 */ - 0; - BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ - | (fcsId << 6) ); - size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; - if (hSize > dstCapacity) return ERROR(dstSize_tooSmall); - - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = fdescriptor; - switch(fcsId) - { - default: /* impossible */ - case 0 : break; - case 1 : op[5] = (BYTE)(pledgedSrcSize); break; - case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break; - case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break; - } - return hSize; -} - - -static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 frame) -{ - const BYTE* const ip = (const BYTE*) src; - size_t fhSize = 0; - - if (zc->stage==0) return ERROR(stage_wrong); - if (frame && (zc->stage==1)) { /* copy saved header */ - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize); - if (ZSTD_isError(fhSize)) return fhSize; - dstCapacity -= fhSize; - dst = (char*)dst + fhSize; - zc->stage = 2; - } - - /* Check if blocks follow each other */ - if (src != zc->nextSrc) { - /* not contiguous */ - size_t const delta = zc->nextSrc - ip; - zc->lowLimit = zc->dictLimit; - zc->dictLimit = (U32)(zc->nextSrc - zc->base); - zc->dictBase = zc->base; - zc->base -= delta; - zc->nextToUpdate = zc->dictLimit; - if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit; /* too small extDict */ - } - - /* preemptive overflow correction */ - if (zc->lowLimit > (1<<30)) { - U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt); - U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1; - U32 const newLowLimit = zc->lowLimit & chainMask; /* preserve position % chainSize */ - U32 const correction = zc->lowLimit - newLowLimit; - ZSTD_reduceIndex(zc, correction); - zc->base += correction; - zc->dictBase += correction; - zc->lowLimit = newLowLimit; - zc->dictLimit -= correction; - if (zc->nextToUpdate < correction) zc->nextToUpdate = 0; - else zc->nextToUpdate -= correction; - } - - /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */ - if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) { - zc->lowLimit = (U32)(ip + srcSize - zc->dictBase); - if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit; - } - - zc->nextSrc = ip + srcSize; - { size_t const cSize = frame ? - ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : - ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) return cSize; - return cSize + fhSize; - } -} - - -size_t ZSTD_compressContinue (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); -} - - -size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); - ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength); - return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0); -} - - -static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) -{ - const BYTE* const ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - - /* input becomes current prefix */ - zc->lowLimit = zc->dictLimit; - zc->dictLimit = (U32)(zc->nextSrc - zc->base); - zc->dictBase = zc->base; - zc->base += ip - zc->nextSrc; - zc->nextToUpdate = zc->dictLimit; - zc->loadedDictEnd = (U32)(iend - zc->base); - - zc->nextSrc = iend; - if (srcSize <= 8) return 0; - - switch(zc->params.cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength); - break; - - case ZSTD_btlazy2: - case ZSTD_btopt: - ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); - break; - - default: - return ERROR(GENERIC); /* strategy doesn't exist; impossible */ - } - - zc->nextToUpdate = zc->loadedDictEnd; - return 0; -} - - -/* Dictionary format : - Magic == ZSTD_DICT_MAGIC (4 bytes) - HUF_writeCTable(256) - Dictionary content -*/ -/*! ZSTD_loadDictEntropyStats() : - @return : size read from dictionary */ -static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) -{ - /* note : magic number already checked */ - size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; - short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; - short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; - - size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); - if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); - zc->flagStaticTables = 1; - dict = (const char*)dict + hufHeaderSize; - dictSize -= hufHeaderSize; - - offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + offcodeHeaderSize; - dictSize -= offcodeHeaderSize; - - matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + matchlengthHeaderSize; - dictSize -= matchlengthHeaderSize; - - litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); - - return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; -} - -/** ZSTD_compress_insertDictionary() : -* @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) -{ - if ((dict==NULL) || (dictSize<=4)) return 0; - - /* default : dict is pure content */ - if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); - - /* known magic number : dict is parsed for entropy stats and content */ - { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; - if (ZSTD_isError(eSize)) return eSize; - return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); - } -} - - -/*! ZSTD_compressBegin_internal() : -* @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, - const void* dict, size_t dictSize, - ZSTD_parameters params, U64 pledgedSrcSize) -{ - { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); - zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; } - - { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1); - if (ZSTD_isError(resetError)) return resetError; } - - return ZSTD_compress_insertDictionary(zc, dict, dictSize); -} - - -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, - const void* dict, size_t dictSize, - ZSTD_parameters params, U64 pledgedSrcSize) -{ - /* compression parameters verification and optimization */ - { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); - if (ZSTD_isError(errorCode)) return errorCode; } - - return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); -} - - -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters params; - params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - params.fParams.contentSizeFlag = 0; - ZSTD_adjustCParams(¶ms.cParams, 0, dictSize); - ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); - return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); -} - - -size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) -{ - ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel); - return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); -} - - -/*! ZSTD_compressEnd() : -* Write frame epilogue. -* @return : nb of bytes written into dst (or an error code) */ -size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) -{ - BYTE* op = (BYTE*)dst; - size_t fhSize = 0; - - /* not even init ! */ - if (zc->stage==0) return ERROR(stage_wrong); - - /* special case : empty frame */ - if (zc->stage==1) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0); - if (ZSTD_isError(fhSize)) return fhSize; - dstCapacity -= fhSize; - op += fhSize; - zc->stage = 2; - } - - /* frame epilogue */ - if (dstCapacity < 3) return ERROR(dstSize_tooSmall); - op[0] = (BYTE)(bt_end << 6); - op[1] = 0; - op[2] = 0; - - zc->stage = 0; /* return to "created by not init" status */ - return 3+fhSize; -} - - -size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - { size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); - if (ZSTD_isError(errorCode)) return errorCode; - } - { size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) return cSize; - - { size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize); - if (ZSTD_isError(endSize)) return endSize; - return cSize + endSize; - } } -} - - -static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - - /* Init */ - { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize); - if(ZSTD_isError(errorCode)) return errorCode; } - - /* body (compression) */ - { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; - dstCapacity -= oSize; } - - /* Close frame */ - { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; } - - return (op - ostart); -} - -size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) -{ - size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize); - if (ZSTD_isError(errorCode)) return errorCode; - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); -} - -size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters params; - ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); - params.cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); - params.fParams.contentSizeFlag = 1; - ZSTD_adjustCParams(¶ms.cParams, srcSize, dictSize); - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); -} - -size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) -{ - ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); - return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); -} - -size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) -{ - size_t result; - ZSTD_CCtx ctxBody; - memset(&ctxBody, 0, sizeof(ctxBody)); - result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); - free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; just free heap content */ - return result; -} - - -/*-===== Pre-defined compression levels =====-*/ - +static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); +} + + +typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +{ + static const ZSTD_blockCompressor blockCompressor[2][6] = { +#if 1 + { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, +#else + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, +#endif + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } + }; + + return blockCompressor[extDict][(U32)strat]; +} + + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + blockCompressor(zc, src, srcSize); + return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); +} + + + + +static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t blockSize = zc->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + const U32 maxDist = 1 << zc->params.cParams.windowLog; + ZSTD_stats_t* stats = &zc->seqStore.stats; + + ZSTD_statsInit(stats); + + while (remaining) { + size_t cSize; + ZSTD_statsResetFreqs(stats); + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { + /* enforce maxDist */ + U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; + if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; + if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; + } + + cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + } else { + op[0] = (BYTE)(cSize>>16); + op[1] = (BYTE)(cSize>>8); + op[2] = (BYTE)cSize; + op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ + cSize += 3; + } + + remaining -= blockSize; + dstCapacity -= cSize; + ip += blockSize; + op += cSize; + } + + ZSTD_statsPrint(stats, zc->params.cParams.searchLength); + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_parameters params, U64 pledgedSrcSize) +{ BYTE* const op = (BYTE*)dst; + U32 const fcsId = params.fParams.contentSizeFlag ? + (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) : /* 0-3 */ + 0; + BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ + | (fcsId << 6) ); + size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; + if (hSize > dstCapacity) return ERROR(dstSize_tooSmall); + + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + op[4] = fdescriptor; + switch(fcsId) + { + default: /* impossible */ + case 0 : break; + case 1 : op[5] = (BYTE)(pledgedSrcSize); break; + case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break; + case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break; + } + return hSize; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame) +{ + const BYTE* const ip = (const BYTE*) src; + size_t fhSize = 0; + + if (zc->stage==0) return ERROR(stage_wrong); + if (frame && (zc->stage==1)) { /* copy saved header */ + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + zc->stage = 2; + } + + /* Check if blocks follow each other */ + if (src != zc->nextSrc) { + /* not contiguous */ + size_t const delta = zc->nextSrc - ip; + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base -= delta; + zc->nextToUpdate = zc->dictLimit; + if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit; /* too small extDict */ + } + + /* preemptive overflow correction */ + if (zc->lowLimit > (1<<30)) { + U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt); + U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1; + U32 const newLowLimit = zc->lowLimit & chainMask; /* preserve position % chainSize */ + U32 const correction = zc->lowLimit - newLowLimit; + ZSTD_reduceIndex(zc, correction); + zc->base += correction; + zc->dictBase += correction; + zc->lowLimit = newLowLimit; + zc->dictLimit -= correction; + if (zc->nextToUpdate < correction) zc->nextToUpdate = 0; + else zc->nextToUpdate -= correction; + } + + /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */ + if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) { + zc->lowLimit = (U32)(ip + srcSize - zc->dictBase); + if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit; + } + + zc->nextSrc = ip + srcSize; + { size_t const cSize = frame ? + ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : + ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + return cSize + fhSize; + } +} + + +size_t ZSTD_compressContinue (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); +} + + +size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength); + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0); +} + + +static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + /* input becomes current prefix */ + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base += ip - zc->nextSrc; + zc->nextToUpdate = zc->dictLimit; + zc->loadedDictEnd = (U32)(iend - zc->base); + + zc->nextSrc = iend; + if (srcSize <= 8) return 0; + + switch(zc->params.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength); + break; + + case ZSTD_btlazy2: + case ZSTD_btopt: + ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + break; + + default: + return ERROR(GENERIC); /* strategy doesn't exist; impossible */ + } + + zc->nextToUpdate = zc->loadedDictEnd; + return 0; +} + + +/* Dictionary format : + Magic == ZSTD_DICT_MAGIC (4 bytes) + HUF_writeCTable(256) + Dictionary content +*/ +/*! ZSTD_loadDictEntropyStats() : + @return : size read from dictionary */ +static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +{ + /* note : magic number already checked */ + size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; + short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + + size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + zc->flagStaticTables = 1; + dict = (const char*)dict + hufHeaderSize; + dictSize -= hufHeaderSize; + + offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + offcodeHeaderSize; + dictSize -= offcodeHeaderSize; + + matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + matchlengthHeaderSize; + dictSize -= matchlengthHeaderSize; + + litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + + return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; +} + +/** ZSTD_compress_insertDictionary() : +* @return : 0, or an error code */ +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +{ + if ((dict==NULL) || (dictSize<=4)) return 0; + + /* default : dict is pure content */ + if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); + + /* known magic number : dict is parsed for entropy stats and content */ + { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; + if (ZSTD_isError(eSize)) return eSize; + return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + } +} + + +/*! ZSTD_compressBegin_internal() : +* @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); + zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; } + + { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1); + if (ZSTD_isError(resetError)) return resetError; } + + return ZSTD_compress_insertDictionary(zc, dict, dictSize); +} + + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + /* compression parameters verification and optimization */ + { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params; + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 0; + ZSTD_adjustCParams(¶ms.cParams, 0, dictSize); + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); + return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); +} + + +size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) +{ + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel); + return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); +} + + +/*! ZSTD_compressEnd() : +* Write frame epilogue. +* @return : nb of bytes written into dst (or an error code) */ +size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) +{ + BYTE* op = (BYTE*)dst; + size_t fhSize = 0; + + /* not even init ! */ + if (zc->stage==0) return ERROR(stage_wrong); + + /* special case : empty frame */ + if (zc->stage==1) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + zc->stage = 2; + } + + /* frame epilogue */ + if (dstCapacity < 3) return ERROR(dstSize_tooSmall); + op[0] = (BYTE)(bt_end << 6); + op[1] = 0; + op[2] = 0; + + zc->stage = 0; /* return to "created by not init" status */ + return 3+fhSize; +} + + +size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + { size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); + if (ZSTD_isError(errorCode)) return errorCode; + } + { size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + + { size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize); + if (ZSTD_isError(endSize)) return endSize; + return cSize + endSize; + } } +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + /* Init */ + { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize); + if(ZSTD_isError(errorCode)) return errorCode; } + + /* body (compression) */ + { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); + if(ZSTD_isError(oSize)) return oSize; + op += oSize; + dstCapacity -= oSize; } + + /* Close frame */ + { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); + if(ZSTD_isError(oSize)) return oSize; + op += oSize; } + + return (op - ostart); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize); + if (ZSTD_isError(errorCode)) return errorCode; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params; + ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); + params.cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); + params.fParams.contentSizeFlag = 1; + ZSTD_adjustCParams(¶ms.cParams, srcSize, dictSize); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + memset(&ctxBody, 0, sizeof(ctxBody)); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; just free heap content */ + return result; +} + + +/*-===== Pre-defined compression levels =====-*/ + #define ZSTD_DEFAULT_CLEVEL 5 #define ZSTD_MAX_CLEVEL 22 -unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" */ - /* W, C, H, S, L, TL, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ - { 19, 13, 14, 1, 7, 4, ZSTD_fast }, /* level 1 */ - { 19, 15, 16, 1, 6, 4, ZSTD_fast }, /* level 2 */ - { 20, 18, 20, 1, 6, 4, ZSTD_fast }, /* level 3 */ - { 20, 13, 17, 2, 5, 4, ZSTD_greedy }, /* level 4.*/ - { 20, 15, 18, 3, 5, 4, ZSTD_greedy }, /* level 5 */ - { 21, 16, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */ - { 21, 17, 20, 3, 5, 4, ZSTD_lazy }, /* level 7 */ - { 21, 18, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 8.*/ - { 21, 20, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */ - { 21, 19, 21, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */ - { 22, 20, 22, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */ - { 22, 20, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */ - { 22, 21, 22, 6, 5, 4, ZSTD_lazy2 }, /* level 14 */ - { 22, 21, 21, 5, 5, 4, ZSTD_btlazy2 }, /* level 15 */ - { 23, 22, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ - { 23, 23, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 17.*/ - { 23, 23, 22, 6, 5, 24, ZSTD_btopt }, /* level 18.*/ - { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19.*/ - { 25, 26, 23, 7, 3, 64, ZSTD_btopt }, /* level 20.*/ - { 26, 26, 23, 7, 3,256, ZSTD_btopt }, /* level 21.*/ - { 27, 27, 25, 9, 3,512, ZSTD_btopt }, /* level 22.*/ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */ - { 18, 13, 14, 1, 6, 4, ZSTD_fast }, /* level 1 */ - { 18, 15, 17, 1, 5, 4, ZSTD_fast }, /* level 2 */ - { 18, 13, 15, 1, 5, 4, ZSTD_greedy }, /* level 3.*/ - { 18, 15, 17, 1, 5, 4, ZSTD_greedy }, /* level 4.*/ - { 18, 16, 17, 4, 5, 4, ZSTD_greedy }, /* level 5 */ - { 18, 17, 17, 5, 5, 4, ZSTD_greedy }, /* level 6 */ - { 18, 17, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 18, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 18, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 11.*/ - { 18, 18, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 12.*/ - { 18, 19, 17, 7, 4, 4, ZSTD_btlazy2 }, /* level 13 */ - { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 18, 8, 4, 24, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 18, 8, 3, 48, ZSTD_btopt }, /* level 16.*/ - { 18, 19, 18, 8, 3, 96, ZSTD_btopt }, /* level 17.*/ - { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ - { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ - { 18, 19, 18, 11, 3,512, ZSTD_btopt }, /* level 20.*/ - { 18, 19, 18, 12, 3,512, ZSTD_btopt }, /* level 21.*/ - { 18, 19, 18, 13, 3,512, ZSTD_btopt }, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ - { 17, 12, 13, 1, 6, 4, ZSTD_fast }, /* level 1 */ - { 17, 13, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */ - { 17, 13, 14, 2, 5, 4, ZSTD_greedy }, /* level 3 */ - { 17, 13, 15, 3, 4, 4, ZSTD_greedy }, /* level 4 */ - { 17, 15, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */ - { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 15, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ - { 17, 17, 17, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */ - { 17, 18, 17, 6, 4, 4, ZSTD_btlazy2 }, /* level 13.*/ - { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ - { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ - { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ - { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ - { 17, 18, 17, 9, 3,256, ZSTD_btopt }, /* level 20.*/ - { 17, 18, 17, 10, 3,256, ZSTD_btopt }, /* level 21.*/ - { 17, 18, 17, 11, 3,256, ZSTD_btopt }, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ - { 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */ - { 14, 14, 14, 4, 4, 4, ZSTD_greedy }, /* level 3.*/ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 4.*/ - { 14, 14, 14, 4, 4, 4, ZSTD_lazy2 }, /* level 5 */ - { 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ - { 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ - { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ - { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ - { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ - { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 20.*/ - { 14, 15, 15, 9, 3,256, ZSTD_btopt }, /* level 21.*/ - { 14, 15, 15, 10, 3,256, ZSTD_btopt }, /* level 22.*/ -}, -}; - -/*! ZSTD_getParams() : -* @return ZSTD_parameters structure for a selected compression level and srcSize. -* `srcSize` value is optional, select 0 if not known */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize) -{ - ZSTD_compressionParameters cp; - size_t const addedSize = srcSize ? 0 : 500; - U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ +unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" */ + /* W, C, H, S, L, TL, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 19, 13, 14, 1, 7, 4, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 4, ZSTD_fast }, /* level 2 */ + { 20, 18, 20, 1, 6, 4, ZSTD_fast }, /* level 3 */ + { 20, 13, 17, 2, 5, 4, ZSTD_greedy }, /* level 4.*/ + { 20, 15, 18, 3, 5, 4, ZSTD_greedy }, /* level 5 */ + { 21, 16, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 17, 20, 3, 5, 4, ZSTD_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 8.*/ + { 21, 20, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 21, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */ + { 22, 21, 22, 6, 5, 4, ZSTD_lazy2 }, /* level 14 */ + { 22, 21, 21, 5, 5, 4, ZSTD_btlazy2 }, /* level 15 */ + { 23, 22, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ + { 23, 23, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 17.*/ + { 23, 23, 22, 6, 5, 24, ZSTD_btopt }, /* level 18.*/ + { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19.*/ + { 25, 26, 23, 7, 3, 64, ZSTD_btopt }, /* level 20.*/ + { 26, 26, 23, 7, 3,256, ZSTD_btopt }, /* level 21.*/ + { 27, 27, 25, 9, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */ + { 18, 13, 14, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 18, 15, 17, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 18, 13, 15, 1, 5, 4, ZSTD_greedy }, /* level 3.*/ + { 18, 15, 17, 1, 5, 4, ZSTD_greedy }, /* level 4.*/ + { 18, 16, 17, 4, 5, 4, ZSTD_greedy }, /* level 5 */ + { 18, 17, 17, 5, 5, 4, ZSTD_greedy }, /* level 6 */ + { 18, 17, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 18, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 18, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 11.*/ + { 18, 18, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 12.*/ + { 18, 19, 17, 7, 4, 4, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 18, 8, 4, 24, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 18, 8, 3, 48, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 18, 8, 3, 96, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btopt }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btopt }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 17, 12, 13, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 17, 13, 14, 2, 5, 4, ZSTD_greedy }, /* level 3 */ + { 17, 13, 15, 3, 4, 4, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 15, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 17, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 17, 6, 4, 4, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ + { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btopt }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btopt }, /* level 21.*/ + { 17, 18, 17, 11, 3,256, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ + { 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 4, 4, 4, ZSTD_greedy }, /* level 3.*/ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 4.*/ + { 14, 14, 14, 4, 4, 4, ZSTD_lazy2 }, /* level 5 */ + { 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ + { 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btopt }, /* level 22.*/ +}, +}; + +/*! ZSTD_getParams() : +* @return ZSTD_parameters structure for a selected compression level and srcSize. +* `srcSize` value is optional, select 0 if not known */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize) +{ + ZSTD_compressionParameters cp; + size_t const addedSize = srcSize ? 0 : 500; + U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel < 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; if (compressionLevel==0) compressionLevel = 1; - if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; - cp = ZSTD_defaultCParameters[tableID][compressionLevel]; - if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ - if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; - if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; - if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; - } - return cp; -} + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + cp = ZSTD_defaultCParameters[tableID][compressionLevel]; + if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ + if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; + if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; + if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; + } + return cp; +} diff --git a/contrib/libs/zstd06/decompress/zstd_decompress.c b/contrib/libs/zstd06/decompress/zstd_decompress.c index c4ce3f81d86..177e9c8a210 100644 --- a/contrib/libs/zstd06/decompress/zstd_decompress.c +++ b/contrib/libs/zstd06/decompress/zstd_decompress.c @@ -1,88 +1,88 @@ -/* - zstd - standard compression library - Copyright (C) 2014-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd homepage : http://www.zstd.net -*/ - -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! - * HEAPMODE : - * Select how default decompression function ZSTD_decompress() will allocate memory, - * in memory stack (0), or in memory heap (1, requires malloc()) - */ -#ifndef ZSTD_HEAPMODE -# define ZSTD_HEAPMODE 1 -#endif - -/*! -* LEGACY_SUPPORT : -* if set to 1, ZSTD_decompress() can decode older formats (v0.1+) -*/ -#ifndef ZSTD_LEGACY_SUPPORT -# define ZSTD_LEGACY_SUPPORT 0 -#endif - - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* calloc */ -#include /* memcpy, memmove */ -#include /* debug only : printf */ -#include "mem.h" /* low level memory routines */ -#include "zstd_internal.h" -#include "fse_static.h" +/* + zstd - standard compression library + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net +*/ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() will allocate memory, + * in memory stack (0), or in memory heap (1, requires malloc()) + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug only : printf */ +#include "mem.h" /* low level memory routines */ +#include "zstd_internal.h" +#include "fse_static.h" #include "huf_static.h" - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) -# include "zstd_legacy.h" -#endif - - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4324) /* disable: C4324: padded structure */ -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) +# include "zstd_legacy.h" +#endif + + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + /*-************************************* * Macros ***************************************/ @@ -91,892 +91,892 @@ #define HUF_isError ERR_isError -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } - - -/*-************************************************************* -* Context management -***************************************************************/ -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; - -struct ZSTD_DCtx_s -{ - FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; - FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; - FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; - unsigned hufTableX4[HUF_DTABLE_SIZE(HufLog)]; - const void* previousDstEnd; - const void* base; - const void* vBase; - const void* dictEnd; - size_t expected; - size_t headerSize; - ZSTD_frameParams fParams; - blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ - ZSTD_dStage stage; - U32 flagRepeatTable; - const BYTE* litPtr; - size_t litBufSize; - size_t litSize; - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ - -size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); } /* non published interface */ - -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) -{ - dctx->expected = ZSTD_frameHeaderSize_min; - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; - dctx->dictEnd = NULL; - dctx->hufTableX4[0] = HufLog; - dctx->flagRepeatTable = 0; - return 0; -} - -ZSTD_DCtx* ZSTD_createDCtx(void) -{ - ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); - if (dctx==NULL) return NULL; - ZSTD_decompressBegin(dctx); - return dctx; -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) -{ - free(dctx); - return 0; /* reserved as a potential error code in the future */ -} - -void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) -{ - memcpy(dstDCtx, srcDCtx, - sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max)); /* no need to copy workspace */ -} - - -/*-************************************************************* -* Decompression section -***************************************************************/ - -/* Frame format description - Frame Header - [ Block Header - Block ] - Frame End - 1) Frame Header - - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) - - 1 byte - Frame Descriptor - 2) Block Header - - 3 bytes, starting with a 2-bits descriptor - Uncompressed, Compressed, Frame End, unused - 3) Block - See Block Format Description - 4) Frame End - - 3 bytes, compatible with Block Header -*/ - - -/* Frame descriptor - - 1 byte, using : - bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) - bit 4 : minmatch 4(0) or 3(1) - bit 5 : reserved (must be zero) - bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes - - Optional : content size (0, 1, 2 or 8 bytes) - 0 : unknown - 1 : 0-255 bytes - 2 : 256 - 65535+256 - 8 : up to 16 exa -*/ - - -/* Compressed Block, format description - - Block = Literal Section - Sequences Section - Prerequisite : size of (compressed) block, maximum size of regenerated data - - 1) Literal Section - - 1.1) Header : 1-5 bytes - flags: 2 bits - 00 compressed by Huff0 - 01 unused - 10 is Raw (uncompressed) - 11 is Rle - Note : using 01 => Huff0 with precomputed table ? - Note : delta map ? => compressed ? - - 1.1.1) Huff0-compressed literal block : 3-5 bytes - srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream - srcSize < 1 KB => 3 bytes (2-2-10-10) - srcSize < 16KB => 4 bytes (2-2-14-14) - else => 5 bytes (2-2-18-18) - big endian convention - - 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes - size : 5 bits: (IS_RAW<<6) + (0<<4) + size - 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) - size&255 - 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) - size>>8&255 - size&255 - - 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes - size : 5 bits: (IS_RLE<<6) + (0<<4) + size - 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) - size&255 - 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) - size>>8&255 - size&255 - - 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes - srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream - srcSize < 1 KB => 3 bytes (2-2-10-10) - srcSize < 16KB => 4 bytes (2-2-14-14) - else => 5 bytes (2-2-18-18) - big endian convention - - 1- CTable available (stored into workspace ?) - 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) - - - 1.2) Literal block content - - 1.2.1) Huff0 block, using sizes from header - See Huff0 format - - 1.2.2) Huff0 block, using prepared table - - 1.2.3) Raw content - - 1.2.4) single byte - - - 2) Sequences section - TO DO -*/ - -/** ZSTD_frameHeaderSize() : -* srcSize must be >= ZSTD_frameHeaderSize_min. -* @return : size of the Frame Header */ -static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) -{ - if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* +* Context management +***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; + +struct ZSTD_DCtx_s +{ + FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; + FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; + FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; + unsigned hufTableX4[HUF_DTABLE_SIZE(HufLog)]; + const void* previousDstEnd; + const void* base; + const void* vBase; + const void* dictEnd; + size_t expected; + size_t headerSize; + ZSTD_frameParams fParams; + blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ + ZSTD_dStage stage; + U32 flagRepeatTable; + const BYTE* litPtr; + size_t litBufSize; + size_t litSize; + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ + +size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); } /* non published interface */ + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize_min; + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + dctx->hufTableX4[0] = HufLog; + dctx->flagRepeatTable = 0; + return 0; +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); + if (dctx==NULL) return NULL; + ZSTD_decompressBegin(dctx); + return dctx; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + free(dctx); + return 0; /* reserved as a potential error code in the future */ +} + +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + memcpy(dstDCtx, srcDCtx, + sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max)); /* no need to copy workspace */ +} + + +/*-************************************************************* +* Decompression section +***************************************************************/ + +/* Frame format description + Frame Header - [ Block Header - Block ] - Frame End + 1) Frame Header + - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) + - 1 byte - Frame Descriptor + 2) Block Header + - 3 bytes, starting with a 2-bits descriptor + Uncompressed, Compressed, Frame End, unused + 3) Block + See Block Format Description + 4) Frame End + - 3 bytes, compatible with Block Header +*/ + + +/* Frame descriptor + + 1 byte, using : + bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) + bit 4 : minmatch 4(0) or 3(1) + bit 5 : reserved (must be zero) + bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes + + Optional : content size (0, 1, 2 or 8 bytes) + 0 : unknown + 1 : 0-255 bytes + 2 : 256 - 65535+256 + 8 : up to 16 exa +*/ + + +/* Compressed Block, format description + + Block = Literal Section - Sequences Section + Prerequisite : size of (compressed) block, maximum size of regenerated data + + 1) Literal Section + + 1.1) Header : 1-5 bytes + flags: 2 bits + 00 compressed by Huff0 + 01 unused + 10 is Raw (uncompressed) + 11 is Rle + Note : using 01 => Huff0 with precomputed table ? + Note : delta map ? => compressed ? + + 1.1.1) Huff0-compressed literal block : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes + size : 5 bits: (IS_RAW<<6) + (0<<4) + size + 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes + size : 5 bits: (IS_RLE<<6) + (0<<4) + size + 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1- CTable available (stored into workspace ?) + 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) + + + 1.2) Literal block content + + 1.2.1) Huff0 block, using sizes from header + See Huff0 format + + 1.2.2) Huff0 block, using prepared table + + 1.2.3) Raw content + + 1.2.4) single byte + + + 2) Sequences section + TO DO +*/ + +/** ZSTD_frameHeaderSize() : +* srcSize must be >= ZSTD_frameHeaderSize_min. +* @return : size of the Frame Header */ +static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); { U32 const fcsId = (((const BYTE*)src)[4]) >> 6; return ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; } -} - - -/** ZSTD_getFrameParams() : -* decode Frame Header, or provide expected `srcSize`. -* @return : 0, `fparamsPtr` is correctly filled, -* >0, `srcSize` is too small, result is expected `srcSize`, -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - - if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min; - if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); - if (srcSize < fhsize) return fhsize; } - - memset(fparamsPtr, 0, sizeof(*fparamsPtr)); - { BYTE const frameDesc = ip[4]; - fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported); /* reserved 1 bit */ - switch(frameDesc >> 6) /* fcsId */ - { - default: /* impossible */ - case 0 : fparamsPtr->frameContentSize = 0; break; - case 1 : fparamsPtr->frameContentSize = ip[5]; break; - case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break; - case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break; - } } - return 0; -} - - -/** ZSTD_decodeFrameHeader() : -* `srcSize` must be the size provided by ZSTD_frameHeaderSize(). -* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) -{ - size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); - if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); - return result; -} - - -typedef struct -{ - blockType_t blockType; - U32 origSize; -} blockProperties_t; - -/*! ZSTD_getcBlockSize() : -* Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) -{ - const BYTE* const in = (const BYTE* const)src; - U32 cSize; - - if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - - bpPtr->blockType = (blockType_t)((*in) >> 6); - cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); - bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; - - if (bpPtr->blockType == bt_end) return 0; - if (bpPtr->blockType == bt_rle) return 1; - return cSize; -} - - -static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); - return srcSize; -} - - -/*! ZSTD_decodeLiteralsBlock() : - @return : nb of bytes read from src (< srcSize ) */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - const BYTE* const istart = (const BYTE*) src; - - /* any compressed block with literals segment must be at least this size */ - if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); - - switch(istart[0]>> 6) - { - case IS_HUF: - { size_t litSize, litCSize, singleStream=0; - U32 lhSize = ((istart[0]) >> 4) & 3; - if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */ - switch(lhSize) - { - case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ - /* 2 - 2 - 10 - 10 */ - lhSize=3; - singleStream = istart[0] & 16; - litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); - litCSize = ((istart[1] & 3) << 8) + istart[2]; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize=4; - litSize = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6); - litCSize = ((istart[2] & 63) << 8) + istart[3]; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize=5; - litSize = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2); - litCSize = ((istart[2] & 3) << 16) + (istart[3] << 8) + istart[4]; - break; - } - if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); - if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); - - if (HUF_isError(singleStream ? - HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : - HUF_decompress (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) - return ERROR(corruption_detected); - - dctx->litPtr = dctx->litBuffer; - dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; - dctx->litSize = litSize; - return litCSize + lhSize; - } - case IS_PCH: - { size_t litSize, litCSize; - U32 lhSize = ((istart[0]) >> 4) & 3; - if (lhSize != 1) /* only case supported for now : small litSize, single stream */ - return ERROR(corruption_detected); - if (!dctx->flagRepeatTable) - return ERROR(dictionary_corrupted); - - /* 2 - 2 - 10 - 10 */ - lhSize=3; - litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); - litCSize = ((istart[1] & 3) << 8) + istart[2]; - - { size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); - if (HUF_isError(errorCode)) return ERROR(corruption_detected); - } - dctx->litPtr = dctx->litBuffer; - dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; - dctx->litSize = litSize; - return litCSize + lhSize; - } - case IS_RAW: - { size_t litSize; - U32 lhSize = ((istart[0]) >> 4) & 3; - switch(lhSize) - { - case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ - lhSize=1; - litSize = istart[0] & 31; - break; - case 2: - litSize = ((istart[0] & 15) << 8) + istart[1]; - break; - case 3: - litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; - break; - } - - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - if (litSize+lhSize > srcSize) return ERROR(corruption_detected); - memcpy(dctx->litBuffer, istart+lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; - dctx->litSize = litSize; - return lhSize+litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart+lhSize; - dctx->litBufSize = srcSize-lhSize; - dctx->litSize = litSize; - return lhSize+litSize; - } - case IS_RLE: - { size_t litSize; - U32 lhSize = ((istart[0]) >> 4) & 3; - switch(lhSize) - { - case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ - lhSize = 1; - litSize = istart[0] & 31; - break; - case 2: - litSize = ((istart[0] & 15) << 8) + istart[1]; - break; - case 3: - litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; - if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ - break; - } - if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; - dctx->litSize = litSize; - return lhSize+1; - } - default: - return ERROR(corruption_detected); /* impossible */ - } -} - - -/*! ZSTD_buildSeqTable() : - @return : nb bytes read from src, - or an error code if it fails, testable with ZSTD_isError() -*/ -FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, - const void* src, size_t srcSize, - const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable) -{ - switch(type) - { - case FSE_ENCODING_RLE : - if (!srcSize) return ERROR(srcSize_wrong); - if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); - FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ - return 1; - case FSE_ENCODING_RAW : - FSE_buildDTable(DTable, defaultNorm, max, defaultLog); - return 0; - case FSE_ENCODING_STATIC: - if (!flagRepeatTable) return ERROR(corruption_detected); - return 0; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { U32 tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) return ERROR(corruption_detected); - if (tableLog > maxLog) return ERROR(corruption_detected); - FSE_buildDTable(DTable, norm, max, tableLog); - return headerSize; - } } -} - - -size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, - FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* const iend = istart + srcSize; - const BYTE* ip = istart; - - /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); - - /* SeqHead */ - { int nbSeq = *ip++; - if (!nbSeq) { *nbSeqPtr=0; return 1; } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } - *nbSeqPtr = nbSeq; - } - - /* FSE table descriptors */ - { U32 const LLtype = *ip >> 6; - U32 const Offtype = (*ip >> 4) & 3; - U32 const MLtype = (*ip >> 2) & 3; - ip++; - - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - - /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } } - - return ip-istart; -} - - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; -} seq_t; - -typedef struct { - BIT_DStream_t DStream; - FSE_DState_t stateLL; - FSE_DState_t stateOffb; - FSE_DState_t stateML; - size_t prevOffset[ZSTD_REP_INIT]; -} seqState_t; - - - -static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) -{ - /* Literal length */ - U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); - U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); - U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits+mlBits+ofBits; - - static const U32 LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; - - static const U32 ML_base[MaxML+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - - static const U32 OF_base[MaxOff+1] = { - 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, - 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, - 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, - 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 }; - - /* sequence */ - { size_t offset; - if (!ofCode) - offset = 0; - else { - offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits); /* <= 26 bits */ - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - } - - if (offset < ZSTD_REP_NUM) { - if (llCode == 0 && offset <= 1) offset = 1-offset; - - if (offset != 0) { - size_t temp = seqState->prevOffset[offset]; - if (offset != 1) { - seqState->prevOffset[2] = seqState->prevOffset[1]; - } - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - - } else { - offset = seqState->prevOffset[0]; - } - } else { - offset -= ZSTD_REP_MOVE; - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq->offset = offset; - } - - seq->matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); - - seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || - (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); - - /* ANS state update */ - FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); /* <= 9 bits */ - FSE_updateState(&(seqState->stateML), &(seqState->DStream)); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* <= 18 bits */ - FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <= 8 bits */ -} - - +} + + +/** ZSTD_getFrameParams() : +* decode Frame Header, or provide expected `srcSize`. +* @return : 0, `fparamsPtr` is correctly filled, +* >0, `srcSize` is too small, result is expected `srcSize`, +* or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + + if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min; + if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); + if (srcSize < fhsize) return fhsize; } + + memset(fparamsPtr, 0, sizeof(*fparamsPtr)); + { BYTE const frameDesc = ip[4]; + fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported); /* reserved 1 bit */ + switch(frameDesc >> 6) /* fcsId */ + { + default: /* impossible */ + case 0 : fparamsPtr->frameContentSize = 0; break; + case 1 : fparamsPtr->frameContentSize = ip[5]; break; + case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break; + case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break; + } } + return 0; +} + + +/** ZSTD_decodeFrameHeader() : +* `srcSize` must be the size provided by ZSTD_frameHeaderSize(). +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) +{ + size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); + if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); + return result; +} + + +typedef struct +{ + blockType_t blockType; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + U32 cSize; + + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + + bpPtr->blockType = (blockType_t)((*in) >> 6); + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +/*! ZSTD_decodeLiteralsBlock() : + @return : nb of bytes read from src (< srcSize ) */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + const BYTE* const istart = (const BYTE*) src; + + /* any compressed block with literals segment must be at least this size */ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + switch(istart[0]>> 6) + { + case IS_HUF: + { size_t litSize, litCSize, singleStream=0; + U32 lhSize = ((istart[0]) >> 4) & 3; + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */ + switch(lhSize) + { + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + /* 2 - 2 - 10 - 10 */ + lhSize=3; + singleStream = istart[0] & 16; + litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); + litCSize = ((istart[1] & 3) << 8) + istart[2]; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize=4; + litSize = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6); + litCSize = ((istart[2] & 63) << 8) + istart[3]; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize=5; + litSize = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2); + litCSize = ((istart[2] & 3) << 16) + (istart[3] << 8) + istart[4]; + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + + if (HUF_isError(singleStream ? + HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : + HUF_decompress (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) + return ERROR(corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; + dctx->litSize = litSize; + return litCSize + lhSize; + } + case IS_PCH: + { size_t litSize, litCSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + if (lhSize != 1) /* only case supported for now : small litSize, single stream */ + return ERROR(corruption_detected); + if (!dctx->flagRepeatTable) + return ERROR(dictionary_corrupted); + + /* 2 - 2 - 10 - 10 */ + lhSize=3; + litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); + litCSize = ((istart[1] & 3) << 8) + istart[2]; + + { size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); + if (HUF_isError(errorCode)) return ERROR(corruption_detected); + } + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; + dctx->litSize = litSize; + return litCSize + lhSize; + } + case IS_RAW: + { size_t litSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + switch(lhSize) + { + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + lhSize=1; + litSize = istart[0] & 31; + break; + case 2: + litSize = ((istart[0] & 15) << 8) + istart[1]; + break; + case 3: + litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; + dctx->litSize = litSize; + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litBufSize = srcSize-lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + case IS_RLE: + { size_t litSize; + U32 lhSize = ((istart[0]) >> 4) & 3; + switch(lhSize) + { + case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ + lhSize = 1; + litSize = istart[0] & 31; + break; + case 2: + litSize = ((istart[0] & 15) << 8) + istart[1]; + break; + case 3: + litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; + dctx->litSize = litSize; + return lhSize+1; + } + default: + return ERROR(corruption_detected); /* impossible */ + } +} + + +/*! ZSTD_buildSeqTable() : + @return : nb bytes read from src, + or an error code if it fails, testable with ZSTD_isError() +*/ +FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize, + const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, defaultNorm, max, defaultLog); + return 0; + case FSE_ENCODING_STATIC: + if (!flagRepeatTable) return ERROR(corruption_detected); + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + + /* check */ + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + + /* SeqHead */ + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + else + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + *nbSeqPtr = nbSeq; + } + + /* FSE table descriptors */ + { U32 const LLtype = *ip >> 6; + U32 const Offtype = (*ip >> 4) & 3; + U32 const MLtype = (*ip >> 2) & 3; + ip++; + + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* Build DTables */ + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; +} seq_t; + +typedef struct { + BIT_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset[ZSTD_REP_INIT]; +} seqState_t; + + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + /* Literal length */ + U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; + + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, + 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 }; + + /* sequence */ + { size_t offset; + if (!ofCode) + offset = 0; + else { + offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits); /* <= 26 bits */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + } + + if (offset < ZSTD_REP_NUM) { + if (llCode == 0 && offset <= 1) offset = 1-offset; + + if (offset != 0) { + size_t temp = seqState->prevOffset[offset]; + if (offset != 1) { + seqState->prevOffset[2] = seqState->prevOffset[1]; + } + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + + } else { + offset = seqState->prevOffset[0]; + } + } else { + offset -= ZSTD_REP_MOVE; + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq->offset = offset; + } + + seq->matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); + + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); + + /* ANS state update */ + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); /* <= 9 bits */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* <= 18 bits */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <= 8 bits */ +} + + FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit_8, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_8 = oend-8; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - - /* check */ - if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ - if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ - if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - - /* copy Literals */ - ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); - match = dictEnd - (base-match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - } } - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_8) { - ZSTD_wildcopy(op, match, oend_8 - op); - match += oend_8 - op; - op = oend_8; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - - -static size_t ZSTD_decompressSequences( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; - const BYTE* const litEnd = litPtr + dctx->litSize; - FSE_DTable* DTableLL = dctx->LLTable; - FSE_DTable* DTableML = dctx->MLTable; - FSE_DTable* DTableOffb = dctx->OffTable; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - int nbSeq; - - /* Build Decoding Tables */ - { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - dctx->flagRepeatTable = 0; - } - - /* Regen sequences */ - if (nbSeq) { - seq_t sequence; - seqState_t seqState; - - memset(&sequence, 0, sizeof(sequence)); - sequence.offset = REPCODE_STARTVALUE; - { U32 i; for (i=0; i= 5810037) && (pos < 5810400)) - printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); -#endif - - { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } - - /* check if reached exact end */ - if (nbSeq) return ERROR(corruption_detected); - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ - if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op-ostart; -} - - -static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dst; - dctx->previousDstEnd = dst; - } -} - - -static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; - - if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); - - /* Decode literals sub-block */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); -} - - -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); -} - - + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit_8, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_8 = oend-8; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ + if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ + if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + + /* copy Literals */ + ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_8) { + ZSTD_wildcopy(op, match, oend_8 - op); + match += oend_8 - op; + op = oend_8; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +static size_t ZSTD_decompressSequences( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; + const BYTE* const litEnd = litPtr + dctx->litSize; + FSE_DTable* DTableLL = dctx->LLTable; + FSE_DTable* DTableML = dctx->MLTable; + FSE_DTable* DTableOffb = dctx->OffTable; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + int nbSeq; + + /* Build Decoding Tables */ + { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + dctx->flagRepeatTable = 0; + } + + /* Regen sequences */ + if (nbSeq) { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + sequence.offset = REPCODE_STARTVALUE; + { U32 i; for (i=0; i= 5810037) && (pos < 5810400)) + printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); +#endif + + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + if (nbSeq) return ERROR(corruption_detected); + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + + +static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dst; + dctx->previousDstEnd = dst; + } +} + + +static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + + if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + + /* Decode literals sub-block */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_checkContinuity(dctx, dst); + return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); +} + + /*! ZSTD_decompressFrame() : * `dctx` must be properly initialized */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* op = ostart; - BYTE* const oend = ostart + dstCapacity; - size_t remainingSize = srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstCapacity; + size_t remainingSize = srcSize; blockProperties_t blockProperties = { bt_compressed, 0 }; - - /* check */ - if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - - /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); - if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected); - ip += frameHeaderSize; remainingSize -= frameHeaderSize; - } - - /* Loop on each block */ - while (1) { - size_t decodedSize=0; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - - ip += ZSTD_blockHeaderSize; - remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); - - switch(blockProperties.blockType) - { - case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); - break; - case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); - break; - case bt_rle : - return ERROR(GENERIC); /* not yet supported */ - break; - case bt_end : - /* end of frame */ - if (remainingSize) return ERROR(srcSize_wrong); - break; - default: - return ERROR(GENERIC); /* impossible */ - } - if (cBlockSize == 0) break; /* bt_end */ - - if (ZSTD_isError(decodedSize)) return decodedSize; - op += decodedSize; - ip += cBlockSize; - remainingSize -= cBlockSize; - } - - return op-ostart; -} - - -size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - ZSTD_copyDCtx(dctx, refDCtx); - ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); -} - - -size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize) -{ + + /* check */ + if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected); + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize=0; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return ERROR(srcSize_wrong); + break; + default: + return ERROR(GENERIC); /* impossible */ + } + if (cBlockSize == 0) break; /* bt_end */ + + if (ZSTD_isError(decodedSize)) return decodedSize; + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + } + + return op-ostart; +} + + +size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_copyDCtx(dctx, refDCtx); + ZSTD_checkContinuity(dctx, dst); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); +} + + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) { const U32 magicNumber = MEM_readLE32(src); if (ZSTD_isLegacy(magicNumber)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize, magicNumber); } #endif - ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); - ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); -} - - -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); -} - - -size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ -#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) - size_t regenSize; - ZSTD_DCtx* dctx = ZSTD_createDCtx(); - if (dctx==NULL) return ERROR(memory_allocation); - regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); - ZSTD_freeDCtx(dctx); - return regenSize; + ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); + ZSTD_checkContinuity(dctx, dst); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); +} + + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) + size_t regenSize; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (dctx==NULL) return ERROR(memory_allocation); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; #else /* stack mode */ - ZSTD_DCtx dctx; - return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); -#endif -} - - -/*_****************************** -* Streaming Decompression API -********************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) -{ - return dctx->expected; -} - + ZSTD_DCtx dctx; + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*_****************************** +* Streaming Decompression API +********************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) +{ + return dctx->expected; +} + size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - /* Sanity check */ - if (srcSize != dctx->expected) return ERROR(srcSize_wrong); +{ + /* Sanity check */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); if (dstCapacity) ZSTD_checkContinuity(dctx, dst); - - /* Decompress : frame header; part 1 */ - switch (dctx->stage) - { - case ZSTDds_getFrameHeaderSize : + + /* Decompress : frame header; part 1 */ + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; @@ -985,79 +985,79 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_min; dctx->stage = ZSTDds_decodeFrameHeader; return 0; - } + } dctx->expected = 0; /* not necessary to copy more */ - case ZSTDds_decodeFrameHeader: + case ZSTDds_decodeFrameHeader: { size_t result; - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); - result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); - if (ZSTD_isError(result)) return result; - dctx->expected = ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; - } - case ZSTDds_decodeBlockHeader: + memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); + result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); + if (ZSTD_isError(result)) return result; + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + } + case ZSTDds_decodeBlockHeader: { blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - if (bp.blockType == bt_end) { - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - } else { - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->stage = ZSTDds_decompressBlock; - } - return 0; - } - case ZSTDds_decompressBlock: + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + if (bp.blockType == bt_end) { + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + } else { + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->stage = ZSTDds_decompressBlock; + } + return 0; + } + case ZSTDds_decompressBlock: { size_t rSize; - switch(dctx->bType) - { - case bt_compressed: + switch(dctx->bType) + { + case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); - break; - case bt_raw : + break; + case bt_raw : rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); - break; - case bt_rle : - return ERROR(GENERIC); /* not yet handled */ - break; - case bt_end : /* should never happen (filtered at phase 1) */ - rSize = 0; - break; - default: - return ERROR(GENERIC); /* impossible */ - } - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char*)dst + rSize; - return rSize; - } - default: - return ERROR(GENERIC); /* impossible */ - } -} - - -static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dict; - dctx->previousDstEnd = (const char*)dict + dictSize; -} - -static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); /* impossible */ + } + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + return rSize; + } + default: + return ERROR(GENERIC); /* impossible */ + } +} + + +static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +} + +static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize; - - hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize); - if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + hSize; - dictSize -= hSize; - + + hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize); + if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + hSize; + dictSize -= hSize; + { short offcodeNCount[MaxOff+1]; U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); @@ -1067,7 +1067,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz dict = (const char*)dict + offcodeHeaderSize; dictSize -= offcodeHeaderSize; } - + { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); @@ -1077,7 +1077,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz dict = (const char*)dict + matchlengthHeaderSize; dictSize -= matchlengthHeaderSize; } - + { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); @@ -1085,45 +1085,45 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz { size_t const errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } } - - dctx->flagRepeatTable = 1; - return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; -} - -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - size_t eSize; - U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { - /* pure content mode */ - ZSTD_refDictContent(dctx, dict, dictSize); - return 0; - } - /* load entropy tables */ - dict = (const char*)dict + 4; - dictSize -= 4; - eSize = ZSTD_loadEntropy(dctx, dict, dictSize); - if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); - - /* reference dictionary content */ - dict = (const char*)dict + eSize; - dictSize -= eSize; - ZSTD_refDictContent(dctx, dict, dictSize); - - return 0; -} - - -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - { size_t const errorCode = ZSTD_decompressBegin(dctx); - if (ZSTD_isError(errorCode)) return errorCode; } - - if (dict && dictSize) { - size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); - if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); - } - - return 0; -} - + + dctx->flagRepeatTable = 1; + return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + size_t eSize; + U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_DICT_MAGIC) { + /* pure content mode */ + ZSTD_refDictContent(dctx, dict, dictSize); + return 0; + } + /* load entropy tables */ + dict = (const char*)dict + 4; + dictSize -= 4; + eSize = ZSTD_loadEntropy(dctx, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + + /* reference dictionary content */ + dict = (const char*)dict + eSize; + dictSize -= eSize; + ZSTD_refDictContent(dctx, dict, dictSize); + + return 0; +} + + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + { size_t const errorCode = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(errorCode)) return errorCode; } + + if (dict && dictSize) { + size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); + if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); + } + + return 0; +} + diff --git a/contrib/libs/zstd06/dictBuilder/divsufsort.c b/contrib/libs/zstd06/dictBuilder/divsufsort.c index 17116166b49..60cceb08832 100644 --- a/contrib/libs/zstd06/dictBuilder/divsufsort.c +++ b/contrib/libs/zstd06/dictBuilder/divsufsort.c @@ -1,1913 +1,1913 @@ -/* - * divsufsort.c for libdivsufsort-lite - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/*- Compiler specifics -*/ -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wshorten-64-to-32" -#endif - -#if defined(_MSC_VER) -# pragma warning(disable : 4244) -# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ -#endif - - -/*- Dependencies -*/ -#include -#include -#include - -#include "divsufsort.h" - -/*- Constants -*/ -#if defined(INLINE) -# undef INLINE -#endif -#if !defined(INLINE) -# define INLINE __inline -#endif -#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) -# undef ALPHABET_SIZE -#endif -#if !defined(ALPHABET_SIZE) -# define ALPHABET_SIZE (256) -#endif -#define BUCKET_A_SIZE (ALPHABET_SIZE) -#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) -#if defined(SS_INSERTIONSORT_THRESHOLD) -# if SS_INSERTIONSORT_THRESHOLD < 1 -# undef SS_INSERTIONSORT_THRESHOLD -# define SS_INSERTIONSORT_THRESHOLD (1) -# endif -#else -# define SS_INSERTIONSORT_THRESHOLD (8) -#endif -#if defined(SS_BLOCKSIZE) -# if SS_BLOCKSIZE < 0 -# undef SS_BLOCKSIZE -# define SS_BLOCKSIZE (0) -# elif 32768 <= SS_BLOCKSIZE -# undef SS_BLOCKSIZE -# define SS_BLOCKSIZE (32767) -# endif -#else -# define SS_BLOCKSIZE (1024) -#endif -/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ -#if SS_BLOCKSIZE == 0 -# define SS_MISORT_STACKSIZE (96) -#elif SS_BLOCKSIZE <= 4096 -# define SS_MISORT_STACKSIZE (16) -#else -# define SS_MISORT_STACKSIZE (24) -#endif -#define SS_SMERGE_STACKSIZE (32) -#define TR_INSERTIONSORT_THRESHOLD (8) -#define TR_STACKSIZE (64) - - -/*- Macros -*/ -#ifndef SWAP -# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) -#endif /* SWAP */ -#ifndef MIN -# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) -#endif /* MIN */ -#ifndef MAX -# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) -#endif /* MAX */ -#define STACK_PUSH(_a, _b, _c, _d)\ - do {\ - assert(ssize < STACK_SIZE);\ - stack[ssize].a = (_a), stack[ssize].b = (_b),\ - stack[ssize].c = (_c), stack[ssize++].d = (_d);\ - } while(0) -#define STACK_PUSH5(_a, _b, _c, _d, _e)\ - do {\ - assert(ssize < STACK_SIZE);\ - stack[ssize].a = (_a), stack[ssize].b = (_b),\ - stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ - } while(0) -#define STACK_POP(_a, _b, _c, _d)\ - do {\ - assert(0 <= ssize);\ - if(ssize == 0) { return; }\ - (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ - (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ - } while(0) -#define STACK_POP5(_a, _b, _c, _d, _e)\ - do {\ - assert(0 <= ssize);\ - if(ssize == 0) { return; }\ - (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ - (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ - } while(0) -#define BUCKET_A(_c0) bucket_A[(_c0)] -#if ALPHABET_SIZE == 256 -#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) -#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) -#else -#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) -#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) -#endif - - -/*- Private Functions -*/ - -static const int lg_table[256]= { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -}; - -#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) - -static INLINE -int -ss_ilg(int n) { -#if SS_BLOCKSIZE == 0 - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -#elif SS_BLOCKSIZE < 256 - return lg_table[n]; -#else - return (n & 0xff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]; -#endif -} - -#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ - -#if SS_BLOCKSIZE != 0 - -static const int sqq_table[256] = { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, - 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, - 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, -110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, -128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, -143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, -156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, -169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, -181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, -192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, -202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, -212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, -221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, -230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, -239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, -247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 -}; - -static INLINE -int -ss_isqrt(int x) { - int y, e; - - if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } - e = (x & 0xffff0000) ? - ((x & 0xff000000) ? - 24 + lg_table[(x >> 24) & 0xff] : - 16 + lg_table[(x >> 16) & 0xff]) : - ((x & 0x0000ff00) ? - 8 + lg_table[(x >> 8) & 0xff] : - 0 + lg_table[(x >> 0) & 0xff]); - - if(e >= 16) { - y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); - if(e >= 24) { y = (y + 1 + x / y) >> 1; } - y = (y + 1 + x / y) >> 1; - } else if(e >= 8) { - y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; - } else { - return sqq_table[x] >> 4; - } - - return (x < (y * y)) ? y - 1 : y; -} - -#endif /* SS_BLOCKSIZE != 0 */ - - -/*---------------------------------------------------------------------------*/ - -/* Compares two suffixes. */ -static INLINE -int -ss_compare(const unsigned char *T, - const int *p1, const int *p2, - int depth) { - const unsigned char *U1, *U2, *U1n, *U2n; - - for(U1 = T + depth + *p1, - U2 = T + depth + *p2, - U1n = T + *(p1 + 1) + 2, - U2n = T + *(p2 + 1) + 2; - (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); - ++U1, ++U2) { - } - - return U1 < U1n ? - (U2 < U2n ? *U1 - *U2 : 1) : - (U2 < U2n ? -1 : 0); -} - - -/*---------------------------------------------------------------------------*/ - -#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) - -/* Insertionsort for small size groups */ -static -void -ss_insertionsort(const unsigned char *T, const int *PA, - int *first, int *last, int depth) { - int *i, *j; - int t; - int r; - - for(i = last - 2; first <= i; --i) { - for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { - do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); - if(last <= j) { break; } - } - if(r == 0) { *j = ~*j; } - *(j - 1) = t; - } -} - -#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ - - -/*---------------------------------------------------------------------------*/ - -#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) - -static INLINE -void -ss_fixdown(const unsigned char *Td, const int *PA, - int *SA, int i, int size) { - int j, k; - int v; - int c, d, e; - - for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { - d = Td[PA[SA[k = j++]]]; - if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } - if(d <= c) { break; } - } - SA[i] = v; -} - -/* Simple top-down heapsort. */ -static -void -ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { - int i, m; - int t; - - m = size; - if((size % 2) == 0) { - m--; - if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } - } - - for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } - if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } - for(i = m - 1; 0 < i; --i) { - t = SA[0], SA[0] = SA[i]; - ss_fixdown(Td, PA, SA, 0, i); - SA[i] = t; - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Returns the median of three elements. */ -static INLINE -int * -ss_median3(const unsigned char *Td, const int *PA, - int *v1, int *v2, int *v3) { - int *t; - if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } - if(Td[PA[*v2]] > Td[PA[*v3]]) { - if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } - else { return v3; } - } - return v2; -} - -/* Returns the median of five elements. */ -static INLINE -int * -ss_median5(const unsigned char *Td, const int *PA, - int *v1, int *v2, int *v3, int *v4, int *v5) { - int *t; - if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } - if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } - if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } - if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } - if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } - if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } - return v3; -} - -/* Returns the pivot element. */ -static INLINE -int * -ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { - int *middle; - int t; - - t = last - first; - middle = first + t / 2; - - if(t <= 512) { - if(t <= 32) { - return ss_median3(Td, PA, first, middle, last - 1); - } else { - t >>= 2; - return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); - } - } - t >>= 3; - first = ss_median3(Td, PA, first, first + t, first + (t << 1)); - middle = ss_median3(Td, PA, middle - t, middle, middle + t); - last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - return ss_median3(Td, PA, first, middle, last); -} - - -/*---------------------------------------------------------------------------*/ - -/* Binary partition for substrings. */ -static INLINE -int * -ss_partition(const int *PA, - int *first, int *last, int depth) { - int *a, *b; - int t; - for(a = first - 1, b = last;;) { - for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } - for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } - if(b <= a) { break; } - t = ~*b; - *b = *a; - *a = t; - } - if(first < a) { *first = ~*first; } - return a; -} - -/* Multikey introsort for medium size groups. */ -static -void -ss_mintrosort(const unsigned char *T, const int *PA, - int *first, int *last, - int depth) { -#define STACK_SIZE SS_MISORT_STACKSIZE - struct { int *a, *b, c; int d; } stack[STACK_SIZE]; - const unsigned char *Td; - int *a, *b, *c, *d, *e, *f; - int s, t; - int ssize; - int limit; - int v, x = 0; - - for(ssize = 0, limit = ss_ilg(last - first);;) { - - if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { -#if 1 < SS_INSERTIONSORT_THRESHOLD - if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } -#endif - STACK_POP(first, last, depth, limit); - continue; - } - - Td = T + depth; - if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } - if(limit < 0) { - for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { - if((x = Td[PA[*a]]) != v) { - if(1 < (a - first)) { break; } - v = x; - first = a; - } - } - if(Td[PA[*first] - 1] < v) { - first = ss_partition(PA, first, a, depth); - } - if((a - first) <= (last - a)) { - if(1 < (a - first)) { - STACK_PUSH(a, last, depth, -1); - last = a, depth += 1, limit = ss_ilg(a - first); - } else { - first = a, limit = -1; - } - } else { - if(1 < (last - a)) { - STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); - first = a, limit = -1; - } else { - last = a, depth += 1, limit = ss_ilg(a - first); - } - } - continue; - } - - /* choose pivot */ - a = ss_pivot(Td, PA, first, last); - v = Td[PA[*a]]; - SWAP(*first, *a); - - /* partition */ - for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } - if(((a = b) < last) && (x < v)) { - for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - } - for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } - if((b < (d = c)) && (x > v)) { - for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - for(; b < c;) { - SWAP(*b, *c); - for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - - if(a <= d) { - c = b - 1; - - if((s = a - first) > (t = b - a)) { s = t; } - for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if((s = d - c) > (t = last - d - 1)) { s = t; } - for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - - a = first + (b - a), c = last - (d - c); - b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); - - if((a - first) <= (last - c)) { - if((last - c) <= (c - b)) { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(c, last, depth, limit); - last = a; - } else if((a - first) <= (c - b)) { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } else { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(first, a, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); - } - } else { - if((a - first) <= (c - b)) { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(first, a, depth, limit); - first = c; - } else if((last - c) <= (c - b)) { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } else { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(c, last, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); - } - } - } else { - limit += 1; - if(Td[PA[*first] - 1] < v) { - first = ss_partition(PA, first, last, depth); - limit = ss_ilg(last - first); - } - depth += 1; - } - } -#undef STACK_SIZE -} - -#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ - - -/*---------------------------------------------------------------------------*/ - -#if SS_BLOCKSIZE != 0 - -static INLINE -void -ss_blockswap(int *a, int *b, int n) { - int t; - for(; 0 < n; --n, ++a, ++b) { - t = *a, *a = *b, *b = t; - } -} - -static INLINE -void -ss_rotate(int *first, int *middle, int *last) { - int *a, *b, t; - int l, r; - l = middle - first, r = last - middle; - for(; (0 < l) && (0 < r);) { - if(l == r) { ss_blockswap(first, middle, l); break; } - if(l < r) { - a = last - 1, b = middle - 1; - t = *a; - do { - *a-- = *b, *b-- = *a; - if(b < first) { - *a = t; - last = a; - if((r -= l + 1) <= l) { break; } - a -= 1, b = middle - 1; - t = *a; - } - } while(1); - } else { - a = first, b = middle; - t = *a; - do { - *a++ = *b, *b++ = *a; - if(last <= b) { - *a = t; - first = a + 1; - if((l -= r + 1) <= r) { break; } - a += 1, b = middle; - t = *a; - } - } while(1); - } - } -} - - -/*---------------------------------------------------------------------------*/ - -static -void -ss_inplacemerge(const unsigned char *T, const int *PA, - int *first, int *middle, int *last, - int depth) { - const int *p; - int *a, *b; - int len, half; - int q, r; - int x; - - for(;;) { - if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } - else { x = 0; p = PA + *(last - 1); } - for(a = first, len = middle - first, half = len >> 1, r = -1; - 0 < len; - len = half, half >>= 1) { - b = a + half; - q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); - if(q < 0) { - a = b + 1; - half -= (len & 1) ^ 1; - } else { - r = q; - } - } - if(a < middle) { - if(r == 0) { *a = ~*a; } - ss_rotate(a, middle, last); - last -= middle - a; - middle = a; - if(first == middle) { break; } - } - --last; - if(x != 0) { while(*--last < 0) { } } - if(middle == last) { break; } - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Merge-forward with internal buffer. */ -static -void -ss_mergeforward(const unsigned char *T, const int *PA, - int *first, int *middle, int *last, - int *buf, int depth) { - int *a, *b, *c, *bufend; - int t; - int r; - - bufend = buf + (middle - first) - 1; - ss_blockswap(buf, first, middle - first); - - for(t = *(a = first), b = buf, c = middle;;) { - r = ss_compare(T, PA + *b, PA + *c, depth); - if(r < 0) { - do { - *a++ = *b; - if(bufend <= b) { *bufend = t; return; } - *b++ = *a; - } while(*b < 0); - } else if(r > 0) { - do { - *a++ = *c, *c++ = *a; - if(last <= c) { - while(b < bufend) { *a++ = *b, *b++ = *a; } - *a = *b, *b = t; - return; - } - } while(*c < 0); - } else { - *c = ~*c; - do { - *a++ = *b; - if(bufend <= b) { *bufend = t; return; } - *b++ = *a; - } while(*b < 0); - - do { - *a++ = *c, *c++ = *a; - if(last <= c) { - while(b < bufend) { *a++ = *b, *b++ = *a; } - *a = *b, *b = t; - return; - } - } while(*c < 0); - } - } -} - -/* Merge-backward with internal buffer. */ -static -void -ss_mergebackward(const unsigned char *T, const int *PA, - int *first, int *middle, int *last, - int *buf, int depth) { - const int *p1, *p2; - int *a, *b, *c, *bufend; - int t; - int r; - int x; - - bufend = buf + (last - middle) - 1; - ss_blockswap(buf, middle, last - middle); - - x = 0; - if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } - else { p1 = PA + *bufend; } - if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } - else { p2 = PA + *(middle - 1); } - for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { - r = ss_compare(T, p1, p2, depth); - if(0 < r) { - if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } - *a-- = *b; - if(b <= buf) { *buf = t; break; } - *b-- = *a; - if(*b < 0) { p1 = PA + ~*b; x |= 1; } - else { p1 = PA + *b; } - } else if(r < 0) { - if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } - *a-- = *c, *c-- = *a; - if(c < first) { - while(buf < b) { *a-- = *b, *b-- = *a; } - *a = *b, *b = t; - break; - } - if(*c < 0) { p2 = PA + ~*c; x |= 2; } - else { p2 = PA + *c; } - } else { - if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } - *a-- = ~*b; - if(b <= buf) { *buf = t; break; } - *b-- = *a; - if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } - *a-- = *c, *c-- = *a; - if(c < first) { - while(buf < b) { *a-- = *b, *b-- = *a; } - *a = *b, *b = t; - break; - } - if(*b < 0) { p1 = PA + ~*b; x |= 1; } - else { p1 = PA + *b; } - if(*c < 0) { p2 = PA + ~*c; x |= 2; } - else { p2 = PA + *c; } - } - } -} - -/* D&C based merge. */ -static -void -ss_swapmerge(const unsigned char *T, const int *PA, - int *first, int *middle, int *last, - int *buf, int bufsize, int depth) { -#define STACK_SIZE SS_SMERGE_STACKSIZE -#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) -#define MERGE_CHECK(a, b, c)\ - do {\ - if(((c) & 1) ||\ - (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ - *(a) = ~*(a);\ - }\ - if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ - *(b) = ~*(b);\ - }\ - } while(0) - struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; - int *l, *r, *lm, *rm; - int m, len, half; - int ssize; - int check, next; - - for(check = 0, ssize = 0;;) { - if((last - middle) <= bufsize) { - if((first < middle) && (middle < last)) { - ss_mergebackward(T, PA, first, middle, last, buf, depth); - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - continue; - } - - if((middle - first) <= bufsize) { - if(first < middle) { - ss_mergeforward(T, PA, first, middle, last, buf, depth); - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - continue; - } - - for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; - 0 < len; - len = half, half >>= 1) { - if(ss_compare(T, PA + GETIDX(*(middle + m + half)), - PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { - m += half + 1; - half -= (len & 1) ^ 1; - } - } - - if(0 < m) { - lm = middle - m, rm = middle + m; - ss_blockswap(lm, middle, m); - l = r = middle, next = 0; - if(rm < last) { - if(*rm < 0) { - *rm = ~*rm; - if(first < lm) { for(; *--l < 0;) { } next |= 4; } - next |= 1; - } else if(first < lm) { - for(; *r < 0; ++r) { } - next |= 2; - } - } - - if((l - first) <= (last - r)) { - STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); - middle = lm, last = l, check = (check & 3) | (next & 4); - } else { - if((next & 2) && (r == middle)) { next ^= 6; } - STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); - first = r, middle = rm, check = (next & 3) | (check & 4); - } - } else { - if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { - *middle = ~*middle; - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - } - } -#undef STACK_SIZE -} - -#endif /* SS_BLOCKSIZE != 0 */ - - -/*---------------------------------------------------------------------------*/ - -/* Substring sort */ -static -void -sssort(const unsigned char *T, const int *PA, - int *first, int *last, - int *buf, int bufsize, - int depth, int n, int lastsuffix) { - int *a; -#if SS_BLOCKSIZE != 0 - int *b, *middle, *curbuf; - int j, k, curbufsize, limit; -#endif - int i; - - if(lastsuffix != 0) { ++first; } - -#if SS_BLOCKSIZE == 0 - ss_mintrosort(T, PA, first, last, depth); -#else - if((bufsize < SS_BLOCKSIZE) && - (bufsize < (last - first)) && - (bufsize < (limit = ss_isqrt(last - first)))) { - if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } - buf = middle = last - limit, bufsize = limit; - } else { - middle = last, limit = 0; - } - for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); -#endif - curbufsize = last - (a + SS_BLOCKSIZE); - curbuf = a + SS_BLOCKSIZE; - if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } - for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { - ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); - } - } -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, a, middle, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, a, middle, depth); -#endif - for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { - if(i & 1) { - ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); - a -= k; - } - } - if(limit != 0) { -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, middle, last, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, middle, last, depth); -#endif - ss_inplacemerge(T, PA, first, middle, last, depth); - } -#endif - - if(lastsuffix != 0) { - /* Insert last type B* suffix. */ - int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; - for(a = first, i = *(first - 1); - (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); - ++a) { - *(a - 1) = *a; - } - *(a - 1) = i; - } -} - - -/*---------------------------------------------------------------------------*/ - -static INLINE -int -tr_ilg(int n) { - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -} - - -/*---------------------------------------------------------------------------*/ - -/* Simple insertionsort for small size groups. */ -static -void -tr_insertionsort(const int *ISAd, int *first, int *last) { - int *a, *b; - int t, r; - - for(a = first + 1; a < last; ++a) { - for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { - do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); - if(b < first) { break; } - } - if(r == 0) { *b = ~*b; } - *(b + 1) = t; - } -} - - -/*---------------------------------------------------------------------------*/ - -static INLINE -void -tr_fixdown(const int *ISAd, int *SA, int i, int size) { - int j, k; - int v; - int c, d, e; - - for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { - d = ISAd[SA[k = j++]]; - if(d < (e = ISAd[SA[j]])) { k = j; d = e; } - if(d <= c) { break; } - } - SA[i] = v; -} - -/* Simple top-down heapsort. */ -static -void -tr_heapsort(const int *ISAd, int *SA, int size) { - int i, m; - int t; - - m = size; - if((size % 2) == 0) { - m--; - if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } - } - - for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } - if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } - for(i = m - 1; 0 < i; --i) { - t = SA[0], SA[0] = SA[i]; - tr_fixdown(ISAd, SA, 0, i); - SA[i] = t; - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Returns the median of three elements. */ -static INLINE -int * -tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { - int *t; - if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } - if(ISAd[*v2] > ISAd[*v3]) { - if(ISAd[*v1] > ISAd[*v3]) { return v1; } - else { return v3; } - } - return v2; -} - -/* Returns the median of five elements. */ -static INLINE -int * -tr_median5(const int *ISAd, - int *v1, int *v2, int *v3, int *v4, int *v5) { - int *t; - if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } - if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } - if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } - if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } - if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } - if(ISAd[*v3] > ISAd[*v4]) { return v4; } - return v3; -} - -/* Returns the pivot element. */ -static INLINE -int * -tr_pivot(const int *ISAd, int *first, int *last) { - int *middle; - int t; - - t = last - first; - middle = first + t / 2; - - if(t <= 512) { - if(t <= 32) { - return tr_median3(ISAd, first, middle, last - 1); - } else { - t >>= 2; - return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); - } - } - t >>= 3; - first = tr_median3(ISAd, first, first + t, first + (t << 1)); - middle = tr_median3(ISAd, middle - t, middle, middle + t); - last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); - return tr_median3(ISAd, first, middle, last); -} - - -/*---------------------------------------------------------------------------*/ - -typedef struct _trbudget_t trbudget_t; -struct _trbudget_t { - int chance; - int remain; - int incval; - int count; -}; - -static INLINE -void -trbudget_init(trbudget_t *budget, int chance, int incval) { - budget->chance = chance; - budget->remain = budget->incval = incval; -} - -static INLINE -int -trbudget_check(trbudget_t *budget, int size) { - if(size <= budget->remain) { budget->remain -= size; return 1; } - if(budget->chance == 0) { budget->count += size; return 0; } - budget->remain += budget->incval - size; - budget->chance -= 1; - return 1; -} - - -/*---------------------------------------------------------------------------*/ - -static INLINE -void -tr_partition(const int *ISAd, - int *first, int *middle, int *last, - int **pa, int **pb, int v) { - int *a, *b, *c, *d, *e, *f; - int t, s; - int x = 0; - - for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } - if(((a = b) < last) && (x < v)) { - for(; (++b < last) && ((x = ISAd[*b]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - } - for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } - if((b < (d = c)) && (x > v)) { - for(; (b < --c) && ((x = ISAd[*c]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - for(; b < c;) { - SWAP(*b, *c); - for(; (++b < c) && ((x = ISAd[*b]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - for(; (b < --c) && ((x = ISAd[*c]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - - if(a <= d) { - c = b - 1; - if((s = a - first) > (t = b - a)) { s = t; } - for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if((s = d - c) > (t = last - d - 1)) { s = t; } - for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - first += (b - a), last -= (d - c); - } - *pa = first, *pb = last; -} - -static -void -tr_copy(int *ISA, const int *SA, - int *first, int *a, int *b, int *last, - int depth) { - /* sort suffixes of middle partition - by using sorted order of suffixes of left and right partition. */ - int *c, *d, *e; - int s, v; - - v = b - SA - 1; - for(c = first, d = a - 1; c <= d; ++c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *++d = s; - ISA[s] = d - SA; - } - } - for(c = last - 1, e = d + 1, d = b; e < d; --c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *--d = s; - ISA[s] = d - SA; - } - } -} - -static -void -tr_partialcopy(int *ISA, const int *SA, - int *first, int *a, int *b, int *last, - int depth) { - int *c, *d, *e; - int s, v; - int rank, lastrank, newrank = -1; - - v = b - SA - 1; - lastrank = -1; - for(c = first, d = a - 1; c <= d; ++c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *++d = s; - rank = ISA[s + depth]; - if(lastrank != rank) { lastrank = rank; newrank = d - SA; } - ISA[s] = newrank; - } - } - - lastrank = -1; - for(e = d; first <= e; --e) { - rank = ISA[*e]; - if(lastrank != rank) { lastrank = rank; newrank = e - SA; } - if(newrank != rank) { ISA[*e] = newrank; } - } - - lastrank = -1; - for(c = last - 1, e = d + 1, d = b; e < d; --c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *--d = s; - rank = ISA[s + depth]; - if(lastrank != rank) { lastrank = rank; newrank = d - SA; } - ISA[s] = newrank; - } - } -} - -static -void -tr_introsort(int *ISA, const int *ISAd, - int *SA, int *first, int *last, - trbudget_t *budget) { -#define STACK_SIZE TR_STACKSIZE - struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; - int *a, *b, *c; - int t; - int v, x = 0; - int incr = ISAd - ISA; - int limit, next; - int ssize, trlink = -1; - - for(ssize = 0, limit = tr_ilg(last - first);;) { - - if(limit < 0) { - if(limit == -1) { - /* tandem repeat partition */ - tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); - - /* update ranks */ - if(a < last) { - for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } - } - if(b < last) { - for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } - } - - /* push */ - if(1 < (b - a)) { - STACK_PUSH5(NULL, a, b, 0, 0); - STACK_PUSH5(ISAd - incr, first, last, -2, trlink); - trlink = ssize - 2; - } - if((a - first) <= (last - b)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); - last = a, limit = tr_ilg(a - first); - } else if(1 < (last - b)) { - first = b, limit = tr_ilg(last - b); - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } else { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); - first = b, limit = tr_ilg(last - b); - } else if(1 < (a - first)) { - last = a, limit = tr_ilg(a - first); - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } else if(limit == -2) { - /* tandem repeat copy */ - a = stack[--ssize].b, b = stack[ssize].c; - if(stack[ssize].d == 0) { - tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); - } - STACK_POP5(ISAd, first, last, limit, trlink); - } else { - /* sorted partition */ - if(0 <= *first) { - a = first; - do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); - first = a; - } - if(first < last) { - a = first; do { *a = ~*a; } while(*++a < 0); - next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; - if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } - - /* push */ - if(trbudget_check(budget, a - first)) { - if((a - first) <= (last - a)) { - STACK_PUSH5(ISAd, a, last, -3, trlink); - ISAd += incr, last = a, limit = next; - } else { - if(1 < (last - a)) { - STACK_PUSH5(ISAd + incr, first, a, next, trlink); - first = a, limit = -3; - } else { - ISAd += incr, last = a, limit = next; - } - } - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - if(1 < (last - a)) { - first = a, limit = -3; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - continue; - } - - if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { - tr_insertionsort(ISAd, first, last); - limit = -3; - continue; - } - - if(limit-- == 0) { - tr_heapsort(ISAd, first, last - first); - for(a = last - 1; first < a; a = b) { - for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } - } - limit = -3; - continue; - } - - /* choose pivot */ - a = tr_pivot(ISAd, first, last); - SWAP(*first, *a); - v = ISAd[*first]; - - /* partition */ - tr_partition(ISAd, first, first + 1, last, &a, &b, v); - if((last - first) != (b - a)) { - next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; - - /* update ranks */ - for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } - if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } - - /* push */ - if((1 < (b - a)) && (trbudget_check(budget, b - a))) { - if((a - first) <= (last - b)) { - if((last - b) <= (b - a)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - STACK_PUSH5(ISAd, b, last, limit, trlink); - last = a; - } else if(1 < (last - b)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - first = b; - } else { - ISAd += incr, first = a, last = b, limit = next; - } - } else if((a - first) <= (b - a)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, limit, trlink); - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - last = a; - } else { - STACK_PUSH5(ISAd, b, last, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - STACK_PUSH5(ISAd, b, last, limit, trlink); - STACK_PUSH5(ISAd, first, a, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - if((a - first) <= (b - a)) { - if(1 < (last - b)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - STACK_PUSH5(ISAd, first, a, limit, trlink); - first = b; - } else if(1 < (a - first)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - last = a; - } else { - ISAd += incr, first = a, last = b, limit = next; - } - } else if((last - b) <= (b - a)) { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, limit, trlink); - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - first = b; - } else { - STACK_PUSH5(ISAd, first, a, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - STACK_PUSH5(ISAd, first, a, limit, trlink); - STACK_PUSH5(ISAd, b, last, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } - } else { - if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } - if((a - first) <= (last - b)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, limit, trlink); - last = a; - } else if(1 < (last - b)) { - first = b; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } else { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, limit, trlink); - first = b; - } else if(1 < (a - first)) { - last = a; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } - } else { - if(trbudget_check(budget, last - first)) { - limit = tr_ilg(last - first), ISAd += incr; - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } -#undef STACK_SIZE -} - - - -/*---------------------------------------------------------------------------*/ - -/* Tandem repeat sort */ -static -void -trsort(int *ISA, int *SA, int n, int depth) { - int *ISAd; - int *first, *last; - trbudget_t budget; - int t, skip, unsorted; - - trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); -/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ - for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { - first = SA; - skip = 0; - unsorted = 0; - do { - if((t = *first) < 0) { first -= t; skip += t; } - else { - if(skip != 0) { *(first + skip) = skip; skip = 0; } - last = SA + ISA[t] + 1; - if(1 < (last - first)) { - budget.count = 0; - tr_introsort(ISA, ISAd, SA, first, last, &budget); - if(budget.count != 0) { unsorted += budget.count; } - else { skip = first - last; } - } else if((last - first) == 1) { - skip = -1; - } - first = last; - } - } while(first < (SA + n)); - if(skip != 0) { *(first + skip) = skip; } - if(unsorted == 0) { break; } - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Sorts suffixes of type B*. */ -static -int -sort_typeBstar(const unsigned char *T, int *SA, - int *bucket_A, int *bucket_B, - int n, int openMP) { - int *PAb, *ISAb, *buf; -#ifdef LIBBSC_OPENMP - int *curbuf; - int l; -#endif - int i, j, k, t, m, bufsize; - int c0, c1; -#ifdef LIBBSC_OPENMP - int d0, d1; -#endif - (void)openMP; - - /* Initialize bucket arrays. */ - for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } - for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } - - /* Count the number of occurrences of the first one or two characters of each - type A, B and B* suffix. Moreover, store the beginning position of all - type B* suffixes into the array SA. */ - for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { - /* type A suffix. */ - do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); - if(0 <= i) { - /* type B* suffix. */ - ++BUCKET_BSTAR(c0, c1); - SA[--m] = i; - /* type B suffix. */ - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { - ++BUCKET_B(c0, c1); - } - } - } - m = n - m; -/* -note: - A type B* suffix is lexicographically smaller than a type B suffix that - begins with the same first two characters. -*/ - - /* Calculate the index of start/end point of each bucket. */ - for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { - t = i + BUCKET_A(c0); - BUCKET_A(c0) = i + j; /* start point */ - i = t + BUCKET_B(c0, c0); - for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { - j += BUCKET_BSTAR(c0, c1); - BUCKET_BSTAR(c0, c1) = j; /* end point */ - i += BUCKET_B(c0, c1); - } - } - - if(0 < m) { - /* Sort the type B* suffixes by their first two characters. */ - PAb = SA + n - m; ISAb = SA + m; - for(i = m - 2; 0 <= i; --i) { - t = PAb[i], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = i; - } - t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = m - 1; - - /* Sort the type B* substrings using sssort. */ -#ifdef LIBBSC_OPENMP - if (openMP) - { - buf = SA + m; - c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; -#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) - { - bufsize = (n - (2 * m)) / omp_get_num_threads(); - curbuf = buf + omp_get_thread_num() * bufsize; - k = 0; - for(;;) { - #pragma omp critical(sssort_lock) - { - if(0 < (l = j)) { - d0 = c0, d1 = c1; - do { - k = BUCKET_BSTAR(d0, d1); - if(--d1 <= d0) { - d1 = ALPHABET_SIZE - 1; - if(--d0 < 0) { break; } - } - } while(((l - k) <= 1) && (0 < (l = k))); - c0 = d0, c1 = d1, j = k; - } - } - if(l == 0) { break; } - sssort(T, PAb, SA + k, SA + l, - curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); - } - } - } - else - { - buf = SA + m, bufsize = n - (2 * m); - for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { - for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { - i = BUCKET_BSTAR(c0, c1); - if(1 < (j - i)) { - sssort(T, PAb, SA + i, SA + j, - buf, bufsize, 2, n, *(SA + i) == (m - 1)); - } - } - } - } -#else - buf = SA + m, bufsize = n - (2 * m); - for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { - for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { - i = BUCKET_BSTAR(c0, c1); - if(1 < (j - i)) { - sssort(T, PAb, SA + i, SA + j, - buf, bufsize, 2, n, *(SA + i) == (m - 1)); - } - } - } -#endif - - /* Compute ranks of type B* substrings. */ - for(i = m - 1; 0 <= i; --i) { - if(0 <= SA[i]) { - j = i; - do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); - SA[i + 1] = i - j; - if(i <= 0) { break; } - } - j = i; - do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); - ISAb[SA[i]] = j; - } - - /* Construct the inverse suffix array of type B* suffixes using trsort. */ - trsort(ISAb, SA, m, 1); - - /* Set the sorted order of tyoe B* suffixes. */ - for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } - if(0 <= i) { - t = i; - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } - SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; - } - } - - /* Calculate the index of start/end point of each bucket. */ - BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ - for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { - i = BUCKET_A(c0 + 1) - 1; - for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { - t = i - BUCKET_B(c0, c1); - BUCKET_B(c0, c1) = i; /* end point */ - - /* Move all type B* suffixes to the correct position. */ - for(i = t, j = BUCKET_BSTAR(c0, c1); - j <= k; - --i, --k) { SA[i] = SA[k]; } - } - BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ - BUCKET_B(c0, c0) = i; /* end point */ - } - } - - return m; -} - -/* Constructs the suffix array by using the sorted order of type B* suffixes. */ -static -void -construct_SA(const unsigned char *T, int *SA, - int *bucket_A, int *bucket_B, - int n, int m) { - int *i, *j, *k; - int s; - int c0, c1, c2; - - if(0 < m) { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { - /* Scan the suffix array from right to left. */ - for(i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) { - if(0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - *j = ~s; - c0 = T[--s]; - if((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else { - assert(((s == 0) && (T[s] == c1)) || (s < 0)); - *j = ~s; - } - } - } - } - - /* Construct the suffix array by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - if((s == 0) || (T[s - 1] < c0)) { s = ~s; } - if(c0 != c2) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } else { - assert(s < 0); - *i = ~s; - } - } -} - -/* Constructs the burrows-wheeler transformed string directly - by using the sorted order of type B* suffixes. */ -static -int -construct_BWT(const unsigned char *T, int *SA, - int *bucket_A, int *bucket_B, - int n, int m) { - int *i, *j, *k, *orig; - int s; - int c0, c1, c2; - - if(0 < m) { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { - /* Scan the suffix array from right to left. */ - for(i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) { - if(0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - c0 = T[--s]; - *j = ~((int)c0); - if((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else if(s != 0) { - *j = ~s; -#ifndef NDEBUG - } else { - assert(T[s] == c1); -#endif - } - } - } - } - - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n, orig = SA; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - *i = c0; - if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } - if(c0 != c2) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } else if(s != 0) { - *i = ~s; - } else { - orig = i; - } - } - - return orig - SA; -} - -/* Constructs the burrows-wheeler transformed string directly - by using the sorted order of type B* suffixes. */ -static -int -construct_BWT_indexes(const unsigned char *T, int *SA, - int *bucket_A, int *bucket_B, - int n, int m, - unsigned char * num_indexes, int * indexes) { - int *i, *j, *k, *orig; - int s; - int c0, c1, c2; - - int mod = n / 8; - { - mod |= mod >> 1; mod |= mod >> 2; - mod |= mod >> 4; mod |= mod >> 8; - mod |= mod >> 16; mod >>= 1; - - *num_indexes = (unsigned char)((n - 1) / (mod + 1)); - } - - if(0 < m) { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { - /* Scan the suffix array from right to left. */ - for(i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) { - if(0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - - if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; - - c0 = T[--s]; - *j = ~((int)c0); - if((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else if(s != 0) { - *j = ~s; -#ifndef NDEBUG - } else { - assert(T[s] == c1); -#endif - } - } - } - } - - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - if (T[n - 2] < c2) { - if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; - *k++ = ~((int)T[n - 2]); - } - else { - *k++ = n - 1; - } - - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n, orig = SA; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - - if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; - - c0 = T[--s]; - *i = c0; - if(c0 != c2) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - if((0 < s) && (T[s - 1] < c0)) { - if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; - *k++ = ~((int)T[s - 1]); - } else - *k++ = s; - } else if(s != 0) { - *i = ~s; - } else { - orig = i; - } - } - - return orig - SA; -} - - -/*---------------------------------------------------------------------------*/ - -/*- Function -*/ - -int -divsufsort(const unsigned char *T, int *SA, int n, int openMP) { - int *bucket_A, *bucket_B; - int m; - int err = 0; - - /* Check arguments. */ - if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } - else if(n == 0) { return 0; } - else if(n == 1) { SA[0] = 0; return 0; } - else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } - - bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); - bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); - - /* Suffixsort. */ - if((bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); - construct_SA(T, SA, bucket_A, bucket_B, n, m); - } else { - err = -2; - } - - free(bucket_B); - free(bucket_A); - - return err; -} - -int -divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { - int *B; - int *bucket_A, *bucket_B; - int m, pidx, i; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } - else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - - if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } - bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); - bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); - - /* Burrows-Wheeler Transform. */ - if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); - - if (num_indexes == NULL || indexes == NULL) { - pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); - } else { - pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); - } - - /* Copy to output string. */ - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } - for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } - pidx += 1; - } else { - pidx = -2; - } - - free(bucket_B); - free(bucket_A); - if(A == NULL) { free(B); } - - return pidx; -} +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +#include "divsufsort.h" + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} diff --git a/contrib/libs/zstd06/dictBuilder/divsufsort.h b/contrib/libs/zstd06/dictBuilder/divsufsort.h index 546c302cb41..84686157ab5 100644 --- a/contrib/libs/zstd06/dictBuilder/divsufsort.h +++ b/contrib/libs/zstd06/dictBuilder/divsufsort.h @@ -1,68 +1,68 @@ #include -/* - * divsufsort.h for libdivsufsort-lite - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _DIVSUFSORT_H -#define _DIVSUFSORT_H 1 - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - - -/*- Prototypes -*/ - -/** - * Constructs the suffix array of a given string. - * @param T[0..n-1] The input string. - * @param SA[0..n-1] The output array of suffixes. - * @param n The length of the given string. - * @param openMP enables OpenMP optimization. - * @return 0 if no error occurred, -1 or -2 otherwise. - */ -int -divsufsort(const unsigned char *T, int *SA, int n, int openMP); - -/** - * Constructs the burrows-wheeler transformed string of a given string. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param A[0..n-1] The temporary array. (can be NULL) - * @param n The length of the given string. - * @param num_indexes The length of secondary indexes array. (can be NULL) - * @param indexes The secondary indexes array. (can be NULL) - * @param openMP enables OpenMP optimization. - * @return The primary index if no error occurred, -1 or -2 otherwise. - */ -int -divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ - -#endif /* _DIVSUFSORT_H */ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T[0..n-1] The input string. + * @param SA[0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param A[0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/contrib/libs/zstd06/dictBuilder/zdict.c b/contrib/libs/zstd06/dictBuilder/zdict.c index 4b193940737..95d291f4096 100644 --- a/contrib/libs/zstd06/dictBuilder/zdict.c +++ b/contrib/libs/zstd06/dictBuilder/zdict.c @@ -1,949 +1,949 @@ -/* - dictBuilder - dictionary builder for zstd - Copyright (C) Yann Collet 2016 - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Zstd homepage : https://www.zstd.net -*/ - -/*-************************************** -* Compiler Options -****************************************/ -/* Disable some Visual warning messages */ -#ifdef _MSC_VER -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - -/* Unix Large Files support (>4GB) */ -#define _FILE_OFFSET_BITS 64 -#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ -# define _LARGEFILE_SOURCE -#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ -# define _LARGEFILE64_SOURCE -#endif - - -/*-************************************* -* Dependencies -***************************************/ -#include /* malloc, free */ -#include /* memset */ -#include /* fprintf, fopen, ftello64 */ -#include /* clock */ - -#include "mem.h" /* read */ -#include "error_private.h" -#include "fse.h" +/* + dictBuilder - dictionary builder for zstd + Copyright (C) Yann Collet 2016 + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd homepage : https://www.zstd.net +*/ + +/*-************************************** +* Compiler Options +****************************************/ +/* Disable some Visual warning messages */ +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# define _LARGEFILE_SOURCE +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# define _LARGEFILE64_SOURCE +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "error_private.h" +#include "fse.h" #include "huf_static.h" -#include "zstd_internal.h" -#include "divsufsort.h" -#include "zdict_static.h" - - - -/*-************************************* -* Constants -***************************************/ -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define DICTLISTSIZE 10000 - -#define NOISELENGTH 32 -#define PRIME1 2654435761U -#define PRIME2 2246822519U - -#define MINRATIO 4 -static const U32 g_compressionLevel_default = 5; -static const U32 g_selectivity_default = 9; -static const size_t g_provision_entropySize = 200; -static const size_t g_min_fast_dictContent = 192; - - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } -static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */ - -#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ - if (ZDICT_GetMilliSpan(g_time) > refreshRate) \ - { g_time = clock(); DISPLAY(__VA_ARGS__); \ - if (g_displayLevel>=4) fflush(stdout); } } -static const unsigned refreshRate = 300; -static clock_t g_time = 0; - -static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length) -{ - const BYTE* const b = (const BYTE*)ptr; - size_t u; - for (u=0; u126) c = '.'; /* non-printable char */ - DISPLAYLEVEL(dlevel, "%c", c); - } -} - - -/*-******************************************************** -* Helper functions -**********************************************************/ -static unsigned ZDICT_GetMilliSpan(clock_t nPrevious) -{ - clock_t nCurrent = clock(); - unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC); - return nSpan; -} - -unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } - -const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } - - -/*-******************************************************** -* Dictionary training functions -**********************************************************/ -static unsigned ZDICT_NbCommonBytes (register size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - -/*! ZDICT_count() : - Count the nb of common bytes between 2 pointers. - Note : this function presumes end of buffer followed by noisy guard band. -*/ -static size_t ZDICT_count(const void* pIn, const void* pMatch) -{ - const char* const pStart = (const char*)pIn; - for (;;) { - size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; } - pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); - return (size_t)((const char*)pIn - pStart); - } -} - - -typedef struct { - U32 pos; - U32 length; - U32 savings; -} dictItem; - -static void ZDICT_initDictItem(dictItem* d) -{ - d->pos = 1; - d->length = 0; - d->savings = (U32)(-1); -} - - -#define LLIMIT 64 /* heuristic determined experimentally */ -#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ -static dictItem ZDICT_analyzePos( - BYTE* doneMarks, - const int* suffix, U32 start, - const void* buffer, U32 minRatio) -{ - U32 lengthList[LLIMIT] = {0}; - U32 cumulLength[LLIMIT] = {0}; - U32 savings[LLIMIT] = {0}; - const BYTE* b = (const BYTE*)buffer; - size_t length; - size_t maxLength = LLIMIT; - size_t pos = suffix[start]; - U32 end = start; - dictItem solution; - - /* init */ - memset(&solution, 0, sizeof(solution)); - doneMarks[pos] = 1; - - /* trivial repetition cases */ - if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) - ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) - ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { - /* skip and mark segment */ - U16 u16 = MEM_read16(b+pos+4); - U32 u, e = 6; - while (MEM_read16(b+pos+e) == u16) e+=2 ; - if (b[pos+e] == b[pos+e-1]) e++; - for (u=1; u=MINMATCHLENGTH); - - /* look backward */ - do { - length = ZDICT_count(b + pos, b + *(suffix+start-1)); - if (length >=MINMATCHLENGTH) start--; - } while(length >= MINMATCHLENGTH); - - /* exit if not found a minimum nb of repetitions */ - if (end-start < minRatio) { - U32 idx; - for(idx=start; idx= %u at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); - DISPLAYLEVEL(4, "\n"); - - for (searchLength = MINMATCHLENGTH ; ; searchLength++) { - BYTE currentChar = 0; - U32 currentCount = 0; - U32 currentID = refinedStart; - U32 id; - U32 selectedCount = 0; - U32 selectedID = currentID; - for (id =refinedStart; id < refinedEnd; id++) { - if (b[ suffix[id] + searchLength] != currentChar) { - if (currentCount > selectedCount) { - selectedCount = currentCount; - selectedID = currentID; - } - currentID = id; - currentChar = b[ suffix[id] + searchLength]; - currentCount = 0; - } - currentCount ++; - } - if (currentCount > selectedCount) { /* for last */ - selectedCount = currentCount; - selectedID = currentID; - } - - if (selectedCount < minRatio) - break; - refinedStart = selectedID; - refinedEnd = refinedStart + selectedCount; - } - - /* evaluate gain based on new ref */ - start = refinedStart; - pos = suffix[refinedStart]; - end = start; - memset(lengthList, 0, sizeof(lengthList)); - - /* look forward */ - do { - end++; - length = ZDICT_count(b + pos, b + suffix[end]); - if (length >= LLIMIT) length = LLIMIT-1; - lengthList[length]++; - } while (length >=MINMATCHLENGTH); - - /* look backward */ - do { - length = ZDICT_count(b + pos, b + suffix[start-1]); - if (length >= LLIMIT) length = LLIMIT-1; - lengthList[length]++; - if (length >=MINMATCHLENGTH) start--; - } while(length >= MINMATCHLENGTH); - - /* largest useful length */ - memset(cumulLength, 0, sizeof(cumulLength)); - cumulLength[maxLength-1] = lengthList[maxLength-1]; - for (i=(int)(maxLength-2); i>=0; i--) - cumulLength[i] = cumulLength[i+1] + lengthList[i]; - - for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; - maxLength = i; - - /* reduce maxLength in case of final into repetitive data */ - { - U32 l = (U32)maxLength; - BYTE c = b[pos + maxLength-1]; - while (b[pos+l-2]==c) l--; - maxLength = l; - } - if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ - - /* calculate savings */ - savings[5] = 0; - for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) - savings[i] = savings[i-1] + (lengthList[i] * (i-3)); - - DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", - (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); - - solution.pos = (U32)pos; - solution.length = (U32)maxLength; - solution.savings = savings[maxLength]; - - /* mark positions done */ - { - U32 id; - U32 testedPos; - for (id=start; id solution.length) length = solution.length; - } - pEnd = (U32)(testedPos + length); - for (p=testedPos; ppos; - const U32 max = elt.pos + (elt.length-1); - - /* tail overlap */ - U32 u; for (u=1; u elt.pos) && (table[u].pos < max)) { /* overlap */ - /* append */ - U32 addedLength = table[u].pos - elt.pos; - table[u].length += addedLength; - table[u].pos = elt.pos; - table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ - table[u].savings += elt.length / 8; /* rough approx */ - elt = table[u]; - while ((u>1) && (table[u-1].savings < elt.savings)) - table[u] = table[u-1], u--; - table[u] = elt; - return u; - } } - - /* front overlap */ - for (u=1; u elt.pos) && (table[u].pos < elt.pos)) { /* overlap */ - /* append */ - int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length); - table[u].savings += elt.length / 8; /* rough approx */ - if (addedLength > 0) { /* otherwise, already included */ - table[u].length += addedLength; - table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ - } - elt = table[u]; - while ((u>1) && (table[u-1].savings < elt.savings)) - table[u] = table[u-1], u--; - table[u] = elt; - return u; - } } - - return 0; -} - - -static void ZDICT_removeDictItem(dictItem* table, U32 id) -{ - /* convention : first element is nb of elts */ - U32 max = table->pos; - U32 u; - if (!id) return; /* protection, should never happen */ - for (u=id; upos--; -} - - -static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) -{ - /* merge if possible */ - U32 mergeId = ZDICT_checkMerge(table, elt, 0); - if (mergeId) { - U32 newMerge = 1; - while (newMerge) { - newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId); - if (newMerge) ZDICT_removeDictItem(table, mergeId); - mergeId = newMerge; - } - return; - } - - /* insert */ - { - U32 current; - U32 nextElt = table->pos; - if (nextElt >= maxSize) nextElt = maxSize-1; - current = nextElt-1; - while (table[current].savings < elt.savings) { - table[current+1] = table[current]; - current--; - } - table[current+1] = elt; - table->pos = nextElt+1; - } -} - - -static U32 ZDICT_dictSize(const dictItem* dictList) -{ - U32 u, dictSize = 0; - for (u=1; u> shiftRatio; - int divSuftSortResult; - size_t result = 0; - - /* init */ - DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ - if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { - result = ERROR(memory_allocation); - goto _cleanup; - } - if (minRatio < MINRATIO) minRatio = MINRATIO; - memset(doneMarks, 0, bufferSize+16); - - /* sort */ - DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); - divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); - if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } - suffix[bufferSize] = (int)bufferSize; /* leads into noise */ - suffix0[0] = (int)bufferSize; /* leads into noise */ - { - /* build reverse suffix sort */ - size_t pos; - for (pos=0; pos < bufferSize; pos++) - reverseSuffix[suffix[pos]] = (U32)pos; - /* build file pos */ - filePos[0] = 0; - for (pos=1; pospos; /* convention : nb of useful elts within dictList */ - U32 currentSize = 0; - U32 n; for (n=1; n maxDictSize) break; - } - dictList->pos = n; - } - -_cleanup: - free(suffix0); - free(reverseSuffix); - free(doneMarks); - free(filePos); - return result; -} - - -static void ZDICT_fillNoise(void* buffer, size_t length) -{ - unsigned acc = PRIME1; - size_t p=0;; - for (p=0; p> 21); - } -} - - -typedef struct -{ - ZSTD_CCtx* ref; - ZSTD_CCtx* zc; - void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ -} EStats_ress_t; - - -static void ZDICT_countEStats(EStats_ress_t esr, - U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, - const void* src, size_t srcSize) -{ - const seqStore_t* seqStorePtr; - - if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */ - ZSTD_copyCCtx(esr.zc, esr.ref); - ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); - seqStorePtr = ZSTD_getSeqStore(esr.zc); - - /* literals stats */ - { const BYTE* bytePtr; - for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) - countLit[*bytePtr]++; - } - - /* seqStats */ - { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart); - ZSTD_seqToCodes(seqStorePtr, nbSeq); - - { const BYTE* codePtr = seqStorePtr->offCodeStart; - size_t u; - for (u=0; umlCodeStart; - size_t u; - for (u=0; ullCodeStart; - size_t u; - for (u=0; u=l) { DISPLAY(__VA_ARGS__); } +static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if (ZDICT_GetMilliSpan(g_time) > refreshRate) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stdout); } } +static const unsigned refreshRate = 300; +static clock_t g_time = 0; + +static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAYLEVEL(dlevel, "%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +static unsigned ZDICT_GetMilliSpan(clock_t nPrevious) +{ + clock_t nCurrent = clock(); + unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC); + return nSpan; +} + +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t length; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 u16 = MEM_read16(b+pos+4); + U32 u, e = 6; + while (MEM_read16(b+pos+e) == u16) e+=2 ; + if (b[pos+e] == b[pos+e-1]) e++; + for (u=1; u=MINMATCHLENGTH); + + /* look backward */ + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %u at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); + DISPLAYLEVEL(4, "\n"); + + for (searchLength = MINMATCHLENGTH ; ; searchLength++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[ suffix[id] + searchLength] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + searchLength]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new ref */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + + /* look backward */ + do { + length = ZDICT_count(b + pos, b + suffix[start-1]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { + U32 l = (U32)maxLength; + BYTE c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", + (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { + U32 id; + U32 testedPos; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 max = elt.pos + (elt.length-1); + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos < max)) { /* overlap */ + /* append */ + U32 addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u elt.pos) && (table[u].pos < elt.pos)) { /* overlap */ + /* append */ + int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx */ + if (addedLength > 0) { /* otherwise, already included */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : first element is nb of elts */ + U32 max = table->pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) +{ + /* merge if possible */ + U32 mergeId = ZDICT_checkMerge(table, elt, 0); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { + U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u> shiftRatio; + int divSuftSortResult; + size_t result = 0; + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); + divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + { + /* build reverse suffix sort */ + size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* build file pos */ + filePos[0] = 0; + for (pos=1; pospos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n maxDictSize) break; + } + dictList->pos = n; + } + +_cleanup: + free(suffix0); + free(reverseSuffix); + free(doneMarks); + free(filePos); + return result; +} + + +static void ZDICT_fillNoise(void* buffer, size_t length) +{ + unsigned acc = PRIME1; + size_t p=0;; + for (p=0; p> 21); + } +} + + +typedef struct +{ + ZSTD_CCtx* ref; + ZSTD_CCtx* zc; + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + + +static void ZDICT_countEStats(EStats_ress_t esr, + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, + const void* src, size_t srcSize) +{ + const seqStore_t* seqStorePtr; + + if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */ + ZSTD_copyCCtx(esr.zc, esr.ref); + ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart); + ZSTD_seqToCodes(seqStorePtr, nbSeq); + + { const BYTE* codePtr = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umlCodeStart; + size_t u; + for (u=0; ullCodeStart; + size_t u; + for (u=0; u1) { /* selectivity == 1 => fast mode */ - ZDICT_trainBuffer(dictList, dictListSize, - samplesBuffer, sBuffSize, - sampleSizes, nbSamples, - selectivity, (U32)targetDictSize); - - /* display best matches */ - if (g_displayLevel>= 3) { - U32 const nb = 25; - U32 const dictContentSize = ZDICT_dictSize(dictList); - U32 u; - DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); - DISPLAYLEVEL(3, "list %u best segments \n", nb); - for (u=1; u<=nb; u++) { - U32 p = dictList[u].pos; - U32 l = dictList[u].length; - U32 d = MIN(40, l); - DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", - u, l, p, dictList[u].savings); - ZDICT_printHex(3, (const char*)samplesBuffer+p, d); - DISPLAYLEVEL(3, "| \n"); - } } } - - /* create dictionary */ - { U32 dictContentSize = ZDICT_dictSize(dictList); - size_t hSize; - BYTE* ptr; - U32 u; - - /* build dict content */ - ptr = (BYTE*)dictBuffer + maxDictSize; - for (u=1; upos; u++) { - U32 l = dictList[u].length; - ptr -= l; + ZDICT_initDictItem(dictList); + g_displayLevel = params.notificationLevel; + if (selectivity==0) selectivity = g_selectivity_default; + if (compressionLevel==0) compressionLevel = g_compressionLevel_default; + + /* build dictionary */ + if (selectivity>1) { /* selectivity == 1 => fast mode */ + ZDICT_trainBuffer(dictList, dictListSize, + samplesBuffer, sBuffSize, + sampleSizes, nbSamples, + selectivity, (U32)targetDictSize); + + /* display best matches */ + if (g_displayLevel>= 3) { + U32 const nb = 25; + U32 const dictContentSize = ZDICT_dictSize(dictList); + U32 u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb); + for (u=1; u<=nb; u++) { + U32 p = dictList[u].pos; + U32 l = dictList[u].length; + U32 d = MIN(40, l); + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, l, p, dictList[u].savings); + ZDICT_printHex(3, (const char*)samplesBuffer+p, d); + DISPLAYLEVEL(3, "| \n"); + } } } + + /* create dictionary */ + { U32 dictContentSize = ZDICT_dictSize(dictList); + size_t hSize; + BYTE* ptr; + U32 u; + + /* build dict content */ + ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; if (ptr<(BYTE*)dictBuffer) return ERROR(GENERIC); /* should not happen */ - memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); - } - - /* fast mode dict content */ - if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */ - DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */ - DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10)); - dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize, - samplesBuffer, sBuffSize); - } - - /* dictionary header */ - MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC); - hSize = 4; - - /* entropic tables */ - DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ - DISPLAYLEVEL(2, "statistics ... \n"); - hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4, - compressionLevel, - samplesBuffer, sampleSizes, nbSamples, - (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); - - if (hSize + dictContentSize < maxDictSize) - memmove((char*)dictBuffer + hSize, (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); - dictSize = MIN(maxDictSize, hSize+dictContentSize); - } - - /* clean up */ - free(dictList); - return dictSize; -} - - -/* issue : samplesBuffer need to be followed by a noisy guard band. -* work around : duplicate the buffer, and add the noise */ -size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t params) -{ - void* newBuff; - size_t sBuffSize; - - { unsigned u; for (u=0, sBuffSize=0; u no dictionary */ - newBuff = malloc(sBuffSize + NOISELENGTH); - if (!newBuff) return ERROR(memory_allocation); - - memcpy(newBuff, samplesBuffer, sBuffSize); - ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ - - { size_t const result = ZDICT_trainFromBuffer_unsafe( - dictBuffer, dictBufferCapacity, - newBuff, samplesSizes, nbSamples, - params); - free(newBuff); - return result; } -} - - -size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) -{ - ZDICT_params_t params; - memset(¶ms, 0, sizeof(params)); - return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity, - samplesBuffer, samplesSizes, nbSamples, - params); -} - + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } + + /* fast mode dict content */ + if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */ + DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10)); + dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize, + samplesBuffer, sBuffSize); + } + + /* dictionary header */ + MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC); + hSize = 4; + + /* entropic tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4, + compressionLevel, + samplesBuffer, sampleSizes, nbSamples, + (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); + + if (hSize + dictContentSize < maxDictSize) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); + dictSize = MIN(maxDictSize, hSize+dictContentSize); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* issue : samplesBuffer need to be followed by a noisy guard band. +* work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + void* newBuff; + size_t sBuffSize; + + { unsigned u; for (u=0, sBuffSize=0; u no dictionary */ + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + { size_t const result = ZDICT_trainFromBuffer_unsafe( + dictBuffer, dictBufferCapacity, + newBuff, samplesSizes, nbSamples, + params); + free(newBuff); + return result; } +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} + diff --git a/contrib/libs/zstd06/dictBuilder/zdict.h b/contrib/libs/zstd06/dictBuilder/zdict.h index a898cb9c322..d9e6d3262de 100644 --- a/contrib/libs/zstd06/dictBuilder/zdict.h +++ b/contrib/libs/zstd06/dictBuilder/zdict.h @@ -1,68 +1,68 @@ #include -/* - dictBuilder header file - Copyright (C) Yann Collet 2016 - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Zstd source repository : https://www.zstd.net -*/ - -#ifndef DICTBUILDER_H_001 -#define DICTBUILDER_H_001 - -#if defined (__cplusplus) -extern "C" { -#endif - -/*-************************************* -* Public functions -***************************************/ -/*! ZDICT_trainFromBuffer() : - Train a dictionary from a memory buffer `samplesBuffer`, - where `nbSamples` samples have been stored concatenated. - Each sample size is provided into an orderly table `samplesSizes`. - Resulting dictionary will be saved into `dictBuffer`. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - or an error code, which can be tested by ZDICT_isError(). -*/ -size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); - - -/*-************************************* -* Helper functions -***************************************/ -unsigned ZDICT_isError(size_t errorCode); -const char* ZDICT_getErrorName(size_t errorCode); - - -#if defined (__cplusplus) -} -#endif - -#endif +/* + dictBuilder header file + Copyright (C) Yann Collet 2016 + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd source repository : https://www.zstd.net +*/ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Public functions +***************************************/ +/*! ZDICT_trainFromBuffer() : + Train a dictionary from a memory buffer `samplesBuffer`, + where `nbSamples` samples have been stored concatenated. + Each sample size is provided into an orderly table `samplesSizes`. + Resulting dictionary will be saved into `dictBuffer`. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + or an error code, which can be tested by ZDICT_isError(). +*/ +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +/*-************************************* +* Helper functions +***************************************/ +unsigned ZDICT_isError(size_t errorCode); +const char* ZDICT_getErrorName(size_t errorCode); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/contrib/libs/zstd06/dictBuilder/zdict_static.h b/contrib/libs/zstd06/dictBuilder/zdict_static.h index 6553e904ae6..f83f917008b 100644 --- a/contrib/libs/zstd06/dictBuilder/zdict_static.h +++ b/contrib/libs/zstd06/dictBuilder/zdict_static.h @@ -1,81 +1,81 @@ #include -/* - dictBuilder header file - for static linking only - Copyright (C) Yann Collet 2016 - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Zstd source repository : https://www.zstd.net -*/ - -/* This library is EXPERIMENTAL, below API is not yet stable */ - -#ifndef DICTBUILDER_STATIC_H_002 -#define DICTBUILDER_STATIC_H_002 - -#if defined (__cplusplus) -extern "C" { -#endif - -/*-************************************* -* Dependencies -***************************************/ -#include "zdict.h" - - -/*-************************************* -* Public type -***************************************/ -typedef struct { - unsigned selectivityLevel; /* 0 means default; larger => bigger selection => larger dictionary */ - unsigned compressionLevel; /* 0 means default; target a specific zstd compression level */ - unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned reserved[3]; /* space for future parameters */ -} ZDICT_params_t; - - -/*-************************************* -* Public functions -***************************************/ -/*! ZDICT_trainFromBuffer_advanced() : - Same as ZDICT_trainFromBuffer() with control over more parameters. - `parameters` is optional and can be provided with values set to 0 to mean "default". - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`) - or an error code, which can be tested by DiB_isError(). - note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel() -*/ -size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); - - -#if defined (__cplusplus) -} -#endif - -#endif /* DICTBUILDER_STATIC_H_002 */ +/* + dictBuilder header file + for static linking only + Copyright (C) Yann Collet 2016 + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd source repository : https://www.zstd.net +*/ + +/* This library is EXPERIMENTAL, below API is not yet stable */ + +#ifndef DICTBUILDER_STATIC_H_002 +#define DICTBUILDER_STATIC_H_002 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include "zdict.h" + + +/*-************************************* +* Public type +***************************************/ +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => bigger selection => larger dictionary */ + unsigned compressionLevel; /* 0 means default; target a specific zstd compression level */ + unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned reserved[3]; /* space for future parameters */ +} ZDICT_params_t; + + +/*-************************************* +* Public functions +***************************************/ +/*! ZDICT_trainFromBuffer_advanced() : + Same as ZDICT_trainFromBuffer() with control over more parameters. + `parameters` is optional and can be provided with values set to 0 to mean "default". + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`) + or an error code, which can be tested by DiB_isError(). + note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel() +*/ +size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_STATIC_H_002 */ diff --git a/contrib/libs/zstd06/legacy/zstd_legacy.h b/contrib/libs/zstd06/legacy/zstd_legacy.h index 8b17a15317d..80fc364cac5 100644 --- a/contrib/libs/zstd06/legacy/zstd_legacy.h +++ b/contrib/libs/zstd06/legacy/zstd_legacy.h @@ -1,65 +1,65 @@ #include -/* - zstd_legacy - decoder for legacy format - Header File +/* + zstd_legacy - decoder for legacy format + Header File Copyright (C) 2015-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#ifndef ZSTD_LEGACY_H -#define ZSTD_LEGACY_H - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Includes -***************************************/ -#include "mem.h" /* MEM_STATIC */ -#include "error_private.h" /* ERROR */ -#include "zstd_v01.h" -#include "zstd_v02.h" -#include "zstd_v03.h" -#include "zstd_v04.h" -#include "zstd_v05.h" + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "mem.h" /* MEM_STATIC */ +#include "error_private.h" /* ERROR */ +#include "zstd_v01.h" +#include "zstd_v02.h" +#include "zstd_v03.h" +#include "zstd_v04.h" +#include "zstd_v05.h" #include "zstd_v07.h" #include "zstd_v08.h" - + /** ZSTD_isLegacy() : @return : > 0 if supported by legacy decoder. 0 otherwise. return value is the version. */ -MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE) -{ - switch(magicNumberLE) - { +MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE) +{ + switch(magicNumberLE) + { case ZSTDv01_magicNumberLE:return 1; case ZSTDv02_magicNumber : return 2; case ZSTDv03_magicNumber : return 3; @@ -67,17 +67,17 @@ MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE) case ZSTDv05_MAGICNUMBER : return 5; case ZSTDv07_MAGICNUMBER : return 7; case ZSTDv08_MAGICNUMBER : return 8; - default : return 0; - } -} - - -MEM_STATIC size_t ZSTD_decompressLegacy( - void* dst, size_t dstCapacity, - const void* src, size_t compressedSize, + default : return 0; + } +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, const void* dict,size_t dictSize, - U32 magicNumberLE) -{ + U32 magicNumberLE) +{ switch(magicNumberLE) { case ZSTDv01_magicNumberLE : @@ -116,12 +116,12 @@ MEM_STATIC size_t ZSTD_decompressLegacy( default : return ERROR(prefix_unknown); } -} - - - -#if defined (__cplusplus) -} -#endif - -#endif /* ZSTD_LEGACY_H */ +} + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ diff --git a/contrib/libs/zstd06/legacy/zstd_v01.c b/contrib/libs/zstd06/legacy/zstd_v01.c index f2d657ece9a..d62367df29c 100644 --- a/contrib/libs/zstd06/legacy/zstd_v01.c +++ b/contrib/libs/zstd06/legacy/zstd_v01.c @@ -1,2178 +1,2178 @@ -/* ****************************************************************** - ZSTD_v01 - Zstandard decoder, compatible with v0.1.x format - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ - -/****************************************** -* Includes -******************************************/ -#include /* size_t, ptrdiff_t */ -#include "zstd_v01.h" - - -/****************************************** -* Static allocation -******************************************/ -/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#define FSE_MAX_MEMORY_USAGE 14 -#define FSE_DEFAULT_MEMORY_USAGE 13 - -/* FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#define FSE_MAX_SYMBOL_VALUE 255 - - -/**************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION - - -/**************************************************************** -* Byte symbol type -****************************************************************/ -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - - - -/**************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -/**************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ -#include /* printf (debug) */ - - -#ifndef MEM_ACCESS_MODULE -#define MEM_ACCESS_MODULE -/**************************************************************** -* Basic Types -*****************************************************************/ -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# include -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef int16_t S16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef int64_t S64; -#else -typedef unsigned char BYTE; -typedef unsigned short U16; -typedef signed short S16; -typedef unsigned int U32; -typedef signed int S32; -typedef unsigned long long U64; -typedef signed long long S64; -#endif - -#endif /* MEM_ACCESS_MODULE */ - -/**************************************************************** -* Memory I/O -*****************************************************************/ -/* FSE_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define FSE_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define FSE_FORCE_MEMORY_ACCESS 1 -# endif -#endif - - -static unsigned FSE_32bits(void) -{ - return sizeof(void*)==4; -} - -static unsigned FSE_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) - -static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; - -static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -#else - -static U16 FSE_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U32 FSE_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U64 FSE_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -#endif // FSE_FORCE_MEMORY_ACCESS - -static U16 FSE_readLE16(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read16(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -static U32 FSE_readLE32(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read32(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); - } -} - - -static U64 FSE_readLE64(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read64(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) - + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); - } -} - -static size_t FSE_readLEST(const void* memPtr) -{ - if (FSE_32bits()) - return (size_t)FSE_readLE32(memPtr); - else - return (size_t)FSE_readLE64(memPtr); -} - - - -/**************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) -#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX -#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - - -/**************************************************************** -* Error Management -****************************************************************/ -#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/**************************************************************** -* Complex types -****************************************************************/ -typedef struct -{ - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - -/**************************************************************** -* Internal functions -****************************************************************/ -FORCE_INLINE unsigned FSE_highbit32 (register U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r; - _BitScanReverse ( &r, val ); - return (unsigned) r; -# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ - return 31 - __builtin_clz (val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - unsigned r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - - -/**************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - - - -static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } - +/* ****************************************************************** + ZSTD_v01 + Zstandard decoder, compatible with v0.1.x format + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include "zstd_v01.h" + + +/****************************************** +* Static allocation +******************************************/ +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +typedef signed long long S64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* FSE_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define FSE_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define FSE_FORCE_MEMORY_ACCESS 1 +# endif +#endif + + +static unsigned FSE_32bits(void) +{ + return sizeof(void*)==4; +} + +static unsigned FSE_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) + +static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +static U16 FSE_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 FSE_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U64 FSE_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +#endif // FSE_FORCE_MEMORY_ACCESS + +static U16 FSE_readLE16(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +static U32 FSE_readLE32(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +static U64 FSE_readLE64(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +static size_t FSE_readLEST(const void* memPtr) +{ + if (FSE_32bits()) + return (size_t)FSE_readLE32(memPtr); + else + return (size_t)FSE_readLE64(memPtr); +} + + + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef struct +{ + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +/**************************************************************** +* Internal functions +****************************************************************/ +FORCE_INLINE unsigned FSE_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + #define FSE_DECODE_TYPE FSE_decode_t - - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - + + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + static size_t FSE_buildDTable -(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ - const U32 tableSize = 1 << tableLog; - const U32 tableMask = tableSize-1; - const U32 step = FSE_tableStep(tableSize); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; - U32 position = 0; - U32 highThreshold = tableSize-1; - const S16 largeLimit= (S16)(1 << (tableLog-1)); - U32 noLarge = 1; - U32 s; - - /* Sanity Checks */ - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; - if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; - - /* Init, lay down lowprob symbols */ - DTableH[0].tableLog = (U16)tableLog; - for (s=0; s<=maxSymbolValue; s++) - { - if (normalizedCounter[s]==-1) - { - tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; - symbolNext[s] = 1; - } - else - { - if (normalizedCounter[s] >= largeLimit) noLarge=0; - symbolNext[s] = normalizedCounter[s]; - } - } - - /* Spread symbols */ - for (s=0; s<=maxSymbolValue; s++) - { - int i; - for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } - } - - if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - - /* Build Decoding table */ - { - U32 i; - for (i=0; ifastMode = (U16)noLarge; - return 0; -} - - -/****************************************** -* FSE byte symbol -******************************************/ -#ifndef FSE_COMMONDEFS_ONLY - -static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } - -static short FSE_abs(short a) -{ - return a<0? -a : a; -} - - -/**************************************************************** -* Header bitstream management -****************************************************************/ -static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -{ - const BYTE* const istart = (const BYTE*) headerBuffer; - const BYTE* const iend = istart + hbSize; - const BYTE* ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; - bitStream = FSE_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1<1) && (charnum<=*maxSVPtr)) - { - if (previous0) - { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) - { - n0+=24; - if (ip < iend-5) - { - ip+=2; - bitStream = FSE_readLE32(ip) >> bitCount; - } - else - { - bitStream >>= 16; - bitCount+=16; - } - } - while ((bitStream & 3) == 3) - { - n0+=3; - bitStream>>=2; - bitCount+=2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) - { - ip += bitCount>>3; - bitCount &= 7; - bitStream = FSE_readLE32(ip) >> bitCount; - } - else - bitStream >>= 2; - } - { - const short max = (short)((2*threshold-1)-remaining); - short count; - - if ((bitStream & (threshold-1)) < (U32)max) - { - count = (short)(bitStream & (threshold-1)); - bitCount += nbBits-1; - } - else - { - count = (short)(bitStream & (2*threshold-1)); - if (count >= threshold) count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= FSE_abs(count); - normalizedCounter[charnum++] = count; - previous0 = !count; - while (remaining < threshold) - { - nbBits--; - threshold >>= 1; - } - - { - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) - { - ip += bitCount>>3; - bitCount &= 7; - } - else - { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = FSE_readLE32(ip) >> (bitCount & 31); - } - } - } - if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; - *maxSVPtr = charnum-1; - - ip += (bitCount+7)>>3; - if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; - return ip-istart; -} - - -/********************************************************* -* Decompression (Byte symbols) -*********************************************************/ -static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ - - DTableH->tableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - - -static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; s<=maxSymbolValue; s++) - { - dinfo[s].newState = 0; - dinfo[s].symbol = (BYTE)s; - dinfo[s].nbBits = (BYTE)nbBits; - } - - return 0; -} - - -/* FSE_initDStream - * Initialize a FSE_DStream_t. - * srcBuffer must point at the beginning of an FSE block. - * The function result is the size of the FSE_block (== srcSize). - * If srcSize is too small, the function will return an errorCode; - */ -static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; - - if (srcSize >= sizeof(size_t)) - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); - bitD->bitContainer = FSE_readLEST(bitD->ptr); - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ - bitD->bitsConsumed = 8 - FSE_highbit32(contain32); - } - else - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; - default:; - } - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ - bitD->bitsConsumed = 8 - FSE_highbit32(contain32); - bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; - } - - return srcSize; -} - - -/*!FSE_lookBits - * Provides next n bits from the bitContainer. - * bitContainer is not modified (bits are still present for next read/look) - * On 32-bits, maxNbBits==25 - * On 64-bits, maxNbBits==57 - * return : value extracted. - */ -static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); -} - -static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); -} - -static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - - -/*!FSE_readBits - * Read next n bits from the bitContainer. - * On 32-bits, don't read more than maxNbBits==25 - * On 64-bits, don't read more than maxNbBits==57 - * Use the fast variant *only* if n >= 1. - * return : value extracted. - */ -static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) -{ - size_t value = FSE_lookBits(bitD, nbBits); - FSE_skipBits(bitD, nbBits); - return value; -} - -static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ -{ - size_t value = FSE_lookBitsFast(bitD, nbBits); - FSE_skipBits(bitD, nbBits); - return value; -} - -static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ - return FSE_DStream_tooFar; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) - { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = FSE_readLEST(bitD->ptr); - return FSE_DStream_unfinished; - } - if (bitD->ptr == bitD->start) - { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; - return FSE_DStream_completed; - } - { - U32 nbBytes = bitD->bitsConsumed >> 3; - U32 result = FSE_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) - { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = FSE_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - - -static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; - DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); - FSE_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = FSE_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = FSE_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/* FSE_endOfDStream - Tells if bitD has reached end of bitStream or not */ - -static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) -{ - return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); -} - -static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - -FORCE_INLINE size_t FSE_decompress_usingDTable_generic( - void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt, const unsigned fast) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-3; - - FSE_DStream_t bitD; - FSE_DState_t state1; - FSE_DState_t state2; - size_t errorCode; - - /* Init */ - errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ - if (FSE_isError(errorCode)) return errorCode; - - FSE_initDState(&state1, &bitD, dt); - FSE_initDState(&state2, &bitD, dt); - -#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) - - /* 4 symbols per loop */ - for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ - FSE_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - FSE_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ - while (1) - { - if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) - break; - - *op++ = FSE_GETSYMBOL(&state1); - - if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) - break; - - *op++ = FSE_GETSYMBOL(&state2); - } - - /* end ? */ - if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) - return op-ostart; - - if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ - - return (size_t)-FSE_ERROR_corruptionDetected; -} - - -static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - FSE_DTableHeader DTableH; - memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ - - /* select fast mode (static) */ - if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - - -static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) -{ - const BYTE* const istart = (const BYTE*)cSrc; - const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - size_t errorCode; - - if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ - - /* normal FSE decoding mode */ - errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(errorCode)) return errorCode; - if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ - ip += errorCode; - cSrcSize -= errorCode; - - errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); - if (FSE_isError(errorCode)) return errorCode; - - /* always return, even if it is an error code */ - return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); -} - - - -/* ******************************************************* -* Huff0 : Huffman block compression -*********************************************************/ -#define HUF_MAX_SYMBOL_VALUE 255 -#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ -#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ -#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) -# error "HUF_MAX_TABLELOG is too large !" -#endif - -typedef struct HUF_CElt_s { - U16 val; - BYTE nbBits; -} HUF_CElt ; - -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - - -/* ******************************************************* -* Huff0 : Huffman block decompression -*********************************************************/ -typedef struct { - BYTE byte; - BYTE nbBits; -} HUF_DElt; - -static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) -{ - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ - U32 weightTotal; - U32 maxBits; - const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; - size_t oSize; - U32 n; - U32 nextRankStart; - void* ptr = DTable+1; - HUF_DElt* const dt = (HUF_DElt*)ptr; - - FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ - //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ - if (iSize >= 128) /* special header */ - { - if (iSize >= (242)) /* RLE */ - { - static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; - oSize = l[iSize-242]; - memset(huffWeight, 1, sizeof(huffWeight)); - iSize = 0; - } - else /* Incompressible */ - { - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - ip += 1; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } - } - } - else /* header compressed with FSE (normal case) */ - { - if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankVal, 0, sizeof(rankVal)); - weightTotal = 0; - for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; - rankVal[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } - - /* get last non-null symbol weight (implied, total must be 2^n) */ - maxBits = FSE_highbit32(weightTotal) + 1; - if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ - DTable[0] = (U16)maxBits; - { - U32 total = 1 << maxBits; - U32 rest = total - weightTotal; - U32 verif = 1 << FSE_highbit32(rest); - U32 lastWeight = FSE_highbit32(rest) + 1; - if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankVal[lastWeight]++; - } - - /* check tree construction validity */ - if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ - - /* Prepare ranks */ - nextRankStart = 0; - for (n=1; n<=maxBits; n++) - { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } - - /* fill DTable */ - for (n=0; n<=oSize; n++) - { - const U32 w = huffWeight[n]; - const U32 length = (1 << w) >> 1; - U32 i; - HUF_DElt D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); - for (i = rankVal[w]; i < rankVal[w] + length; i++) - dt[i] = D; - rankVal[w] += length; - } - - return iSize+1; -} - - -static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) -{ - const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - const BYTE c = dt[val].byte; - FSE_skipBits(Dstream, dt[val].nbBits); - return c; -} - -static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ - void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const U16* DTable) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; - - const void* ptr = DTable; - const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; - const U32 dtLog = DTable[0]; - size_t errorCode; - U32 reloadStatus; - - /* Init */ - - const U16* jumpTable = (const U16*)cSrc; - const size_t length1 = FSE_readLE16(jumpTable); - const size_t length2 = FSE_readLE16(jumpTable+1); - const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! - const char* const start1 = (const char*)(cSrc) + 6; - const char* const start2 = start1 + length1; - const char* const start3 = start2 + length2; - const char* const start4 = start3 + length3; - FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - - if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - - errorCode = FSE_initDStream(&bitD1, start1, length1); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD2, start2, length2); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD3, start3, length3); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD4, start4, length4); - if (FSE_isError(errorCode)) return errorCode; - - reloadStatus=FSE_reloadDStream(&bitD2); - - /* 16 symbols per loop */ - for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) - -#define HUF_DECODE_SYMBOL_2(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits()) FSE_reloadDStream(&Dstream) - - HUF_DECODE_SYMBOL_1( 0, bitD1); - HUF_DECODE_SYMBOL_1( 1, bitD2); - HUF_DECODE_SYMBOL_1( 2, bitD3); - HUF_DECODE_SYMBOL_1( 3, bitD4); - HUF_DECODE_SYMBOL_2( 4, bitD1); - HUF_DECODE_SYMBOL_2( 5, bitD2); - HUF_DECODE_SYMBOL_2( 6, bitD3); - HUF_DECODE_SYMBOL_2( 7, bitD4); - HUF_DECODE_SYMBOL_1( 8, bitD1); - HUF_DECODE_SYMBOL_1( 9, bitD2); - HUF_DECODE_SYMBOL_1(10, bitD3); - HUF_DECODE_SYMBOL_1(11, bitD4); - HUF_DECODE_SYMBOL_0(12, bitD1); - HUF_DECODE_SYMBOL_0(13, bitD2); - HUF_DECODE_SYMBOL_0(14, bitD3); - HUF_DECODE_SYMBOL_0(15, bitD4); - } - - if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; - - /* tail */ - { - // bitTail = bitD1; // *much* slower : -20% !??! - FSE_DStream_t bitTail; - bitTail.ptr = bitD1.ptr; - bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer - bitTail.start = start1; - for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - ip += errorCode; - cSrcSize -= errorCode; - - return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); -} - - -#endif /* FSE_COMMONDEFS_ONLY */ - -/* - zstd - standard compression library - Header File for static linking only - Copyright (C) 2014-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c -*/ - -/* The objects defined into this file should be considered experimental. - * They are not labelled stable, as their prototype may change in the future. - * You can use them for tests, provide feedback, or if you can endure risk of future changes. - */ - -/************************************** -* Error management -**************************************/ -#define ZSTD_LIST_ERRORS(ITEM) \ - ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \ - ITEM(ZSTD_ERROR_MagicNumber) \ - ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \ - ITEM(ZSTD_ERROR_corruption) \ - ITEM(ZSTD_ERROR_maxCode) - -#define ZSTD_GENERATE_ENUM(ENUM) ENUM, -typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes; /* exposed list of errors; static linking only */ - -/* - zstd - standard compression library - Copyright (C) 2014-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c -*/ - -/**************************************************************** -* Tuning parameters -*****************************************************************/ -/* MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect */ -#define ZSTD_MEMORY_USAGE 17 - - -/************************************** - CPU Feature Detection -**************************************/ -/* - * Automated efficient unaligned memory access detection - * Based on known hardware architectures - * This list will be updated thanks to feedbacks - */ -#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ - || defined(__ARM_FEATURE_UNALIGNED) \ - || defined(__i386__) || defined(__x86_64__) \ - || defined(_M_IX86) || defined(_M_X64) \ - || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ - || (defined(_M_ARM) && (_M_ARM >= 7)) -# define ZSTD_UNALIGNED_ACCESS 1 -#else -# define ZSTD_UNALIGNED_ACCESS 0 -#endif - - -/******************************************************** -* Includes -*********************************************************/ -#include /* calloc */ -#include /* memcpy, memmove */ -#include /* debug : printf */ - - -/******************************************************** -* Compiler specifics -*********************************************************/ -#ifdef __AVX2__ -# include /* AVX2 intrinsics */ -#endif - -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4324) /* disable: C4324: padded structure */ -#else -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -#ifndef MEM_ACCESS_MODULE -#define MEM_ACCESS_MODULE -/******************************************************** -* Basic Types -*********************************************************/ -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# include -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef int16_t S16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -#else -typedef unsigned char BYTE; -typedef unsigned short U16; -typedef signed short S16; -typedef unsigned int U32; -typedef signed int S32; -typedef unsigned long long U64; -#endif - -#endif /* MEM_ACCESS_MODULE */ - - -/******************************************************** -* Constants -*********************************************************/ -static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ - -#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) -#define HASH_TABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASH_TABLESIZE - 1) - -#define KNUTH 2654435761 - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define BLOCKSIZE (128 KB) /* define, for static allocation */ - -#define WORKPLACESIZE (BLOCKSIZE*3) -#define MINMATCH 4 -#define MLbits 7 -#define LLbits 6 -#define Offbits 5 -#define MaxML ((1<>3]; -#else - U32 hashTable[HASH_TABLESIZE]; -#endif - BYTE buffer[WORKPLACESIZE]; -} cctxi_t; - - - - -/************************************** -* Error Management -**************************************/ -/* tells if a return value is an error code */ -static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); } - -/* published entry point */ -unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); } - - -/************************************** -* Tool functions -**************************************/ -#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ -#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - -/************************************************************** -* Decompression code -**************************************************************/ - -static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) -{ - const BYTE* const in = (const BYTE* const)src; - BYTE headerFlags; - U32 cSize; - - if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize; - - headerFlags = *in; - cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); - - bpPtr->blockType = (blockType_t)(headerFlags >> 6); - bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; - - if (bpPtr->blockType == bt_end) return 0; - if (bpPtr->blockType == bt_rle) return 1; - return cSize; -} - - -static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - memcpy(dst, src, srcSize); - return srcSize; -} - - -static size_t ZSTD_decompressLiterals(void* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize) -{ - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + maxDstSize; - const BYTE* ip = (const BYTE*)src; - size_t errorCode; - size_t litSize; - - /* check : minimum 2, for litSize, +1, for content */ - if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption; - - litSize = ip[1] + (ip[0]<<8); - litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... - op = oend - litSize; - - (void)ctx; - if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); - if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; - return litSize; -} - - -static size_t ZSTD_decodeLiteralsBlock(void* ctx, - void* dst, size_t maxDstSize, - const BYTE** litStart, size_t* litSize, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* ip = istart; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - blockProperties_t litbp; - - size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp); - if (ZSTD_isError(litcSize)) return litcSize; - if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; - ip += ZSTD_blockHeaderSize; - - switch(litbp.blockType) - { - case bt_raw: - *litStart = ip; - ip += litcSize; - *litSize = litcSize; - break; - case bt_rle: - { - size_t rleSize = litbp.origSize; - if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - memset(oend - rleSize, *ip, rleSize); - *litStart = oend - rleSize; - *litSize = rleSize; - ip++; - break; - } - case bt_compressed: - { - size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); - if (ZSTD_isError(decodedLitSize)) return decodedLitSize; - *litStart = oend - decodedLitSize; - *litSize = decodedLitSize; - ip += litcSize; - break; - } - case bt_end: - default: - return (size_t)-ZSTD_ERROR_GENERIC; - } - - return ip-istart; -} - - -static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, - FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* ip = istart; - const BYTE* const iend = istart + srcSize; - U32 LLtype, Offtype, MLtype; - U32 LLlog, Offlog, MLlog; - size_t dumpsLength; - - /* check */ - if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize; - - /* SeqHead */ - *nbSeq = ZSTD_readLE16(ip); ip+=2; - LLtype = *ip >> 6; - Offtype = (*ip >> 4) & 3; - MLtype = (*ip >> 2) & 3; - if (*ip & 2) - { - dumpsLength = ip[2]; - dumpsLength += ip[1] << 8; - ip += 3; - } - else - { - dumpsLength = ip[1]; - dumpsLength += (ip[0] & 1) << 8; - ip += 2; - } - *dumpsPtr = ip; - ip += dumpsLength; - *dumpsLengthPtr = dumpsLength; - - /* check */ - if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - - /* sequences */ - { - S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ - size_t headerSize; - - /* Build DTables */ - switch(LLtype) - { - U32 max; - case bt_rle : - LLlog = 0; - FSE_buildDTable_rle(DTableLL, *ip++); break; - case bt_raw : - LLlog = LLbits; - FSE_buildDTable_raw(DTableLL, LLbits); break; - default : - max = MaxLL; - headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; - if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption; - ip += headerSize; - FSE_buildDTable(DTableLL, norm, max, LLlog); - } - - switch(Offtype) - { - U32 max; - case bt_rle : - Offlog = 0; - if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ - FSE_buildDTable_rle(DTableOffb, *ip++); break; - case bt_raw : - Offlog = Offbits; - FSE_buildDTable_raw(DTableOffb, Offbits); break; - default : - max = MaxOff; - headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); - if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; - if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption; - ip += headerSize; - FSE_buildDTable(DTableOffb, norm, max, Offlog); - } - - switch(MLtype) - { - U32 max; - case bt_rle : - MLlog = 0; - if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ - FSE_buildDTable_rle(DTableML, *ip++); break; - case bt_raw : - MLlog = MLbits; - FSE_buildDTable_raw(DTableML, MLbits); break; - default : - max = MaxML; - headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; - if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption; - ip += headerSize; - FSE_buildDTable(DTableML, norm, max, MLlog); - } - } - - return ip-istart; -} - - -typedef struct { - size_t litLength; - size_t offset; - size_t matchLength; -} seq_t; - -typedef struct { - FSE_DStream_t DStream; - FSE_DState_t stateLL; - FSE_DState_t stateOffb; - FSE_DState_t stateML; - size_t prevOffset; - const BYTE* dumps; - const BYTE* dumpsEnd; -} seqState_t; - - -static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) -{ - size_t litLength; - size_t prevOffset; - size_t offset; - size_t matchLength; - const BYTE* dumps = seqState->dumps; - const BYTE* const de = seqState->dumpsEnd; - - /* Literal length */ - litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); - prevOffset = litLength ? seq->offset : seqState->prevOffset; - seqState->prevOffset = seq->offset; - if (litLength == MaxLL) - { - U32 add = dumps 1 byte */ - dumps += 3; - } - } - } - - /* Offset */ - { - U32 offsetCode, nbBits; - offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); - if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); - nbBits = offsetCode - 1; - if (offsetCode==0) nbBits = 0; /* cmove */ - offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); - if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); - if (offsetCode==0) offset = prevOffset; - } - - /* MatchLength */ - matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - if (matchLength == MaxML) - { - U32 add = dumps 1 byte */ - dumps += 3; - } - } - } - matchLength += MINMATCH; - - /* save result */ - seq->litLength = litLength; - seq->offset = offset; - seq->matchLength = matchLength; - seqState->dumps = dumps; -} - - -static size_t ZSTD_execSequence(BYTE* op, - seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - BYTE* const base, BYTE* const oend) -{ - static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ - const BYTE* const ostart = op; - const size_t litLength = sequence.litLength; - BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ - const BYTE* const litEnd = *litPtr + litLength; - - /* check */ - if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite beyond dst buffer */ - if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption; - if (sequence.matchLength > (size_t)(*litPtr-op)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite literal segment */ - - /* copy Literals */ - if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) - memmove(op, *litPtr, litLength); /* overwrite risk */ - else - ZSTD_wildcopy(op, *litPtr, litLength); - op += litLength; - *litPtr = litEnd; /* update for next sequence */ - - /* check : last match must be at a minimum distance of 8 from end of dest buffer */ - if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - - /* copy Match */ - { - const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); - const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ - size_t qutt = 12; - U64 saved[2]; - - /* check */ - if (match < base) return (size_t)-ZSTD_ERROR_corruption; - if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption; - - /* save beginning of literal sequence, in case of write overlap */ - if (overlapRisk) - { - if ((endMatch + qutt) > oend) qutt = oend-endMatch; - memcpy(saved, endMatch, qutt); - } - - if (sequence.offset < 8) - { - const int dec64 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= dec64; - } else { ZSTD_copy8(op, match); } - op += 8; match += 8; - - if (endMatch > oend-12) - { - if (op < oend-8) - { - ZSTD_wildcopy(op, match, (oend-8) - op); - match += (oend-8) - op; - op = oend-8; - } - while (opLLTable; - U32* DTableML = dctx->MLTable; - U32* DTableOffb = dctx->OffTable; - BYTE* const base = (BYTE*) (dctx->base); - - /* Build Decoding Tables */ - errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, - DTableLL, DTableML, DTableOffb, - ip, iend-ip); - if (ZSTD_isError(errorCode)) return errorCode; - ip += errorCode; - - /* Regen sequences */ - { - seq_t sequence; - seqState_t seqState; - - memset(&sequence, 0, sizeof(sequence)); - seqState.dumps = dumps; - seqState.dumpsEnd = dumps + dumpsLength; - seqState.prevOffset = 1; - errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); - if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption; - FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); - FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); - FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - - for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) - { - size_t oneSeqSize; - nbSeq--; - ZSTD_decodeSequence(&sequence, &seqState); - oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } - - /* check if reached exact end */ - if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption; /* requested too much : data is corrupted */ - if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption; /* requested too many sequences : data is corrupted */ - - /* last literal segment */ - { - size_t lastLLSize = litEnd - litPtr; - if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; - if (op != litPtr) memmove(op, litPtr, lastLLSize); - op += lastLLSize; - } - } - - return op-ostart; -} - - -static size_t ZSTD_decompressBlock( - void* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize) -{ - /* blockType == blockCompressed, srcSize is trusted */ - const BYTE* ip = (const BYTE*)src; - const BYTE* litPtr = NULL; - size_t litSize = 0; - size_t errorCode; - - /* Decode literals sub-block */ - errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); - if (ZSTD_isError(errorCode)) return errorCode; - ip += errorCode; - srcSize -= errorCode; - - return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); -} - - -size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - const BYTE* iend = ip + srcSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* op = ostart; - BYTE* const oend = ostart + maxDstSize; - size_t remainingSize = srcSize; - U32 magicNumber; - size_t errorCode=0; - blockProperties_t blockProperties; - - /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; - magicNumber = ZSTD_readBE32(src); - if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; - ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; - - /* Loop on each block */ - while (1) - { - size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); - if (ZSTD_isError(blockSize)) return blockSize; - - ip += ZSTD_blockHeaderSize; - remainingSize -= ZSTD_blockHeaderSize; - if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; - - switch(blockProperties.blockType) - { - case bt_compressed: - errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); - break; - case bt_raw : - errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); - break; - case bt_rle : - return (size_t)-ZSTD_ERROR_GENERIC; /* not yet supported */ - break; - case bt_end : - /* end of frame */ - if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; - break; - default: - return (size_t)-ZSTD_ERROR_GENERIC; - } - if (blockSize == 0) break; /* bt_end */ - - if (ZSTD_isError(errorCode)) return errorCode; - op += errorCode; - ip += blockSize; - remainingSize -= blockSize; - } - - return op-ostart; -} - -size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - dctx_t ctx; - ctx.base = dst; - return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); -} - - -/******************************* -* Streaming Decompression API -*******************************/ - -size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) -{ - dctx->expected = ZSTD_frameHeaderSize; - dctx->phase = 0; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - return 0; -} - -ZSTDv01_Dctx* ZSTDv01_createDCtx(void) -{ - ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); - if (dctx==NULL) return NULL; - ZSTDv01_resetDCtx(dctx); - return dctx; -} - -size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) -{ - free(dctx); - return 0; -} - -size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) -{ - return ((dctx_t*)dctx)->expected; -} - -size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - dctx_t* ctx = (dctx_t*)dctx; - - /* Sanity check */ - if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize; - if (dst != ctx->previousDstEnd) /* not contiguous */ - ctx->base = dst; - - /* Decompress : frame header */ - if (ctx->phase == 0) - { - /* Check frame magic header */ - U32 magicNumber = ZSTD_readBE32(src); - if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; - ctx->phase = 1; - ctx->expected = ZSTD_blockHeaderSize; - return 0; - } - - /* Decompress : block header */ - if (ctx->phase == 1) - { - blockProperties_t bp; - size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(blockSize)) return blockSize; - if (bp.blockType == bt_end) - { - ctx->expected = 0; - ctx->phase = 0; - } - else - { - ctx->expected = blockSize; - ctx->bType = bp.blockType; - ctx->phase = 2; - } - - return 0; - } - - /* Decompress : block content */ - { - size_t rSize; - switch(ctx->bType) - { - case bt_compressed: - rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); - break; - case bt_raw : - rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); - break; - case bt_rle : - return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */ - break; - case bt_end : /* should never happen (filtered at phase 1) */ - rSize = 0; - break; - default: - return (size_t)-ZSTD_ERROR_GENERIC; - } - ctx->phase = 1; - ctx->expected = ZSTD_blockHeaderSize; - ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); - return rSize; - } - -} - - +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; + if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; + + /* Init, lay down lowprob symbols */ + DTableH[0].tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; ifastMode = (U16)noLarge; + return 0; +} + + +/****************************************** +* FSE byte symbol +******************************************/ +#ifndef FSE_COMMONDEFS_ONLY + +static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } + +static short FSE_abs(short a) +{ + return a<0? -a : a; +} + + +/**************************************************************** +* Header bitstream management +****************************************************************/ +static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; + bitStream = FSE_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = FSE_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + + +/* FSE_initDStream + * Initialize a FSE_DStream_t. + * srcBuffer must point at the beginning of an FSE block. + * The function result is the size of the FSE_block (== srcSize). + * If srcSize is too small, the function will return an errorCode; + */ +static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; + + if (srcSize >= sizeof(size_t)) + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = FSE_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + + +/*!FSE_lookBits + * Provides next n bits from the bitContainer. + * bitContainer is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * return : value extracted. + */ +static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + + +/*!FSE_readBits + * Read next n bits from the bitContainer. + * On 32-bits, don't read more than maxNbBits==25 + * On 64-bits, don't read more than maxNbBits==57 + * Use the fast variant *only* if n >= 1. + * return : value extracted. + */ +static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) +{ + size_t value = FSE_lookBits(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + size_t value = FSE_lookBitsFast(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return FSE_DStream_tooFar; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = FSE_readLEST(bitD->ptr); + return FSE_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; + return FSE_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + U32 result = FSE_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = FSE_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + + +static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); + FSE_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/* FSE_endOfDStream + Tells if bitD has reached end of bitStream or not */ + +static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) +{ + return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); +} + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + FSE_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ + while (1) + { + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + + return (size_t)-FSE_ERROR_corruptionDetected; +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +/* ******************************************************* +* Huff0 : Huffman block compression +*********************************************************/ +#define HUF_MAX_SYMBOL_VALUE 255 +#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ +#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + +typedef struct HUF_CElt_s { + U16 val; + BYTE nbBits; +} HUF_CElt ; + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ******************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { + BYTE byte; + BYTE nbBits; +} HUF_DElt; + +static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 weightTotal; + U32 maxBits; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + size_t oSize; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DElt* const dt = (HUF_DElt*)ptr; + + FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, sizeof(huffWeight)); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankVal, 0, sizeof(rankVal)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; + rankVal[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + + /* get last non-null symbol weight (implied, total must be 2^n) */ + maxBits = FSE_highbit32(weightTotal) + 1; + if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ + DTable[0] = (U16)maxBits; + { + U32 total = 1 << maxBits; + U32 rest = total - weightTotal; + U32 verif = 1 << FSE_highbit32(rest); + U32 lastWeight = FSE_highbit32(rest) + 1; + if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankVal[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=maxBits; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n<=oSize; n++) + { + const U32 w = huffWeight[n]; + const U32 length = (1 << w) >> 1; + U32 i; + HUF_DElt D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize+1; +} + + +static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) +{ + const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + FSE_skipBits(Dstream, dt[val].nbBits); + return c; +} + +static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + +#define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } + + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; + + /* tail */ + { + // bitTail = bitD1; // *much* slower : -20% !??! + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ + +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +/************************************** +* Error management +**************************************/ +#define ZSTD_LIST_ERRORS(ITEM) \ + ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \ + ITEM(ZSTD_ERROR_MagicNumber) \ + ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \ + ITEM(ZSTD_ERROR_corruption) \ + ITEM(ZSTD_ERROR_maxCode) + +#define ZSTD_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes; /* exposed list of errors; static linking only */ + +/* + zstd - standard compression library + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/**************************************************************** +* Tuning parameters +*****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect */ +#define ZSTD_MEMORY_USAGE 17 + + +/************************************** + CPU Feature Detection +**************************************/ +/* + * Automated efficient unaligned memory access detection + * Based on known hardware architectures + * This list will be updated thanks to feedbacks + */ +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ + || (defined(_M_ARM) && (_M_ARM >= 7)) +# define ZSTD_UNALIGNED_ACCESS 1 +#else +# define ZSTD_UNALIGNED_ACCESS 0 +#endif + + +/******************************************************** +* Includes +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug : printf */ + + +/******************************************************** +* Compiler specifics +*********************************************************/ +#ifdef __AVX2__ +# include /* AVX2 intrinsics */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/******************************************************** +* Basic Types +*********************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + + +/******************************************************** +* Constants +*********************************************************/ +static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ + +#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) +#define HASH_TABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASH_TABLESIZE - 1) + +#define KNUTH 2654435761 + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BLOCKSIZE (128 KB) /* define, for static allocation */ + +#define WORKPLACESIZE (BLOCKSIZE*3) +#define MINMATCH 4 +#define MLbits 7 +#define LLbits 6 +#define Offbits 5 +#define MaxML ((1<>3]; +#else + U32 hashTable[HASH_TABLESIZE]; +#endif + BYTE buffer[WORKPLACESIZE]; +} cctxi_t; + + + + +/************************************** +* Error Management +**************************************/ +/* tells if a return value is an error code */ +static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); } + +/* published entry point */ +unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); } + + +/************************************** +* Tool functions +**************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/************************************************************** +* Decompression code +**************************************************************/ + +static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + BYTE headerFlags; + U32 cSize; + + if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize; + + headerFlags = *in; + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + + bpPtr->blockType = (blockType_t)(headerFlags >> 6); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + + +static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_decompressLiterals(void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + maxDstSize; + const BYTE* ip = (const BYTE*)src; + size_t errorCode; + size_t litSize; + + /* check : minimum 2, for litSize, +1, for content */ + if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption; + + litSize = ip[1] + (ip[0]<<8); + litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... + op = oend - litSize; + + (void)ctx; + if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; + errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); + if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; + return litSize; +} + + +static size_t ZSTD_decodeLiteralsBlock(void* ctx, + void* dst, size_t maxDstSize, + const BYTE** litStart, size_t* litSize, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + blockProperties_t litbp; + + size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp); + if (ZSTD_isError(litcSize)) return litcSize; + if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; + ip += ZSTD_blockHeaderSize; + + switch(litbp.blockType) + { + case bt_raw: + *litStart = ip; + ip += litcSize; + *litSize = litcSize; + break; + case bt_rle: + { + size_t rleSize = litbp.origSize; + if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; + memset(oend - rleSize, *ip, rleSize); + *litStart = oend - rleSize; + *litSize = rleSize; + ip++; + break; + } + case bt_compressed: + { + size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); + if (ZSTD_isError(decodedLitSize)) return decodedLitSize; + *litStart = oend - decodedLitSize; + *litSize = decodedLitSize; + ip += litcSize; + break; + } + case bt_end: + default: + return (size_t)-ZSTD_ERROR_GENERIC; + } + + return ip-istart; +} + + +static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + const BYTE* const iend = istart + srcSize; + U32 LLtype, Offtype, MLtype; + U32 LLlog, Offlog, MLlog; + size_t dumpsLength; + + /* check */ + if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize; + + /* SeqHead */ + *nbSeq = ZSTD_readLE16(ip); ip+=2; + LLtype = *ip >> 6; + Offtype = (*ip >> 4) & 3; + MLtype = (*ip >> 2) & 3; + if (*ip & 2) + { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } + else + { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; + + /* check */ + if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* sequences */ + { + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + size_t headerSize; + + /* Build DTables */ + switch(LLtype) + { + U32 max; + case bt_rle : + LLlog = 0; + FSE_buildDTable_rle(DTableLL, *ip++); break; + case bt_raw : + LLlog = LLbits; + FSE_buildDTable_raw(DTableLL, LLbits); break; + default : + max = MaxLL; + headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; + if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption; + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } + + switch(Offtype) + { + U32 max; + case bt_rle : + Offlog = 0; + if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableOffb, *ip++); break; + case bt_raw : + Offlog = Offbits; + FSE_buildDTable_raw(DTableOffb, Offbits); break; + default : + max = MaxOff; + headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; + if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption; + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } + + switch(MLtype) + { + U32 max; + case bt_rle : + MLlog = 0; + if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableML, *ip++); break; + case bt_raw : + MLlog = MLbits; + FSE_buildDTable_raw(DTableML, MLbits); break; + default : + max = MaxML; + headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; + if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption; + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t offset; + size_t matchLength; +} seq_t; + +typedef struct { + FSE_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset; + const BYTE* dumps; + const BYTE* dumpsEnd; +} seqState_t; + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + size_t litLength; + size_t prevOffset; + size_t offset; + size_t matchLength; + const BYTE* dumps = seqState->dumps; + const BYTE* const de = seqState->dumpsEnd; + + /* Literal length */ + litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + prevOffset = litLength ? seq->offset : seqState->prevOffset; + seqState->prevOffset = seq->offset; + if (litLength == MaxLL) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + + /* Offset */ + { + U32 offsetCode, nbBits; + offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + nbBits = offsetCode - 1; + if (offsetCode==0) nbBits = 0; /* cmove */ + offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + if (offsetCode==0) offset = prevOffset; + } + + /* MatchLength */ + matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + if (matchLength == MaxML) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + matchLength += MINMATCH; + + /* save result */ + seq->litLength = litLength; + seq->offset = offset; + seq->matchLength = matchLength; + seqState->dumps = dumps; +} + + +static size_t ZSTD_execSequence(BYTE* op, + seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + BYTE* const base, BYTE* const oend) +{ + static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ + static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ + const BYTE* const ostart = op; + const size_t litLength = sequence.litLength; + BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + const BYTE* const litEnd = *litPtr + litLength; + + /* check */ + if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite beyond dst buffer */ + if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption; + if (sequence.matchLength > (size_t)(*litPtr-op)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite literal segment */ + + /* copy Literals */ + if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) + memmove(op, *litPtr, litLength); /* overwrite risk */ + else + ZSTD_wildcopy(op, *litPtr, litLength); + op += litLength; + *litPtr = litEnd; /* update for next sequence */ + + /* check : last match must be at a minimum distance of 8 from end of dest buffer */ + if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; + + /* copy Match */ + { + const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); + const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ + size_t qutt = 12; + U64 saved[2]; + + /* check */ + if (match < base) return (size_t)-ZSTD_ERROR_corruption; + if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption; + + /* save beginning of literal sequence, in case of write overlap */ + if (overlapRisk) + { + if ((endMatch + qutt) > oend) qutt = oend-endMatch; + memcpy(saved, endMatch, qutt); + } + + if (sequence.offset < 8) + { + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } else { ZSTD_copy8(op, match); } + op += 8; match += 8; + + if (endMatch > oend-12) + { + if (op < oend-8) + { + ZSTD_wildcopy(op, match, (oend-8) - op); + match += (oend-8) - op; + op = oend-8; + } + while (opLLTable; + U32* DTableML = dctx->MLTable; + U32* DTableOffb = dctx->OffTable; + BYTE* const base = (BYTE*) (dctx->base); + + /* Build Decoding Tables */ + errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + DTableLL, DTableML, DTableOffb, + ip, iend-ip); + if (ZSTD_isError(errorCode)) return errorCode; + ip += errorCode; + + /* Regen sequences */ + { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + seqState.dumps = dumps; + seqState.dumpsEnd = dumps + dumpsLength; + seqState.prevOffset = 1; + errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); + if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption; + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption; /* requested too much : data is corrupted */ + if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption; /* requested too many sequences : data is corrupted */ + + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock( + void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + /* blockType == blockCompressed, srcSize is trusted */ + const BYTE* ip = (const BYTE*)src; + const BYTE* litPtr = NULL; + size_t litSize = 0; + size_t errorCode; + + /* Decode literals sub-block */ + errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); + if (ZSTD_isError(errorCode)) return errorCode; + ip += errorCode; + srcSize -= errorCode; + + return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); +} + + +size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* iend = ip + srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t remainingSize = srcSize; + U32 magicNumber; + size_t errorCode=0; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTD_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; + + switch(blockProperties.blockType) + { + case bt_compressed: + errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); + break; + case bt_raw : + errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); + break; + case bt_rle : + return (size_t)-ZSTD_ERROR_GENERIC; /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; + break; + default: + return (size_t)-ZSTD_ERROR_GENERIC; + } + if (blockSize == 0) break; /* bt_end */ + + if (ZSTD_isError(errorCode)) return errorCode; + op += errorCode; + ip += blockSize; + remainingSize -= blockSize; + } + + return op-ostart; +} + +size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t ctx; + ctx.base = dst; + return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); +} + + +/******************************* +* Streaming Decompression API +*******************************/ + +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + return 0; +} + +ZSTDv01_Dctx* ZSTDv01_createDCtx(void) +{ + ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); + if (dctx==NULL) return NULL; + ZSTDv01_resetDCtx(dctx); + return dctx; +} + +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) +{ + free(dctx); + return 0; +} + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) +{ + return ((dctx_t*)dctx)->expected; +} + +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t* ctx = (dctx_t*)dctx; + + /* Sanity check */ + if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize; + if (dst != ctx->previousDstEnd) /* not contiguous */ + ctx->base = dst; + + /* Decompress : frame header */ + if (ctx->phase == 0) + { + /* Check frame magic header */ + U32 magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + return 0; + } + + /* Decompress : block header */ + if (ctx->phase == 1) + { + blockProperties_t bp; + size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->phase = 0; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->phase = 2; + } + + return 0; + } + + /* Decompress : block content */ + { + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return (size_t)-ZSTD_ERROR_GENERIC; + } + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + return rSize; + } + +} + + diff --git a/contrib/libs/zstd06/legacy/zstd_v01.h b/contrib/libs/zstd06/legacy/zstd_v01.h index c90731b2b1c..2b0bea28570 100644 --- a/contrib/libs/zstd06/legacy/zstd_v01.h +++ b/contrib/libs/zstd06/legacy/zstd_v01.h @@ -1,101 +1,101 @@ #include -/* - zstd - standard compression library - Header File - Copyright (C) 2014-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Includes -***************************************/ -#include /* size_t */ - - -/* ************************************* -* Simple one-step function -***************************************/ -/** -ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format - compressedSize : is the exact source size - maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. - It must be equal or larger than originalSize, otherwise decompression will fail. - return : the number of bytes decompressed into destination buffer (originalSize) - or an errorCode if it fails (which can be tested using ZSTDv01_isError()) -*/ -size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, - const void* src, size_t compressedSize); - -/** -ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error -*/ -unsigned ZSTDv01_isError(size_t code); - - -/* ************************************* -* Advanced functions -***************************************/ -typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; -ZSTDv01_Dctx* ZSTDv01_createDCtx(void); -size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); - -size_t ZSTDv01_decompressDCtx(void* ctx, - void* dst, size_t maxOriginalSize, - const void* src, size_t compressedSize); - -/* ************************************* -* Streaming functions -***************************************/ -size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); - -size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); -size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -/** - Use above functions alternatively. - ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. - Result is the number of bytes regenerated within 'dst'. - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. -*/ - -/* ************************************* -* Prefix - version detection -***************************************/ -#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ -#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ - - -#if defined (__cplusplus) -} -#endif +/* + zstd - standard compression library + Header File + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif diff --git a/contrib/libs/zstd06/legacy/zstd_v02.c b/contrib/libs/zstd06/legacy/zstd_v02.c index ffc6c9d61ef..2d4cfa59c43 100644 --- a/contrib/libs/zstd06/legacy/zstd_v02.c +++ b/contrib/libs/zstd06/legacy/zstd_v02.c @@ -1,3748 +1,3748 @@ -/* ****************************************************************** - Error codes and messages - Copyright (C) 2013-2015, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - -#include /* size_t, ptrdiff_t */ -#include "zstd_v02.h" - -/****************************************** -* Compiler-specific -******************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#elif defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/****************************************** -* Error Management -******************************************/ -#define PREFIX(name) ZSTD_error_##name - -#define ERROR(name) (size_t)-PREFIX(name) - -#define ERROR_LIST(ITEM) \ - ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ - ITEM(PREFIX(memory_allocation)) \ - ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ - ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ - ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ - ITEM(PREFIX(maxCode)) - -#define ERROR_GENERATE_ENUM(ENUM) ENUM, -typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ - -#define ERROR_CONVERTTOSTRING(STRING) #STRING, -#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - - -#if defined (__cplusplus) -} -#endif - -#endif /* ERROR_H_MODULE */ - - -/* ****************************************************************** - mem.h - low-level memory access routines - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - -/****************************************** -* Includes -******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ - - -/****************************************** -* Compiler-specific -******************************************/ -#if defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/**************************************************************** -* Basic Types -*****************************************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef signed short S16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; - typedef signed long long S64; -#endif - - -/**************************************************************** -* Memory I/O -*****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } -MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } - -MEM_STATIC unsigned MEM_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - -MEM_STATIC U16 MEM_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U32 MEM_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U64 MEM_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write32(void* memPtr, U32 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write64(void* memPtr, U64 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif // MEM_FORCE_MEMORY_ACCESS - - -MEM_STATIC U16 MEM_readLE16(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read16(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) -{ - if (MEM_isLittleEndian()) - { - MEM_write16(memPtr, val); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); - } -} - -MEM_STATIC U32 MEM_readLE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read32(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); - } -} - -MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) -{ - if (MEM_isLittleEndian()) - { - MEM_write32(memPtr, val32); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val32; - p[1] = (BYTE)(val32>>8); - p[2] = (BYTE)(val32>>16); - p[3] = (BYTE)(val32>>24); - } -} - -MEM_STATIC U64 MEM_readLE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read64(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) - + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); - } -} - -MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) -{ - if (MEM_isLittleEndian()) - { - MEM_write64(memPtr, val64); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val64; - p[1] = (BYTE)(val64>>8); - p[2] = (BYTE)(val64>>16); - p[3] = (BYTE)(val64>>24); - p[4] = (BYTE)(val64>>32); - p[5] = (BYTE)(val64>>40); - p[6] = (BYTE)(val64>>48); - p[7] = (BYTE)(val64>>56); - } -} - -MEM_STATIC size_t MEM_readLEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readLE32(memPtr); - else - return (size_t)MEM_readLE64(memPtr); -} - -MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeLE32(memPtr, (U32)val); - else - MEM_writeLE64(memPtr, (U64)val); -} - -#if defined (__cplusplus) -} -#endif - -#endif /* MEM_H_MODULE */ - - -/* ****************************************************************** - bitstream - Part of NewGen Entropy library - header file (to include) - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* -* This API consists of small unitary functions, which highly benefit from being inlined. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - - -/********************************************** -* bitStream decompression API (read backward) -**********************************************/ -typedef struct -{ - size_t bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; -} BIT_DStream_t; - -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - -/* -* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is manually filled from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream() -*/ - - -/****************************************** -* unsafe API -******************************************/ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - - - -/**************************************************************** -* Helper functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (register U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse ( &r, val ); - return (unsigned) r; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - unsigned r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - - - -/********************************************************** -* bitStream decoding -**********************************************************/ - -/*!BIT_initDStream -* Initialize a BIT_DStream_t. -* @bitD : a pointer to an already allocated BIT_DStream_t structure -* @srcBuffer must point at the beginning of a bitStream -* @srcSize must be the exact size of the bitStream -* @result : size of stream (== srcSize) or an errorCode if a problem is detected -*/ -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - - if (srcSize >= sizeof(size_t)) /* normal case */ - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); - } - else - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; - default:; - } - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); - bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; - } - - return srcSize; -} - -/*!BIT_lookBits - * Provides next n bits from local register - * local register is not modified (bits are still present for next read/look) - * On 32-bits, maxNbBits==25 - * On 64-bits, maxNbBits==57 - * @return : value extracted - */ -MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); -} - -/*! BIT_lookBitsFast : -* unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); -} - -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - -/*!BIT_readBits - * Read next n bits from local register. - * pay attention to not read more than nbBits contained into local register. - * @return : extracted value. - */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) -{ - size_t value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*!BIT_readBitsFast : -* unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) -{ - size_t value = BIT_lookBitsFast(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) - { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; - } - if (bitD->ptr == bitD->start) - { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - { - U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) - { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - -/*! BIT_endOfDStream -* @return Tells if DStream has reached its exact end -*/ -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); -} - -#if defined (__cplusplus) -} -#endif - -#endif /* BITSTREAM_H_MODULE */ -/* ****************************************************************** - Error codes and messages - Copyright (C) 2013-2015, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - - -/****************************************** -* Compiler-specific -******************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#elif defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/****************************************** -* Error Management -******************************************/ -#define PREFIX(name) ZSTD_error_##name - -#define ERROR(name) (size_t)-PREFIX(name) - -#define ERROR_LIST(ITEM) \ - ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ - ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ - ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ - ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ - ITEM(PREFIX(maxCode)) - -#define ERROR_GENERATE_ENUM(ENUM) ENUM, -typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ - -#define ERROR_CONVERTTOSTRING(STRING) #STRING, -#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) -static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - -ERR_STATIC const char* ERR_getErrorName(size_t code) -{ - static const char* codeError = "Unspecified error code"; - if (ERR_isError(code)) return ERR_strings[-(int)(code)]; - return codeError; -} - - -#if defined (__cplusplus) -} -#endif - -#endif /* ERROR_H_MODULE */ -/* -Constructor and Destructor of type FSE_CTable - Note that its size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ - - -/* ****************************************************************** - FSE : Finite State Entropy coder - header file for static linking (only) - Copyright (C) 2013-2015, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ -#if defined (__cplusplus) -extern "C" { -#endif - - -/****************************************** -* Static allocation -******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7)) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - - -/****************************************** -* FSE unsafe API -******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - - -/****************************************** -* Implementation of inline functions -******************************************/ - -/* decompression */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) -{ - FSE_DTableHeader DTableH; - memcpy(&DTableH, dt, sizeof(DTableH)); - DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - -#if defined (__cplusplus) -} -#endif -/* ****************************************************************** - Huff0 : Huffman coder, part of New Generation Entropy library - header file for static linking (only) - Copyright (C) 2013-2015, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/****************************************** -* Static allocation macros -******************************************/ -/* Huff0 buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of Huff0's DTable */ -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ - - -/* ************************************* -* Version -***************************************/ -#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ -#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - - -/* ************************************* -* Advanced functions -***************************************/ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ - -#if defined (__cplusplus) -} -#endif -/* - zstd - standard compression library - Header File for static linking only - Copyright (C) 2014-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c -*/ - -/* The objects defined into this file should be considered experimental. - * They are not labelled stable, as their prototype may change in the future. - * You can use them for tests, provide feedback, or if you can endure risk of future changes. - */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Streaming functions -***************************************/ - -typedef struct ZSTD_DCtx_s ZSTD_DCtx; - -/* - Use above functions alternatively. - ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. - Result is the number of bytes regenerated within 'dst'. - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. -*/ - -/* ************************************* -* Prefix - version detection -***************************************/ -#define ZSTD_magicNumber 0xFD2FB522 /* v0.2 (current)*/ - - -#if defined (__cplusplus) -} -#endif -/* ****************************************************************** - FSE : Finite State Entropy coder - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ - -#ifndef FSE_COMMONDEFS_ONLY - -/**************************************************************** -* Tuning parameters -****************************************************************/ -/* MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#define FSE_MAX_MEMORY_USAGE 14 -#define FSE_DEFAULT_MEMORY_USAGE 13 - -/* FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#define FSE_MAX_SYMBOL_VALUE 255 - - -/**************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION - - -/**************************************************************** -* Byte symbol type -****************************************************************/ -#endif /* !FSE_COMMONDEFS_ONLY */ - - -/**************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# ifdef __GNUC__ -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -/**************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ -#include /* printf (debug) */ - -/**************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) -#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX -#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - - -/**************************************************************** -* Error Management -****************************************************************/ -#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/**************************************************************** -* Complex types -****************************************************************/ -typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - - -/**************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - - -/* Function templates */ - +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +#include /* size_t, ptrdiff_t */ +#include "zstd_v02.h" + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(memory_allocation)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ + + +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + { + MEM_write32(memPtr, val32); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val32; + p[1] = (BYTE)(val32>>8); + p[2] = (BYTE)(val32>>16); + p[3] = (BYTE)(val32>>24); + } +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + { + MEM_write64(memPtr, val64); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val64; + p[1] = (BYTE)(val64>>8); + p[2] = (BYTE)(val64>>16); + p[3] = (BYTE)(val64>>24); + p[4] = (BYTE)(val64>>32); + p[5] = (BYTE)(val64>>40); + p[6] = (BYTE)(val64>>48); + p[7] = (BYTE)(val64>>56); + } +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ + + +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* +* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is manually filled from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +*/ + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +/*!BIT_lookBits + * Provides next n bits from local register + * local register is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * @return : value extracted + */ +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*!BIT_readBits + * Read next n bits from local register. + * pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/* +Constructor and Destructor of type FSE_CTable + Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + + +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Static allocation +******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/****************************************** +* FSE unsafe API +******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/****************************************** +* Implementation of inline functions +******************************************/ + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Static allocation macros +******************************************/ +/* Huff0 buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of Huff0's DTable */ +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ + + +/* ************************************* +* Version +***************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ + +#if defined (__cplusplus) +} +#endif +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Streaming functions +***************************************/ + +typedef struct ZSTD_DCtx_s ZSTD_DCtx; + +/* + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTD_magicNumber 0xFD2FB522 /* v0.2 (current)*/ + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + FSE : Finite State Entropy coder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#ifndef FSE_COMMONDEFS_ONLY + +/**************************************************************** +* Tuning parameters +****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + #define FSE_DECODE_TYPE FSE_decode_t - -static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } - + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + static size_t FSE_buildDTable -(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - void* ptr = dt+1; - FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; - FSE_DTableHeader DTableH; - const U32 tableSize = 1 << tableLog; - const U32 tableMask = tableSize-1; - const U32 step = FSE_tableStep(tableSize); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; - U32 position = 0; - U32 highThreshold = tableSize-1; - const S16 largeLimit= (S16)(1 << (tableLog-1)); - U32 noLarge = 1; - U32 s; - - /* Sanity Checks */ - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - - /* Init, lay down lowprob symbols */ - DTableH.tableLog = (U16)tableLog; - for (s=0; s<=maxSymbolValue; s++) - { - if (normalizedCounter[s]==-1) - { - tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; - symbolNext[s] = 1; - } - else - { - if (normalizedCounter[s] >= largeLimit) noLarge=0; - symbolNext[s] = normalizedCounter[s]; - } - } - - /* Spread symbols */ - for (s=0; s<=maxSymbolValue; s++) - { - int i; - for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } - } - - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - - /* Build Decoding table */ - { - U32 i; - for (i=0; i FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + DTableH.tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; i FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1<1) && (charnum<=*maxSVPtr)) - { - if (previous0) - { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) - { - n0+=24; - if (ip < iend-5) - { - ip+=2; - bitStream = MEM_readLE32(ip) >> bitCount; - } - else - { - bitStream >>= 16; - bitCount+=16; - } - } - while ((bitStream & 3) == 3) - { - n0+=3; - bitStream>>=2; - bitCount+=2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) - { - ip += bitCount>>3; - bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; - } - else - bitStream >>= 2; - } - { - const short max = (short)((2*threshold-1)-remaining); - short count; - - if ((bitStream & (threshold-1)) < (U32)max) - { - count = (short)(bitStream & (threshold-1)); - bitCount += nbBits-1; - } - else - { - count = (short)(bitStream & (2*threshold-1)); - if (count >= threshold) count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= FSE_abs(count); - normalizedCounter[charnum++] = count; - previous0 = !count; - while (remaining < threshold) - { - nbBits--; - threshold >>= 1; - } - - { - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) - { - ip += bitCount>>3; - bitCount &= 7; - } - else - { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } - } - } - if (remaining != 1) return ERROR(GENERIC); - *maxSVPtr = charnum-1; - - ip += (bitCount+7)>>3; - if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); - return ip-istart; -} - - -/********************************************************* -* Decompression (Byte symbols) -*********************************************************/ -static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ - - DTableH->tableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - - -static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; s<=maxSymbolValue; s++) - { - dinfo[s].newState = 0; - dinfo[s].symbol = (BYTE)s; - dinfo[s].nbBits = (BYTE)nbBits; - } - - return 0; -} - -FORCE_INLINE size_t FSE_decompress_usingDTable_generic( - void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt, const unsigned fast) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-3; - - BIT_DStream_t bitD; - FSE_DState_t state1; - FSE_DState_t state2; - size_t errorCode; - - /* Init */ - errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ - if (FSE_isError(errorCode)) return errorCode; - - FSE_initDState(&state1, &bitD, dt); - FSE_initDState(&state2, &bitD, dt); - -#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) - - /* 4 symbols per loop */ - for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) - { - if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) - break; - - *op++ = FSE_GETSYMBOL(&state1); - - if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) - break; - - *op++ = FSE_GETSYMBOL(&state2); - } - - /* end ? */ - if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) - return op-ostart; - - if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ - - return ERROR(corruption_detected); -} - - -static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - FSE_DTableHeader DTableH; - memcpy(&DTableH, dt, sizeof(DTableH)); - - /* select fast mode (static) */ - if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - - -static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) -{ - const BYTE* const istart = (const BYTE*)cSrc; - const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - size_t errorCode; - - if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ - - /* normal FSE decoding mode */ - errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(errorCode)) return errorCode; - if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ - ip += errorCode; - cSrcSize -= errorCode; - - errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); - if (FSE_isError(errorCode)) return errorCode; - - /* always return, even if it is an error code */ - return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); -} - - - -#endif /* FSE_COMMONDEFS_ONLY */ -/* ****************************************************************** - Huff0 : Huffman coder, part of New Generation Entropy library - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -****************************************************************** */ - -/**************************************************************** -* Compiler specifics -****************************************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -/* inline is defined */ -#elif defined(_MSC_VER) -# define inline __inline -#else -# define inline /* disable inline */ -#endif - - -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#else -# ifdef __GNUC__ -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -/**************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ -#include /* printf (debug) */ - -/**************************************************************** -* Error Management -****************************************************************/ -#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/****************************************** -* Helper functions -******************************************/ -static unsigned HUF_isError(size_t code) { return ERR_isError(code); } - -#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ -#define HUF_MAX_SYMBOL_VALUE 255 -#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) -# error "HUF_MAX_TABLELOG is too large !" -#endif - - - -/********************************************************* -* Huff0 : Huffman block decompression -*********************************************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ - -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ - -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; - -/*! HUF_readStats - Read compact Huffman tree, saved by HUF_writeCTable - @huffWeight : destination buffer - @return : size read from `src` -*/ -static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize) -{ - U32 weightTotal; - U32 tableLog; - const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; - size_t oSize; - U32 n; - - //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) /* special header */ - { - if (iSize >= (242)) /* RLE */ - { - static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; - oSize = l[iSize-242]; - memset(huffWeight, 1, hwSize); - iSize = 0; - } - else /* Incompressible */ - { - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - if (oSize >= hwSize) return ERROR(corruption_detected); - ip += 1; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } - } - } - else /* header compressed with FSE (normal case) */ - { - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); - weightTotal = 0; - for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } - - /* get last non-null symbol weight (implied, total must be 2^n) */ - tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); - { - U32 total = 1 << tableLog; - U32 rest = total - weightTotal; - U32 verif = 1 << BIT_highbit32(rest); - U32 lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ - - /* results */ - *nbSymbolsPtr = (U32)(oSize+1); - *tableLogPtr = tableLog; - return iSize+1; -} - - -/**************************/ -/* single-symbol decoding */ -/**************************/ - -static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) -{ - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ - U32 tableLog = 0; - const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; - U32 nbSymbols = 0; - U32 n; - U32 nextRankStart; - void* ptr = DTable+1; - HUF_DEltX2* const dt = (HUF_DEltX2*)ptr; - - HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ - //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* check result */ - if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ - DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ - - /* Prepare ranks */ - nextRankStart = 0; - for (n=1; n<=tableLog; n++) - { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } - - /* fill DTable */ - for (n=0; n> 1; - U32 i; - HUF_DEltX2 D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (i = rankVal[w]; i < rankVal[w] + length; i++) - dt[i] = D; - rankVal[w] += length; - } - - return iSize; -} - -static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) -{ - const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - const BYTE c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; -} - -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) - { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } - - /* closer to the end */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - /* no more data to retrieve from bitstream, hence no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - return pEnd-pStart; -} - - -static size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const U16* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - - const void* ptr = DTable; - const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; - const U32 dtLog = DTable[0]; - size_t errorCode; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; - - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) - { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; - } -} - - -static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); - const BYTE* ip = (const BYTE*) cSrc; - size_t errorCode; - - errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); - ip += errorCode; - cSrcSize -= errorCode; - - return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); -} - - -/***************************/ -/* double-symbols decoding */ -/***************************/ - -static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX4 DElt; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; - U32 s; - - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill skipped values */ - if (minWeight>1) - { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } - - /* fill DTable */ - for (s=0; s= 1 */ - - rankVal[weight] += length; - } -} - -typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; - -static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s=0; s= minBits) /* enough room for a second symbol */ - { - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); - } - else - { - U32 i; - const U32 end = start + length; - HUF_DEltX4 DElt; - - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - for (i = start; i < end; i++) - DTable[i] = DElt; - } - rankVal[weight] += length; - } -} - -static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) -{ - BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; - sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; - U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; - U32* const rankStart = rankStart0+1; - rankVal_t rankVal; - U32 tableLog, maxW, sizeOfSort, nbSymbols; - const U32 memLog = DTable[0]; - const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; - void* ptr = DTable; - HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; - - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ - if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); - //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* check result */ - if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) - {if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */ - - /* Get start index of each weight */ - { - U32 w, nextRankStart = 0; - for (w=1; w<=maxW; w++) - { - U32 current = nextRankStart; - nextRankStart += rankStats[w]; - rankStart[w] = current; - } - rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ - sizeOfSort = nextRankStart; - } - - /* sort symbols by weight */ - { - U32 s; - for (s=0; s> consumed; - } - } - } - - HUF_fillDTableX4(dt, memLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); - - return iSize; -} - - -static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) -{ - const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) -{ - const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else - { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) - { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - } - } - return 1; -} - - -#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) - { - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_1(p, bitDPtr); - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - } - - /* closer to the end */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); - - return p-pStart; -} - - - -static size_t HUF_decompress4X4_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const U32* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - - const void* ptr = DTable; - const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; - const U32 dtLog = DTable[0]; - size_t errorCode; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; - - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) - { - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_1(op1, &bitD1); - HUF_DECODE_SYMBOLX4_1(op2, &bitD2); - HUF_DECODE_SYMBOLX4_1(op3, &bitD3); - HUF_DECODE_SYMBOLX4_1(op4, &bitD4); - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_0(op1, &bitD1); - HUF_DECODE_SYMBOLX4_0(op2, &bitD2); - HUF_DECODE_SYMBOLX4_0(op3, &bitD3); - HUF_DECODE_SYMBOLX4_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; - } -} - - -static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); -} - - -/**********************************/ -/* quad-symbol decoding */ -/**********************************/ -typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; -typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; - -/* recursive, up to level 3; may benefit from